aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/display/dc/dml
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml')
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c189
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/calcs_logger.h578
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/custom_float.c197
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dce_calcs.c3623
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c1933
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.h38
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c145
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c1752
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dc_features.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c123
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h30
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c2292
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h57
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c743
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h76
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c354
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c95
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c124
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c357
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c359
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c799
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c579
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c113
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c386
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c7199
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c1733
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c2527
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h76
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c3686
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h60
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c6231
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h1144
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c615
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c700
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h38
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h88
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h179
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c212
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h292
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c1889
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c284
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h2
63 files changed, 39207 insertions, 3168 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index eee6672bd32d..ca7d24000621 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -34,7 +34,7 @@ dml_ccflags := -mhard-float -maltivec
endif
ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+ifneq ($(call gcc-min-version, 70100),y)
IS_OLD_GCC = 1
endif
endif
@@ -58,9 +58,9 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
ifdef CONFIG_DRM_AMD_DC_DCN
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
@@ -70,9 +70,23 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags)
@@ -85,6 +99,9 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags)
@@ -93,18 +110,32 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags)
-DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \
+DML = calcs/dce_calcs.o calcs/custom_float.o calcs/bw_fixed.o
ifdef CONFIG_DRM_AMD_DC_DCN
+DML += display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o
+DML += dcn10/dcn10_fpu.o
DML += dcn20/dcn20_fpu.o
DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o
DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o
-DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
+DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o
+DML += dcn314/display_mode_vba_314.o dcn314/display_rq_dlg_calc_314.o
+DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o
+DML += dcn31/dcn31_fpu.o
+DML += dcn32/dcn32_fpu.o
+DML += dcn321/dcn321_fpu.o
DML += dcn301/dcn301_fpu.o
+DML += dcn302/dcn302_fpu.o
+DML += dcn303/dcn303_fpu.o
+DML += dcn314/dcn314_fpu.o
DML += dsc/rc_calc_fpu.o
+DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o
endif
AMD_DAL_DML = $(addprefix $(AMDDALPATH)/dc/dml/,$(DML))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c
new file mode 100644
index 000000000000..3aa8dd0acd5e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "dm_services.h"
+#include "bw_fixed.h"
+
+#define MAX_I64 \
+ ((int64_t)((1ULL << 63) - 1))
+
+#define MIN_I64 \
+ (-MAX_I64 - 1)
+
+#define FRACTIONAL_PART_MASK \
+ ((1ULL << BW_FIXED_BITS_PER_FRACTIONAL_PART) - 1)
+
+#define GET_FRACTIONAL_PART(x) \
+ (FRACTIONAL_PART_MASK & (x))
+
+static uint64_t abs_i64(int64_t arg)
+{
+ if (arg >= 0)
+ return (uint64_t)(arg);
+ else
+ return (uint64_t)(-arg);
+}
+
+struct bw_fixed bw_int_to_fixed_nonconst(int64_t value)
+{
+ struct bw_fixed res;
+
+ ASSERT(value < BW_FIXED_MAX_I32 && value > BW_FIXED_MIN_I32);
+ res.value = value << BW_FIXED_BITS_PER_FRACTIONAL_PART;
+ return res;
+}
+
+struct bw_fixed bw_frc_to_fixed(int64_t numerator, int64_t denominator)
+{
+ struct bw_fixed res;
+ bool arg1_negative = numerator < 0;
+ bool arg2_negative = denominator < 0;
+ uint64_t arg1_value;
+ uint64_t arg2_value;
+ uint64_t remainder;
+
+ /* determine integer part */
+ uint64_t res_value;
+
+ ASSERT(denominator != 0);
+
+ arg1_value = abs_i64(numerator);
+ arg2_value = abs_i64(denominator);
+ res_value = div64_u64_rem(arg1_value, arg2_value, &remainder);
+
+ ASSERT(res_value <= BW_FIXED_MAX_I32);
+
+ /* determine fractional part */
+ {
+ uint32_t i = BW_FIXED_BITS_PER_FRACTIONAL_PART;
+
+ do {
+ remainder <<= 1;
+
+ res_value <<= 1;
+
+ if (remainder >= arg2_value) {
+ res_value |= 1;
+ remainder -= arg2_value;
+ }
+ } while (--i != 0);
+ }
+
+ /* round up LSB */
+ {
+ uint64_t summand = (remainder << 1) >= arg2_value;
+
+ ASSERT(res_value <= MAX_I64 - summand);
+
+ res_value += summand;
+ }
+
+ res.value = (int64_t)(res_value);
+
+ if (arg1_negative ^ arg2_negative)
+ res.value = -res.value;
+ return res;
+}
+
+struct bw_fixed bw_floor2(
+ const struct bw_fixed arg,
+ const struct bw_fixed significance)
+{
+ struct bw_fixed result;
+ int64_t multiplicand;
+
+ multiplicand = div64_s64(arg.value, abs_i64(significance.value));
+ result.value = abs_i64(significance.value) * multiplicand;
+ ASSERT(abs_i64(result.value) <= abs_i64(arg.value));
+ return result;
+}
+
+struct bw_fixed bw_ceil2(
+ const struct bw_fixed arg,
+ const struct bw_fixed significance)
+{
+ struct bw_fixed result;
+ int64_t multiplicand;
+
+ multiplicand = div64_s64(arg.value, abs_i64(significance.value));
+ result.value = abs_i64(significance.value) * multiplicand;
+ if (abs_i64(result.value) < abs_i64(arg.value)) {
+ if (arg.value < 0)
+ result.value -= abs_i64(significance.value);
+ else
+ result.value += abs_i64(significance.value);
+ }
+ return result;
+}
+
+struct bw_fixed bw_mul(const struct bw_fixed arg1, const struct bw_fixed arg2)
+{
+ struct bw_fixed res;
+
+ bool arg1_negative = arg1.value < 0;
+ bool arg2_negative = arg2.value < 0;
+
+ uint64_t arg1_value = abs_i64(arg1.value);
+ uint64_t arg2_value = abs_i64(arg2.value);
+
+ uint64_t arg1_int = BW_FIXED_GET_INTEGER_PART(arg1_value);
+ uint64_t arg2_int = BW_FIXED_GET_INTEGER_PART(arg2_value);
+
+ uint64_t arg1_fra = GET_FRACTIONAL_PART(arg1_value);
+ uint64_t arg2_fra = GET_FRACTIONAL_PART(arg2_value);
+
+ uint64_t tmp;
+
+ res.value = arg1_int * arg2_int;
+
+ ASSERT(res.value <= BW_FIXED_MAX_I32);
+
+ res.value <<= BW_FIXED_BITS_PER_FRACTIONAL_PART;
+
+ tmp = arg1_int * arg2_fra;
+
+ ASSERT(tmp <= (uint64_t)(MAX_I64 - res.value));
+
+ res.value += tmp;
+
+ tmp = arg2_int * arg1_fra;
+
+ ASSERT(tmp <= (uint64_t)(MAX_I64 - res.value));
+
+ res.value += tmp;
+
+ tmp = arg1_fra * arg2_fra;
+
+ tmp = (tmp >> BW_FIXED_BITS_PER_FRACTIONAL_PART) +
+ (tmp >= (uint64_t)(bw_frc_to_fixed(1, 2).value));
+
+ ASSERT(tmp <= (uint64_t)(MAX_I64 - res.value));
+
+ res.value += tmp;
+
+ if (arg1_negative ^ arg2_negative)
+ res.value = -res.value;
+ return res;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/calcs_logger.h b/drivers/gpu/drm/amd/display/dc/dml/calcs/calcs_logger.h
new file mode 100644
index 000000000000..62435bfc274d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/calcs_logger.h
@@ -0,0 +1,578 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _CALCS_CALCS_LOGGER_H_
+#define _CALCS_CALCS_LOGGER_H_
+#define DC_LOGGER ctx->logger
+
+static void print_bw_calcs_dceip(struct dc_context *ctx, const struct bw_calcs_dceip *dceip)
+{
+
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_dceip");
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_calcs_version version %d", dceip->version);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] large_cursor: %d", dceip->large_cursor);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] dmif_pipe_en_fbc_chunk_tracker: %d", dceip->dmif_pipe_en_fbc_chunk_tracker);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] display_write_back_supported: %d", dceip->display_write_back_supported);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] argb_compression_support: %d", dceip->argb_compression_support);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] pre_downscaler_enabled: %d", dceip->pre_downscaler_enabled);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] underlay_downscale_prefetch_enabled: %d",
+ dceip->underlay_downscale_prefetch_enabled);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] graphics_lb_nodownscaling_multi_line_prefetching: %d",
+ dceip->graphics_lb_nodownscaling_multi_line_prefetching);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] limit_excessive_outstanding_dmif_requests: %d",
+ dceip->limit_excessive_outstanding_dmif_requests);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] cursor_max_outstanding_group_num: %d",
+ dceip->cursor_max_outstanding_group_num);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] lines_interleaved_into_lb: %d", dceip->lines_interleaved_into_lb);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] low_power_tiling_mode: %d", dceip->low_power_tiling_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] chunk_width: %d", dceip->chunk_width);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_graphics_pipes: %d", dceip->number_of_graphics_pipes);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_underlay_pipes: %d", dceip->number_of_underlay_pipes);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] max_dmif_buffer_allocated: %d", dceip->max_dmif_buffer_allocated);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] graphics_dmif_size: %d", dceip->graphics_dmif_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_luma_dmif_size: %d", dceip->underlay_luma_dmif_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_chroma_dmif_size: %d", dceip->underlay_chroma_dmif_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] scatter_gather_lines_of_pte_prefetching_in_linear_mode: %d",
+ dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] display_write_back420_luma_mcifwr_buffer_size: %d",
+ dceip->display_write_back420_luma_mcifwr_buffer_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] display_write_back420_chroma_mcifwr_buffer_size: %d",
+ dceip->display_write_back420_chroma_mcifwr_buffer_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] scatter_gather_pte_request_rows_in_tiling_mode: %d",
+ dceip->scatter_gather_pte_request_rows_in_tiling_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_vscaler_efficiency10_bit_per_component: %d",
+ bw_fixed_to_int(dceip->underlay_vscaler_efficiency10_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_vscaler_efficiency12_bit_per_component: %d",
+ bw_fixed_to_int(dceip->underlay_vscaler_efficiency12_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency6_bit_per_component: %d",
+ bw_fixed_to_int(dceip->graphics_vscaler_efficiency6_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency8_bit_per_component: %d",
+ bw_fixed_to_int(dceip->graphics_vscaler_efficiency8_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency10_bit_per_component: %d",
+ bw_fixed_to_int(dceip->graphics_vscaler_efficiency10_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency12_bit_per_component: %d",
+ bw_fixed_to_int(dceip->graphics_vscaler_efficiency12_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] alpha_vscaler_efficiency: %d",
+ bw_fixed_to_int(dceip->alpha_vscaler_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_write_pixels_per_dispclk: %d",
+ bw_fixed_to_int(dceip->lb_write_pixels_per_dispclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_size_per_component444: %d",
+ bw_fixed_to_int(dceip->lb_size_per_component444));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_and_dram_clock_state_change_gated_before_cursor: %d",
+ bw_fixed_to_int(dceip->stutter_and_dram_clock_state_change_gated_before_cursor));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay420_luma_lb_size_per_component: %d",
+ bw_fixed_to_int(dceip->underlay420_luma_lb_size_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay420_chroma_lb_size_per_component: %d",
+ bw_fixed_to_int(dceip->underlay420_chroma_lb_size_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay422_lb_size_per_component: %d",
+ bw_fixed_to_int(dceip->underlay422_lb_size_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_chunk_width: %d", bw_fixed_to_int(dceip->cursor_chunk_width));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_dcp_buffer_lines: %d",
+ bw_fixed_to_int(dceip->cursor_dcp_buffer_lines));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_width_efficient_for_tiling: %d",
+ bw_fixed_to_int(dceip->underlay_maximum_width_efficient_for_tiling));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_height_efficient_for_tiling: %d",
+ bw_fixed_to_int(dceip->underlay_maximum_height_efficient_for_tiling));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display: %d",
+ bw_fixed_to_int(dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation: %d",
+ bw_fixed_to_int(dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_outstanding_pte_request_limit: %d",
+ bw_fixed_to_int(dceip->minimum_outstanding_pte_request_limit));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_total_outstanding_pte_requests_allowed_by_saw: %d",
+ bw_fixed_to_int(dceip->maximum_total_outstanding_pte_requests_allowed_by_saw));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] linear_mode_line_request_alternation_slice: %d",
+ bw_fixed_to_int(dceip->linear_mode_line_request_alternation_slice));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] request_efficiency: %d", bw_fixed_to_int(dceip->request_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_per_request: %d", bw_fixed_to_int(dceip->dispclk_per_request));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_ramping_factor: %d",
+ bw_fixed_to_int(dceip->dispclk_ramping_factor));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_pipe_throughput_factor: %d",
+ bw_fixed_to_int(dceip->display_pipe_throughput_factor));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_all_surfaces_burst_time: %d",
+ bw_fixed_to_int(dceip->mcifwr_all_surfaces_burst_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_request_buffer_size: %d",
+ bw_fixed_to_int(dceip->dmif_request_buffer_size));
+
+
+}
+
+static void print_bw_calcs_vbios(struct dc_context *ctx, const struct bw_calcs_vbios *vbios)
+{
+
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_vbios vbios");
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines memory_type: %d", vbios->memory_type);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines memory_type: %d", vbios->memory_type);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] dram_channel_width_in_bits: %d", vbios->dram_channel_width_in_bits);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_channels: %d", vbios->number_of_dram_channels);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_banks: %d", vbios->number_of_dram_banks);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_yclk: %d", bw_fixed_to_int(vbios->low_yclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_yclk: %d", bw_fixed_to_int(vbios->mid_yclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_yclk: %d", bw_fixed_to_int(vbios->high_yclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_sclk: %d", bw_fixed_to_int(vbios->low_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid1_sclk: %d", bw_fixed_to_int(vbios->mid1_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid2_sclk: %d", bw_fixed_to_int(vbios->mid2_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid3_sclk: %d", bw_fixed_to_int(vbios->mid3_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid4_sclk: %d", bw_fixed_to_int(vbios->mid4_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid5_sclk: %d", bw_fixed_to_int(vbios->mid5_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid6_sclk: %d", bw_fixed_to_int(vbios->mid6_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_sclk: %d", bw_fixed_to_int(vbios->high_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_voltage_max_dispclk: %d",
+ bw_fixed_to_int(vbios->low_voltage_max_dispclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_voltage_max_dispclk;: %d",
+ bw_fixed_to_int(vbios->mid_voltage_max_dispclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_voltage_max_dispclk;: %d",
+ bw_fixed_to_int(vbios->high_voltage_max_dispclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_voltage_max_phyclk: %d",
+ bw_fixed_to_int(vbios->low_voltage_max_phyclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_voltage_max_phyclk: %d",
+ bw_fixed_to_int(vbios->mid_voltage_max_phyclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_voltage_max_phyclk: %d",
+ bw_fixed_to_int(vbios->high_voltage_max_phyclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] data_return_bus_width: %d", bw_fixed_to_int(vbios->data_return_bus_width));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] trc: %d", bw_fixed_to_int(vbios->trc));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifmc_urgent_latency: %d", bw_fixed_to_int(vbios->dmifmc_urgent_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_self_refresh_exit_latency: %d",
+ bw_fixed_to_int(vbios->stutter_self_refresh_exit_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_self_refresh_entry_latency: %d",
+ bw_fixed_to_int(vbios->stutter_self_refresh_entry_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_change_latency: %d",
+ bw_fixed_to_int(vbios->nbp_state_change_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwrmc_urgent_latency: %d",
+ bw_fixed_to_int(vbios->mcifwrmc_urgent_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bool] scatter_gather_enable: %d", vbios->scatter_gather_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] down_spread_percentage: %d",
+ bw_fixed_to_int(vbios->down_spread_percentage));
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] cursor_width: %d", vbios->cursor_width);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] average_compression_rate: %d", vbios->average_compression_rate);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_request_slots_gmc_reserves_for_dmif_per_channel: %d",
+ vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_duration: %d", bw_fixed_to_int(vbios->blackout_duration));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_blackout_recovery_time: %d",
+ bw_fixed_to_int(vbios->maximum_blackout_recovery_time));
+
+
+}
+
+static void print_bw_calcs_data(struct dc_context *ctx, struct bw_calcs_data *data)
+{
+
+ int i, j, k;
+
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_data data");
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_displays: %d", data->number_of_displays);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_surface_type: %d", data->underlay_surface_type);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines panning_and_bezel_adjustment: %d",
+ data->panning_and_bezel_adjustment);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines graphics_tiling_mode: %d", data->graphics_tiling_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] graphics_lb_bpc: %d", data->graphics_lb_bpc);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_lb_bpc: %d", data->underlay_lb_bpc);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_tiling_mode: %d", data->underlay_tiling_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines d0_underlay_mode: %d", data->d0_underlay_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] d1_display_write_back_dwb_enable: %d", data->d1_display_write_back_dwb_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines d1_underlay_mode: %d", data->d1_underlay_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] cpup_state_change_enable: %d", data->cpup_state_change_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] cpuc_state_change_enable: %d", data->cpuc_state_change_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] nbp_state_change_enable: %d", data->nbp_state_change_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] stutter_mode_enable: %d", data->stutter_mode_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] y_clk_level: %d", data->y_clk_level);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] sclk_level: %d", data->sclk_level);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_underlay_surfaces: %d", data->number_of_underlay_surfaces);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_wrchannels: %d", data->number_of_dram_wrchannels);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] chunk_request_delay: %d", data->chunk_request_delay);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_channels: %d", data->number_of_dram_channels);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_micro_tile_mode: %d", data->underlay_micro_tile_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines graphics_micro_tile_mode: %d", data->graphics_micro_tile_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] max_phyclk: %d", bw_fixed_to_int(data->max_phyclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_efficiency: %d", bw_fixed_to_int(data->dram_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width_after_surface_type: %d",
+ bw_fixed_to_int(data->src_width_after_surface_type));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height_after_surface_type: %d",
+ bw_fixed_to_int(data->src_height_after_surface_type));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_surface_type: %d",
+ bw_fixed_to_int(data->hsr_after_surface_type));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_surface_type: %d", bw_fixed_to_int(data->vsr_after_surface_type));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width_after_rotation: %d",
+ bw_fixed_to_int(data->src_width_after_rotation));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height_after_rotation: %d",
+ bw_fixed_to_int(data->src_height_after_rotation));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_rotation: %d", bw_fixed_to_int(data->hsr_after_rotation));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_rotation: %d", bw_fixed_to_int(data->vsr_after_rotation));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_height_pixels: %d", bw_fixed_to_int(data->source_height_pixels));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_stereo: %d", bw_fixed_to_int(data->hsr_after_stereo));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_stereo: %d", bw_fixed_to_int(data->vsr_after_stereo));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_in_lb: %d", bw_fixed_to_int(data->source_width_in_lb));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_line_pitch: %d", bw_fixed_to_int(data->lb_line_pitch));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_source_efficient_for_tiling: %d",
+ bw_fixed_to_int(data->underlay_maximum_source_efficient_for_tiling));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] num_lines_at_frame_start: %d",
+ bw_fixed_to_int(data->num_lines_at_frame_start));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_dmif_size_in_time: %d", bw_fixed_to_int(data->min_dmif_size_in_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_mcifwr_size_in_time: %d",
+ bw_fixed_to_int(data->min_mcifwr_size_in_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_requests_for_dmif_size: %d",
+ bw_fixed_to_int(data->total_requests_for_dmif_size));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting: %d",
+ bw_fixed_to_int(data->peak_pte_request_to_eviction_ratio_limiting));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] useful_pte_per_pte_request: %d",
+ bw_fixed_to_int(data->useful_pte_per_pte_request));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_request_rows: %d",
+ bw_fixed_to_int(data->scatter_gather_pte_request_rows));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_row_height: %d",
+ bw_fixed_to_int(data->scatter_gather_row_height));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_requests_in_vblank: %d",
+ bw_fixed_to_int(data->scatter_gather_pte_requests_in_vblank));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] inefficient_linear_pitch_in_bytes: %d",
+ bw_fixed_to_int(data->inefficient_linear_pitch_in_bytes));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_total_data: %d", bw_fixed_to_int(data->cursor_total_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_total_request_groups: %d",
+ bw_fixed_to_int(data->cursor_total_request_groups));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_total_pte_requests: %d",
+ bw_fixed_to_int(data->scatter_gather_total_pte_requests));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_total_pte_request_groups: %d",
+ bw_fixed_to_int(data->scatter_gather_total_pte_request_groups));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] tile_width_in_pixels: %d", bw_fixed_to_int(data->tile_width_in_pixels));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_total_number_of_data_request_page_close_open: %d",
+ bw_fixed_to_int(data->dmif_total_number_of_data_request_page_close_open));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_total_number_of_data_request_page_close_open: %d",
+ bw_fixed_to_int(data->mcifwr_total_number_of_data_request_page_close_open));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] bytes_per_page_close_open: %d",
+ bw_fixed_to_int(data->bytes_per_page_close_open));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_total_page_close_open_time: %d",
+ bw_fixed_to_int(data->mcifwr_total_page_close_open_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_requests_for_adjusted_dmif_size: %d",
+ bw_fixed_to_int(data->total_requests_for_adjusted_dmif_size));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dmifmc_urgent_trips: %d",
+ bw_fixed_to_int(data->total_dmifmc_urgent_trips));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dmifmc_urgent_latency: %d",
+ bw_fixed_to_int(data->total_dmifmc_urgent_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_reads_required_data: %d",
+ bw_fixed_to_int(data->total_display_reads_required_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_reads_required_dram_access_data: %d",
+ bw_fixed_to_int(data->total_display_reads_required_dram_access_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_writes_required_data: %d",
+ bw_fixed_to_int(data->total_display_writes_required_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_writes_required_dram_access_data: %d",
+ bw_fixed_to_int(data->total_display_writes_required_dram_access_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_required_data: %d",
+ bw_fixed_to_int(data->display_reads_required_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_required_dram_access_data: %d",
+ bw_fixed_to_int(data->display_reads_required_dram_access_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_total_page_close_open_time: %d",
+ bw_fixed_to_int(data->dmif_total_page_close_open_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_cursor_memory_interface_buffer_size_in_time: %d",
+ bw_fixed_to_int(data->min_cursor_memory_interface_buffer_size_in_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_read_buffer_size_in_time: %d",
+ bw_fixed_to_int(data->min_read_buffer_size_in_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_time_for_data_transfer: %d",
+ bw_fixed_to_int(data->display_reads_time_for_data_transfer));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_writes_time_for_data_transfer: %d",
+ bw_fixed_to_int(data->display_writes_time_for_data_transfer));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_dram_bandwidth: %d",
+ bw_fixed_to_int(data->dmif_required_dram_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_required_dram_bandwidth: %d",
+ bw_fixed_to_int(data->mcifwr_required_dram_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_dmifmc_urgent_latency_for_page_close_open: %d",
+ bw_fixed_to_int(data->required_dmifmc_urgent_latency_for_page_close_open));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_mcifmcwr_urgent_latency: %d",
+ bw_fixed_to_int(data->required_mcifmcwr_urgent_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_dram_bandwidth_gbyte_per_second: %d",
+ bw_fixed_to_int(data->required_dram_bandwidth_gbyte_per_second));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_bandwidth: %d", bw_fixed_to_int(data->dram_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_sclk: %d", bw_fixed_to_int(data->dmif_required_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_required_sclk: %d", bw_fixed_to_int(data->mcifwr_required_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_sclk: %d", bw_fixed_to_int(data->required_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] downspread_factor: %d", bw_fixed_to_int(data->downspread_factor));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_scaler_efficiency: %d", bw_fixed_to_int(data->v_scaler_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scaler_limits_factor: %d", bw_fixed_to_int(data->scaler_limits_factor));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_pipe_pixel_throughput: %d",
+ bw_fixed_to_int(data->display_pipe_pixel_throughput));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_with_ramping: %d",
+ bw_fixed_to_int(data->total_dispclk_required_with_ramping));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_without_ramping: %d",
+ bw_fixed_to_int(data->total_dispclk_required_without_ramping));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_read_request_bandwidth: %d",
+ bw_fixed_to_int(data->total_read_request_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_write_request_bandwidth: %d",
+ bw_fixed_to_int(data->total_write_request_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_total_read_request_bandwidth: %d",
+ bw_fixed_to_int(data->dispclk_required_for_total_read_request_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_with_ramping_with_request_bandwidth: %d",
+ bw_fixed_to_int(data->total_dispclk_required_with_ramping_with_request_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_without_ramping_with_request_bandwidth: %d",
+ bw_fixed_to_int(data->total_dispclk_required_without_ramping_with_request_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk: %d", bw_fixed_to_int(data->dispclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_recovery_time: %d", bw_fixed_to_int(data->blackout_recovery_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_pixels_per_data_fifo_entry: %d",
+ bw_fixed_to_int(data->min_pixels_per_data_fifo_entry));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] sclk_deep_sleep: %d", bw_fixed_to_int(data->sclk_deep_sleep));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] chunk_request_time: %d", bw_fixed_to_int(data->chunk_request_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_request_time: %d", bw_fixed_to_int(data->cursor_request_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] line_source_pixels_transfer_time: %d",
+ bw_fixed_to_int(data->line_source_pixels_transfer_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifdram_access_efficiency: %d",
+ bw_fixed_to_int(data->dmifdram_access_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwrdram_access_efficiency: %d",
+ bw_fixed_to_int(data->mcifwrdram_access_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_average_bandwidth_no_compression: %d",
+ bw_fixed_to_int(data->total_average_bandwidth_no_compression));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_average_bandwidth: %d",
+ bw_fixed_to_int(data->total_average_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_stutter_cycle_duration: %d",
+ bw_fixed_to_int(data->total_stutter_cycle_duration));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_burst_time: %d", bw_fixed_to_int(data->stutter_burst_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] time_in_self_refresh: %d", bw_fixed_to_int(data->time_in_self_refresh));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_efficiency: %d", bw_fixed_to_int(data->stutter_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] worst_number_of_trips_to_memory: %d",
+ bw_fixed_to_int(data->worst_number_of_trips_to_memory));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] immediate_flip_time: %d", bw_fixed_to_int(data->immediate_flip_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_for_non_dmif_clients: %d",
+ bw_fixed_to_int(data->latency_for_non_dmif_clients));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_for_non_mcifwr_clients: %d",
+ bw_fixed_to_int(data->latency_for_non_mcifwr_clients));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifmc_urgent_latency_supported_in_high_sclk_and_yclk: %d",
+ bw_fixed_to_int(data->dmifmc_urgent_latency_supported_in_high_sclk_and_yclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_dram_speed_change_margin: %d",
+ bw_fixed_to_int(data->nbp_state_dram_speed_change_margin));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_time_for_data_transfer_and_urgent_latency: %d",
+ bw_fixed_to_int(data->display_reads_time_for_data_transfer_and_urgent_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_speed_change_margin: %d",
+ bw_fixed_to_int(data->dram_speed_change_margin));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_vblank_dram_speed_change_margin: %d",
+ bw_fixed_to_int(data->min_vblank_dram_speed_change_margin));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_stutter_refresh_duration: %d",
+ bw_fixed_to_int(data->min_stutter_refresh_duration));
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] total_stutter_dmif_buffer_size: %d", data->total_stutter_dmif_buffer_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] total_bytes_requested: %d", data->total_bytes_requested);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] min_stutter_dmif_buffer_size: %d", data->min_stutter_dmif_buffer_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] num_stutter_bursts: %d", data->num_stutter_bursts);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_blank_nbp_state_dram_speed_change_latency_supported: %d",
+ bw_fixed_to_int(data->v_blank_nbp_state_dram_speed_change_latency_supported));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_dram_speed_change_latency_supported: %d",
+ bw_fixed_to_int(data->nbp_state_dram_speed_change_latency_supported));
+
+ for (i = 0; i < maximum_number_of_surfaces; i++) {
+ DC_LOG_BANDWIDTH_CALCS(" [bool] fbc_en[%d]:%d\n", i, data->fbc_en[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] lpt_en[%d]:%d", i, data->lpt_en[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] displays_match_flag[%d]:%d", i, data->displays_match_flag[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] use_alpha[%d]:%d", i, data->use_alpha[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] orthogonal_rotation[%d]:%d", i, data->orthogonal_rotation[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] enable[%d]:%d", i, data->enable[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] access_one_channel_only[%d]:%d", i, data->access_one_channel_only[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] scatter_gather_enable_for_pipe[%d]:%d",
+ i, data->scatter_gather_enable_for_pipe[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] interlace_mode[%d]:%d",
+ i, data->interlace_mode[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] display_pstate_change_enable[%d]:%d",
+ i, data->display_pstate_change_enable[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] line_buffer_prefetch[%d]:%d", i, data->line_buffer_prefetch[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] bytes_per_pixel[%d]:%d", i, data->bytes_per_pixel[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] max_chunks_non_fbc_mode[%d]:%d",
+ i, data->max_chunks_non_fbc_mode[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] lb_bpc[%d]:%d", i, data->lb_bpc[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bpphdmi[%d]:%d", i, data->output_bpphdmi[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr[%d]:%d", i, data->output_bppdp4_lane_hbr[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr2[%d]:%d",
+ i, data->output_bppdp4_lane_hbr2[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr3[%d]:%d",
+ i, data->output_bppdp4_lane_hbr3[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines stereo_mode[%d]:%d", i, data->stereo_mode[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_buffer_transfer_time[%d]:%d",
+ i, bw_fixed_to_int(data->dmif_buffer_transfer_time[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] displays_with_same_mode[%d]:%d",
+ i, bw_fixed_to_int(data->displays_with_same_mode[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_dmif_buffer_size[%d]:%d",
+ i, bw_fixed_to_int(data->stutter_dmif_buffer_size[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_refresh_duration[%d]:%d",
+ i, bw_fixed_to_int(data->stutter_refresh_duration[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_exit_watermark[%d]:%d",
+ i, bw_fixed_to_int(data->stutter_exit_watermark[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_entry_watermark[%d]:%d",
+ i, bw_fixed_to_int(data->stutter_entry_watermark[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_total[%d]:%d", i, bw_fixed_to_int(data->h_total[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_total[%d]:%d", i, bw_fixed_to_int(data->v_total[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pixel_rate[%d]:%d", i, bw_fixed_to_int(data->pixel_rate[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width[%d]:%d", i, bw_fixed_to_int(data->src_width[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pitch_in_pixels[%d]:%d",
+ i, bw_fixed_to_int(data->pitch_in_pixels[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pitch_in_pixels_after_surface_type[%d]:%d",
+ i, bw_fixed_to_int(data->pitch_in_pixels_after_surface_type[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height[%d]:%d", i, bw_fixed_to_int(data->src_height[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scale_ratio[%d]:%d", i, bw_fixed_to_int(data->scale_ratio[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_taps[%d]:%d", i, bw_fixed_to_int(data->h_taps[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_taps[%d]:%d", i, bw_fixed_to_int(data->v_taps[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_scale_ratio[%d]:%d", i, bw_fixed_to_int(data->h_scale_ratio[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_scale_ratio[%d]:%d", i, bw_fixed_to_int(data->v_scale_ratio[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] rotation_angle[%d]:%d",
+ i, bw_fixed_to_int(data->rotation_angle[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] compression_rate[%d]:%d",
+ i, bw_fixed_to_int(data->compression_rate[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr[%d]:%d", i, bw_fixed_to_int(data->hsr[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr[%d]:%d", i, bw_fixed_to_int(data->vsr[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_rounded_up_to_chunks[%d]:%d",
+ i, bw_fixed_to_int(data->source_width_rounded_up_to_chunks[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_pixels[%d]:%d",
+ i, bw_fixed_to_int(data->source_width_pixels[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_height_rounded_up_to_chunks[%d]:%d",
+ i, bw_fixed_to_int(data->source_height_rounded_up_to_chunks[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_bandwidth[%d]:%d",
+ i, bw_fixed_to_int(data->display_bandwidth[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] request_bandwidth[%d]:%d",
+ i, bw_fixed_to_int(data->request_bandwidth[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] bytes_per_request[%d]:%d",
+ i, bw_fixed_to_int(data->bytes_per_request[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] useful_bytes_per_request[%d]:%d",
+ i, bw_fixed_to_int(data->useful_bytes_per_request[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lines_interleaved_in_mem_access[%d]:%d",
+ i, bw_fixed_to_int(data->lines_interleaved_in_mem_access[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_hiding_lines[%d]:%d",
+ i, bw_fixed_to_int(data->latency_hiding_lines[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_partitions[%d]:%d",
+ i, bw_fixed_to_int(data->lb_partitions[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_partitions_max[%d]:%d",
+ i, bw_fixed_to_int(data->lb_partitions_max[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_with_ramping[%d]:%d",
+ i, bw_fixed_to_int(data->dispclk_required_with_ramping[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_without_ramping[%d]:%d",
+ i, bw_fixed_to_int(data->dispclk_required_without_ramping[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] data_buffer_size[%d]:%d",
+ i, bw_fixed_to_int(data->data_buffer_size[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] outstanding_chunk_request_limit[%d]:%d",
+ i, bw_fixed_to_int(data->outstanding_chunk_request_limit[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] urgent_watermark[%d]:%d",
+ i, bw_fixed_to_int(data->urgent_watermark[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_change_watermark[%d]:%d",
+ i, bw_fixed_to_int(data->nbp_state_change_watermark[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_filter_init[%d]:%d", i, bw_fixed_to_int(data->v_filter_init[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_cycle_duration[%d]:%d",
+ i, bw_fixed_to_int(data->stutter_cycle_duration[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] average_bandwidth[%d]:%d",
+ i, bw_fixed_to_int(data->average_bandwidth[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] average_bandwidth_no_compression[%d]:%d",
+ i, bw_fixed_to_int(data->average_bandwidth_no_compression[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_request_limit[%d]:%d",
+ i, bw_fixed_to_int(data->scatter_gather_pte_request_limit[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_size_per_component[%d]:%d",
+ i, bw_fixed_to_int(data->lb_size_per_component[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] memory_chunk_size_in_bytes[%d]:%d",
+ i, bw_fixed_to_int(data->memory_chunk_size_in_bytes[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pipe_chunk_size_in_bytes[%d]:%d",
+ i, bw_fixed_to_int(data->pipe_chunk_size_in_bytes[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] number_of_trips_to_memory_for_getting_apte_row[%d]:%d",
+ i, bw_fixed_to_int(data->number_of_trips_to_memory_for_getting_apte_row[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] adjusted_data_buffer_size[%d]:%d",
+ i, bw_fixed_to_int(data->adjusted_data_buffer_size[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] adjusted_data_buffer_size_in_memory[%d]:%d",
+ i, bw_fixed_to_int(data->adjusted_data_buffer_size_in_memory[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pixels_per_data_fifo_entry[%d]:%d",
+ i, bw_fixed_to_int(data->pixels_per_data_fifo_entry[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_requests_in_row[%d]:%d",
+ i, bw_fixed_to_int(data->scatter_gather_pte_requests_in_row[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pte_request_per_chunk[%d]:%d",
+ i, bw_fixed_to_int(data->pte_request_per_chunk[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_page_width[%d]:%d",
+ i, bw_fixed_to_int(data->scatter_gather_page_width[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_page_height[%d]:%d",
+ i, bw_fixed_to_int(data->scatter_gather_page_height[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_lines_in_per_line_out_in_beginning_of_frame[%d]:%d",
+ i, bw_fixed_to_int(data->lb_lines_in_per_line_out_in_beginning_of_frame[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_lines_in_per_line_out_in_middle_of_frame[%d]:%d",
+ i, bw_fixed_to_int(data->lb_lines_in_per_line_out_in_middle_of_frame[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_width_pixels[%d]:%d",
+ i, bw_fixed_to_int(data->cursor_width_pixels[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_latency_hiding[%d]:%d",
+ i, bw_fixed_to_int(data->minimum_latency_hiding[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_latency_hiding[%d]:%d",
+ i, bw_fixed_to_int(data->maximum_latency_hiding[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_latency_hiding_with_cursor[%d]:%d",
+ i, bw_fixed_to_int(data->minimum_latency_hiding_with_cursor[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_latency_hiding_with_cursor[%d]:%d",
+ i, bw_fixed_to_int(data->maximum_latency_hiding_with_cursor[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_pixels_for_first_output_pixel[%d]:%d",
+ i, bw_fixed_to_int(data->src_pixels_for_first_output_pixel[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_pixels_for_last_output_pixel[%d]:%d",
+ i, bw_fixed_to_int(data->src_pixels_for_last_output_pixel[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_data_for_first_output_pixel[%d]:%d",
+ i, bw_fixed_to_int(data->src_data_for_first_output_pixel[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_data_for_last_output_pixel[%d]:%d",
+ i, bw_fixed_to_int(data->src_data_for_last_output_pixel[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] active_time[%d]:%d", i, bw_fixed_to_int(data->active_time[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] horizontal_blank_and_chunk_granularity_factor[%d]:%d",
+ i, bw_fixed_to_int(data->horizontal_blank_and_chunk_granularity_factor[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_latency_hiding[%d]:%d",
+ i, bw_fixed_to_int(data->cursor_latency_hiding[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_blank_dram_speed_change_margin[%d]:%d",
+ i, bw_fixed_to_int(data->v_blank_dram_speed_change_margin[i]));
+ }
+
+ for (i = 0; i < maximum_number_of_surfaces; i++) {
+ for (j = 0; j < 3; j++) {
+ for (k = 0; k < 8; k++) {
+
+ DC_LOG_BANDWIDTH_CALCS("\n [bw_fixed] line_source_transfer_time[%d][%d][%d]:%d",
+ i, j, k, bw_fixed_to_int(data->line_source_transfer_time[i][j][k]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_speed_change_line_source_transfer_time[%d][%d][%d]:%d",
+ i, j, k,
+ bw_fixed_to_int(data->dram_speed_change_line_source_transfer_time[i][j][k]));
+ }
+ }
+ }
+
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 8; j++) {
+
+ DC_LOG_BANDWIDTH_CALCS("\n [uint32_t] num_displays_with_margin[%d][%d]:%d",
+ i, j, data->num_displays_with_margin[i][j]);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_burst_time[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->dmif_burst_time[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_burst_time[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->mcifwr_burst_time[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_dram_speed_change_margin[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->min_dram_speed_change_margin[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_dram_speed_change[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->dispclk_required_for_dram_speed_change[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_duration_margin[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->blackout_duration_margin[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_blackout_duration[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->dispclk_required_for_blackout_duration[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_blackout_recovery[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->dispclk_required_for_blackout_recovery[i][j]));
+ }
+ }
+
+ for (i = 0; i < 6; i++) {
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_sclk_for_urgent_latency[%d]:%d",
+ i, bw_fixed_to_int(data->dmif_required_sclk_for_urgent_latency[i]));
+ }
+}
+;
+
+#endif /* _CALCS_CALCS_LOGGER_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/custom_float.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/custom_float.c
new file mode 100644
index 000000000000..31d167bc548f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/custom_float.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "dm_services.h"
+#include "custom_float.h"
+
+
+static bool build_custom_float(
+ struct fixed31_32 value,
+ const struct custom_float_format *format,
+ bool *negative,
+ uint32_t *mantissa,
+ uint32_t *exponenta)
+{
+ uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1;
+
+ const struct fixed31_32 mantissa_constant_plus_max_fraction =
+ dc_fixpt_from_fraction(
+ (1LL << (format->mantissa_bits + 1)) - 1,
+ 1LL << format->mantissa_bits);
+
+ struct fixed31_32 mantiss;
+
+ if (dc_fixpt_eq(
+ value,
+ dc_fixpt_zero)) {
+ *negative = false;
+ *mantissa = 0;
+ *exponenta = 0;
+ return true;
+ }
+
+ if (dc_fixpt_lt(
+ value,
+ dc_fixpt_zero)) {
+ *negative = format->sign;
+ value = dc_fixpt_neg(value);
+ } else {
+ *negative = false;
+ }
+
+ if (dc_fixpt_lt(
+ value,
+ dc_fixpt_one)) {
+ uint32_t i = 1;
+
+ do {
+ value = dc_fixpt_shl(value, 1);
+ ++i;
+ } while (dc_fixpt_lt(
+ value,
+ dc_fixpt_one));
+
+ --i;
+
+ if (exp_offset <= i) {
+ *mantissa = 0;
+ *exponenta = 0;
+ return true;
+ }
+
+ *exponenta = exp_offset - i;
+ } else if (dc_fixpt_le(
+ mantissa_constant_plus_max_fraction,
+ value)) {
+ uint32_t i = 1;
+
+ do {
+ value = dc_fixpt_shr(value, 1);
+ ++i;
+ } while (dc_fixpt_lt(
+ mantissa_constant_plus_max_fraction,
+ value));
+
+ *exponenta = exp_offset + i - 1;
+ } else {
+ *exponenta = exp_offset;
+ }
+
+ mantiss = dc_fixpt_sub(
+ value,
+ dc_fixpt_one);
+
+ if (dc_fixpt_lt(
+ mantiss,
+ dc_fixpt_zero) ||
+ dc_fixpt_lt(
+ dc_fixpt_one,
+ mantiss))
+ mantiss = dc_fixpt_zero;
+ else
+ mantiss = dc_fixpt_shl(
+ mantiss,
+ format->mantissa_bits);
+
+ *mantissa = dc_fixpt_floor(mantiss);
+
+ return true;
+}
+
+static bool setup_custom_float(
+ const struct custom_float_format *format,
+ bool negative,
+ uint32_t mantissa,
+ uint32_t exponenta,
+ uint32_t *result)
+{
+ uint32_t i = 0;
+ uint32_t j = 0;
+
+ uint32_t value = 0;
+
+ /* verification code:
+ * once calculation is ok we can remove it
+ */
+
+ const uint32_t mantissa_mask =
+ (1 << (format->mantissa_bits + 1)) - 1;
+
+ const uint32_t exponenta_mask =
+ (1 << (format->exponenta_bits + 1)) - 1;
+
+ if (mantissa & ~mantissa_mask) {
+ BREAK_TO_DEBUGGER();
+ mantissa = mantissa_mask;
+ }
+
+ if (exponenta & ~exponenta_mask) {
+ BREAK_TO_DEBUGGER();
+ exponenta = exponenta_mask;
+ }
+
+ /* end of verification code */
+
+ while (i < format->mantissa_bits) {
+ uint32_t mask = 1 << i;
+
+ if (mantissa & mask)
+ value |= mask;
+
+ ++i;
+ }
+
+ while (j < format->exponenta_bits) {
+ uint32_t mask = 1 << j;
+
+ if (exponenta & mask)
+ value |= mask << i;
+
+ ++j;
+ }
+
+ if (negative && format->sign)
+ value |= 1 << (i + j);
+
+ *result = value;
+
+ return true;
+}
+
+bool convert_to_custom_float_format(
+ struct fixed31_32 value,
+ const struct custom_float_format *format,
+ uint32_t *result)
+{
+ uint32_t mantissa;
+ uint32_t exponenta;
+ bool negative;
+
+ return build_custom_float(
+ value, format, &negative, &mantissa, &exponenta) &&
+ setup_custom_float(
+ format, negative, mantissa, exponenta, result);
+}
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dce_calcs.c
new file mode 100644
index 000000000000..0100a6053ab6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dce_calcs.c
@@ -0,0 +1,3623 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <linux/slab.h>
+
+#include "resource.h"
+#include "dm_services.h"
+#include "dce_calcs.h"
+#include "dc.h"
+#include "core_types.h"
+#include "dal_asic_id.h"
+#include "calcs_logger.h"
+
+/*
+ * NOTE:
+ * This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
+/*******************************************************************************
+ * Private Functions
+ ******************************************************************************/
+
+static enum bw_calcs_version bw_calcs_version_from_asic_id(struct hw_asic_id asic_id)
+{
+ switch (asic_id.chip_family) {
+
+ case FAMILY_CZ:
+ if (ASIC_REV_IS_STONEY(asic_id.hw_internal_rev))
+ return BW_CALCS_VERSION_STONEY;
+ return BW_CALCS_VERSION_CARRIZO;
+
+ case FAMILY_VI:
+ if (ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev))
+ return BW_CALCS_VERSION_POLARIS12;
+ if (ASIC_REV_IS_POLARIS10_P(asic_id.hw_internal_rev))
+ return BW_CALCS_VERSION_POLARIS10;
+ if (ASIC_REV_IS_POLARIS11_M(asic_id.hw_internal_rev))
+ return BW_CALCS_VERSION_POLARIS11;
+ if (ASIC_REV_IS_VEGAM(asic_id.hw_internal_rev))
+ return BW_CALCS_VERSION_VEGAM;
+ return BW_CALCS_VERSION_INVALID;
+
+ case FAMILY_AI:
+ return BW_CALCS_VERSION_VEGA10;
+
+ default:
+ return BW_CALCS_VERSION_INVALID;
+ }
+}
+
+static void calculate_bandwidth(
+ const struct bw_calcs_dceip *dceip,
+ const struct bw_calcs_vbios *vbios,
+ struct bw_calcs_data *data)
+
+{
+ const int32_t pixels_per_chunk = 512;
+ const int32_t high = 2;
+ const int32_t mid = 1;
+ const int32_t low = 0;
+ const uint32_t s_low = 0;
+ const uint32_t s_mid1 = 1;
+ const uint32_t s_mid2 = 2;
+ const uint32_t s_mid3 = 3;
+ const uint32_t s_mid4 = 4;
+ const uint32_t s_mid5 = 5;
+ const uint32_t s_mid6 = 6;
+ const uint32_t s_high = 7;
+ const uint32_t dmif_chunk_buff_margin = 1;
+
+ uint32_t max_chunks_fbc_mode;
+ int32_t num_cursor_lines;
+
+ int32_t i, j, k;
+ struct bw_fixed *yclk;
+ struct bw_fixed *sclk;
+ bool d0_underlay_enable;
+ bool d1_underlay_enable;
+ bool fbc_enabled;
+ bool lpt_enabled;
+ enum bw_defines sclk_message;
+ enum bw_defines yclk_message;
+ enum bw_defines *tiling_mode;
+ enum bw_defines *surface_type;
+ enum bw_defines voltage;
+ enum bw_defines pipe_check;
+ enum bw_defines hsr_check;
+ enum bw_defines vsr_check;
+ enum bw_defines lb_size_check;
+ enum bw_defines fbc_check;
+ enum bw_defines rotation_check;
+ enum bw_defines mode_check;
+ enum bw_defines nbp_state_change_enable_blank;
+ /*initialize variables*/
+ int32_t number_of_displays_enabled = 0;
+ int32_t number_of_displays_enabled_with_margin = 0;
+ int32_t number_of_aligned_displays_with_no_margin = 0;
+
+ yclk = kcalloc(3, sizeof(*yclk), GFP_KERNEL);
+ if (!yclk)
+ return;
+
+ sclk = kcalloc(8, sizeof(*sclk), GFP_KERNEL);
+ if (!sclk)
+ goto free_yclk;
+
+ tiling_mode = kcalloc(maximum_number_of_surfaces, sizeof(*tiling_mode), GFP_KERNEL);
+ if (!tiling_mode)
+ goto free_sclk;
+
+ surface_type = kcalloc(maximum_number_of_surfaces, sizeof(*surface_type), GFP_KERNEL);
+ if (!surface_type)
+ goto free_tiling_mode;
+
+ yclk[low] = vbios->low_yclk;
+ yclk[mid] = vbios->mid_yclk;
+ yclk[high] = vbios->high_yclk;
+ sclk[s_low] = vbios->low_sclk;
+ sclk[s_mid1] = vbios->mid1_sclk;
+ sclk[s_mid2] = vbios->mid2_sclk;
+ sclk[s_mid3] = vbios->mid3_sclk;
+ sclk[s_mid4] = vbios->mid4_sclk;
+ sclk[s_mid5] = vbios->mid5_sclk;
+ sclk[s_mid6] = vbios->mid6_sclk;
+ sclk[s_high] = vbios->high_sclk;
+ /*''''''''''''''''''*/
+ /* surface assignment:*/
+ /* 0: d0 underlay or underlay luma*/
+ /* 1: d0 underlay chroma*/
+ /* 2: d1 underlay or underlay luma*/
+ /* 3: d1 underlay chroma*/
+ /* 4: d0 graphics*/
+ /* 5: d1 graphics*/
+ /* 6: d2 graphics*/
+ /* 7: d3 graphics, same mode as d2*/
+ /* 8: d4 graphics, same mode as d2*/
+ /* 9: d5 graphics, same mode as d2*/
+ /* ...*/
+ /* maximum_number_of_surfaces-2: d1 display_write_back420 luma*/
+ /* maximum_number_of_surfaces-1: d1 display_write_back420 chroma*/
+ /* underlay luma and chroma surface parameters from spreadsheet*/
+
+
+
+
+ if (data->d0_underlay_mode == bw_def_none)
+ d0_underlay_enable = false;
+ else
+ d0_underlay_enable = true;
+ if (data->d1_underlay_mode == bw_def_none)
+ d1_underlay_enable = false;
+ else
+ d1_underlay_enable = true;
+ data->number_of_underlay_surfaces = d0_underlay_enable + d1_underlay_enable;
+ switch (data->underlay_surface_type) {
+ case bw_def_420:
+ surface_type[0] = bw_def_underlay420_luma;
+ surface_type[2] = bw_def_underlay420_luma;
+ data->bytes_per_pixel[0] = 1;
+ data->bytes_per_pixel[2] = 1;
+ surface_type[1] = bw_def_underlay420_chroma;
+ surface_type[3] = bw_def_underlay420_chroma;
+ data->bytes_per_pixel[1] = 2;
+ data->bytes_per_pixel[3] = 2;
+ data->lb_size_per_component[0] = dceip->underlay420_luma_lb_size_per_component;
+ data->lb_size_per_component[1] = dceip->underlay420_chroma_lb_size_per_component;
+ data->lb_size_per_component[2] = dceip->underlay420_luma_lb_size_per_component;
+ data->lb_size_per_component[3] = dceip->underlay420_chroma_lb_size_per_component;
+ break;
+ case bw_def_422:
+ surface_type[0] = bw_def_underlay422;
+ surface_type[2] = bw_def_underlay422;
+ data->bytes_per_pixel[0] = 2;
+ data->bytes_per_pixel[2] = 2;
+ data->lb_size_per_component[0] = dceip->underlay422_lb_size_per_component;
+ data->lb_size_per_component[2] = dceip->underlay422_lb_size_per_component;
+ break;
+ default:
+ surface_type[0] = bw_def_underlay444;
+ surface_type[2] = bw_def_underlay444;
+ data->bytes_per_pixel[0] = 4;
+ data->bytes_per_pixel[2] = 4;
+ data->lb_size_per_component[0] = dceip->lb_size_per_component444;
+ data->lb_size_per_component[2] = dceip->lb_size_per_component444;
+ break;
+ }
+ if (d0_underlay_enable) {
+ switch (data->underlay_surface_type) {
+ case bw_def_420:
+ data->enable[0] = 1;
+ data->enable[1] = 1;
+ break;
+ default:
+ data->enable[0] = 1;
+ data->enable[1] = 0;
+ break;
+ }
+ }
+ else {
+ data->enable[0] = 0;
+ data->enable[1] = 0;
+ }
+ if (d1_underlay_enable) {
+ switch (data->underlay_surface_type) {
+ case bw_def_420:
+ data->enable[2] = 1;
+ data->enable[3] = 1;
+ break;
+ default:
+ data->enable[2] = 1;
+ data->enable[3] = 0;
+ break;
+ }
+ }
+ else {
+ data->enable[2] = 0;
+ data->enable[3] = 0;
+ }
+ data->use_alpha[0] = 0;
+ data->use_alpha[1] = 0;
+ data->use_alpha[2] = 0;
+ data->use_alpha[3] = 0;
+ data->scatter_gather_enable_for_pipe[0] = vbios->scatter_gather_enable;
+ data->scatter_gather_enable_for_pipe[1] = vbios->scatter_gather_enable;
+ data->scatter_gather_enable_for_pipe[2] = vbios->scatter_gather_enable;
+ data->scatter_gather_enable_for_pipe[3] = vbios->scatter_gather_enable;
+ /*underlay0 same and graphics display pipe0*/
+ data->interlace_mode[0] = data->interlace_mode[4];
+ data->interlace_mode[1] = data->interlace_mode[4];
+ /*underlay1 same and graphics display pipe1*/
+ data->interlace_mode[2] = data->interlace_mode[5];
+ data->interlace_mode[3] = data->interlace_mode[5];
+ /*underlay0 same and graphics display pipe0*/
+ data->h_total[0] = data->h_total[4];
+ data->v_total[0] = data->v_total[4];
+ data->h_total[1] = data->h_total[4];
+ data->v_total[1] = data->v_total[4];
+ /*underlay1 same and graphics display pipe1*/
+ data->h_total[2] = data->h_total[5];
+ data->v_total[2] = data->v_total[5];
+ data->h_total[3] = data->h_total[5];
+ data->v_total[3] = data->v_total[5];
+ /*underlay0 same and graphics display pipe0*/
+ data->pixel_rate[0] = data->pixel_rate[4];
+ data->pixel_rate[1] = data->pixel_rate[4];
+ /*underlay1 same and graphics display pipe1*/
+ data->pixel_rate[2] = data->pixel_rate[5];
+ data->pixel_rate[3] = data->pixel_rate[5];
+ if ((data->underlay_tiling_mode == bw_def_array_linear_general || data->underlay_tiling_mode == bw_def_array_linear_aligned)) {
+ tiling_mode[0] = bw_def_linear;
+ tiling_mode[1] = bw_def_linear;
+ tiling_mode[2] = bw_def_linear;
+ tiling_mode[3] = bw_def_linear;
+ }
+ else {
+ tiling_mode[0] = bw_def_landscape;
+ tiling_mode[1] = bw_def_landscape;
+ tiling_mode[2] = bw_def_landscape;
+ tiling_mode[3] = bw_def_landscape;
+ }
+ data->lb_bpc[0] = data->underlay_lb_bpc;
+ data->lb_bpc[1] = data->underlay_lb_bpc;
+ data->lb_bpc[2] = data->underlay_lb_bpc;
+ data->lb_bpc[3] = data->underlay_lb_bpc;
+ data->compression_rate[0] = bw_int_to_fixed(1);
+ data->compression_rate[1] = bw_int_to_fixed(1);
+ data->compression_rate[2] = bw_int_to_fixed(1);
+ data->compression_rate[3] = bw_int_to_fixed(1);
+ data->access_one_channel_only[0] = 0;
+ data->access_one_channel_only[1] = 0;
+ data->access_one_channel_only[2] = 0;
+ data->access_one_channel_only[3] = 0;
+ data->cursor_width_pixels[0] = bw_int_to_fixed(0);
+ data->cursor_width_pixels[1] = bw_int_to_fixed(0);
+ data->cursor_width_pixels[2] = bw_int_to_fixed(0);
+ data->cursor_width_pixels[3] = bw_int_to_fixed(0);
+ /* graphics surface parameters from spreadsheet*/
+ fbc_enabled = false;
+ lpt_enabled = false;
+ for (i = 4; i <= maximum_number_of_surfaces - 3; i++) {
+ if (i < data->number_of_displays + 4) {
+ if (i == 4 && data->d0_underlay_mode == bw_def_underlay_only) {
+ data->enable[i] = 0;
+ data->use_alpha[i] = 0;
+ }
+ else if (i == 4 && data->d0_underlay_mode == bw_def_blend) {
+ data->enable[i] = 1;
+ data->use_alpha[i] = 1;
+ }
+ else if (i == 4) {
+ data->enable[i] = 1;
+ data->use_alpha[i] = 0;
+ }
+ else if (i == 5 && data->d1_underlay_mode == bw_def_underlay_only) {
+ data->enable[i] = 0;
+ data->use_alpha[i] = 0;
+ }
+ else if (i == 5 && data->d1_underlay_mode == bw_def_blend) {
+ data->enable[i] = 1;
+ data->use_alpha[i] = 1;
+ }
+ else {
+ data->enable[i] = 1;
+ data->use_alpha[i] = 0;
+ }
+ }
+ else {
+ data->enable[i] = 0;
+ data->use_alpha[i] = 0;
+ }
+ data->scatter_gather_enable_for_pipe[i] = vbios->scatter_gather_enable;
+ surface_type[i] = bw_def_graphics;
+ data->lb_size_per_component[i] = dceip->lb_size_per_component444;
+ if (data->graphics_tiling_mode == bw_def_array_linear_general || data->graphics_tiling_mode == bw_def_array_linear_aligned) {
+ tiling_mode[i] = bw_def_linear;
+ }
+ else {
+ tiling_mode[i] = bw_def_tiled;
+ }
+ data->lb_bpc[i] = data->graphics_lb_bpc;
+ if ((data->fbc_en[i] == 1 && (dceip->argb_compression_support || data->d0_underlay_mode != bw_def_blended))) {
+ data->compression_rate[i] = bw_int_to_fixed(vbios->average_compression_rate);
+ data->access_one_channel_only[i] = data->lpt_en[i];
+ }
+ else {
+ data->compression_rate[i] = bw_int_to_fixed(1);
+ data->access_one_channel_only[i] = 0;
+ }
+ if (data->fbc_en[i] == 1) {
+ fbc_enabled = true;
+ if (data->lpt_en[i] == 1) {
+ lpt_enabled = true;
+ }
+ }
+ data->cursor_width_pixels[i] = bw_int_to_fixed(vbios->cursor_width);
+ }
+ /* display_write_back420*/
+ data->scatter_gather_enable_for_pipe[maximum_number_of_surfaces - 2] = 0;
+ data->scatter_gather_enable_for_pipe[maximum_number_of_surfaces - 1] = 0;
+ if (data->d1_display_write_back_dwb_enable == 1) {
+ data->enable[maximum_number_of_surfaces - 2] = 1;
+ data->enable[maximum_number_of_surfaces - 1] = 1;
+ }
+ else {
+ data->enable[maximum_number_of_surfaces - 2] = 0;
+ data->enable[maximum_number_of_surfaces - 1] = 0;
+ }
+ surface_type[maximum_number_of_surfaces - 2] = bw_def_display_write_back420_luma;
+ surface_type[maximum_number_of_surfaces - 1] = bw_def_display_write_back420_chroma;
+ data->lb_size_per_component[maximum_number_of_surfaces - 2] = dceip->underlay420_luma_lb_size_per_component;
+ data->lb_size_per_component[maximum_number_of_surfaces - 1] = dceip->underlay420_chroma_lb_size_per_component;
+ data->bytes_per_pixel[maximum_number_of_surfaces - 2] = 1;
+ data->bytes_per_pixel[maximum_number_of_surfaces - 1] = 2;
+ data->interlace_mode[maximum_number_of_surfaces - 2] = data->interlace_mode[5];
+ data->interlace_mode[maximum_number_of_surfaces - 1] = data->interlace_mode[5];
+ data->h_taps[maximum_number_of_surfaces - 2] = bw_int_to_fixed(1);
+ data->h_taps[maximum_number_of_surfaces - 1] = bw_int_to_fixed(1);
+ data->v_taps[maximum_number_of_surfaces - 2] = bw_int_to_fixed(1);
+ data->v_taps[maximum_number_of_surfaces - 1] = bw_int_to_fixed(1);
+ data->rotation_angle[maximum_number_of_surfaces - 2] = bw_int_to_fixed(0);
+ data->rotation_angle[maximum_number_of_surfaces - 1] = bw_int_to_fixed(0);
+ tiling_mode[maximum_number_of_surfaces - 2] = bw_def_linear;
+ tiling_mode[maximum_number_of_surfaces - 1] = bw_def_linear;
+ data->lb_bpc[maximum_number_of_surfaces - 2] = 8;
+ data->lb_bpc[maximum_number_of_surfaces - 1] = 8;
+ data->compression_rate[maximum_number_of_surfaces - 2] = bw_int_to_fixed(1);
+ data->compression_rate[maximum_number_of_surfaces - 1] = bw_int_to_fixed(1);
+ data->access_one_channel_only[maximum_number_of_surfaces - 2] = 0;
+ data->access_one_channel_only[maximum_number_of_surfaces - 1] = 0;
+ /*assume display pipe1 has dwb enabled*/
+ data->h_total[maximum_number_of_surfaces - 2] = data->h_total[5];
+ data->h_total[maximum_number_of_surfaces - 1] = data->h_total[5];
+ data->v_total[maximum_number_of_surfaces - 2] = data->v_total[5];
+ data->v_total[maximum_number_of_surfaces - 1] = data->v_total[5];
+ data->pixel_rate[maximum_number_of_surfaces - 2] = data->pixel_rate[5];
+ data->pixel_rate[maximum_number_of_surfaces - 1] = data->pixel_rate[5];
+ data->src_width[maximum_number_of_surfaces - 2] = data->src_width[5];
+ data->src_width[maximum_number_of_surfaces - 1] = data->src_width[5];
+ data->src_height[maximum_number_of_surfaces - 2] = data->src_height[5];
+ data->src_height[maximum_number_of_surfaces - 1] = data->src_height[5];
+ data->pitch_in_pixels[maximum_number_of_surfaces - 2] = data->src_width[5];
+ data->pitch_in_pixels[maximum_number_of_surfaces - 1] = data->src_width[5];
+ data->h_scale_ratio[maximum_number_of_surfaces - 2] = bw_int_to_fixed(1);
+ data->h_scale_ratio[maximum_number_of_surfaces - 1] = bw_int_to_fixed(1);
+ data->v_scale_ratio[maximum_number_of_surfaces - 2] = bw_int_to_fixed(1);
+ data->v_scale_ratio[maximum_number_of_surfaces - 1] = bw_int_to_fixed(1);
+ data->stereo_mode[maximum_number_of_surfaces - 2] = bw_def_mono;
+ data->stereo_mode[maximum_number_of_surfaces - 1] = bw_def_mono;
+ data->cursor_width_pixels[maximum_number_of_surfaces - 2] = bw_int_to_fixed(0);
+ data->cursor_width_pixels[maximum_number_of_surfaces - 1] = bw_int_to_fixed(0);
+ data->use_alpha[maximum_number_of_surfaces - 2] = 0;
+ data->use_alpha[maximum_number_of_surfaces - 1] = 0;
+ /*mode check calculations:*/
+ /* mode within dce ip capabilities*/
+ /* fbc*/
+ /* hsr*/
+ /* vsr*/
+ /* lb size*/
+ /*effective scaling source and ratios:*/
+ /*for graphics, non-stereo, non-interlace surfaces when the size of the source and destination are the same, only one tap is used*/
+ /*420 chroma has half the width, height, horizontal and vertical scaling ratios than luma*/
+ /*rotating a graphic or underlay surface swaps the width, height, horizontal and vertical scaling ratios*/
+ /*in top-bottom stereo mode there is 2:1 vertical downscaling for each eye*/
+ /*in side-by-side stereo mode there is 2:1 horizontal downscaling for each eye*/
+ /*in interlace mode there is 2:1 vertical downscaling for each field*/
+ /*in panning or bezel adjustment mode the source width has an extra 128 pixels*/
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (bw_equ(data->h_scale_ratio[i], bw_int_to_fixed(1)) && bw_equ(data->v_scale_ratio[i], bw_int_to_fixed(1)) && surface_type[i] == bw_def_graphics && data->stereo_mode[i] == bw_def_mono && data->interlace_mode[i] == 0) {
+ data->h_taps[i] = bw_int_to_fixed(1);
+ data->v_taps[i] = bw_int_to_fixed(1);
+ }
+ if (surface_type[i] == bw_def_display_write_back420_chroma || surface_type[i] == bw_def_underlay420_chroma) {
+ data->pitch_in_pixels_after_surface_type[i] = bw_div(data->pitch_in_pixels[i], bw_int_to_fixed(2));
+ data->src_width_after_surface_type = bw_div(data->src_width[i], bw_int_to_fixed(2));
+ data->src_height_after_surface_type = bw_div(data->src_height[i], bw_int_to_fixed(2));
+ data->hsr_after_surface_type = bw_div(data->h_scale_ratio[i], bw_int_to_fixed(2));
+ data->vsr_after_surface_type = bw_div(data->v_scale_ratio[i], bw_int_to_fixed(2));
+ }
+ else {
+ data->pitch_in_pixels_after_surface_type[i] = data->pitch_in_pixels[i];
+ data->src_width_after_surface_type = data->src_width[i];
+ data->src_height_after_surface_type = data->src_height[i];
+ data->hsr_after_surface_type = data->h_scale_ratio[i];
+ data->vsr_after_surface_type = data->v_scale_ratio[i];
+ }
+ if ((bw_equ(data->rotation_angle[i], bw_int_to_fixed(90)) || bw_equ(data->rotation_angle[i], bw_int_to_fixed(270))) && surface_type[i] != bw_def_display_write_back420_luma && surface_type[i] != bw_def_display_write_back420_chroma) {
+ data->src_width_after_rotation = data->src_height_after_surface_type;
+ data->src_height_after_rotation = data->src_width_after_surface_type;
+ data->hsr_after_rotation = data->vsr_after_surface_type;
+ data->vsr_after_rotation = data->hsr_after_surface_type;
+ }
+ else {
+ data->src_width_after_rotation = data->src_width_after_surface_type;
+ data->src_height_after_rotation = data->src_height_after_surface_type;
+ data->hsr_after_rotation = data->hsr_after_surface_type;
+ data->vsr_after_rotation = data->vsr_after_surface_type;
+ }
+ switch (data->stereo_mode[i]) {
+ case bw_def_top_bottom:
+ data->source_width_pixels[i] = data->src_width_after_rotation;
+ data->source_height_pixels = bw_mul(bw_int_to_fixed(2), data->src_height_after_rotation);
+ data->hsr_after_stereo = data->hsr_after_rotation;
+ data->vsr_after_stereo = bw_mul(bw_int_to_fixed(1), data->vsr_after_rotation);
+ break;
+ case bw_def_side_by_side:
+ data->source_width_pixels[i] = bw_mul(bw_int_to_fixed(2), data->src_width_after_rotation);
+ data->source_height_pixels = data->src_height_after_rotation;
+ data->hsr_after_stereo = bw_mul(bw_int_to_fixed(1), data->hsr_after_rotation);
+ data->vsr_after_stereo = data->vsr_after_rotation;
+ break;
+ default:
+ data->source_width_pixels[i] = data->src_width_after_rotation;
+ data->source_height_pixels = data->src_height_after_rotation;
+ data->hsr_after_stereo = data->hsr_after_rotation;
+ data->vsr_after_stereo = data->vsr_after_rotation;
+ break;
+ }
+ data->hsr[i] = data->hsr_after_stereo;
+ if (data->interlace_mode[i]) {
+ data->vsr[i] = bw_mul(data->vsr_after_stereo, bw_int_to_fixed(2));
+ }
+ else {
+ data->vsr[i] = data->vsr_after_stereo;
+ }
+ if (data->panning_and_bezel_adjustment != bw_def_none) {
+ data->source_width_rounded_up_to_chunks[i] = bw_add(bw_floor2(bw_sub(data->source_width_pixels[i], bw_int_to_fixed(1)), bw_int_to_fixed(128)), bw_int_to_fixed(256));
+ }
+ else {
+ data->source_width_rounded_up_to_chunks[i] = bw_ceil2(data->source_width_pixels[i], bw_int_to_fixed(128));
+ }
+ data->source_height_rounded_up_to_chunks[i] = data->source_height_pixels;
+ }
+ }
+ /*mode support checks:*/
+ /*the number of graphics and underlay pipes is limited by the ip support*/
+ /*maximum horizontal and vertical scale ratio is 4, and should not exceed the number of taps*/
+ /*for downscaling with the pre-downscaler, the horizontal scale ratio must be more than the ceiling of one quarter of the number of taps*/
+ /*the pre-downscaler reduces the line buffer source by the horizontal scale ratio*/
+ /*the number of lines in the line buffer has to exceed the number of vertical taps*/
+ /*the size of the line in the line buffer is the product of the source width and the bits per component, rounded up to a multiple of 48*/
+ /*the size of the line in the line buffer in the case of 10 bit per component is the product of the source width rounded up to multiple of 8 and 30.023438 / 3, rounded up to a multiple of 48*/
+ /*the size of the line in the line buffer in the case of 8 bit per component is the product of the source width rounded up to multiple of 8 and 30.023438 / 3, rounded up to a multiple of 48*/
+ /*frame buffer compression is not supported with stereo mode, rotation, or non- 888 formats*/
+ /*rotation is not supported with linear of stereo modes*/
+ if (dceip->number_of_graphics_pipes >= data->number_of_displays && dceip->number_of_underlay_pipes >= data->number_of_underlay_surfaces && !(dceip->display_write_back_supported == 0 && data->d1_display_write_back_dwb_enable == 1)) {
+ pipe_check = bw_def_ok;
+ }
+ else {
+ pipe_check = bw_def_notok;
+ }
+ hsr_check = bw_def_ok;
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (bw_neq(data->hsr[i], bw_int_to_fixed(1))) {
+ if (bw_mtn(data->hsr[i], bw_int_to_fixed(4))) {
+ hsr_check = bw_def_hsr_mtn_4;
+ }
+ else {
+ if (bw_mtn(data->hsr[i], data->h_taps[i])) {
+ hsr_check = bw_def_hsr_mtn_h_taps;
+ }
+ else {
+ if (dceip->pre_downscaler_enabled == 1 && bw_mtn(data->hsr[i], bw_int_to_fixed(1)) && bw_leq(data->hsr[i], bw_ceil2(bw_div(data->h_taps[i], bw_int_to_fixed(4)), bw_int_to_fixed(1)))) {
+ hsr_check = bw_def_ceiling__h_taps_div_4___meq_hsr;
+ }
+ }
+ }
+ }
+ }
+ }
+ vsr_check = bw_def_ok;
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (bw_neq(data->vsr[i], bw_int_to_fixed(1))) {
+ if (bw_mtn(data->vsr[i], bw_int_to_fixed(4))) {
+ vsr_check = bw_def_vsr_mtn_4;
+ }
+ else {
+ if (bw_mtn(data->vsr[i], data->v_taps[i])) {
+ vsr_check = bw_def_vsr_mtn_v_taps;
+ }
+ }
+ }
+ }
+ }
+ lb_size_check = bw_def_ok;
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if ((dceip->pre_downscaler_enabled && bw_mtn(data->hsr[i], bw_int_to_fixed(1)))) {
+ data->source_width_in_lb = bw_div(data->source_width_pixels[i], data->hsr[i]);
+ }
+ else {
+ data->source_width_in_lb = data->source_width_pixels[i];
+ }
+ switch (data->lb_bpc[i]) {
+ case 8:
+ data->lb_line_pitch = bw_ceil2(bw_mul(bw_div(bw_frc_to_fixed(2401171875ul, 100000000), bw_int_to_fixed(3)), bw_ceil2(data->source_width_in_lb, bw_int_to_fixed(8))), bw_int_to_fixed(48));
+ break;
+ case 10:
+ data->lb_line_pitch = bw_ceil2(bw_mul(bw_div(bw_frc_to_fixed(300234375, 10000000), bw_int_to_fixed(3)), bw_ceil2(data->source_width_in_lb, bw_int_to_fixed(8))), bw_int_to_fixed(48));
+ break;
+ default:
+ data->lb_line_pitch = bw_ceil2(bw_mul(bw_int_to_fixed(data->lb_bpc[i]), data->source_width_in_lb), bw_int_to_fixed(48));
+ break;
+ }
+ data->lb_partitions[i] = bw_floor2(bw_div(data->lb_size_per_component[i], data->lb_line_pitch), bw_int_to_fixed(1));
+ /*clamp the partitions to the maxium number supported by the lb*/
+ if ((surface_type[i] != bw_def_graphics || dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1)) {
+ data->lb_partitions_max[i] = bw_int_to_fixed(10);
+ }
+ else {
+ data->lb_partitions_max[i] = bw_int_to_fixed(7);
+ }
+ data->lb_partitions[i] = bw_min2(data->lb_partitions_max[i], data->lb_partitions[i]);
+ if (bw_mtn(bw_add(data->v_taps[i], bw_int_to_fixed(1)), data->lb_partitions[i])) {
+ lb_size_check = bw_def_notok;
+ }
+ }
+ }
+ fbc_check = bw_def_ok;
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i] && data->fbc_en[i] == 1 && (bw_equ(data->rotation_angle[i], bw_int_to_fixed(90)) || bw_equ(data->rotation_angle[i], bw_int_to_fixed(270)) || data->stereo_mode[i] != bw_def_mono || data->bytes_per_pixel[i] != 4)) {
+ fbc_check = bw_def_invalid_rotation_or_bpp_or_stereo;
+ }
+ }
+ rotation_check = bw_def_ok;
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if ((bw_equ(data->rotation_angle[i], bw_int_to_fixed(90)) || bw_equ(data->rotation_angle[i], bw_int_to_fixed(270))) && (tiling_mode[i] == bw_def_linear || data->stereo_mode[i] != bw_def_mono)) {
+ rotation_check = bw_def_invalid_linear_or_stereo_mode;
+ }
+ }
+ }
+ if (pipe_check == bw_def_ok && hsr_check == bw_def_ok && vsr_check == bw_def_ok && lb_size_check == bw_def_ok && fbc_check == bw_def_ok && rotation_check == bw_def_ok) {
+ mode_check = bw_def_ok;
+ }
+ else {
+ mode_check = bw_def_notok;
+ }
+ /*number of memory channels for write-back client*/
+ data->number_of_dram_wrchannels = vbios->number_of_dram_channels;
+ data->number_of_dram_channels = vbios->number_of_dram_channels;
+ /*modify number of memory channels if lpt mode is enabled*/
+ /* low power tiling mode register*/
+ /* 0 = use channel 0*/
+ /* 1 = use channel 0 and 1*/
+ /* 2 = use channel 0,1,2,3*/
+ if ((fbc_enabled == 1 && lpt_enabled == 1)) {
+ if (vbios->memory_type == bw_def_hbm)
+ data->dram_efficiency = bw_frc_to_fixed(5, 10);
+ else
+ data->dram_efficiency = bw_int_to_fixed(1);
+
+
+ if (dceip->low_power_tiling_mode == 0) {
+ data->number_of_dram_channels = 1;
+ }
+ else if (dceip->low_power_tiling_mode == 1) {
+ data->number_of_dram_channels = 2;
+ }
+ else if (dceip->low_power_tiling_mode == 2) {
+ data->number_of_dram_channels = 4;
+ }
+ else {
+ data->number_of_dram_channels = 1;
+ }
+ }
+ else {
+ if (vbios->memory_type == bw_def_hbm)
+ data->dram_efficiency = bw_frc_to_fixed(5, 10);
+ else
+ data->dram_efficiency = bw_frc_to_fixed(8, 10);
+ }
+ /*memory request size and latency hiding:*/
+ /*request size is normally 64 byte, 2-line interleaved, with full latency hiding*/
+ /*the display write-back requests are single line*/
+ /*for tiled graphics surfaces, or undelay surfaces with width higher than the maximum size for full efficiency, request size is 32 byte in 8 and 16 bpp or if the rotation is orthogonal to the tiling grain. only half is useful of the bytes in the request size in 8 bpp or in 32 bpp if the rotation is orthogonal to the tiling grain.*/
+ /*for undelay surfaces with width lower than the maximum size for full efficiency, requests are 4-line interleaved in 16bpp if the rotation is parallel to the tiling grain, and 8-line interleaved with 4-line latency hiding in 8bpp or if the rotation is orthogonal to the tiling grain.*/
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if ((bw_equ(data->rotation_angle[i], bw_int_to_fixed(90)) || bw_equ(data->rotation_angle[i], bw_int_to_fixed(270)))) {
+ if ((i < 4)) {
+ /*underlay portrait tiling mode is not supported*/
+ data->orthogonal_rotation[i] = 1;
+ }
+ else {
+ /*graphics portrait tiling mode*/
+ if (data->graphics_micro_tile_mode == bw_def_rotated_micro_tiling) {
+ data->orthogonal_rotation[i] = 0;
+ }
+ else {
+ data->orthogonal_rotation[i] = 1;
+ }
+ }
+ }
+ else {
+ if ((i < 4)) {
+ /*underlay landscape tiling mode is only supported*/
+ if (data->underlay_micro_tile_mode == bw_def_display_micro_tiling) {
+ data->orthogonal_rotation[i] = 0;
+ }
+ else {
+ data->orthogonal_rotation[i] = 1;
+ }
+ }
+ else {
+ /*graphics landscape tiling mode*/
+ if (data->graphics_micro_tile_mode == bw_def_display_micro_tiling) {
+ data->orthogonal_rotation[i] = 0;
+ }
+ else {
+ data->orthogonal_rotation[i] = 1;
+ }
+ }
+ }
+ if (bw_equ(data->rotation_angle[i], bw_int_to_fixed(90)) || bw_equ(data->rotation_angle[i], bw_int_to_fixed(270))) {
+ data->underlay_maximum_source_efficient_for_tiling = dceip->underlay_maximum_height_efficient_for_tiling;
+ }
+ else {
+ data->underlay_maximum_source_efficient_for_tiling = dceip->underlay_maximum_width_efficient_for_tiling;
+ }
+ if (surface_type[i] == bw_def_display_write_back420_luma || surface_type[i] == bw_def_display_write_back420_chroma) {
+ data->bytes_per_request[i] = bw_int_to_fixed(64);
+ data->useful_bytes_per_request[i] = bw_int_to_fixed(64);
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(1);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(1);
+ }
+ else if (tiling_mode[i] == bw_def_linear) {
+ data->bytes_per_request[i] = bw_int_to_fixed(64);
+ data->useful_bytes_per_request[i] = bw_int_to_fixed(64);
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(2);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(2);
+ }
+ else {
+ if (surface_type[i] == bw_def_graphics || (bw_mtn(data->source_width_rounded_up_to_chunks[i], bw_ceil2(data->underlay_maximum_source_efficient_for_tiling, bw_int_to_fixed(256))))) {
+ switch (data->bytes_per_pixel[i]) {
+ case 8:
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(2);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(2);
+ if (data->orthogonal_rotation[i]) {
+ data->bytes_per_request[i] = bw_int_to_fixed(32);
+ data->useful_bytes_per_request[i] = bw_int_to_fixed(32);
+ }
+ else {
+ data->bytes_per_request[i] = bw_int_to_fixed(64);
+ data->useful_bytes_per_request[i] = bw_int_to_fixed(64);
+ }
+ break;
+ case 4:
+ if (data->orthogonal_rotation[i]) {
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(2);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(2);
+ data->bytes_per_request[i] = bw_int_to_fixed(32);
+ data->useful_bytes_per_request[i] = bw_int_to_fixed(16);
+ }
+ else {
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(2);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(2);
+ data->bytes_per_request[i] = bw_int_to_fixed(64);
+ data->useful_bytes_per_request[i] = bw_int_to_fixed(64);
+ }
+ break;
+ case 2:
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(2);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(2);
+ data->bytes_per_request[i] = bw_int_to_fixed(32);
+ data->useful_bytes_per_request[i] = bw_int_to_fixed(32);
+ break;
+ default:
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(2);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(2);
+ data->bytes_per_request[i] = bw_int_to_fixed(32);
+ data->useful_bytes_per_request[i] = bw_int_to_fixed(16);
+ break;
+ }
+ }
+ else {
+ data->bytes_per_request[i] = bw_int_to_fixed(64);
+ data->useful_bytes_per_request[i] = bw_int_to_fixed(64);
+ if (data->orthogonal_rotation[i]) {
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(8);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(4);
+ }
+ else {
+ switch (data->bytes_per_pixel[i]) {
+ case 4:
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(2);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(2);
+ break;
+ case 2:
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(4);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(4);
+ break;
+ default:
+ data->lines_interleaved_in_mem_access[i] = bw_int_to_fixed(8);
+ data->latency_hiding_lines[i] = bw_int_to_fixed(4);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ /*requested peak bandwidth:*/
+ /*the peak request-per-second bandwidth is the product of the maximum source lines in per line out in the beginning*/
+ /*and in the middle of the frame, the ratio of the source width to the line time, the ratio of line interleaving*/
+ /*in memory to lines of latency hiding, and the ratio of bytes per pixel to useful bytes per request.*/
+ /**/
+ /*if the dmif data buffer size holds more than vta_ps worth of source lines, then only vsr is used.*/
+ /*the peak bandwidth is the peak request-per-second bandwidth times the request size.*/
+ /**/
+ /*the line buffer lines in per line out in the beginning of the frame is the vertical filter initialization value*/
+ /*rounded up to even and divided by the line times for initialization, which is normally three.*/
+ /*the line buffer lines in per line out in the middle of the frame is at least one, or the vertical scale ratio,*/
+ /*rounded up to line pairs if not doing line buffer prefetching.*/
+ /**/
+ /*the non-prefetching rounding up of the vertical scale ratio can also be done up to 1 (for a 0,2 pattern), 4/3 (for a 0,2,2 pattern),*/
+ /*6/4 (for a 0,2,2,2 pattern), or 3 (for a 2,4 pattern).*/
+ /**/
+ /*the scaler vertical filter initialization value is calculated by the hardware as the floor of the average of the*/
+ /*vertical scale ratio and the number of vertical taps increased by one. add one more for possible odd line*/
+ /*panning/bezel adjustment mode.*/
+ /**/
+ /*for the bottom interlace field an extra 50% of the vertical scale ratio is considered for this calculation.*/
+ /*in top-bottom stereo mode software has to set the filter initialization value manually and explicitly limit it to 4.*/
+ /*furthermore, there is only one line time for initialization.*/
+ /**/
+ /*line buffer prefetching is done when the number of lines in the line buffer exceeds the number of taps plus*/
+ /*the ceiling of the vertical scale ratio.*/
+ /**/
+ /*multi-line buffer prefetching is only done in the graphics pipe when the scaler is disabled or when upscaling and the vsr <= 0.8.'*/
+ /**/
+ /*the horizontal blank and chunk granularity factor is indirectly used indicate the interval of time required to transfer the source pixels.*/
+ /*the denominator of this term represents the total number of destination output pixels required for the input source pixels.*/
+ /*it applies when the lines in per line out is not 2 or 4. it does not apply when there is a line buffer between the scl and blnd.*/
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ data->v_filter_init[i] = bw_floor2(bw_div((bw_add(bw_add(bw_add(bw_int_to_fixed(1), data->v_taps[i]), data->vsr[i]), bw_mul(bw_mul(bw_int_to_fixed(data->interlace_mode[i]), bw_frc_to_fixed(5, 10)), data->vsr[i]))), bw_int_to_fixed(2)), bw_int_to_fixed(1));
+ if (data->panning_and_bezel_adjustment == bw_def_any_lines) {
+ data->v_filter_init[i] = bw_add(data->v_filter_init[i], bw_int_to_fixed(1));
+ }
+ if (data->stereo_mode[i] == bw_def_top_bottom) {
+ data->v_filter_init[i] = bw_min2(data->v_filter_init[i], bw_int_to_fixed(4));
+ }
+ if (data->stereo_mode[i] == bw_def_top_bottom) {
+ data->num_lines_at_frame_start = bw_int_to_fixed(1);
+ }
+ else {
+ data->num_lines_at_frame_start = bw_int_to_fixed(3);
+ }
+ if ((bw_mtn(data->vsr[i], bw_int_to_fixed(1)) && surface_type[i] == bw_def_graphics) || data->panning_and_bezel_adjustment == bw_def_any_lines) {
+ data->line_buffer_prefetch[i] = 0;
+ }
+ else if ((((dceip->underlay_downscale_prefetch_enabled == 1 && surface_type[i] != bw_def_graphics) || surface_type[i] == bw_def_graphics) && (bw_mtn(data->lb_partitions[i], bw_add(data->v_taps[i], bw_ceil2(data->vsr[i], bw_int_to_fixed(1))))))) {
+ data->line_buffer_prefetch[i] = 1;
+ }
+ else {
+ data->line_buffer_prefetch[i] = 0;
+ }
+ data->lb_lines_in_per_line_out_in_beginning_of_frame[i] = bw_div(bw_ceil2(data->v_filter_init[i], bw_int_to_fixed(dceip->lines_interleaved_into_lb)), data->num_lines_at_frame_start);
+ if (data->line_buffer_prefetch[i] == 1) {
+ data->lb_lines_in_per_line_out_in_middle_of_frame[i] = bw_max2(bw_int_to_fixed(1), data->vsr[i]);
+ }
+ else if (bw_leq(data->vsr[i], bw_int_to_fixed(1))) {
+ data->lb_lines_in_per_line_out_in_middle_of_frame[i] = bw_int_to_fixed(1);
+ } else if (bw_leq(data->vsr[i],
+ bw_frc_to_fixed(4, 3))) {
+ data->lb_lines_in_per_line_out_in_middle_of_frame[i] = bw_div(bw_int_to_fixed(4), bw_int_to_fixed(3));
+ } else if (bw_leq(data->vsr[i],
+ bw_frc_to_fixed(6, 4))) {
+ data->lb_lines_in_per_line_out_in_middle_of_frame[i] = bw_div(bw_int_to_fixed(6), bw_int_to_fixed(4));
+ }
+ else if (bw_leq(data->vsr[i], bw_int_to_fixed(2))) {
+ data->lb_lines_in_per_line_out_in_middle_of_frame[i] = bw_int_to_fixed(2);
+ }
+ else if (bw_leq(data->vsr[i], bw_int_to_fixed(3))) {
+ data->lb_lines_in_per_line_out_in_middle_of_frame[i] = bw_int_to_fixed(3);
+ }
+ else {
+ data->lb_lines_in_per_line_out_in_middle_of_frame[i] = bw_int_to_fixed(4);
+ }
+ if (data->line_buffer_prefetch[i] == 1 || bw_equ(data->lb_lines_in_per_line_out_in_middle_of_frame[i], bw_int_to_fixed(2)) || bw_equ(data->lb_lines_in_per_line_out_in_middle_of_frame[i], bw_int_to_fixed(4))) {
+ data->horizontal_blank_and_chunk_granularity_factor[i] = bw_int_to_fixed(1);
+ }
+ else {
+ data->horizontal_blank_and_chunk_granularity_factor[i] = bw_div(data->h_total[i], (bw_div((bw_add(data->h_total[i], bw_div((bw_sub(data->source_width_pixels[i], bw_int_to_fixed(dceip->chunk_width))), data->hsr[i]))), bw_int_to_fixed(2))));
+ }
+ data->request_bandwidth[i] = bw_div(bw_mul(bw_div(bw_mul(bw_div(bw_mul(bw_max2(data->lb_lines_in_per_line_out_in_beginning_of_frame[i], data->lb_lines_in_per_line_out_in_middle_of_frame[i]), data->source_width_rounded_up_to_chunks[i]), (bw_div(data->h_total[i], data->pixel_rate[i]))), bw_int_to_fixed(data->bytes_per_pixel[i])), data->useful_bytes_per_request[i]), data->lines_interleaved_in_mem_access[i]), data->latency_hiding_lines[i]);
+ data->display_bandwidth[i] = bw_mul(data->request_bandwidth[i], data->bytes_per_request[i]);
+ }
+ }
+ /*outstanding chunk request limit*/
+ /*if underlay buffer sharing is enabled, the data buffer size for underlay in 422 or 444 is the sum of the luma and chroma data buffer sizes.*/
+ /*underlay buffer sharing mode is only permitted in orthogonal rotation modes.*/
+ /**/
+ /*if there is only one display enabled, the dmif data buffer size for the graphics surface is increased by concatenating the adjacent buffers.*/
+ /**/
+ /*the memory chunk size in bytes is 1024 for the writeback, and 256 times the memory line interleaving and the bytes per pixel for graphics*/
+ /*and underlay.*/
+ /**/
+ /*the pipe chunk size uses 2 for line interleaving, except for the write back, in which case it is 1.*/
+ /*graphics and underlay data buffer size is adjusted (limited) using the outstanding chunk request limit if there is more than one*/
+ /*display enabled or if the dmif request buffer is not large enough for the total data buffer size.*/
+ /*the outstanding chunk request limit is the ceiling of the adjusted data buffer size divided by the chunk size in bytes*/
+ /*the adjusted data buffer size is the product of the display bandwidth and the minimum effective data buffer size in terms of time,*/
+ /*rounded up to the chunk size in bytes, but should not exceed the original data buffer size*/
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if ((dceip->dmif_pipe_en_fbc_chunk_tracker + 3 == i && fbc_enabled == 0 && tiling_mode[i] != bw_def_linear)) {
+ data->max_chunks_non_fbc_mode[i] = 128 - dmif_chunk_buff_margin;
+ }
+ else {
+ data->max_chunks_non_fbc_mode[i] = 16 - dmif_chunk_buff_margin;
+ }
+ }
+ if (data->fbc_en[i] == 1) {
+ max_chunks_fbc_mode = 128 - dmif_chunk_buff_margin;
+ }
+ }
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ switch (surface_type[i]) {
+ case bw_def_display_write_back420_luma:
+ data->data_buffer_size[i] = bw_int_to_fixed(dceip->display_write_back420_luma_mcifwr_buffer_size);
+ break;
+ case bw_def_display_write_back420_chroma:
+ data->data_buffer_size[i] = bw_int_to_fixed(dceip->display_write_back420_chroma_mcifwr_buffer_size);
+ break;
+ case bw_def_underlay420_luma:
+ data->data_buffer_size[i] = bw_int_to_fixed(dceip->underlay_luma_dmif_size);
+ break;
+ case bw_def_underlay420_chroma:
+ data->data_buffer_size[i] = bw_div(bw_int_to_fixed(dceip->underlay_chroma_dmif_size), bw_int_to_fixed(2));
+ break;
+ case bw_def_underlay422:case bw_def_underlay444:
+ if (data->orthogonal_rotation[i] == 0) {
+ data->data_buffer_size[i] = bw_int_to_fixed(dceip->underlay_luma_dmif_size);
+ }
+ else {
+ data->data_buffer_size[i] = bw_add(bw_int_to_fixed(dceip->underlay_luma_dmif_size), bw_int_to_fixed(dceip->underlay_chroma_dmif_size));
+ }
+ break;
+ default:
+ if (data->fbc_en[i] == 1) {
+ /*data_buffer_size(i) = max_dmif_buffer_allocated * graphics_dmif_size*/
+ if (data->number_of_displays == 1) {
+ data->data_buffer_size[i] = bw_min2(bw_mul(bw_mul(bw_int_to_fixed(max_chunks_fbc_mode), bw_int_to_fixed(pixels_per_chunk)), bw_int_to_fixed(data->bytes_per_pixel[i])), bw_mul(bw_int_to_fixed(dceip->max_dmif_buffer_allocated), bw_int_to_fixed(dceip->graphics_dmif_size)));
+ }
+ else {
+ data->data_buffer_size[i] = bw_min2(bw_mul(bw_mul(bw_int_to_fixed(max_chunks_fbc_mode), bw_int_to_fixed(pixels_per_chunk)), bw_int_to_fixed(data->bytes_per_pixel[i])), bw_int_to_fixed(dceip->graphics_dmif_size));
+ }
+ }
+ else {
+ /*the effective dmif buffer size in non-fbc mode is limited by the 16 entry chunk tracker*/
+ if (data->number_of_displays == 1) {
+ data->data_buffer_size[i] = bw_min2(bw_mul(bw_mul(bw_int_to_fixed(data->max_chunks_non_fbc_mode[i]), bw_int_to_fixed(pixels_per_chunk)), bw_int_to_fixed(data->bytes_per_pixel[i])), bw_mul(bw_int_to_fixed(dceip->max_dmif_buffer_allocated), bw_int_to_fixed(dceip->graphics_dmif_size)));
+ }
+ else {
+ data->data_buffer_size[i] = bw_min2(bw_mul(bw_mul(bw_int_to_fixed(data->max_chunks_non_fbc_mode[i]), bw_int_to_fixed(pixels_per_chunk)), bw_int_to_fixed(data->bytes_per_pixel[i])), bw_int_to_fixed(dceip->graphics_dmif_size));
+ }
+ }
+ break;
+ }
+ if (surface_type[i] == bw_def_display_write_back420_luma || surface_type[i] == bw_def_display_write_back420_chroma) {
+ data->memory_chunk_size_in_bytes[i] = bw_int_to_fixed(1024);
+ data->pipe_chunk_size_in_bytes[i] = bw_int_to_fixed(1024);
+ }
+ else {
+ data->memory_chunk_size_in_bytes[i] = bw_mul(bw_mul(bw_int_to_fixed(dceip->chunk_width), data->lines_interleaved_in_mem_access[i]), bw_int_to_fixed(data->bytes_per_pixel[i]));
+ data->pipe_chunk_size_in_bytes[i] = bw_mul(bw_mul(bw_int_to_fixed(dceip->chunk_width), bw_int_to_fixed(dceip->lines_interleaved_into_lb)), bw_int_to_fixed(data->bytes_per_pixel[i]));
+ }
+ }
+ }
+ data->min_dmif_size_in_time = bw_int_to_fixed(9999);
+ data->min_mcifwr_size_in_time = bw_int_to_fixed(9999);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (surface_type[i] != bw_def_display_write_back420_luma && surface_type[i] != bw_def_display_write_back420_chroma) {
+ if (bw_ltn(bw_div(bw_div(bw_mul(data->data_buffer_size[i], data->bytes_per_request[i]), data->useful_bytes_per_request[i]), data->display_bandwidth[i]), data->min_dmif_size_in_time)) {
+ data->min_dmif_size_in_time = bw_div(bw_div(bw_mul(data->data_buffer_size[i], data->bytes_per_request[i]), data->useful_bytes_per_request[i]), data->display_bandwidth[i]);
+ }
+ }
+ else {
+ if (bw_ltn(bw_div(bw_div(bw_mul(data->data_buffer_size[i], data->bytes_per_request[i]), data->useful_bytes_per_request[i]), data->display_bandwidth[i]), data->min_mcifwr_size_in_time)) {
+ data->min_mcifwr_size_in_time = bw_div(bw_div(bw_mul(data->data_buffer_size[i], data->bytes_per_request[i]), data->useful_bytes_per_request[i]), data->display_bandwidth[i]);
+ }
+ }
+ }
+ }
+ data->total_requests_for_dmif_size = bw_int_to_fixed(0);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i] && surface_type[i] != bw_def_display_write_back420_luma && surface_type[i] != bw_def_display_write_back420_chroma) {
+ data->total_requests_for_dmif_size = bw_add(data->total_requests_for_dmif_size, bw_div(data->data_buffer_size[i], data->useful_bytes_per_request[i]));
+ }
+ }
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (surface_type[i] != bw_def_display_write_back420_luma && surface_type[i] != bw_def_display_write_back420_chroma && dceip->limit_excessive_outstanding_dmif_requests && (data->number_of_displays > 1 || bw_mtn(data->total_requests_for_dmif_size, dceip->dmif_request_buffer_size))) {
+ data->adjusted_data_buffer_size[i] = bw_min2(data->data_buffer_size[i], bw_ceil2(bw_mul(data->min_dmif_size_in_time, data->display_bandwidth[i]), data->memory_chunk_size_in_bytes[i]));
+ }
+ else {
+ data->adjusted_data_buffer_size[i] = data->data_buffer_size[i];
+ }
+ }
+ }
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (data->number_of_displays == 1 && data->number_of_underlay_surfaces == 0) {
+ /*set maximum chunk limit if only one graphic pipe is enabled*/
+ data->outstanding_chunk_request_limit[i] = bw_int_to_fixed(127);
+ }
+ else {
+ data->outstanding_chunk_request_limit[i] = bw_ceil2(bw_div(data->adjusted_data_buffer_size[i], data->pipe_chunk_size_in_bytes[i]), bw_int_to_fixed(1));
+ /*clamp maximum chunk limit in the graphic display pipe*/
+ if (i >= 4) {
+ data->outstanding_chunk_request_limit[i] = bw_max2(bw_int_to_fixed(127), data->outstanding_chunk_request_limit[i]);
+ }
+ }
+ }
+ }
+ /*outstanding pte request limit*/
+ /*in tiling mode with no rotation the sg pte requests are 8 useful pt_es, the sg row height is the page height and the sg page width x height is 64x64 for 8bpp, 64x32 for 16 bpp, 32x32 for 32 bpp*/
+ /*in tiling mode with rotation the sg pte requests are only one useful pte, and the sg row height is also the page height, but the sg page width and height are swapped*/
+ /*in linear mode the pte requests are 8 useful pt_es, the sg page width is 4096 divided by the bytes per pixel, the sg page height is 1, but there is just one row whose height is the lines of pte prefetching*/
+ /*the outstanding pte request limit is obtained by multiplying the outstanding chunk request limit by the peak pte request to eviction limiting ratio, rounding up to integer, multiplying by the pte requests per chunk, and rounding up to integer again*/
+ /*if not using peak pte request to eviction limiting, the outstanding pte request limit is the pte requests in the vblank*/
+ /*the pte requests in the vblank is the product of the number of pte request rows times the number of pte requests in a row*/
+ /*the number of pte requests in a row is the quotient of the source width divided by 256, multiplied by the pte requests per chunk, rounded up to even, multiplied by the scatter-gather row height and divided by the scatter-gather page height*/
+ /*the pte requests per chunk is 256 divided by the scatter-gather page width and the useful pt_es per pte request*/
+ if (data->number_of_displays > 1 || (bw_neq(data->rotation_angle[4], bw_int_to_fixed(0)) && bw_neq(data->rotation_angle[4], bw_int_to_fixed(180)))) {
+ data->peak_pte_request_to_eviction_ratio_limiting = dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display;
+ }
+ else {
+ data->peak_pte_request_to_eviction_ratio_limiting = dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation;
+ }
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i] && data->scatter_gather_enable_for_pipe[i] == 1) {
+ if (tiling_mode[i] == bw_def_linear) {
+ data->useful_pte_per_pte_request = bw_int_to_fixed(8);
+ data->scatter_gather_page_width[i] = bw_div(bw_int_to_fixed(4096), bw_int_to_fixed(data->bytes_per_pixel[i]));
+ data->scatter_gather_page_height[i] = bw_int_to_fixed(1);
+ data->scatter_gather_pte_request_rows = bw_int_to_fixed(1);
+ data->scatter_gather_row_height = bw_int_to_fixed(dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode);
+ }
+ else if (bw_equ(data->rotation_angle[i], bw_int_to_fixed(0)) || bw_equ(data->rotation_angle[i], bw_int_to_fixed(180))) {
+ data->useful_pte_per_pte_request = bw_int_to_fixed(8);
+ switch (data->bytes_per_pixel[i]) {
+ case 4:
+ data->scatter_gather_page_width[i] = bw_int_to_fixed(32);
+ data->scatter_gather_page_height[i] = bw_int_to_fixed(32);
+ break;
+ case 2:
+ data->scatter_gather_page_width[i] = bw_int_to_fixed(64);
+ data->scatter_gather_page_height[i] = bw_int_to_fixed(32);
+ break;
+ default:
+ data->scatter_gather_page_width[i] = bw_int_to_fixed(64);
+ data->scatter_gather_page_height[i] = bw_int_to_fixed(64);
+ break;
+ }
+ data->scatter_gather_pte_request_rows = bw_int_to_fixed(dceip->scatter_gather_pte_request_rows_in_tiling_mode);
+ data->scatter_gather_row_height = data->scatter_gather_page_height[i];
+ }
+ else {
+ data->useful_pte_per_pte_request = bw_int_to_fixed(1);
+ switch (data->bytes_per_pixel[i]) {
+ case 4:
+ data->scatter_gather_page_width[i] = bw_int_to_fixed(32);
+ data->scatter_gather_page_height[i] = bw_int_to_fixed(32);
+ break;
+ case 2:
+ data->scatter_gather_page_width[i] = bw_int_to_fixed(32);
+ data->scatter_gather_page_height[i] = bw_int_to_fixed(64);
+ break;
+ default:
+ data->scatter_gather_page_width[i] = bw_int_to_fixed(64);
+ data->scatter_gather_page_height[i] = bw_int_to_fixed(64);
+ break;
+ }
+ data->scatter_gather_pte_request_rows = bw_int_to_fixed(dceip->scatter_gather_pte_request_rows_in_tiling_mode);
+ data->scatter_gather_row_height = data->scatter_gather_page_height[i];
+ }
+ data->pte_request_per_chunk[i] = bw_div(bw_div(bw_int_to_fixed(dceip->chunk_width), data->scatter_gather_page_width[i]), data->useful_pte_per_pte_request);
+ data->scatter_gather_pte_requests_in_row[i] = bw_div(bw_mul(bw_ceil2(bw_mul(bw_div(data->source_width_rounded_up_to_chunks[i], bw_int_to_fixed(dceip->chunk_width)), data->pte_request_per_chunk[i]), bw_int_to_fixed(1)), data->scatter_gather_row_height), data->scatter_gather_page_height[i]);
+ data->scatter_gather_pte_requests_in_vblank = bw_mul(data->scatter_gather_pte_request_rows, data->scatter_gather_pte_requests_in_row[i]);
+ if (bw_equ(data->peak_pte_request_to_eviction_ratio_limiting, bw_int_to_fixed(0))) {
+ data->scatter_gather_pte_request_limit[i] = data->scatter_gather_pte_requests_in_vblank;
+ }
+ else {
+ data->scatter_gather_pte_request_limit[i] = bw_max2(dceip->minimum_outstanding_pte_request_limit, bw_min2(data->scatter_gather_pte_requests_in_vblank, bw_ceil2(bw_mul(bw_mul(bw_div(bw_ceil2(data->adjusted_data_buffer_size[i], data->memory_chunk_size_in_bytes[i]), data->memory_chunk_size_in_bytes[i]), data->pte_request_per_chunk[i]), data->peak_pte_request_to_eviction_ratio_limiting), bw_int_to_fixed(1))));
+ }
+ }
+ }
+ /*pitch padding recommended for efficiency in linear mode*/
+ /*in linear mode graphics or underlay with scatter gather, a pitch that is a multiple of the channel interleave (256 bytes) times the channel-bank rotation is not efficient*/
+ /*if that is the case it is recommended to pad the pitch by at least 256 pixels*/
+ data->inefficient_linear_pitch_in_bytes = bw_mul(bw_mul(bw_int_to_fixed(256), bw_int_to_fixed(vbios->number_of_dram_banks)), bw_int_to_fixed(data->number_of_dram_channels));
+
+ /*pixel transfer time*/
+ /*the dmif and mcifwr yclk(pclk) required is the one that allows the transfer of all pipe's data buffer size in memory in the time for data transfer*/
+ /*for dmif, pte and cursor requests have to be included.*/
+ /*the dram data requirement is doubled when the data request size in bytes is less than the dram channel width times the burst size (8)*/
+ /*the dram data requirement is also multiplied by the number of channels in the case of low power tiling*/
+ /*the page close-open time is determined by trc and the number of page close-opens*/
+ /*in tiled mode graphics or underlay with scatter-gather enabled the bytes per page close-open is the product of the memory line interleave times the maximum of the scatter-gather page width and the product of the tile width (8 pixels) times the number of channels times the number of banks.*/
+ /*in linear mode graphics or underlay with scatter-gather enabled and inefficient pitch, the bytes per page close-open is the line request alternation slice, because different lines are in completely different 4k address bases.*/
+ /*otherwise, the bytes page close-open is the chunk size because that is the arbitration slice.*/
+ /*pte requests are grouped by pte requests per chunk if that is more than 1. each group costs a page close-open time for dmif reads*/
+ /*cursor requests outstanding are limited to a group of two source lines. each group costs a page close-open time for dmif reads*/
+ /*the display reads and writes time for data transfer is the minimum data or cursor buffer size in time minus the mc urgent latency*/
+ /*the mc urgent latency is experienced more than one time if the number of dmif requests in the data buffer exceeds the request buffer size plus the request slots reserved for dmif in the dram channel arbiter queues*/
+ /*the dispclk required is the maximum for all surfaces of the maximum of the source pixels for first output pixel times the throughput factor, divided by the pixels per dispclk, and divided by the minimum latency hiding minus the dram speed/p-state change latency minus the burst time, and the source pixels for last output pixel, times the throughput factor, divided by the pixels per dispclk, and divided by the minimum latency hiding minus the dram speed/p-state change latency minus the burst time, plus the active time.*/
+ /*the data burst time is the maximum of the total page close-open time, total dmif/mcifwr buffer size in memory divided by the dram bandwidth, and the total dmif/mcifwr buffer size in memory divided by the 32 byte sclk data bus bandwidth, each multiplied by its efficiency.*/
+ /*the source line transfer time is the maximum for all surfaces of the maximum of the burst time plus the urgent latency times the floor of the data required divided by the buffer size for the fist pixel, and the burst time plus the urgent latency times the floor of the data required divided by the buffer size for the last pixel plus the active time.*/
+ /*the source pixels for the first output pixel is 512 if the scaler vertical filter initialization value is greater than 2, and it is 4 times the source width if it is greater than 4.*/
+ /*the source pixels for the last output pixel is the source width times the scaler vertical filter initialization value rounded up to even*/
+ /*the source data for these pixels is the number of pixels times the bytes per pixel times the bytes per request divided by the useful bytes per request.*/
+ data->cursor_total_data = bw_int_to_fixed(0);
+ data->cursor_total_request_groups = bw_int_to_fixed(0);
+ data->scatter_gather_total_pte_requests = bw_int_to_fixed(0);
+ data->scatter_gather_total_pte_request_groups = bw_int_to_fixed(0);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ data->cursor_total_data = bw_add(data->cursor_total_data, bw_mul(bw_mul(bw_int_to_fixed(2), data->cursor_width_pixels[i]), bw_int_to_fixed(4)));
+ if (dceip->large_cursor == 1) {
+ data->cursor_total_request_groups = bw_add(data->cursor_total_request_groups, bw_int_to_fixed((dceip->cursor_max_outstanding_group_num + 1)));
+ }
+ else {
+ data->cursor_total_request_groups = bw_add(data->cursor_total_request_groups, bw_ceil2(bw_div(data->cursor_width_pixels[i], dceip->cursor_chunk_width), bw_int_to_fixed(1)));
+ }
+ if (data->scatter_gather_enable_for_pipe[i]) {
+ data->scatter_gather_total_pte_requests = bw_add(data->scatter_gather_total_pte_requests, data->scatter_gather_pte_request_limit[i]);
+ data->scatter_gather_total_pte_request_groups = bw_add(data->scatter_gather_total_pte_request_groups, bw_ceil2(bw_div(data->scatter_gather_pte_request_limit[i], bw_ceil2(data->pte_request_per_chunk[i], bw_int_to_fixed(1))), bw_int_to_fixed(1)));
+ }
+ }
+ }
+ data->tile_width_in_pixels = bw_int_to_fixed(8);
+ data->dmif_total_number_of_data_request_page_close_open = bw_int_to_fixed(0);
+ data->mcifwr_total_number_of_data_request_page_close_open = bw_int_to_fixed(0);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (data->scatter_gather_enable_for_pipe[i] == 1 && tiling_mode[i] != bw_def_linear) {
+ data->bytes_per_page_close_open = bw_mul(data->lines_interleaved_in_mem_access[i], bw_max2(bw_mul(bw_mul(bw_mul(bw_int_to_fixed(data->bytes_per_pixel[i]), data->tile_width_in_pixels), bw_int_to_fixed(vbios->number_of_dram_banks)), bw_int_to_fixed(data->number_of_dram_channels)), bw_mul(bw_int_to_fixed(data->bytes_per_pixel[i]), data->scatter_gather_page_width[i])));
+ }
+ else if (data->scatter_gather_enable_for_pipe[i] == 1 && tiling_mode[i] == bw_def_linear && bw_equ(bw_mod((bw_mul(data->pitch_in_pixels_after_surface_type[i], bw_int_to_fixed(data->bytes_per_pixel[i]))), data->inefficient_linear_pitch_in_bytes), bw_int_to_fixed(0))) {
+ data->bytes_per_page_close_open = dceip->linear_mode_line_request_alternation_slice;
+ }
+ else {
+ data->bytes_per_page_close_open = data->memory_chunk_size_in_bytes[i];
+ }
+ if (surface_type[i] != bw_def_display_write_back420_luma && surface_type[i] != bw_def_display_write_back420_chroma) {
+ data->dmif_total_number_of_data_request_page_close_open = bw_add(data->dmif_total_number_of_data_request_page_close_open, bw_div(bw_ceil2(data->adjusted_data_buffer_size[i], data->memory_chunk_size_in_bytes[i]), data->bytes_per_page_close_open));
+ }
+ else {
+ data->mcifwr_total_number_of_data_request_page_close_open = bw_add(data->mcifwr_total_number_of_data_request_page_close_open, bw_div(bw_ceil2(data->adjusted_data_buffer_size[i], data->memory_chunk_size_in_bytes[i]), data->bytes_per_page_close_open));
+ }
+ }
+ }
+ data->dmif_total_page_close_open_time = bw_div(bw_mul((bw_add(bw_add(data->dmif_total_number_of_data_request_page_close_open, data->scatter_gather_total_pte_request_groups), data->cursor_total_request_groups)), vbios->trc), bw_int_to_fixed(1000));
+ data->mcifwr_total_page_close_open_time = bw_div(bw_mul(data->mcifwr_total_number_of_data_request_page_close_open, vbios->trc), bw_int_to_fixed(1000));
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ data->adjusted_data_buffer_size_in_memory[i] = bw_div(bw_mul(data->adjusted_data_buffer_size[i], data->bytes_per_request[i]), data->useful_bytes_per_request[i]);
+ }
+ }
+ data->total_requests_for_adjusted_dmif_size = bw_int_to_fixed(0);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (surface_type[i] != bw_def_display_write_back420_luma && surface_type[i] != bw_def_display_write_back420_chroma) {
+ data->total_requests_for_adjusted_dmif_size = bw_add(data->total_requests_for_adjusted_dmif_size, bw_div(data->adjusted_data_buffer_size[i], data->useful_bytes_per_request[i]));
+ }
+ }
+ }
+ data->total_dmifmc_urgent_trips = bw_ceil2(bw_div(data->total_requests_for_adjusted_dmif_size, (bw_add(dceip->dmif_request_buffer_size, bw_int_to_fixed(vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel * data->number_of_dram_channels)))), bw_int_to_fixed(1));
+ data->total_dmifmc_urgent_latency = bw_mul(vbios->dmifmc_urgent_latency, data->total_dmifmc_urgent_trips);
+ data->total_display_reads_required_data = bw_int_to_fixed(0);
+ data->total_display_reads_required_dram_access_data = bw_int_to_fixed(0);
+ data->total_display_writes_required_data = bw_int_to_fixed(0);
+ data->total_display_writes_required_dram_access_data = bw_int_to_fixed(0);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (surface_type[i] != bw_def_display_write_back420_luma && surface_type[i] != bw_def_display_write_back420_chroma) {
+ data->display_reads_required_data = data->adjusted_data_buffer_size_in_memory[i];
+ /*for hbm memories, each channel is split into 2 pseudo-channels that are each 64 bits in width. each*/
+ /*pseudo-channel may be read independently of one another.*/
+ /*the read burst length (bl) for hbm memories is 4, so each read command will access 32 bytes of data.*/
+ /*the 64 or 32 byte sized data is stored in one pseudo-channel.*/
+ /*it will take 4 memclk cycles or 8 yclk cycles to fetch 64 bytes of data from the hbm memory (2 read commands).*/
+ /*it will take 2 memclk cycles or 4 yclk cycles to fetch 32 bytes of data from the hbm memory (1 read command).*/
+ /*for gddr5/ddr4 memories, there is additional overhead if the size of the request is smaller than 64 bytes.*/
+ /*the read burst length (bl) for gddr5/ddr4 memories is 8, regardless of the size of the data request.*/
+ /*therefore it will require 8 cycles to fetch 64 or 32 bytes of data from the memory.*/
+ /*the memory efficiency will be 50% for the 32 byte sized data.*/
+ if (vbios->memory_type == bw_def_hbm) {
+ data->display_reads_required_dram_access_data = data->adjusted_data_buffer_size_in_memory[i];
+ }
+ else {
+ data->display_reads_required_dram_access_data = bw_mul(data->adjusted_data_buffer_size_in_memory[i], bw_ceil2(bw_div(bw_int_to_fixed((8 * vbios->dram_channel_width_in_bits / 8)), data->bytes_per_request[i]), bw_int_to_fixed(1)));
+ }
+ data->total_display_reads_required_data = bw_add(data->total_display_reads_required_data, data->display_reads_required_data);
+ data->total_display_reads_required_dram_access_data = bw_add(data->total_display_reads_required_dram_access_data, data->display_reads_required_dram_access_data);
+ }
+ else {
+ data->total_display_writes_required_data = bw_add(data->total_display_writes_required_data, data->adjusted_data_buffer_size_in_memory[i]);
+ data->total_display_writes_required_dram_access_data = bw_add(data->total_display_writes_required_dram_access_data, bw_mul(data->adjusted_data_buffer_size_in_memory[i], bw_ceil2(bw_div(bw_int_to_fixed(vbios->dram_channel_width_in_bits), data->bytes_per_request[i]), bw_int_to_fixed(1))));
+ }
+ }
+ }
+ data->total_display_reads_required_data = bw_add(bw_add(data->total_display_reads_required_data, data->cursor_total_data), bw_mul(data->scatter_gather_total_pte_requests, bw_int_to_fixed(64)));
+ data->total_display_reads_required_dram_access_data = bw_add(bw_add(data->total_display_reads_required_dram_access_data, data->cursor_total_data), bw_mul(data->scatter_gather_total_pte_requests, bw_int_to_fixed(64)));
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (bw_mtn(data->v_filter_init[i], bw_int_to_fixed(4))) {
+ data->src_pixels_for_first_output_pixel[i] = bw_mul(bw_int_to_fixed(4), data->source_width_rounded_up_to_chunks[i]);
+ }
+ else {
+ if (bw_mtn(data->v_filter_init[i], bw_int_to_fixed(2))) {
+ data->src_pixels_for_first_output_pixel[i] = bw_int_to_fixed(512);
+ }
+ else {
+ data->src_pixels_for_first_output_pixel[i] = bw_int_to_fixed(0);
+ }
+ }
+ data->src_data_for_first_output_pixel[i] = bw_div(bw_mul(bw_mul(data->src_pixels_for_first_output_pixel[i], bw_int_to_fixed(data->bytes_per_pixel[i])), data->bytes_per_request[i]), data->useful_bytes_per_request[i]);
+ data->src_pixels_for_last_output_pixel[i] = bw_mul(data->source_width_rounded_up_to_chunks[i], bw_max2(bw_ceil2(data->v_filter_init[i], bw_int_to_fixed(dceip->lines_interleaved_into_lb)), bw_mul(bw_ceil2(data->vsr[i], bw_int_to_fixed(dceip->lines_interleaved_into_lb)), data->horizontal_blank_and_chunk_granularity_factor[i])));
+ data->src_data_for_last_output_pixel[i] = bw_div(bw_mul(bw_mul(bw_mul(data->source_width_rounded_up_to_chunks[i], bw_max2(bw_ceil2(data->v_filter_init[i], bw_int_to_fixed(dceip->lines_interleaved_into_lb)), data->lines_interleaved_in_mem_access[i])), bw_int_to_fixed(data->bytes_per_pixel[i])), data->bytes_per_request[i]), data->useful_bytes_per_request[i]);
+ data->active_time[i] = bw_div(bw_div(data->source_width_rounded_up_to_chunks[i], data->hsr[i]), data->pixel_rate[i]);
+ }
+ }
+ for (i = 0; i <= 2; i++) {
+ for (j = 0; j <= 7; j++) {
+ data->dmif_burst_time[i][j] = bw_max3(data->dmif_total_page_close_open_time, bw_div(data->total_display_reads_required_dram_access_data, (bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[i]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_channels)))), bw_div(data->total_display_reads_required_data, (bw_mul(bw_mul(sclk[j], vbios->data_return_bus_width), bw_frc_to_fixed(dceip->percent_of_ideal_port_bw_received_after_urgent_latency, 100)))));
+ if (data->d1_display_write_back_dwb_enable == 1) {
+ data->mcifwr_burst_time[i][j] = bw_max3(data->mcifwr_total_page_close_open_time, bw_div(data->total_display_writes_required_dram_access_data, (bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[i]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_wrchannels)))), bw_div(data->total_display_writes_required_data, (bw_mul(sclk[j], vbios->data_return_bus_width))));
+ }
+ }
+ }
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ for (j = 0; j <= 2; j++) {
+ for (k = 0; k <= 7; k++) {
+ if (data->enable[i]) {
+ if (surface_type[i] != bw_def_display_write_back420_luma && surface_type[i] != bw_def_display_write_back420_chroma) {
+ /*time to transfer data from the dmif buffer to the lb. since the mc to dmif transfer time overlaps*/
+ /*with the dmif to lb transfer time, only time to transfer the last chunk is considered.*/
+ data->dmif_buffer_transfer_time[i] = bw_mul(data->source_width_rounded_up_to_chunks[i], (bw_div(dceip->lb_write_pixels_per_dispclk, (bw_div(vbios->low_voltage_max_dispclk, dceip->display_pipe_throughput_factor)))));
+ data->line_source_transfer_time[i][j][k] = bw_max2(bw_mul((bw_add(data->total_dmifmc_urgent_latency, data->dmif_burst_time[j][k])), bw_floor2(bw_div(data->src_data_for_first_output_pixel[i], data->adjusted_data_buffer_size_in_memory[i]), bw_int_to_fixed(1))), bw_sub(bw_add(bw_mul((bw_add(data->total_dmifmc_urgent_latency, data->dmif_burst_time[j][k])), bw_floor2(bw_div(data->src_data_for_last_output_pixel[i], data->adjusted_data_buffer_size_in_memory[i]), bw_int_to_fixed(1))), data->dmif_buffer_transfer_time[i]), data->active_time[i]));
+ /*during an mclk switch the requests from the dce ip are stored in the gmc/arb. these requests should be serviced immediately*/
+ /*after the mclk switch sequence and not incur an urgent latency penalty. it is assumed that the gmc/arb can hold up to 256 requests*/
+ /*per memory channel. if the dce ip is urgent after the mclk switch sequence, all pending requests and subsequent requests should be*/
+ /*immediately serviced without a gap in the urgent requests.*/
+ /*the latency incurred would be the time to issue the requests and return the data for the first or last output pixel.*/
+ if (surface_type[i] == bw_def_graphics) {
+ switch (data->lb_bpc[i]) {
+ case 6:
+ data->v_scaler_efficiency = dceip->graphics_vscaler_efficiency6_bit_per_component;
+ break;
+ case 8:
+ data->v_scaler_efficiency = dceip->graphics_vscaler_efficiency8_bit_per_component;
+ break;
+ case 10:
+ data->v_scaler_efficiency = dceip->graphics_vscaler_efficiency10_bit_per_component;
+ break;
+ default:
+ data->v_scaler_efficiency = dceip->graphics_vscaler_efficiency12_bit_per_component;
+ break;
+ }
+ if (data->use_alpha[i] == 1) {
+ data->v_scaler_efficiency = bw_min2(data->v_scaler_efficiency, dceip->alpha_vscaler_efficiency);
+ }
+ }
+ else {
+ switch (data->lb_bpc[i]) {
+ case 6:
+ data->v_scaler_efficiency = dceip->underlay_vscaler_efficiency6_bit_per_component;
+ break;
+ case 8:
+ data->v_scaler_efficiency = dceip->underlay_vscaler_efficiency8_bit_per_component;
+ break;
+ case 10:
+ data->v_scaler_efficiency = dceip->underlay_vscaler_efficiency10_bit_per_component;
+ break;
+ default:
+ data->v_scaler_efficiency = bw_int_to_fixed(3);
+ break;
+ }
+ }
+ if (dceip->pre_downscaler_enabled && bw_mtn(data->hsr[i], bw_int_to_fixed(1))) {
+ data->scaler_limits_factor = bw_max2(bw_div(data->v_taps[i], data->v_scaler_efficiency), bw_div(data->source_width_rounded_up_to_chunks[i], data->h_total[i]));
+ }
+ else {
+ data->scaler_limits_factor = bw_max3(bw_int_to_fixed(1), bw_ceil2(bw_div(data->h_taps[i], bw_int_to_fixed(4)), bw_int_to_fixed(1)), bw_mul(data->hsr[i], bw_max2(bw_div(data->v_taps[i], data->v_scaler_efficiency), bw_int_to_fixed(1))));
+ }
+ data->dram_speed_change_line_source_transfer_time[i][j][k] = bw_mul(bw_int_to_fixed(2), bw_max2((bw_add((bw_div(data->src_data_for_first_output_pixel[i], bw_min2(bw_mul(data->bytes_per_request[i], sclk[k]), bw_div(bw_mul(bw_mul(data->bytes_per_request[i], data->pixel_rate[i]), data->scaler_limits_factor), bw_int_to_fixed(2))))), (bw_mul(data->dmif_burst_time[j][k], bw_floor2(bw_div(data->src_data_for_first_output_pixel[i], data->adjusted_data_buffer_size_in_memory[i]), bw_int_to_fixed(1)))))), (bw_add((bw_div(data->src_data_for_last_output_pixel[i], bw_min2(bw_mul(data->bytes_per_request[i], sclk[k]), bw_div(bw_mul(bw_mul(data->bytes_per_request[i], data->pixel_rate[i]), data->scaler_limits_factor), bw_int_to_fixed(2))))), (bw_sub(bw_mul(data->dmif_burst_time[j][k], bw_floor2(bw_div(data->src_data_for_last_output_pixel[i], data->adjusted_data_buffer_size_in_memory[i]), bw_int_to_fixed(1))), data->active_time[i]))))));
+ }
+ else {
+ data->line_source_transfer_time[i][j][k] = bw_max2(bw_mul((bw_add(vbios->mcifwrmc_urgent_latency, data->mcifwr_burst_time[j][k])), bw_floor2(bw_div(data->src_data_for_first_output_pixel[i], data->adjusted_data_buffer_size_in_memory[i]), bw_int_to_fixed(1))), bw_sub(bw_mul((bw_add(vbios->mcifwrmc_urgent_latency, data->mcifwr_burst_time[j][k])), bw_floor2(bw_div(data->src_data_for_last_output_pixel[i], data->adjusted_data_buffer_size_in_memory[i]), bw_int_to_fixed(1))), data->active_time[i]));
+ /*during an mclk switch the requests from the dce ip are stored in the gmc/arb. these requests should be serviced immediately*/
+ /*after the mclk switch sequence and not incur an urgent latency penalty. it is assumed that the gmc/arb can hold up to 256 requests*/
+ /*per memory channel. if the dce ip is urgent after the mclk switch sequence, all pending requests and subsequent requests should be*/
+ /*immediately serviced without a gap in the urgent requests.*/
+ /*the latency incurred would be the time to issue the requests and return the data for the first or last output pixel.*/
+ data->dram_speed_change_line_source_transfer_time[i][j][k] = bw_max2((bw_add((bw_div(data->src_data_for_first_output_pixel[i], bw_min2(bw_mul(data->bytes_per_request[i], sclk[k]), bw_div(bw_mul(data->bytes_per_request[i], vbios->low_voltage_max_dispclk), bw_int_to_fixed(2))))), (bw_mul(data->mcifwr_burst_time[j][k], bw_floor2(bw_div(data->src_data_for_first_output_pixel[i], data->adjusted_data_buffer_size_in_memory[i]), bw_int_to_fixed(1)))))), (bw_add((bw_div(data->src_data_for_last_output_pixel[i], bw_min2(bw_mul(data->bytes_per_request[i], sclk[k]), bw_div(bw_mul(data->bytes_per_request[i], vbios->low_voltage_max_dispclk), bw_int_to_fixed(2))))), (bw_sub(bw_mul(data->mcifwr_burst_time[j][k], bw_floor2(bw_div(data->src_data_for_last_output_pixel[i], data->adjusted_data_buffer_size_in_memory[i]), bw_int_to_fixed(1))), data->active_time[i])))));
+ }
+ }
+ }
+ }
+ }
+ /*cpu c-state and p-state change enable*/
+ /*for cpu p-state change to be possible for a yclk(pclk) and sclk level the dispclk required has to be enough for the blackout duration*/
+ /*for cpu c-state change to be possible for a yclk(pclk) and sclk level the dispclk required has to be enough for the blackout duration and recovery*/
+ /*condition for the blackout duration:*/
+ /* minimum latency hiding > blackout duration + dmif burst time + line source transfer time*/
+ /*condition for the blackout recovery:*/
+ /* recovery time > dmif burst time + 2 * urgent latency*/
+ /* recovery time > (display bw * blackout duration + (2 * urgent latency + dmif burst time)*dispclk - dmif size )*/
+ /* / (dispclk - display bw)*/
+ /*the minimum latency hiding is the minimum for all pipes of one screen line time, plus one more line time if doing lb prefetch, plus the dmif data buffer size equivalent in time, minus the urgent latency.*/
+ /*the minimum latency hiding is further limited by the cursor. the cursor latency hiding is the number of lines of the cursor buffer, minus one if the downscaling is less than two, or minus three if it is more*/
+
+ /*initialize variables*/
+ number_of_displays_enabled = 0;
+ number_of_displays_enabled_with_margin = 0;
+ for (k = 0; k <= maximum_number_of_surfaces - 1; k++) {
+ if (data->enable[k]) {
+ number_of_displays_enabled = number_of_displays_enabled + 1;
+ }
+ data->display_pstate_change_enable[k] = 0;
+ }
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if ((bw_equ(dceip->stutter_and_dram_clock_state_change_gated_before_cursor, bw_int_to_fixed(0)) && bw_mtn(data->cursor_width_pixels[i], bw_int_to_fixed(0)))) {
+ if (bw_ltn(data->vsr[i], bw_int_to_fixed(2))) {
+ data->cursor_latency_hiding[i] = bw_div(bw_div(bw_mul((bw_sub(dceip->cursor_dcp_buffer_lines, bw_int_to_fixed(1))), data->h_total[i]), data->vsr[i]), data->pixel_rate[i]);
+ }
+ else {
+ data->cursor_latency_hiding[i] = bw_div(bw_div(bw_mul((bw_sub(dceip->cursor_dcp_buffer_lines, bw_int_to_fixed(3))), data->h_total[i]), data->vsr[i]), data->pixel_rate[i]);
+ }
+ }
+ else {
+ data->cursor_latency_hiding[i] = bw_int_to_fixed(9999);
+ }
+ }
+ }
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1 && (bw_equ(data->vsr[i], bw_int_to_fixed(1)) || (bw_leq(data->vsr[i], bw_frc_to_fixed(8, 10)) && bw_leq(data->v_taps[i], bw_int_to_fixed(2)) && data->lb_bpc[i] == 8)) && surface_type[i] == bw_def_graphics) {
+ if (number_of_displays_enabled > 2)
+ data->minimum_latency_hiding[i] = bw_sub(bw_div(bw_mul((bw_div((bw_add(bw_sub(data->lb_partitions[i], bw_int_to_fixed(2)), bw_div(bw_div(data->data_buffer_size[i], bw_int_to_fixed(data->bytes_per_pixel[i])), data->source_width_pixels[i]))), data->vsr[i])), data->h_total[i]), data->pixel_rate[i]), data->total_dmifmc_urgent_latency);
+ else
+ data->minimum_latency_hiding[i] = bw_sub(bw_div(bw_mul((bw_div((bw_add(bw_sub(data->lb_partitions[i], bw_int_to_fixed(1)), bw_div(bw_div(data->data_buffer_size[i], bw_int_to_fixed(data->bytes_per_pixel[i])), data->source_width_pixels[i]))), data->vsr[i])), data->h_total[i]), data->pixel_rate[i]), data->total_dmifmc_urgent_latency);
+ }
+ else {
+ data->minimum_latency_hiding[i] = bw_sub(bw_div(bw_mul((bw_div((bw_add(bw_int_to_fixed(1 + data->line_buffer_prefetch[i]), bw_div(bw_div(data->data_buffer_size[i], bw_int_to_fixed(data->bytes_per_pixel[i])), data->source_width_pixels[i]))), data->vsr[i])), data->h_total[i]), data->pixel_rate[i]), data->total_dmifmc_urgent_latency);
+ }
+ data->minimum_latency_hiding_with_cursor[i] = bw_min2(data->minimum_latency_hiding[i], data->cursor_latency_hiding[i]);
+ }
+ }
+ for (i = 0; i <= 2; i++) {
+ for (j = 0; j <= 7; j++) {
+ data->blackout_duration_margin[i][j] = bw_int_to_fixed(9999);
+ data->dispclk_required_for_blackout_duration[i][j] = bw_int_to_fixed(0);
+ data->dispclk_required_for_blackout_recovery[i][j] = bw_int_to_fixed(0);
+ for (k = 0; k <= maximum_number_of_surfaces - 1; k++) {
+ if (data->enable[k] && bw_mtn(vbios->blackout_duration, bw_int_to_fixed(0))) {
+ if (surface_type[k] != bw_def_display_write_back420_luma && surface_type[k] != bw_def_display_write_back420_chroma) {
+ data->blackout_duration_margin[i][j] = bw_min2(data->blackout_duration_margin[i][j], bw_sub(bw_sub(bw_sub(data->minimum_latency_hiding_with_cursor[k], vbios->blackout_duration), data->dmif_burst_time[i][j]), data->line_source_transfer_time[k][i][j]));
+ data->dispclk_required_for_blackout_duration[i][j] = bw_max3(data->dispclk_required_for_blackout_duration[i][j], bw_div(bw_div(bw_mul(data->src_pixels_for_first_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_sub(bw_sub(data->minimum_latency_hiding_with_cursor[k], vbios->blackout_duration), data->dmif_burst_time[i][j]))), bw_div(bw_div(bw_mul(data->src_pixels_for_last_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_add(bw_sub(bw_sub(data->minimum_latency_hiding_with_cursor[k], vbios->blackout_duration), data->dmif_burst_time[i][j]), data->active_time[k]))));
+ if (bw_leq(vbios->maximum_blackout_recovery_time, bw_add(bw_mul(bw_int_to_fixed(2), data->total_dmifmc_urgent_latency), data->dmif_burst_time[i][j]))) {
+ data->dispclk_required_for_blackout_recovery[i][j] = bw_int_to_fixed(9999);
+ }
+ else if (bw_ltn(data->adjusted_data_buffer_size[k], bw_mul(bw_div(bw_mul(data->display_bandwidth[k], data->useful_bytes_per_request[k]), data->bytes_per_request[k]), (bw_add(vbios->blackout_duration, bw_add(bw_mul(bw_int_to_fixed(2), data->total_dmifmc_urgent_latency), data->dmif_burst_time[i][j])))))) {
+ data->dispclk_required_for_blackout_recovery[i][j] = bw_max2(data->dispclk_required_for_blackout_recovery[i][j], bw_div(bw_mul(bw_div(bw_div((bw_sub(bw_mul(bw_div(bw_mul(data->display_bandwidth[k], data->useful_bytes_per_request[k]), data->bytes_per_request[k]), (bw_add(vbios->blackout_duration, vbios->maximum_blackout_recovery_time))), data->adjusted_data_buffer_size[k])), bw_int_to_fixed(data->bytes_per_pixel[k])), (bw_sub(vbios->maximum_blackout_recovery_time, bw_sub(bw_mul(bw_int_to_fixed(2), data->total_dmifmc_urgent_latency), data->dmif_burst_time[i][j])))), data->latency_hiding_lines[k]), data->lines_interleaved_in_mem_access[k]));
+ }
+ }
+ else {
+ data->blackout_duration_margin[i][j] = bw_min2(data->blackout_duration_margin[i][j], bw_sub(bw_sub(bw_sub(bw_sub(data->minimum_latency_hiding_with_cursor[k], vbios->blackout_duration), data->dmif_burst_time[i][j]), data->mcifwr_burst_time[i][j]), data->line_source_transfer_time[k][i][j]));
+ data->dispclk_required_for_blackout_duration[i][j] = bw_max3(data->dispclk_required_for_blackout_duration[i][j], bw_div(bw_div(bw_mul(data->src_pixels_for_first_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_sub(bw_sub(bw_sub(data->minimum_latency_hiding_with_cursor[k], vbios->blackout_duration), data->dmif_burst_time[i][j]), data->mcifwr_burst_time[i][j]))), bw_div(bw_div(bw_mul(data->src_pixels_for_last_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_add(bw_sub(bw_sub(bw_sub(data->minimum_latency_hiding_with_cursor[k], vbios->blackout_duration), data->dmif_burst_time[i][j]), data->mcifwr_burst_time[i][j]), data->active_time[k]))));
+ if (bw_ltn(vbios->maximum_blackout_recovery_time, bw_add(bw_add(bw_mul(bw_int_to_fixed(2), vbios->mcifwrmc_urgent_latency), data->dmif_burst_time[i][j]), data->mcifwr_burst_time[i][j]))) {
+ data->dispclk_required_for_blackout_recovery[i][j] = bw_int_to_fixed(9999);
+ }
+ else if (bw_ltn(data->adjusted_data_buffer_size[k], bw_mul(bw_div(bw_mul(data->display_bandwidth[k], data->useful_bytes_per_request[k]), data->bytes_per_request[k]), (bw_add(vbios->blackout_duration, bw_add(bw_mul(bw_int_to_fixed(2), data->total_dmifmc_urgent_latency), data->dmif_burst_time[i][j])))))) {
+ data->dispclk_required_for_blackout_recovery[i][j] = bw_max2(data->dispclk_required_for_blackout_recovery[i][j], bw_div(bw_mul(bw_div(bw_div((bw_sub(bw_mul(bw_div(bw_mul(data->display_bandwidth[k], data->useful_bytes_per_request[k]), data->bytes_per_request[k]), (bw_add(vbios->blackout_duration, vbios->maximum_blackout_recovery_time))), data->adjusted_data_buffer_size[k])), bw_int_to_fixed(data->bytes_per_pixel[k])), (bw_sub(vbios->maximum_blackout_recovery_time, (bw_add(bw_mul(bw_int_to_fixed(2), data->total_dmifmc_urgent_latency), data->dmif_burst_time[i][j]))))), data->latency_hiding_lines[k]), data->lines_interleaved_in_mem_access[k]));
+ }
+ }
+ }
+ }
+ }
+ }
+ if (bw_mtn(data->blackout_duration_margin[high][s_high], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[high][s_high], vbios->high_voltage_max_dispclk)) {
+ data->cpup_state_change_enable = bw_def_yes;
+ if (bw_ltn(data->dispclk_required_for_blackout_recovery[high][s_high], vbios->high_voltage_max_dispclk)) {
+ data->cpuc_state_change_enable = bw_def_yes;
+ }
+ else {
+ data->cpuc_state_change_enable = bw_def_no;
+ }
+ }
+ else {
+ data->cpup_state_change_enable = bw_def_no;
+ data->cpuc_state_change_enable = bw_def_no;
+ }
+ /*nb p-state change enable*/
+ /*for dram speed/p-state change to be possible for a yclk(pclk) and sclk level there has to be positive margin and the dispclk required has to be*/
+ /*below the maximum.*/
+ /*the dram speed/p-state change margin is the minimum for all surfaces of the maximum latency hiding minus the dram speed/p-state change latency,*/
+ /*minus the dmif burst time, minus the source line transfer time*/
+ /*the maximum latency hiding is the minimum latency hiding plus one source line used for de-tiling in the line buffer, plus half the urgent latency*/
+ /*if stutter and dram clock state change are gated before cursor then the cursor latency hiding does not limit stutter or dram clock state change*/
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ /*maximum_latency_hiding(i) = minimum_latency_hiding(i) + 1 / vsr(i) **/
+ /* h_total(i) / pixel_rate(i) + 0.5 * total_dmifmc_urgent_latency*/
+ data->maximum_latency_hiding[i] = bw_add(data->minimum_latency_hiding[i],
+ bw_mul(bw_frc_to_fixed(5, 10), data->total_dmifmc_urgent_latency));
+ data->maximum_latency_hiding_with_cursor[i] = bw_min2(data->maximum_latency_hiding[i], data->cursor_latency_hiding[i]);
+ }
+ }
+ for (i = 0; i <= 2; i++) {
+ for (j = 0; j <= 7; j++) {
+ data->min_dram_speed_change_margin[i][j] = bw_int_to_fixed(9999);
+ data->dram_speed_change_margin = bw_int_to_fixed(9999);
+ data->dispclk_required_for_dram_speed_change[i][j] = bw_int_to_fixed(0);
+ data->num_displays_with_margin[i][j] = 0;
+ for (k = 0; k <= maximum_number_of_surfaces - 1; k++) {
+ if (data->enable[k]) {
+ if (surface_type[k] != bw_def_display_write_back420_luma && surface_type[k] != bw_def_display_write_back420_chroma) {
+ data->dram_speed_change_margin = bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]);
+ if ((bw_mtn(data->dram_speed_change_margin, bw_int_to_fixed(0)) && bw_ltn(data->dram_speed_change_margin, bw_int_to_fixed(9999)))) {
+ /*determine the minimum dram clock change margin for each set of clock frequencies*/
+ data->min_dram_speed_change_margin[i][j] = bw_min2(data->min_dram_speed_change_margin[i][j], data->dram_speed_change_margin);
+ /*compute the maximum clock frequuency required for the dram clock change at each set of clock frequencies*/
+ data->dispclk_required_for_dram_speed_change_pipe[i][j] = bw_max2(bw_div(bw_div(bw_mul(data->src_pixels_for_first_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]))), bw_div(bw_div(bw_mul(data->src_pixels_for_last_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_add(bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]), data->active_time[k]))));
+ if ((bw_ltn(data->dispclk_required_for_dram_speed_change_pipe[i][j], vbios->high_voltage_max_dispclk))) {
+ data->display_pstate_change_enable[k] = 1;
+ data->num_displays_with_margin[i][j] = data->num_displays_with_margin[i][j] + 1;
+ data->dispclk_required_for_dram_speed_change[i][j] = bw_max2(data->dispclk_required_for_dram_speed_change[i][j], data->dispclk_required_for_dram_speed_change_pipe[i][j]);
+ }
+ }
+ }
+ else {
+ data->dram_speed_change_margin = bw_sub(bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->mcifwr_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]);
+ if ((bw_mtn(data->dram_speed_change_margin, bw_int_to_fixed(0)) && bw_ltn(data->dram_speed_change_margin, bw_int_to_fixed(9999)))) {
+ /*determine the minimum dram clock change margin for each display pipe*/
+ data->min_dram_speed_change_margin[i][j] = bw_min2(data->min_dram_speed_change_margin[i][j], data->dram_speed_change_margin);
+ /*compute the maximum clock frequuency required for the dram clock change at each set of clock frequencies*/
+ data->dispclk_required_for_dram_speed_change_pipe[i][j] = bw_max2(bw_div(bw_div(bw_mul(data->src_pixels_for_first_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_sub(bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]), data->mcifwr_burst_time[i][j]))), bw_div(bw_div(bw_mul(data->src_pixels_for_last_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_add(bw_sub(bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]), data->mcifwr_burst_time[i][j]), data->active_time[k]))));
+ if ((bw_ltn(data->dispclk_required_for_dram_speed_change_pipe[i][j], vbios->high_voltage_max_dispclk))) {
+ data->display_pstate_change_enable[k] = 1;
+ data->num_displays_with_margin[i][j] = data->num_displays_with_margin[i][j] + 1;
+ data->dispclk_required_for_dram_speed_change[i][j] = bw_max2(data->dispclk_required_for_dram_speed_change[i][j], data->dispclk_required_for_dram_speed_change_pipe[i][j]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ /*determine the number of displays with margin to switch in the v_active region*/
+ for (k = 0; k <= maximum_number_of_surfaces - 1; k++) {
+ if (data->enable[k] == 1 && data->display_pstate_change_enable[k] == 1) {
+ number_of_displays_enabled_with_margin = number_of_displays_enabled_with_margin + 1;
+ }
+ }
+ /*determine the number of displays that don't have any dram clock change margin, but*/
+ /*have the same resolution. these displays can switch in a common vblank region if*/
+ /*their frames are aligned.*/
+ data->min_vblank_dram_speed_change_margin = bw_int_to_fixed(9999);
+ for (k = 0; k <= maximum_number_of_surfaces - 1; k++) {
+ if (data->enable[k]) {
+ if (surface_type[k] != bw_def_display_write_back420_luma && surface_type[k] != bw_def_display_write_back420_chroma) {
+ data->v_blank_dram_speed_change_margin[k] = bw_sub(bw_sub(bw_sub(bw_div(bw_mul((bw_sub(data->v_total[k], bw_sub(bw_div(data->src_height[k], data->v_scale_ratio[k]), bw_int_to_fixed(4)))), data->h_total[k]), data->pixel_rate[k]), vbios->nbp_state_change_latency), data->dmif_burst_time[low][s_low]), data->dram_speed_change_line_source_transfer_time[k][low][s_low]);
+ data->min_vblank_dram_speed_change_margin = bw_min2(data->min_vblank_dram_speed_change_margin, data->v_blank_dram_speed_change_margin[k]);
+ }
+ else {
+ data->v_blank_dram_speed_change_margin[k] = bw_sub(bw_sub(bw_sub(bw_sub(bw_div(bw_mul((bw_sub(data->v_total[k], bw_sub(bw_div(data->src_height[k], data->v_scale_ratio[k]), bw_int_to_fixed(4)))), data->h_total[k]), data->pixel_rate[k]), vbios->nbp_state_change_latency), data->dmif_burst_time[low][s_low]), data->mcifwr_burst_time[low][s_low]), data->dram_speed_change_line_source_transfer_time[k][low][s_low]);
+ data->min_vblank_dram_speed_change_margin = bw_min2(data->min_vblank_dram_speed_change_margin, data->v_blank_dram_speed_change_margin[k]);
+ }
+ }
+ }
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ data->displays_with_same_mode[i] = bw_int_to_fixed(0);
+ if (data->enable[i] == 1 && data->display_pstate_change_enable[i] == 0 && bw_mtn(data->v_blank_dram_speed_change_margin[i], bw_int_to_fixed(0))) {
+ for (j = 0; j <= maximum_number_of_surfaces - 1; j++) {
+ if ((i == j || data->display_synchronization_enabled) && (data->enable[j] == 1 && bw_equ(data->source_width_rounded_up_to_chunks[i], data->source_width_rounded_up_to_chunks[j]) && bw_equ(data->source_height_rounded_up_to_chunks[i], data->source_height_rounded_up_to_chunks[j]) && bw_equ(data->vsr[i], data->vsr[j]) && bw_equ(data->hsr[i], data->hsr[j]) && bw_equ(data->pixel_rate[i], data->pixel_rate[j]))) {
+ data->displays_with_same_mode[i] = bw_add(data->displays_with_same_mode[i], bw_int_to_fixed(1));
+ }
+ }
+ }
+ }
+ /*compute the maximum number of aligned displays with no margin*/
+ number_of_aligned_displays_with_no_margin = 0;
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ number_of_aligned_displays_with_no_margin = bw_fixed_to_int(bw_max2(bw_int_to_fixed(number_of_aligned_displays_with_no_margin), data->displays_with_same_mode[i]));
+ }
+ /*dram clock change is possible, if all displays have positive margin except for one display or a group of*/
+ /*aligned displays with the same timing.*/
+ /*the display(s) with the negative margin can be switched in the v_blank region while the other*/
+ /*displays are in v_blank or v_active.*/
+ if (number_of_displays_enabled_with_margin > 0 && (number_of_displays_enabled_with_margin + number_of_aligned_displays_with_no_margin) == number_of_displays_enabled && bw_mtn(data->min_dram_speed_change_margin[high][s_high], bw_int_to_fixed(0)) && bw_ltn(data->min_dram_speed_change_margin[high][s_high], bw_int_to_fixed(9999)) && bw_ltn(data->dispclk_required_for_dram_speed_change[high][s_high], vbios->high_voltage_max_dispclk)) {
+ data->nbp_state_change_enable = bw_def_yes;
+ }
+ else {
+ data->nbp_state_change_enable = bw_def_no;
+ }
+ /*dram clock change is possible only in vblank if all displays are aligned and have no margin*/
+ if (number_of_aligned_displays_with_no_margin == number_of_displays_enabled) {
+ nbp_state_change_enable_blank = bw_def_yes;
+ }
+ else {
+ nbp_state_change_enable_blank = bw_def_no;
+ }
+
+ /*average bandwidth*/
+ /*the average bandwidth with no compression is the vertical active time is the source width times the bytes per pixel divided by the line time, multiplied by the vertical scale ratio and the ratio of bytes per request divided by the useful bytes per request.*/
+ /*the average bandwidth with compression is the same, divided by the compression ratio*/
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ data->average_bandwidth_no_compression[i] = bw_div(bw_mul(bw_mul(bw_div(bw_mul(data->source_width_rounded_up_to_chunks[i], bw_int_to_fixed(data->bytes_per_pixel[i])), (bw_div(data->h_total[i], data->pixel_rate[i]))), data->vsr[i]), data->bytes_per_request[i]), data->useful_bytes_per_request[i]);
+ data->average_bandwidth[i] = bw_div(data->average_bandwidth_no_compression[i], data->compression_rate[i]);
+ }
+ }
+ data->total_average_bandwidth_no_compression = bw_int_to_fixed(0);
+ data->total_average_bandwidth = bw_int_to_fixed(0);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ data->total_average_bandwidth_no_compression = bw_add(data->total_average_bandwidth_no_compression, data->average_bandwidth_no_compression[i]);
+ data->total_average_bandwidth = bw_add(data->total_average_bandwidth, data->average_bandwidth[i]);
+ }
+ }
+
+ /*required yclk(pclk)*/
+ /*yclk requirement only makes sense if the dmif and mcifwr data total page close-open time is less than the time for data transfer and the total pte requests fit in the scatter-gather saw queque size*/
+ /*if that is the case, the yclk requirement is the maximum of the ones required by dmif and mcifwr, and the high/low yclk(pclk) is chosen accordingly*/
+ /*high yclk(pclk) has to be selected when dram speed/p-state change is not possible.*/
+ data->min_cursor_memory_interface_buffer_size_in_time = bw_int_to_fixed(9999);
+ /* number of cursor lines stored in the cursor data return buffer*/
+ num_cursor_lines = 0;
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (bw_mtn(data->cursor_width_pixels[i], bw_int_to_fixed(0))) {
+ /*compute number of cursor lines stored in data return buffer*/
+ if (bw_leq(data->cursor_width_pixels[i], bw_int_to_fixed(64)) && dceip->large_cursor == 1) {
+ num_cursor_lines = 4;
+ }
+ else {
+ num_cursor_lines = 2;
+ }
+ data->min_cursor_memory_interface_buffer_size_in_time = bw_min2(data->min_cursor_memory_interface_buffer_size_in_time, bw_div(bw_mul(bw_div(bw_int_to_fixed(num_cursor_lines), data->vsr[i]), data->h_total[i]), data->pixel_rate[i]));
+ }
+ }
+ }
+ /*compute minimum time to read one chunk from the dmif buffer*/
+ if (number_of_displays_enabled > 2) {
+ data->chunk_request_delay = 0;
+ }
+ else {
+ data->chunk_request_delay = bw_fixed_to_int(bw_div(bw_int_to_fixed(512), vbios->high_voltage_max_dispclk));
+ }
+ data->min_read_buffer_size_in_time = bw_min2(data->min_cursor_memory_interface_buffer_size_in_time, data->min_dmif_size_in_time);
+ data->display_reads_time_for_data_transfer = bw_sub(bw_sub(data->min_read_buffer_size_in_time, data->total_dmifmc_urgent_latency), bw_int_to_fixed(data->chunk_request_delay));
+ data->display_writes_time_for_data_transfer = bw_sub(data->min_mcifwr_size_in_time, vbios->mcifwrmc_urgent_latency);
+ data->dmif_required_dram_bandwidth = bw_div(data->total_display_reads_required_dram_access_data, data->display_reads_time_for_data_transfer);
+ data->mcifwr_required_dram_bandwidth = bw_div(data->total_display_writes_required_dram_access_data, data->display_writes_time_for_data_transfer);
+ data->required_dmifmc_urgent_latency_for_page_close_open = bw_div((bw_sub(data->min_read_buffer_size_in_time, data->dmif_total_page_close_open_time)), data->total_dmifmc_urgent_trips);
+ data->required_mcifmcwr_urgent_latency = bw_sub(data->min_mcifwr_size_in_time, data->mcifwr_total_page_close_open_time);
+ if (bw_mtn(data->scatter_gather_total_pte_requests, dceip->maximum_total_outstanding_pte_requests_allowed_by_saw)) {
+ data->required_dram_bandwidth_gbyte_per_second = bw_int_to_fixed(9999);
+ yclk_message = bw_def_exceeded_allowed_outstanding_pte_req_queue_size;
+ data->y_clk_level = high;
+ data->dram_bandwidth = bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[high]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_channels));
+ }
+ else if (bw_mtn(vbios->dmifmc_urgent_latency, data->required_dmifmc_urgent_latency_for_page_close_open) || bw_mtn(vbios->mcifwrmc_urgent_latency, data->required_mcifmcwr_urgent_latency)) {
+ data->required_dram_bandwidth_gbyte_per_second = bw_int_to_fixed(9999);
+ yclk_message = bw_def_exceeded_allowed_page_close_open;
+ data->y_clk_level = high;
+ data->dram_bandwidth = bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[high]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_channels));
+ }
+ else {
+ data->required_dram_bandwidth_gbyte_per_second = bw_div(bw_max2(data->dmif_required_dram_bandwidth, data->mcifwr_required_dram_bandwidth), bw_int_to_fixed(1000));
+ if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation, 100),yclk[low]),bw_div(bw_int_to_fixed(vbios->dram_channel_width_in_bits),bw_int_to_fixed(8))),bw_int_to_fixed(vbios->number_of_dram_channels)))
+ && bw_ltn(bw_mul(data->required_dram_bandwidth_gbyte_per_second, bw_int_to_fixed(1000)), bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[low]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_channels))) && (data->cpup_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[low][s_high], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[low][s_high], vbios->high_voltage_max_dispclk))) && (data->cpuc_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[low][s_high], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[low][s_high], vbios->high_voltage_max_dispclk) && bw_ltn(data->dispclk_required_for_blackout_recovery[low][s_high], vbios->high_voltage_max_dispclk))) && (!data->increase_voltage_to_support_mclk_switch || data->nbp_state_change_enable == bw_def_no || (bw_mtn(data->min_dram_speed_change_margin[low][s_high], bw_int_to_fixed(0)) && bw_ltn(data->min_dram_speed_change_margin[low][s_high], bw_int_to_fixed(9999)) && bw_leq(data->dispclk_required_for_dram_speed_change[low][s_high], vbios->high_voltage_max_dispclk) && data->num_displays_with_margin[low][s_high] == number_of_displays_enabled_with_margin))) {
+ yclk_message = bw_fixed_to_int(vbios->low_yclk);
+ data->y_clk_level = low;
+ data->dram_bandwidth = bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[low]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_channels));
+ }
+ else if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation, 100),yclk[mid]),bw_div(bw_int_to_fixed(vbios->dram_channel_width_in_bits),bw_int_to_fixed(8))),bw_int_to_fixed(vbios->number_of_dram_channels)))
+ && bw_ltn(bw_mul(data->required_dram_bandwidth_gbyte_per_second, bw_int_to_fixed(1000)), bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[mid]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_channels))) && (data->cpup_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[mid][s_high], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[mid][s_high], vbios->high_voltage_max_dispclk))) && (data->cpuc_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[mid][s_high], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[mid][s_high], vbios->high_voltage_max_dispclk) && bw_ltn(data->dispclk_required_for_blackout_recovery[mid][s_high], vbios->high_voltage_max_dispclk))) && (!data->increase_voltage_to_support_mclk_switch || data->nbp_state_change_enable == bw_def_no || (bw_mtn(data->min_dram_speed_change_margin[mid][s_high], bw_int_to_fixed(0)) && bw_ltn(data->min_dram_speed_change_margin[mid][s_high], bw_int_to_fixed(9999)) && bw_leq(data->dispclk_required_for_dram_speed_change[mid][s_high], vbios->high_voltage_max_dispclk) && data->num_displays_with_margin[mid][s_high] == number_of_displays_enabled_with_margin))) {
+ yclk_message = bw_fixed_to_int(vbios->mid_yclk);
+ data->y_clk_level = mid;
+ data->dram_bandwidth = bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[mid]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_channels));
+ }
+ else if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation, 100),yclk[high]),bw_div(bw_int_to_fixed(vbios->dram_channel_width_in_bits),bw_int_to_fixed(8))),bw_int_to_fixed(vbios->number_of_dram_channels)))
+ && bw_ltn(bw_mul(data->required_dram_bandwidth_gbyte_per_second, bw_int_to_fixed(1000)), bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[high]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_channels)))) {
+ yclk_message = bw_fixed_to_int(vbios->high_yclk);
+ data->y_clk_level = high;
+ data->dram_bandwidth = bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[high]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_channels));
+ }
+ else {
+ yclk_message = bw_def_exceeded_allowed_maximum_bw;
+ data->y_clk_level = high;
+ data->dram_bandwidth = bw_mul(bw_div(bw_mul(bw_mul(data->dram_efficiency, yclk[high]), bw_int_to_fixed(vbios->dram_channel_width_in_bits)), bw_int_to_fixed(8)), bw_int_to_fixed(data->number_of_dram_channels));
+ }
+ }
+ /*required sclk*/
+ /*sclk requirement only makes sense if the total pte requests fit in the scatter-gather saw queque size*/
+ /*if that is the case, the sclk requirement is the maximum of the ones required by dmif and mcifwr, and the high/mid/low sclk is chosen accordingly, unless that choice results in foresaking dram speed/nb p-state change.*/
+ /*the dmif and mcifwr sclk required is the one that allows the transfer of all pipe's data buffer size through the sclk bus in the time for data transfer*/
+ /*for dmif, pte and cursor requests have to be included.*/
+ data->dmif_required_sclk = bw_div(bw_div(data->total_display_reads_required_data, data->display_reads_time_for_data_transfer), (bw_mul(vbios->data_return_bus_width, bw_frc_to_fixed(dceip->percent_of_ideal_port_bw_received_after_urgent_latency, 100))));
+ data->mcifwr_required_sclk = bw_div(bw_div(data->total_display_writes_required_data, data->display_writes_time_for_data_transfer), vbios->data_return_bus_width);
+ if (bw_mtn(data->scatter_gather_total_pte_requests, dceip->maximum_total_outstanding_pte_requests_allowed_by_saw)) {
+ data->required_sclk = bw_int_to_fixed(9999);
+ sclk_message = bw_def_exceeded_allowed_outstanding_pte_req_queue_size;
+ data->sclk_level = s_high;
+ }
+ else if (bw_mtn(vbios->dmifmc_urgent_latency, data->required_dmifmc_urgent_latency_for_page_close_open) || bw_mtn(vbios->mcifwrmc_urgent_latency, data->required_mcifmcwr_urgent_latency)) {
+ data->required_sclk = bw_int_to_fixed(9999);
+ sclk_message = bw_def_exceeded_allowed_page_close_open;
+ data->sclk_level = s_high;
+ }
+ else {
+ data->required_sclk = bw_max2(data->dmif_required_sclk, data->mcifwr_required_sclk);
+ if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation, 100),sclk[low]),vbios->data_return_bus_width))
+ && bw_ltn(data->required_sclk, sclk[s_low]) && (data->cpup_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_low], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_low], vbios->high_voltage_max_dispclk))) && (data->cpuc_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_low], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_low], vbios->high_voltage_max_dispclk) && bw_ltn(data->dispclk_required_for_blackout_recovery[data->y_clk_level][s_low], vbios->high_voltage_max_dispclk))) && (!data->increase_voltage_to_support_mclk_switch || data->nbp_state_change_enable == bw_def_no || (bw_mtn(data->min_dram_speed_change_margin[data->y_clk_level][s_low], bw_int_to_fixed(0)) && bw_ltn(data->min_dram_speed_change_margin[data->y_clk_level][s_low], bw_int_to_fixed(9999)) && bw_leq(data->dispclk_required_for_dram_speed_change[data->y_clk_level][s_low], vbios->low_voltage_max_dispclk) && data->num_displays_with_margin[data->y_clk_level][s_low] == number_of_displays_enabled_with_margin))) {
+ sclk_message = bw_def_low;
+ data->sclk_level = s_low;
+ data->required_sclk = vbios->low_sclk;
+ }
+ else if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation, 100),sclk[mid]),vbios->data_return_bus_width))
+ && bw_ltn(data->required_sclk, sclk[s_mid1]) && (data->cpup_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid1], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid1], vbios->high_voltage_max_dispclk))) && (data->cpuc_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid1], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid1], vbios->high_voltage_max_dispclk) && bw_ltn(data->dispclk_required_for_blackout_recovery[data->y_clk_level][s_mid1], vbios->high_voltage_max_dispclk))) && (!data->increase_voltage_to_support_mclk_switch || data->nbp_state_change_enable == bw_def_no || (bw_mtn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid1], bw_int_to_fixed(0)) && bw_ltn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid1], bw_int_to_fixed(9999)) && bw_leq(data->dispclk_required_for_dram_speed_change[data->y_clk_level][s_mid1], vbios->mid_voltage_max_dispclk) && data->num_displays_with_margin[data->y_clk_level][s_mid1] == number_of_displays_enabled_with_margin))) {
+ sclk_message = bw_def_mid;
+ data->sclk_level = s_mid1;
+ data->required_sclk = vbios->mid1_sclk;
+ }
+ else if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation, 100),sclk[s_mid2]),vbios->data_return_bus_width))
+ && bw_ltn(data->required_sclk, sclk[s_mid2]) && (data->cpup_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid2], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid2], vbios->high_voltage_max_dispclk))) && (data->cpuc_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid2], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid2], vbios->high_voltage_max_dispclk) && bw_ltn(data->dispclk_required_for_blackout_recovery[data->y_clk_level][s_mid2], vbios->high_voltage_max_dispclk))) && (!data->increase_voltage_to_support_mclk_switch || data->nbp_state_change_enable == bw_def_no || (bw_mtn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid2], bw_int_to_fixed(0)) && bw_ltn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid2], bw_int_to_fixed(9999)) && bw_leq(data->dispclk_required_for_dram_speed_change[data->y_clk_level][s_mid2], vbios->mid_voltage_max_dispclk) && data->num_displays_with_margin[data->y_clk_level][s_mid2] == number_of_displays_enabled_with_margin))) {
+ sclk_message = bw_def_mid;
+ data->sclk_level = s_mid2;
+ data->required_sclk = vbios->mid2_sclk;
+ }
+ else if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation, 100),sclk[s_mid3]),vbios->data_return_bus_width))
+ && bw_ltn(data->required_sclk, sclk[s_mid3]) && (data->cpup_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid3], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid3], vbios->high_voltage_max_dispclk))) && (data->cpuc_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid3], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid3], vbios->high_voltage_max_dispclk) && bw_ltn(data->dispclk_required_for_blackout_recovery[data->y_clk_level][s_mid3], vbios->high_voltage_max_dispclk))) && (!data->increase_voltage_to_support_mclk_switch || data->nbp_state_change_enable == bw_def_no || (bw_mtn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid3], bw_int_to_fixed(0)) && bw_ltn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid3], bw_int_to_fixed(9999)) && bw_leq(data->dispclk_required_for_dram_speed_change[data->y_clk_level][s_mid3], vbios->mid_voltage_max_dispclk) && data->num_displays_with_margin[data->y_clk_level][s_mid3] == number_of_displays_enabled_with_margin))) {
+ sclk_message = bw_def_mid;
+ data->sclk_level = s_mid3;
+ data->required_sclk = vbios->mid3_sclk;
+ }
+ else if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation, 100),sclk[s_mid4]),vbios->data_return_bus_width))
+ && bw_ltn(data->required_sclk, sclk[s_mid4]) && (data->cpup_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid4], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid4], vbios->high_voltage_max_dispclk))) && (data->cpuc_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid4], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid4], vbios->high_voltage_max_dispclk) && bw_ltn(data->dispclk_required_for_blackout_recovery[data->y_clk_level][s_mid4], vbios->high_voltage_max_dispclk))) && (!data->increase_voltage_to_support_mclk_switch || data->nbp_state_change_enable == bw_def_no || (bw_mtn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid4], bw_int_to_fixed(0)) && bw_ltn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid4], bw_int_to_fixed(9999)) && bw_leq(data->dispclk_required_for_dram_speed_change[data->y_clk_level][s_mid4], vbios->mid_voltage_max_dispclk) && data->num_displays_with_margin[data->y_clk_level][s_mid4] == number_of_displays_enabled_with_margin))) {
+ sclk_message = bw_def_mid;
+ data->sclk_level = s_mid4;
+ data->required_sclk = vbios->mid4_sclk;
+ }
+ else if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation, 100),sclk[s_mid5]),vbios->data_return_bus_width))
+ && bw_ltn(data->required_sclk, sclk[s_mid5]) && (data->cpup_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid5], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid5], vbios->high_voltage_max_dispclk))) && (data->cpuc_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid5], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid5], vbios->high_voltage_max_dispclk) && bw_ltn(data->dispclk_required_for_blackout_recovery[data->y_clk_level][s_mid5], vbios->high_voltage_max_dispclk))) && (!data->increase_voltage_to_support_mclk_switch || data->nbp_state_change_enable == bw_def_no || (bw_mtn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid5], bw_int_to_fixed(0)) && bw_ltn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid5], bw_int_to_fixed(9999)) && bw_leq(data->dispclk_required_for_dram_speed_change[data->y_clk_level][s_mid5], vbios->mid_voltage_max_dispclk) && data->num_displays_with_margin[data->y_clk_level][s_mid5] == number_of_displays_enabled_with_margin))) {
+ sclk_message = bw_def_mid;
+ data->sclk_level = s_mid5;
+ data->required_sclk = vbios->mid5_sclk;
+ }
+ else if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation, 100),sclk[s_mid6]),vbios->data_return_bus_width))
+ && bw_ltn(data->required_sclk, sclk[s_mid6]) && (data->cpup_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid6], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid6], vbios->high_voltage_max_dispclk))) && (data->cpuc_state_change_enable == bw_def_no || (bw_mtn(data->blackout_duration_margin[data->y_clk_level][s_mid6], bw_int_to_fixed(0)) && bw_ltn(data->dispclk_required_for_blackout_duration[data->y_clk_level][s_mid6], vbios->high_voltage_max_dispclk) && bw_ltn(data->dispclk_required_for_blackout_recovery[data->y_clk_level][s_mid6], vbios->high_voltage_max_dispclk))) && (!data->increase_voltage_to_support_mclk_switch || data->nbp_state_change_enable == bw_def_no || (bw_mtn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid6], bw_int_to_fixed(0)) && bw_ltn(data->min_dram_speed_change_margin[data->y_clk_level][s_mid6], bw_int_to_fixed(9999)) && bw_leq(data->dispclk_required_for_dram_speed_change[data->y_clk_level][s_mid6], vbios->high_voltage_max_dispclk) && data->num_displays_with_margin[data->y_clk_level][s_mid6] == number_of_displays_enabled_with_margin))) {
+ sclk_message = bw_def_mid;
+ data->sclk_level = s_mid6;
+ data->required_sclk = vbios->mid6_sclk;
+ }
+ else if (bw_ltn(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation, 100),sclk[s_high]),vbios->data_return_bus_width))
+ && bw_ltn(data->required_sclk, sclk[s_high])) {
+ sclk_message = bw_def_high;
+ data->sclk_level = s_high;
+ data->required_sclk = vbios->high_sclk;
+ }
+ else if (bw_meq(data->total_average_bandwidth_no_compression, bw_mul(bw_mul(bw_frc_to_fixed(dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation, 100),sclk[s_high]),vbios->data_return_bus_width))
+ && bw_ltn(data->required_sclk, sclk[s_high])) {
+ sclk_message = bw_def_high;
+ data->sclk_level = s_high;
+ data->required_sclk = vbios->high_sclk;
+ }
+ else {
+ sclk_message = bw_def_exceeded_allowed_maximum_sclk;
+ data->sclk_level = s_high;
+ /*required_sclk = high_sclk*/
+ }
+ }
+ /*dispclk*/
+ /*if dispclk is set to the maximum, ramping is not required. dispclk required without ramping is less than the dispclk required with ramping.*/
+ /*if dispclk required without ramping is more than the maximum dispclk, that is the dispclk required, and the mode is not supported*/
+ /*if that does not happen, but dispclk required with ramping is more than the maximum dispclk, dispclk required is just the maximum dispclk*/
+ /*if that does not happen either, dispclk required is the dispclk required with ramping.*/
+ /*dispclk required without ramping is the maximum of the one required for display pipe pixel throughput, for scaler throughput, for total read request thrrougput and for dram/np p-state change if enabled.*/
+ /*the display pipe pixel throughput is the maximum of lines in per line out in the beginning of the frame and lines in per line out in the middle of the frame multiplied by the horizontal blank and chunk granularity factor, altogether multiplied by the ratio of the source width to the line time, divided by the line buffer pixels per dispclk throughput, and multiplied by the display pipe throughput factor.*/
+ /*the horizontal blank and chunk granularity factor is the ratio of the line time divided by the line time minus half the horizontal blank and chunk time. it applies when the lines in per line out is not 2 or 4.*/
+ /*the dispclk required for scaler throughput is the product of the pixel rate and the scaling limits factor.*/
+ /*the dispclk required for total read request throughput is the product of the peak request-per-second bandwidth and the dispclk cycles per request, divided by the request efficiency.*/
+ /*for the dispclk required with ramping, instead of multiplying just the pipe throughput by the display pipe throughput factor, we multiply the scaler and pipe throughput by the ramping factor.*/
+ /*the scaling limits factor is the product of the horizontal scale ratio, and the ratio of the vertical taps divided by the scaler efficiency clamped to at least 1.*/
+ /*the scaling limits factor itself it also clamped to at least 1*/
+ /*if doing downscaling with the pre-downscaler enabled, the horizontal scale ratio should not be considered above (use "1")*/
+ data->downspread_factor = bw_add(bw_int_to_fixed(1), bw_div(vbios->down_spread_percentage, bw_int_to_fixed(100)));
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (surface_type[i] == bw_def_graphics) {
+ switch (data->lb_bpc[i]) {
+ case 6:
+ data->v_scaler_efficiency = dceip->graphics_vscaler_efficiency6_bit_per_component;
+ break;
+ case 8:
+ data->v_scaler_efficiency = dceip->graphics_vscaler_efficiency8_bit_per_component;
+ break;
+ case 10:
+ data->v_scaler_efficiency = dceip->graphics_vscaler_efficiency10_bit_per_component;
+ break;
+ default:
+ data->v_scaler_efficiency = dceip->graphics_vscaler_efficiency12_bit_per_component;
+ break;
+ }
+ if (data->use_alpha[i] == 1) {
+ data->v_scaler_efficiency = bw_min2(data->v_scaler_efficiency, dceip->alpha_vscaler_efficiency);
+ }
+ }
+ else {
+ switch (data->lb_bpc[i]) {
+ case 6:
+ data->v_scaler_efficiency = dceip->underlay_vscaler_efficiency6_bit_per_component;
+ break;
+ case 8:
+ data->v_scaler_efficiency = dceip->underlay_vscaler_efficiency8_bit_per_component;
+ break;
+ case 10:
+ data->v_scaler_efficiency = dceip->underlay_vscaler_efficiency10_bit_per_component;
+ break;
+ default:
+ data->v_scaler_efficiency = dceip->underlay_vscaler_efficiency12_bit_per_component;
+ break;
+ }
+ }
+ if (dceip->pre_downscaler_enabled && bw_mtn(data->hsr[i], bw_int_to_fixed(1))) {
+ data->scaler_limits_factor = bw_max2(bw_div(data->v_taps[i], data->v_scaler_efficiency), bw_div(data->source_width_rounded_up_to_chunks[i], data->h_total[i]));
+ }
+ else {
+ data->scaler_limits_factor = bw_max3(bw_int_to_fixed(1), bw_ceil2(bw_div(data->h_taps[i], bw_int_to_fixed(4)), bw_int_to_fixed(1)), bw_mul(data->hsr[i], bw_max2(bw_div(data->v_taps[i], data->v_scaler_efficiency), bw_int_to_fixed(1))));
+ }
+ data->display_pipe_pixel_throughput = bw_div(bw_div(bw_mul(bw_max2(data->lb_lines_in_per_line_out_in_beginning_of_frame[i], bw_mul(data->lb_lines_in_per_line_out_in_middle_of_frame[i], data->horizontal_blank_and_chunk_granularity_factor[i])), data->source_width_rounded_up_to_chunks[i]), (bw_div(data->h_total[i], data->pixel_rate[i]))), dceip->lb_write_pixels_per_dispclk);
+ data->dispclk_required_without_ramping[i] = bw_mul(data->downspread_factor, bw_max2(bw_mul(data->pixel_rate[i], data->scaler_limits_factor), bw_mul(dceip->display_pipe_throughput_factor, data->display_pipe_pixel_throughput)));
+ data->dispclk_required_with_ramping[i] = bw_mul(dceip->dispclk_ramping_factor, bw_max2(bw_mul(data->pixel_rate[i], data->scaler_limits_factor), data->display_pipe_pixel_throughput));
+ }
+ }
+ data->total_dispclk_required_with_ramping = bw_int_to_fixed(0);
+ data->total_dispclk_required_without_ramping = bw_int_to_fixed(0);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (bw_ltn(data->total_dispclk_required_with_ramping, data->dispclk_required_with_ramping[i])) {
+ data->total_dispclk_required_with_ramping = data->dispclk_required_with_ramping[i];
+ }
+ if (bw_ltn(data->total_dispclk_required_without_ramping, data->dispclk_required_without_ramping[i])) {
+ data->total_dispclk_required_without_ramping = data->dispclk_required_without_ramping[i];
+ }
+ }
+ }
+ data->total_read_request_bandwidth = bw_int_to_fixed(0);
+ data->total_write_request_bandwidth = bw_int_to_fixed(0);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (surface_type[i] != bw_def_display_write_back420_luma && surface_type[i] != bw_def_display_write_back420_chroma) {
+ data->total_read_request_bandwidth = bw_add(data->total_read_request_bandwidth, data->request_bandwidth[i]);
+ }
+ else {
+ data->total_write_request_bandwidth = bw_add(data->total_write_request_bandwidth, data->request_bandwidth[i]);
+ }
+ }
+ }
+ data->dispclk_required_for_total_read_request_bandwidth = bw_div(bw_mul(data->total_read_request_bandwidth, dceip->dispclk_per_request), dceip->request_efficiency);
+ data->total_dispclk_required_with_ramping_with_request_bandwidth = bw_max2(data->total_dispclk_required_with_ramping, data->dispclk_required_for_total_read_request_bandwidth);
+ data->total_dispclk_required_without_ramping_with_request_bandwidth = bw_max2(data->total_dispclk_required_without_ramping, data->dispclk_required_for_total_read_request_bandwidth);
+ if (data->cpuc_state_change_enable == bw_def_yes) {
+ data->total_dispclk_required_with_ramping_with_request_bandwidth = bw_max3(data->total_dispclk_required_with_ramping_with_request_bandwidth, data->dispclk_required_for_blackout_duration[data->y_clk_level][data->sclk_level], data->dispclk_required_for_blackout_recovery[data->y_clk_level][data->sclk_level]);
+ data->total_dispclk_required_without_ramping_with_request_bandwidth = bw_max3(data->total_dispclk_required_without_ramping_with_request_bandwidth, data->dispclk_required_for_blackout_duration[data->y_clk_level][data->sclk_level], data->dispclk_required_for_blackout_recovery[data->y_clk_level][data->sclk_level]);
+ }
+ if (data->cpup_state_change_enable == bw_def_yes) {
+ data->total_dispclk_required_with_ramping_with_request_bandwidth = bw_max2(data->total_dispclk_required_with_ramping_with_request_bandwidth, data->dispclk_required_for_blackout_duration[data->y_clk_level][data->sclk_level]);
+ data->total_dispclk_required_without_ramping_with_request_bandwidth = bw_max2(data->total_dispclk_required_without_ramping_with_request_bandwidth, data->dispclk_required_for_blackout_duration[data->y_clk_level][data->sclk_level]);
+ }
+ if (data->nbp_state_change_enable == bw_def_yes && data->increase_voltage_to_support_mclk_switch) {
+ data->total_dispclk_required_with_ramping_with_request_bandwidth = bw_max2(data->total_dispclk_required_with_ramping_with_request_bandwidth, data->dispclk_required_for_dram_speed_change[data->y_clk_level][data->sclk_level]);
+ data->total_dispclk_required_without_ramping_with_request_bandwidth = bw_max2(data->total_dispclk_required_without_ramping_with_request_bandwidth, data->dispclk_required_for_dram_speed_change[data->y_clk_level][data->sclk_level]);
+ }
+ if (bw_ltn(data->total_dispclk_required_with_ramping_with_request_bandwidth, vbios->high_voltage_max_dispclk)) {
+ data->dispclk = data->total_dispclk_required_with_ramping_with_request_bandwidth;
+ }
+ else if (bw_ltn(data->total_dispclk_required_without_ramping_with_request_bandwidth, vbios->high_voltage_max_dispclk)) {
+ data->dispclk = vbios->high_voltage_max_dispclk;
+ }
+ else {
+ data->dispclk = data->total_dispclk_required_without_ramping_with_request_bandwidth;
+ }
+ /* required core voltage*/
+ /* the core voltage required is low if sclk, yclk(pclk)and dispclk are within the low limits*/
+ /* otherwise, the core voltage required is medium if yclk (pclk) is within the low limit and sclk and dispclk are within the medium limit*/
+ /* otherwise, the core voltage required is high if the three clocks are within the high limits*/
+ /* otherwise, or if the mode is not supported, core voltage requirement is not applicable*/
+ if (pipe_check == bw_def_notok) {
+ voltage = bw_def_na;
+ }
+ else if (mode_check == bw_def_notok) {
+ voltage = bw_def_notok;
+ }
+ else if (bw_equ(bw_int_to_fixed(yclk_message), vbios->low_yclk) && sclk_message == bw_def_low && bw_ltn(data->dispclk, vbios->low_voltage_max_dispclk)) {
+ voltage = bw_def_0_72;
+ }
+ else if ((bw_equ(bw_int_to_fixed(yclk_message), vbios->low_yclk) || bw_equ(bw_int_to_fixed(yclk_message), vbios->mid_yclk)) && (sclk_message == bw_def_low || sclk_message == bw_def_mid) && bw_ltn(data->dispclk, vbios->mid_voltage_max_dispclk)) {
+ voltage = bw_def_0_8;
+ }
+ else if ((bw_equ(bw_int_to_fixed(yclk_message), vbios->low_yclk) || bw_equ(bw_int_to_fixed(yclk_message), vbios->mid_yclk) || bw_equ(bw_int_to_fixed(yclk_message), vbios->high_yclk)) && (sclk_message == bw_def_low || sclk_message == bw_def_mid || sclk_message == bw_def_high) && bw_leq(data->dispclk, vbios->high_voltage_max_dispclk)) {
+ if ((data->nbp_state_change_enable == bw_def_no && nbp_state_change_enable_blank == bw_def_no)) {
+ voltage = bw_def_high_no_nbp_state_change;
+ }
+ else {
+ voltage = bw_def_0_9;
+ }
+ }
+ else {
+ voltage = bw_def_notok;
+ }
+ if (voltage == bw_def_0_72) {
+ data->max_phyclk = vbios->low_voltage_max_phyclk;
+ }
+ else if (voltage == bw_def_0_8) {
+ data->max_phyclk = vbios->mid_voltage_max_phyclk;
+ }
+ else {
+ data->max_phyclk = vbios->high_voltage_max_phyclk;
+ }
+ /*required blackout recovery time*/
+ data->blackout_recovery_time = bw_int_to_fixed(0);
+ for (k = 0; k <= maximum_number_of_surfaces - 1; k++) {
+ if (data->enable[k] && bw_mtn(vbios->blackout_duration, bw_int_to_fixed(0)) && data->cpup_state_change_enable == bw_def_yes) {
+ if (surface_type[k] != bw_def_display_write_back420_luma && surface_type[k] != bw_def_display_write_back420_chroma) {
+ data->blackout_recovery_time = bw_max2(data->blackout_recovery_time, bw_add(bw_mul(bw_int_to_fixed(2), data->total_dmifmc_urgent_latency), data->dmif_burst_time[data->y_clk_level][data->sclk_level]));
+ if (bw_ltn(data->adjusted_data_buffer_size[k], bw_mul(bw_div(bw_mul(data->display_bandwidth[k], data->useful_bytes_per_request[k]), data->bytes_per_request[k]), (bw_add(vbios->blackout_duration, bw_add(bw_mul(bw_int_to_fixed(2), data->total_dmifmc_urgent_latency), data->dmif_burst_time[data->y_clk_level][data->sclk_level])))))) {
+ data->blackout_recovery_time = bw_max2(data->blackout_recovery_time, bw_div((bw_add(bw_mul(bw_div(bw_mul(data->display_bandwidth[k], data->useful_bytes_per_request[k]), data->bytes_per_request[k]), vbios->blackout_duration), bw_sub(bw_div(bw_mul(bw_mul(bw_mul((bw_add(bw_mul(bw_int_to_fixed(2), data->total_dmifmc_urgent_latency), data->dmif_burst_time[data->y_clk_level][data->sclk_level])), data->dispclk), bw_int_to_fixed(data->bytes_per_pixel[k])), data->lines_interleaved_in_mem_access[k]), data->latency_hiding_lines[k]), data->adjusted_data_buffer_size[k]))), (bw_sub(bw_div(bw_mul(bw_mul(data->dispclk, bw_int_to_fixed(data->bytes_per_pixel[k])), data->lines_interleaved_in_mem_access[k]), data->latency_hiding_lines[k]), bw_div(bw_mul(data->display_bandwidth[k], data->useful_bytes_per_request[k]), data->bytes_per_request[k])))));
+ }
+ }
+ else {
+ data->blackout_recovery_time = bw_max2(data->blackout_recovery_time, bw_add(bw_mul(bw_int_to_fixed(2), vbios->mcifwrmc_urgent_latency), data->mcifwr_burst_time[data->y_clk_level][data->sclk_level]));
+ if (bw_ltn(data->adjusted_data_buffer_size[k], bw_mul(bw_div(bw_mul(data->display_bandwidth[k], data->useful_bytes_per_request[k]), data->bytes_per_request[k]), (bw_add(vbios->blackout_duration, bw_add(bw_mul(bw_int_to_fixed(2), vbios->mcifwrmc_urgent_latency), data->mcifwr_burst_time[data->y_clk_level][data->sclk_level])))))) {
+ data->blackout_recovery_time = bw_max2(data->blackout_recovery_time, bw_div((bw_add(bw_mul(bw_div(bw_mul(data->display_bandwidth[k], data->useful_bytes_per_request[k]), data->bytes_per_request[k]), vbios->blackout_duration), bw_sub(bw_div(bw_mul(bw_mul(bw_mul((bw_add(bw_add(bw_mul(bw_int_to_fixed(2), vbios->mcifwrmc_urgent_latency), data->dmif_burst_time[data->y_clk_level][data->sclk_level]), data->mcifwr_burst_time[data->y_clk_level][data->sclk_level])), data->dispclk), bw_int_to_fixed(data->bytes_per_pixel[k])), data->lines_interleaved_in_mem_access[k]), data->latency_hiding_lines[k]), data->adjusted_data_buffer_size[k]))), (bw_sub(bw_div(bw_mul(bw_mul(data->dispclk, bw_int_to_fixed(data->bytes_per_pixel[k])), data->lines_interleaved_in_mem_access[k]), data->latency_hiding_lines[k]), bw_div(bw_mul(data->display_bandwidth[k], data->useful_bytes_per_request[k]), data->bytes_per_request[k])))));
+ }
+ }
+ }
+ }
+ /*sclk deep sleep*/
+ /*during self-refresh, sclk can be reduced to dispclk divided by the minimum pixels in the data fifo entry, with 15% margin, but shoudl not be set to less than the request bandwidth.*/
+ /*the data fifo entry is 16 pixels for the writeback, 64 bytes/bytes_per_pixel for the graphics, 16 pixels for the parallel rotation underlay,*/
+ /*and 16 bytes/bytes_per_pixel for the orthogonal rotation underlay.*/
+ /*in parallel mode (underlay pipe), the data read from the dmifv buffer is variable and based on the pixel depth (8bbp - 16 bytes, 16 bpp - 32 bytes, 32 bpp - 64 bytes)*/
+ /*in orthogonal mode (underlay pipe), the data read from the dmifv buffer is fixed at 16 bytes.*/
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (surface_type[i] == bw_def_display_write_back420_luma || surface_type[i] == bw_def_display_write_back420_chroma) {
+ data->pixels_per_data_fifo_entry[i] = bw_int_to_fixed(16);
+ }
+ else if (surface_type[i] == bw_def_graphics) {
+ data->pixels_per_data_fifo_entry[i] = bw_div(bw_int_to_fixed(64), bw_int_to_fixed(data->bytes_per_pixel[i]));
+ }
+ else if (data->orthogonal_rotation[i] == 0) {
+ data->pixels_per_data_fifo_entry[i] = bw_int_to_fixed(16);
+ }
+ else {
+ data->pixels_per_data_fifo_entry[i] = bw_div(bw_int_to_fixed(16), bw_int_to_fixed(data->bytes_per_pixel[i]));
+ }
+ }
+ }
+ data->min_pixels_per_data_fifo_entry = bw_int_to_fixed(9999);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (bw_mtn(data->min_pixels_per_data_fifo_entry, data->pixels_per_data_fifo_entry[i])) {
+ data->min_pixels_per_data_fifo_entry = data->pixels_per_data_fifo_entry[i];
+ }
+ }
+ }
+ data->sclk_deep_sleep = bw_max2(bw_div(bw_mul(data->dispclk, bw_frc_to_fixed(115, 100)), data->min_pixels_per_data_fifo_entry), data->total_read_request_bandwidth);
+ /*urgent, stutter and nb-p_state watermark*/
+ /*the urgent watermark is the maximum of the urgent trip time plus the pixel transfer time, the urgent trip times to get data for the first pixel, and the urgent trip times to get data for the last pixel.*/
+ /*the stutter exit watermark is the self refresh exit time plus the maximum of the data burst time plus the pixel transfer time, the data burst times to get data for the first pixel, and the data burst times to get data for the last pixel. it does not apply to the writeback.*/
+ /*the nb p-state change watermark is the dram speed/p-state change time plus the maximum of the data burst time plus the pixel transfer time, the data burst times to get data for the first pixel, and the data burst times to get data for the last pixel.*/
+ /*the pixel transfer time is the maximum of the time to transfer the source pixels required for the first output pixel, and the time to transfer the pixels for the last output pixel minus the active line time.*/
+ /*blackout_duration is added to the urgent watermark*/
+ data->chunk_request_time = bw_int_to_fixed(0);
+ data->cursor_request_time = bw_int_to_fixed(0);
+ /*compute total time to request one chunk from each active display pipe*/
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ data->chunk_request_time = bw_add(data->chunk_request_time, (bw_div((bw_div(bw_int_to_fixed(pixels_per_chunk * data->bytes_per_pixel[i]), data->useful_bytes_per_request[i])), bw_min2(sclk[data->sclk_level], bw_div(data->dispclk, bw_int_to_fixed(2))))));
+ }
+ }
+ /*compute total time to request cursor data*/
+ data->cursor_request_time = (bw_div(data->cursor_total_data, (bw_mul(bw_int_to_fixed(32), sclk[data->sclk_level]))));
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ data->line_source_pixels_transfer_time = bw_max2(bw_div(bw_div(data->src_pixels_for_first_output_pixel[i], dceip->lb_write_pixels_per_dispclk), (bw_div(data->dispclk, dceip->display_pipe_throughput_factor))), bw_sub(bw_div(bw_div(data->src_pixels_for_last_output_pixel[i], dceip->lb_write_pixels_per_dispclk), (bw_div(data->dispclk, dceip->display_pipe_throughput_factor))), data->active_time[i]));
+ if (surface_type[i] != bw_def_display_write_back420_luma && surface_type[i] != bw_def_display_write_back420_chroma) {
+ data->urgent_watermark[i] = bw_add(bw_add(bw_add(bw_add(bw_add(data->total_dmifmc_urgent_latency, data->dmif_burst_time[data->y_clk_level][data->sclk_level]), bw_max2(data->line_source_pixels_transfer_time, data->line_source_transfer_time[i][data->y_clk_level][data->sclk_level])), vbios->blackout_duration), data->chunk_request_time), data->cursor_request_time);
+ data->stutter_exit_watermark[i] = bw_add(bw_sub(vbios->stutter_self_refresh_exit_latency, data->total_dmifmc_urgent_latency), data->urgent_watermark[i]);
+ data->stutter_entry_watermark[i] = bw_add(bw_sub(bw_add(vbios->stutter_self_refresh_exit_latency, vbios->stutter_self_refresh_entry_latency), data->total_dmifmc_urgent_latency), data->urgent_watermark[i]);
+ /*unconditionally remove black out time from the nb p_state watermark*/
+ if (data->display_pstate_change_enable[i] == 1) {
+ data->nbp_state_change_watermark[i] = bw_add(bw_add(vbios->nbp_state_change_latency, data->dmif_burst_time[data->y_clk_level][data->sclk_level]), bw_max2(data->line_source_pixels_transfer_time, data->dram_speed_change_line_source_transfer_time[i][data->y_clk_level][data->sclk_level]));
+ }
+ else {
+ /*maximize the watermark to force the switch in the vb_lank region of the frame*/
+ data->nbp_state_change_watermark[i] = bw_int_to_fixed(131000);
+ }
+ }
+ else {
+ data->urgent_watermark[i] = bw_add(bw_add(bw_add(bw_add(bw_add(vbios->mcifwrmc_urgent_latency, data->mcifwr_burst_time[data->y_clk_level][data->sclk_level]), bw_max2(data->line_source_pixels_transfer_time, data->line_source_transfer_time[i][data->y_clk_level][data->sclk_level])), vbios->blackout_duration), data->chunk_request_time), data->cursor_request_time);
+ data->stutter_exit_watermark[i] = bw_int_to_fixed(0);
+ data->stutter_entry_watermark[i] = bw_int_to_fixed(0);
+ if (data->display_pstate_change_enable[i] == 1) {
+ data->nbp_state_change_watermark[i] = bw_add(bw_add(vbios->nbp_state_change_latency, data->mcifwr_burst_time[data->y_clk_level][data->sclk_level]), bw_max2(data->line_source_pixels_transfer_time, data->dram_speed_change_line_source_transfer_time[i][data->y_clk_level][data->sclk_level]));
+ }
+ else {
+ /*maximize the watermark to force the switch in the vb_lank region of the frame*/
+ data->nbp_state_change_watermark[i] = bw_int_to_fixed(131000);
+ }
+ }
+ }
+ }
+ /*stutter mode enable*/
+ /*in the multi-display case the stutter exit or entry watermark cannot exceed the minimum latency hiding capabilities of the*/
+ /*display pipe.*/
+ data->stutter_mode_enable = data->cpuc_state_change_enable;
+ if (data->number_of_displays > 1) {
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if ((bw_mtn(data->stutter_exit_watermark[i], data->minimum_latency_hiding[i]) || bw_mtn(data->stutter_entry_watermark[i], data->minimum_latency_hiding[i]))) {
+ data->stutter_mode_enable = bw_def_no;
+ }
+ }
+ }
+ }
+ /*performance metrics*/
+ /* display read access efficiency (%)*/
+ /* display write back access efficiency (%)*/
+ /* stutter efficiency (%)*/
+ /* extra underlay pitch recommended for efficiency (pixels)*/
+ /* immediate flip time (us)*/
+ /* latency for other clients due to urgent display read (us)*/
+ /* latency for other clients due to urgent display write (us)*/
+ /* average bandwidth consumed by display (no compression) (gb/s)*/
+ /* required dram bandwidth (gb/s)*/
+ /* required sclk (m_hz)*/
+ /* required rd urgent latency (us)*/
+ /* nb p-state change margin (us)*/
+ /*dmif and mcifwr dram access efficiency*/
+ /*is the ratio between the ideal dram access time (which is the data buffer size in memory divided by the dram bandwidth), and the actual time which is the total page close-open time. but it cannot exceed the dram efficiency provided by the memory subsystem*/
+ data->dmifdram_access_efficiency = bw_min2(bw_div(bw_div(data->total_display_reads_required_dram_access_data, data->dram_bandwidth), data->dmif_total_page_close_open_time), bw_int_to_fixed(1));
+ if (bw_mtn(data->total_display_writes_required_dram_access_data, bw_int_to_fixed(0))) {
+ data->mcifwrdram_access_efficiency = bw_min2(bw_div(bw_div(data->total_display_writes_required_dram_access_data, data->dram_bandwidth), data->mcifwr_total_page_close_open_time), bw_int_to_fixed(1));
+ }
+ else {
+ data->mcifwrdram_access_efficiency = bw_int_to_fixed(0);
+ }
+ /*stutter efficiency*/
+ /*the stutter efficiency is the frame-average time in self-refresh divided by the frame-average stutter cycle duration. only applies if the display write-back is not enabled.*/
+ /*the frame-average stutter cycle used is the minimum for all pipes of the frame-average data buffer size in time, times the compression rate*/
+ /*the frame-average time in self-refresh is the stutter cycle minus the self refresh exit latency and the burst time*/
+ /*the stutter cycle is the dmif buffer size reduced by the excess of the stutter exit watermark over the lb size in time.*/
+ /*the burst time is the data needed during the stutter cycle divided by the available bandwidth*/
+ /*compute the time read all the data from the dmif buffer to the lb (dram refresh period)*/
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ data->stutter_refresh_duration[i] = bw_sub(bw_mul(bw_div(bw_div(bw_mul(bw_div(bw_div(data->adjusted_data_buffer_size[i], bw_int_to_fixed(data->bytes_per_pixel[i])), data->source_width_rounded_up_to_chunks[i]), data->h_total[i]), data->vsr[i]), data->pixel_rate[i]), data->compression_rate[i]), bw_max2(bw_int_to_fixed(0), bw_sub(data->stutter_exit_watermark[i], bw_div(bw_mul((bw_sub(data->lb_partitions[i], bw_int_to_fixed(1))), data->h_total[i]), data->pixel_rate[i]))));
+ data->stutter_dmif_buffer_size[i] = bw_div(bw_mul(bw_mul(bw_div(bw_mul(bw_mul(data->stutter_refresh_duration[i], bw_int_to_fixed(data->bytes_per_pixel[i])), data->source_width_rounded_up_to_chunks[i]), data->h_total[i]), data->vsr[i]), data->pixel_rate[i]), data->compression_rate[i]);
+ }
+ }
+ data->min_stutter_refresh_duration = bw_int_to_fixed(9999);
+ data->total_stutter_dmif_buffer_size = 0;
+ data->total_bytes_requested = 0;
+ data->min_stutter_dmif_buffer_size = 9999;
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ if (bw_mtn(data->min_stutter_refresh_duration, data->stutter_refresh_duration[i])) {
+ data->min_stutter_refresh_duration = data->stutter_refresh_duration[i];
+ data->total_bytes_requested = bw_fixed_to_int(bw_add(bw_int_to_fixed(data->total_bytes_requested), (bw_mul(bw_mul(data->source_height_rounded_up_to_chunks[i], data->source_width_rounded_up_to_chunks[i]), bw_int_to_fixed(data->bytes_per_pixel[i])))));
+ data->min_stutter_dmif_buffer_size = bw_fixed_to_int(data->stutter_dmif_buffer_size[i]);
+ }
+ data->total_stutter_dmif_buffer_size = bw_fixed_to_int(bw_add(data->stutter_dmif_buffer_size[i], bw_int_to_fixed(data->total_stutter_dmif_buffer_size)));
+ }
+ }
+ data->stutter_burst_time = bw_div(bw_int_to_fixed(data->total_stutter_dmif_buffer_size), bw_mul(sclk[data->sclk_level], vbios->data_return_bus_width));
+ data->num_stutter_bursts = data->total_bytes_requested / data->min_stutter_dmif_buffer_size;
+ data->total_stutter_cycle_duration = bw_add(bw_add(data->min_stutter_refresh_duration, vbios->stutter_self_refresh_exit_latency), data->stutter_burst_time);
+ data->time_in_self_refresh = data->min_stutter_refresh_duration;
+ if (data->d1_display_write_back_dwb_enable == 1) {
+ data->stutter_efficiency = bw_int_to_fixed(0);
+ }
+ else if (bw_ltn(data->time_in_self_refresh, bw_int_to_fixed(0))) {
+ data->stutter_efficiency = bw_int_to_fixed(0);
+ }
+ else {
+ /*compute stutter efficiency assuming 60 hz refresh rate*/
+ data->stutter_efficiency = bw_max2(bw_int_to_fixed(0), bw_mul((bw_sub(bw_int_to_fixed(1), (bw_div(bw_mul((bw_add(vbios->stutter_self_refresh_exit_latency, data->stutter_burst_time)), bw_int_to_fixed(data->num_stutter_bursts)), bw_frc_to_fixed(166666667, 10000))))), bw_int_to_fixed(100)));
+ }
+ /*immediate flip time*/
+ /*if scatter gather is enabled, the immediate flip takes a number of urgent memory trips equivalent to the pte requests in a row divided by the pte request limit.*/
+ /*otherwise, it may take just one urgenr memory trip*/
+ data->worst_number_of_trips_to_memory = bw_int_to_fixed(1);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i] && data->scatter_gather_enable_for_pipe[i] == 1) {
+ data->number_of_trips_to_memory_for_getting_apte_row[i] = bw_ceil2(bw_div(data->scatter_gather_pte_requests_in_row[i], data->scatter_gather_pte_request_limit[i]), bw_int_to_fixed(1));
+ if (bw_ltn(data->worst_number_of_trips_to_memory, data->number_of_trips_to_memory_for_getting_apte_row[i])) {
+ data->worst_number_of_trips_to_memory = data->number_of_trips_to_memory_for_getting_apte_row[i];
+ }
+ }
+ }
+ data->immediate_flip_time = bw_mul(data->worst_number_of_trips_to_memory, data->total_dmifmc_urgent_latency);
+ /*worst latency for other clients*/
+ /*it is the urgent latency plus the urgent burst time*/
+ data->latency_for_non_dmif_clients = bw_add(data->total_dmifmc_urgent_latency, data->dmif_burst_time[data->y_clk_level][data->sclk_level]);
+ if (data->d1_display_write_back_dwb_enable == 1) {
+ data->latency_for_non_mcifwr_clients = bw_add(vbios->mcifwrmc_urgent_latency, dceip->mcifwr_all_surfaces_burst_time);
+ }
+ else {
+ data->latency_for_non_mcifwr_clients = bw_int_to_fixed(0);
+ }
+ /*dmif mc urgent latency supported in high sclk and yclk*/
+ data->dmifmc_urgent_latency_supported_in_high_sclk_and_yclk = bw_div((bw_sub(data->min_read_buffer_size_in_time, data->dmif_burst_time[high][s_high])), data->total_dmifmc_urgent_trips);
+ /*dram speed/p-state change margin*/
+ /*in the multi-display case the nb p-state change watermark cannot exceed the average lb size plus the dmif size or the cursor dcp buffer size*/
+ data->v_blank_nbp_state_dram_speed_change_latency_supported = bw_int_to_fixed(99999);
+ data->nbp_state_dram_speed_change_latency_supported = bw_int_to_fixed(99999);
+ for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
+ if (data->enable[i]) {
+ data->nbp_state_dram_speed_change_latency_supported = bw_min2(data->nbp_state_dram_speed_change_latency_supported, bw_add(bw_sub(data->maximum_latency_hiding_with_cursor[i], data->nbp_state_change_watermark[i]), vbios->nbp_state_change_latency));
+ data->v_blank_nbp_state_dram_speed_change_latency_supported = bw_min2(data->v_blank_nbp_state_dram_speed_change_latency_supported, bw_add(bw_sub(bw_div(bw_mul((bw_sub(data->v_total[i], bw_sub(bw_div(data->src_height[i], data->v_scale_ratio[i]), bw_int_to_fixed(4)))), data->h_total[i]), data->pixel_rate[i]), data->nbp_state_change_watermark[i]), vbios->nbp_state_change_latency));
+ }
+ }
+ /*sclk required vs urgent latency*/
+ for (i = 1; i <= 5; i++) {
+ data->display_reads_time_for_data_transfer_and_urgent_latency = bw_sub(data->min_read_buffer_size_in_time, bw_mul(data->total_dmifmc_urgent_trips, bw_int_to_fixed(i)));
+ if (pipe_check == bw_def_ok && (bw_mtn(data->display_reads_time_for_data_transfer_and_urgent_latency, data->dmif_total_page_close_open_time))) {
+ data->dmif_required_sclk_for_urgent_latency[i] = bw_div(bw_div(data->total_display_reads_required_data, data->display_reads_time_for_data_transfer_and_urgent_latency), (bw_mul(vbios->data_return_bus_width, bw_frc_to_fixed(dceip->percent_of_ideal_port_bw_received_after_urgent_latency, 100))));
+ }
+ else {
+ data->dmif_required_sclk_for_urgent_latency[i] = bw_int_to_fixed(bw_def_na);
+ }
+ }
+ /*output link bit per pixel supported*/
+ for (k = 0; k <= maximum_number_of_surfaces - 1; k++) {
+ data->output_bpphdmi[k] = bw_def_na;
+ data->output_bppdp4_lane_hbr[k] = bw_def_na;
+ data->output_bppdp4_lane_hbr2[k] = bw_def_na;
+ data->output_bppdp4_lane_hbr3[k] = bw_def_na;
+ if (data->enable[k]) {
+ data->output_bpphdmi[k] = bw_fixed_to_int(bw_mul(bw_div(bw_min2(bw_int_to_fixed(600), data->max_phyclk), data->pixel_rate[k]), bw_int_to_fixed(24)));
+ if (bw_meq(data->max_phyclk, bw_int_to_fixed(270))) {
+ data->output_bppdp4_lane_hbr[k] = bw_fixed_to_int(bw_mul(bw_div(bw_mul(bw_int_to_fixed(270), bw_int_to_fixed(4)), data->pixel_rate[k]), bw_int_to_fixed(8)));
+ }
+ if (bw_meq(data->max_phyclk, bw_int_to_fixed(540))) {
+ data->output_bppdp4_lane_hbr2[k] = bw_fixed_to_int(bw_mul(bw_div(bw_mul(bw_int_to_fixed(540), bw_int_to_fixed(4)), data->pixel_rate[k]), bw_int_to_fixed(8)));
+ }
+ if (bw_meq(data->max_phyclk, bw_int_to_fixed(810))) {
+ data->output_bppdp4_lane_hbr3[k] = bw_fixed_to_int(bw_mul(bw_div(bw_mul(bw_int_to_fixed(810), bw_int_to_fixed(4)), data->pixel_rate[k]), bw_int_to_fixed(8)));
+ }
+ }
+ }
+
+ kfree(surface_type);
+free_tiling_mode:
+ kfree(tiling_mode);
+free_sclk:
+ kfree(sclk);
+free_yclk:
+ kfree(yclk);
+}
+
+/*******************************************************************************
+ * Public functions
+ ******************************************************************************/
+void bw_calcs_init(struct bw_calcs_dceip *bw_dceip,
+ struct bw_calcs_vbios *bw_vbios,
+ struct hw_asic_id asic_id)
+{
+ struct bw_calcs_dceip *dceip;
+ struct bw_calcs_vbios *vbios;
+
+ enum bw_calcs_version version = bw_calcs_version_from_asic_id(asic_id);
+
+ dceip = kzalloc(sizeof(*dceip), GFP_KERNEL);
+ if (!dceip)
+ return;
+
+ vbios = kzalloc(sizeof(*vbios), GFP_KERNEL);
+ if (!vbios) {
+ kfree(dceip);
+ return;
+ }
+
+ dceip->version = version;
+
+ switch (version) {
+ case BW_CALCS_VERSION_CARRIZO:
+ vbios->memory_type = bw_def_gddr5;
+ vbios->dram_channel_width_in_bits = 64;
+ vbios->number_of_dram_channels = asic_id.vram_width / vbios->dram_channel_width_in_bits;
+ vbios->number_of_dram_banks = 8;
+ vbios->high_yclk = bw_int_to_fixed(1600);
+ vbios->mid_yclk = bw_int_to_fixed(1600);
+ vbios->low_yclk = bw_frc_to_fixed(66666, 100);
+ vbios->low_sclk = bw_int_to_fixed(200);
+ vbios->mid1_sclk = bw_int_to_fixed(300);
+ vbios->mid2_sclk = bw_int_to_fixed(300);
+ vbios->mid3_sclk = bw_int_to_fixed(300);
+ vbios->mid4_sclk = bw_int_to_fixed(300);
+ vbios->mid5_sclk = bw_int_to_fixed(300);
+ vbios->mid6_sclk = bw_int_to_fixed(300);
+ vbios->high_sclk = bw_frc_to_fixed(62609, 100);
+ vbios->low_voltage_max_dispclk = bw_int_to_fixed(352);
+ vbios->mid_voltage_max_dispclk = bw_int_to_fixed(467);
+ vbios->high_voltage_max_dispclk = bw_int_to_fixed(643);
+ vbios->low_voltage_max_phyclk = bw_int_to_fixed(540);
+ vbios->mid_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->high_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->data_return_bus_width = bw_int_to_fixed(32);
+ vbios->trc = bw_int_to_fixed(50);
+ vbios->dmifmc_urgent_latency = bw_int_to_fixed(4);
+ vbios->stutter_self_refresh_exit_latency = bw_frc_to_fixed(153, 10);
+ vbios->stutter_self_refresh_entry_latency = bw_int_to_fixed(0);
+ vbios->nbp_state_change_latency = bw_frc_to_fixed(19649, 1000);
+ vbios->mcifwrmc_urgent_latency = bw_int_to_fixed(10);
+ vbios->scatter_gather_enable = true;
+ vbios->down_spread_percentage = bw_frc_to_fixed(5, 10);
+ vbios->cursor_width = 32;
+ vbios->average_compression_rate = 4;
+ vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256;
+ vbios->blackout_duration = bw_int_to_fixed(0); /* us */
+ vbios->maximum_blackout_recovery_time = bw_int_to_fixed(0);
+
+ dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation = 100;
+ dceip->max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation = 100;
+ dceip->percent_of_ideal_port_bw_received_after_urgent_latency = 100;
+ dceip->large_cursor = false;
+ dceip->dmif_request_buffer_size = bw_int_to_fixed(768);
+ dceip->dmif_pipe_en_fbc_chunk_tracker = false;
+ dceip->cursor_max_outstanding_group_num = 1;
+ dceip->lines_interleaved_into_lb = 2;
+ dceip->chunk_width = 256;
+ dceip->number_of_graphics_pipes = 3;
+ dceip->number_of_underlay_pipes = 1;
+ dceip->low_power_tiling_mode = 0;
+ dceip->display_write_back_supported = false;
+ dceip->argb_compression_support = false;
+ dceip->underlay_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35556, 10000);
+ dceip->underlay_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->underlay_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->underlay_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->graphics_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35, 10);
+ dceip->graphics_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->graphics_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->graphics_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->alpha_vscaler_efficiency = bw_int_to_fixed(3);
+ dceip->max_dmif_buffer_allocated = 2;
+ dceip->graphics_dmif_size = 12288;
+ dceip->underlay_luma_dmif_size = 19456;
+ dceip->underlay_chroma_dmif_size = 23552;
+ dceip->pre_downscaler_enabled = true;
+ dceip->underlay_downscale_prefetch_enabled = true;
+ dceip->lb_write_pixels_per_dispclk = bw_int_to_fixed(1);
+ dceip->lb_size_per_component444 = bw_int_to_fixed(82176);
+ dceip->graphics_lb_nodownscaling_multi_line_prefetching = false;
+ dceip->stutter_and_dram_clock_state_change_gated_before_cursor =
+ bw_int_to_fixed(0);
+ dceip->underlay420_luma_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->underlay420_chroma_lb_size_per_component =
+ bw_int_to_fixed(164352);
+ dceip->underlay422_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->cursor_chunk_width = bw_int_to_fixed(64);
+ dceip->cursor_dcp_buffer_lines = bw_int_to_fixed(4);
+ dceip->underlay_maximum_width_efficient_for_tiling =
+ bw_int_to_fixed(1920);
+ dceip->underlay_maximum_height_efficient_for_tiling =
+ bw_int_to_fixed(1080);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display =
+ bw_frc_to_fixed(3, 10);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation =
+ bw_int_to_fixed(25);
+ dceip->minimum_outstanding_pte_request_limit = bw_int_to_fixed(
+ 2);
+ dceip->maximum_total_outstanding_pte_requests_allowed_by_saw =
+ bw_int_to_fixed(128);
+ dceip->limit_excessive_outstanding_dmif_requests = true;
+ dceip->linear_mode_line_request_alternation_slice =
+ bw_int_to_fixed(64);
+ dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode =
+ 32;
+ dceip->display_write_back420_luma_mcifwr_buffer_size = 12288;
+ dceip->display_write_back420_chroma_mcifwr_buffer_size = 8192;
+ dceip->request_efficiency = bw_frc_to_fixed(8, 10);
+ dceip->dispclk_per_request = bw_int_to_fixed(2);
+ dceip->dispclk_ramping_factor = bw_frc_to_fixed(105, 100);
+ dceip->display_pipe_throughput_factor = bw_frc_to_fixed(105, 100);
+ dceip->scatter_gather_pte_request_rows_in_tiling_mode = 2;
+ dceip->mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0); /* todo: this is a bug*/
+ break;
+ case BW_CALCS_VERSION_POLARIS10:
+ /* TODO: Treat VEGAM the same as P10 for now
+ * Need to tune the para for VEGAM if needed */
+ case BW_CALCS_VERSION_VEGAM:
+ vbios->memory_type = bw_def_gddr5;
+ vbios->dram_channel_width_in_bits = 32;
+ vbios->number_of_dram_channels = asic_id.vram_width / vbios->dram_channel_width_in_bits;
+ vbios->number_of_dram_banks = 8;
+ vbios->high_yclk = bw_int_to_fixed(6000);
+ vbios->mid_yclk = bw_int_to_fixed(3200);
+ vbios->low_yclk = bw_int_to_fixed(1000);
+ vbios->low_sclk = bw_int_to_fixed(300);
+ vbios->mid1_sclk = bw_int_to_fixed(400);
+ vbios->mid2_sclk = bw_int_to_fixed(500);
+ vbios->mid3_sclk = bw_int_to_fixed(600);
+ vbios->mid4_sclk = bw_int_to_fixed(700);
+ vbios->mid5_sclk = bw_int_to_fixed(800);
+ vbios->mid6_sclk = bw_int_to_fixed(974);
+ vbios->high_sclk = bw_int_to_fixed(1154);
+ vbios->low_voltage_max_dispclk = bw_int_to_fixed(459);
+ vbios->mid_voltage_max_dispclk = bw_int_to_fixed(654);
+ vbios->high_voltage_max_dispclk = bw_int_to_fixed(1108);
+ vbios->low_voltage_max_phyclk = bw_int_to_fixed(540);
+ vbios->mid_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->high_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->data_return_bus_width = bw_int_to_fixed(32);
+ vbios->trc = bw_int_to_fixed(48);
+ vbios->dmifmc_urgent_latency = bw_int_to_fixed(3);
+ vbios->stutter_self_refresh_exit_latency = bw_int_to_fixed(5);
+ vbios->stutter_self_refresh_entry_latency = bw_int_to_fixed(0);
+ vbios->nbp_state_change_latency = bw_int_to_fixed(45);
+ vbios->mcifwrmc_urgent_latency = bw_int_to_fixed(10);
+ vbios->scatter_gather_enable = true;
+ vbios->down_spread_percentage = bw_frc_to_fixed(5, 10);
+ vbios->cursor_width = 32;
+ vbios->average_compression_rate = 4;
+ vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256;
+ vbios->blackout_duration = bw_int_to_fixed(0); /* us */
+ vbios->maximum_blackout_recovery_time = bw_int_to_fixed(0);
+
+ dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation = 100;
+ dceip->max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation = 100;
+ dceip->percent_of_ideal_port_bw_received_after_urgent_latency = 100;
+ dceip->large_cursor = false;
+ dceip->dmif_request_buffer_size = bw_int_to_fixed(768);
+ dceip->dmif_pipe_en_fbc_chunk_tracker = false;
+ dceip->cursor_max_outstanding_group_num = 1;
+ dceip->lines_interleaved_into_lb = 2;
+ dceip->chunk_width = 256;
+ dceip->number_of_graphics_pipes = 6;
+ dceip->number_of_underlay_pipes = 0;
+ dceip->low_power_tiling_mode = 0;
+ dceip->display_write_back_supported = false;
+ dceip->argb_compression_support = true;
+ dceip->underlay_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35556, 10000);
+ dceip->underlay_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->underlay_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->underlay_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->graphics_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35, 10);
+ dceip->graphics_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->graphics_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->graphics_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->alpha_vscaler_efficiency = bw_int_to_fixed(3);
+ dceip->max_dmif_buffer_allocated = 4;
+ dceip->graphics_dmif_size = 12288;
+ dceip->underlay_luma_dmif_size = 19456;
+ dceip->underlay_chroma_dmif_size = 23552;
+ dceip->pre_downscaler_enabled = true;
+ dceip->underlay_downscale_prefetch_enabled = true;
+ dceip->lb_write_pixels_per_dispclk = bw_int_to_fixed(1);
+ dceip->lb_size_per_component444 = bw_int_to_fixed(245952);
+ dceip->graphics_lb_nodownscaling_multi_line_prefetching = true;
+ dceip->stutter_and_dram_clock_state_change_gated_before_cursor =
+ bw_int_to_fixed(1);
+ dceip->underlay420_luma_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->underlay420_chroma_lb_size_per_component =
+ bw_int_to_fixed(164352);
+ dceip->underlay422_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->cursor_chunk_width = bw_int_to_fixed(64);
+ dceip->cursor_dcp_buffer_lines = bw_int_to_fixed(4);
+ dceip->underlay_maximum_width_efficient_for_tiling =
+ bw_int_to_fixed(1920);
+ dceip->underlay_maximum_height_efficient_for_tiling =
+ bw_int_to_fixed(1080);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display =
+ bw_frc_to_fixed(3, 10);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation =
+ bw_int_to_fixed(25);
+ dceip->minimum_outstanding_pte_request_limit = bw_int_to_fixed(
+ 2);
+ dceip->maximum_total_outstanding_pte_requests_allowed_by_saw =
+ bw_int_to_fixed(128);
+ dceip->limit_excessive_outstanding_dmif_requests = true;
+ dceip->linear_mode_line_request_alternation_slice =
+ bw_int_to_fixed(64);
+ dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode =
+ 32;
+ dceip->display_write_back420_luma_mcifwr_buffer_size = 12288;
+ dceip->display_write_back420_chroma_mcifwr_buffer_size = 8192;
+ dceip->request_efficiency = bw_frc_to_fixed(8, 10);
+ dceip->dispclk_per_request = bw_int_to_fixed(2);
+ dceip->dispclk_ramping_factor = bw_frc_to_fixed(105, 100);
+ dceip->display_pipe_throughput_factor = bw_frc_to_fixed(105, 100);
+ dceip->scatter_gather_pte_request_rows_in_tiling_mode = 2;
+ dceip->mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0);
+ break;
+ case BW_CALCS_VERSION_POLARIS11:
+ vbios->memory_type = bw_def_gddr5;
+ vbios->dram_channel_width_in_bits = 32;
+ vbios->number_of_dram_channels = asic_id.vram_width / vbios->dram_channel_width_in_bits;
+ vbios->number_of_dram_banks = 8;
+ vbios->high_yclk = bw_int_to_fixed(6000);
+ vbios->mid_yclk = bw_int_to_fixed(3200);
+ vbios->low_yclk = bw_int_to_fixed(1000);
+ vbios->low_sclk = bw_int_to_fixed(300);
+ vbios->mid1_sclk = bw_int_to_fixed(400);
+ vbios->mid2_sclk = bw_int_to_fixed(500);
+ vbios->mid3_sclk = bw_int_to_fixed(600);
+ vbios->mid4_sclk = bw_int_to_fixed(700);
+ vbios->mid5_sclk = bw_int_to_fixed(800);
+ vbios->mid6_sclk = bw_int_to_fixed(974);
+ vbios->high_sclk = bw_int_to_fixed(1154);
+ vbios->low_voltage_max_dispclk = bw_int_to_fixed(459);
+ vbios->mid_voltage_max_dispclk = bw_int_to_fixed(654);
+ vbios->high_voltage_max_dispclk = bw_int_to_fixed(1108);
+ vbios->low_voltage_max_phyclk = bw_int_to_fixed(540);
+ vbios->mid_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->high_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->data_return_bus_width = bw_int_to_fixed(32);
+ vbios->trc = bw_int_to_fixed(48);
+ if (vbios->number_of_dram_channels == 2) // 64-bit
+ vbios->dmifmc_urgent_latency = bw_int_to_fixed(4);
+ else
+ vbios->dmifmc_urgent_latency = bw_int_to_fixed(3);
+ vbios->stutter_self_refresh_exit_latency = bw_int_to_fixed(5);
+ vbios->stutter_self_refresh_entry_latency = bw_int_to_fixed(0);
+ vbios->nbp_state_change_latency = bw_int_to_fixed(45);
+ vbios->mcifwrmc_urgent_latency = bw_int_to_fixed(10);
+ vbios->scatter_gather_enable = true;
+ vbios->down_spread_percentage = bw_frc_to_fixed(5, 10);
+ vbios->cursor_width = 32;
+ vbios->average_compression_rate = 4;
+ vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256;
+ vbios->blackout_duration = bw_int_to_fixed(0); /* us */
+ vbios->maximum_blackout_recovery_time = bw_int_to_fixed(0);
+
+ dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation = 100;
+ dceip->max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation = 100;
+ dceip->percent_of_ideal_port_bw_received_after_urgent_latency = 100;
+ dceip->large_cursor = false;
+ dceip->dmif_request_buffer_size = bw_int_to_fixed(768);
+ dceip->dmif_pipe_en_fbc_chunk_tracker = false;
+ dceip->cursor_max_outstanding_group_num = 1;
+ dceip->lines_interleaved_into_lb = 2;
+ dceip->chunk_width = 256;
+ dceip->number_of_graphics_pipes = 5;
+ dceip->number_of_underlay_pipes = 0;
+ dceip->low_power_tiling_mode = 0;
+ dceip->display_write_back_supported = false;
+ dceip->argb_compression_support = true;
+ dceip->underlay_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35556, 10000);
+ dceip->underlay_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->underlay_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->underlay_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->graphics_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35, 10);
+ dceip->graphics_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->graphics_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->graphics_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->alpha_vscaler_efficiency = bw_int_to_fixed(3);
+ dceip->max_dmif_buffer_allocated = 4;
+ dceip->graphics_dmif_size = 12288;
+ dceip->underlay_luma_dmif_size = 19456;
+ dceip->underlay_chroma_dmif_size = 23552;
+ dceip->pre_downscaler_enabled = true;
+ dceip->underlay_downscale_prefetch_enabled = true;
+ dceip->lb_write_pixels_per_dispclk = bw_int_to_fixed(1);
+ dceip->lb_size_per_component444 = bw_int_to_fixed(245952);
+ dceip->graphics_lb_nodownscaling_multi_line_prefetching = true;
+ dceip->stutter_and_dram_clock_state_change_gated_before_cursor =
+ bw_int_to_fixed(1);
+ dceip->underlay420_luma_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->underlay420_chroma_lb_size_per_component =
+ bw_int_to_fixed(164352);
+ dceip->underlay422_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->cursor_chunk_width = bw_int_to_fixed(64);
+ dceip->cursor_dcp_buffer_lines = bw_int_to_fixed(4);
+ dceip->underlay_maximum_width_efficient_for_tiling =
+ bw_int_to_fixed(1920);
+ dceip->underlay_maximum_height_efficient_for_tiling =
+ bw_int_to_fixed(1080);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display =
+ bw_frc_to_fixed(3, 10);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation =
+ bw_int_to_fixed(25);
+ dceip->minimum_outstanding_pte_request_limit = bw_int_to_fixed(
+ 2);
+ dceip->maximum_total_outstanding_pte_requests_allowed_by_saw =
+ bw_int_to_fixed(128);
+ dceip->limit_excessive_outstanding_dmif_requests = true;
+ dceip->linear_mode_line_request_alternation_slice =
+ bw_int_to_fixed(64);
+ dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode =
+ 32;
+ dceip->display_write_back420_luma_mcifwr_buffer_size = 12288;
+ dceip->display_write_back420_chroma_mcifwr_buffer_size = 8192;
+ dceip->request_efficiency = bw_frc_to_fixed(8, 10);
+ dceip->dispclk_per_request = bw_int_to_fixed(2);
+ dceip->dispclk_ramping_factor = bw_frc_to_fixed(105, 100);
+ dceip->display_pipe_throughput_factor = bw_frc_to_fixed(105, 100);
+ dceip->scatter_gather_pte_request_rows_in_tiling_mode = 2;
+ dceip->mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0);
+ break;
+ case BW_CALCS_VERSION_POLARIS12:
+ vbios->memory_type = bw_def_gddr5;
+ vbios->dram_channel_width_in_bits = 32;
+ vbios->number_of_dram_channels = asic_id.vram_width / vbios->dram_channel_width_in_bits;
+ vbios->number_of_dram_banks = 8;
+ vbios->high_yclk = bw_int_to_fixed(6000);
+ vbios->mid_yclk = bw_int_to_fixed(3200);
+ vbios->low_yclk = bw_int_to_fixed(1000);
+ vbios->low_sclk = bw_int_to_fixed(678);
+ vbios->mid1_sclk = bw_int_to_fixed(864);
+ vbios->mid2_sclk = bw_int_to_fixed(900);
+ vbios->mid3_sclk = bw_int_to_fixed(920);
+ vbios->mid4_sclk = bw_int_to_fixed(940);
+ vbios->mid5_sclk = bw_int_to_fixed(960);
+ vbios->mid6_sclk = bw_int_to_fixed(980);
+ vbios->high_sclk = bw_int_to_fixed(1049);
+ vbios->low_voltage_max_dispclk = bw_int_to_fixed(459);
+ vbios->mid_voltage_max_dispclk = bw_int_to_fixed(654);
+ vbios->high_voltage_max_dispclk = bw_int_to_fixed(1108);
+ vbios->low_voltage_max_phyclk = bw_int_to_fixed(540);
+ vbios->mid_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->high_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->data_return_bus_width = bw_int_to_fixed(32);
+ vbios->trc = bw_int_to_fixed(48);
+ if (vbios->number_of_dram_channels == 2) // 64-bit
+ vbios->dmifmc_urgent_latency = bw_int_to_fixed(4);
+ else
+ vbios->dmifmc_urgent_latency = bw_int_to_fixed(3);
+ vbios->stutter_self_refresh_exit_latency = bw_int_to_fixed(5);
+ vbios->stutter_self_refresh_entry_latency = bw_int_to_fixed(0);
+ vbios->nbp_state_change_latency = bw_int_to_fixed(250);
+ vbios->mcifwrmc_urgent_latency = bw_int_to_fixed(10);
+ vbios->scatter_gather_enable = false;
+ vbios->down_spread_percentage = bw_frc_to_fixed(5, 10);
+ vbios->cursor_width = 32;
+ vbios->average_compression_rate = 4;
+ vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256;
+ vbios->blackout_duration = bw_int_to_fixed(0); /* us */
+ vbios->maximum_blackout_recovery_time = bw_int_to_fixed(0);
+
+ dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation = 100;
+ dceip->max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation = 100;
+ dceip->percent_of_ideal_port_bw_received_after_urgent_latency = 100;
+ dceip->large_cursor = false;
+ dceip->dmif_request_buffer_size = bw_int_to_fixed(768);
+ dceip->dmif_pipe_en_fbc_chunk_tracker = false;
+ dceip->cursor_max_outstanding_group_num = 1;
+ dceip->lines_interleaved_into_lb = 2;
+ dceip->chunk_width = 256;
+ dceip->number_of_graphics_pipes = 5;
+ dceip->number_of_underlay_pipes = 0;
+ dceip->low_power_tiling_mode = 0;
+ dceip->display_write_back_supported = true;
+ dceip->argb_compression_support = true;
+ dceip->underlay_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35556, 10000);
+ dceip->underlay_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->underlay_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->underlay_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->graphics_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35, 10);
+ dceip->graphics_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->graphics_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->graphics_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->alpha_vscaler_efficiency = bw_int_to_fixed(3);
+ dceip->max_dmif_buffer_allocated = 4;
+ dceip->graphics_dmif_size = 12288;
+ dceip->underlay_luma_dmif_size = 19456;
+ dceip->underlay_chroma_dmif_size = 23552;
+ dceip->pre_downscaler_enabled = true;
+ dceip->underlay_downscale_prefetch_enabled = true;
+ dceip->lb_write_pixels_per_dispclk = bw_int_to_fixed(1);
+ dceip->lb_size_per_component444 = bw_int_to_fixed(245952);
+ dceip->graphics_lb_nodownscaling_multi_line_prefetching = true;
+ dceip->stutter_and_dram_clock_state_change_gated_before_cursor =
+ bw_int_to_fixed(1);
+ dceip->underlay420_luma_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->underlay420_chroma_lb_size_per_component =
+ bw_int_to_fixed(164352);
+ dceip->underlay422_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->cursor_chunk_width = bw_int_to_fixed(64);
+ dceip->cursor_dcp_buffer_lines = bw_int_to_fixed(4);
+ dceip->underlay_maximum_width_efficient_for_tiling =
+ bw_int_to_fixed(1920);
+ dceip->underlay_maximum_height_efficient_for_tiling =
+ bw_int_to_fixed(1080);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display =
+ bw_frc_to_fixed(3, 10);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation =
+ bw_int_to_fixed(25);
+ dceip->minimum_outstanding_pte_request_limit = bw_int_to_fixed(
+ 2);
+ dceip->maximum_total_outstanding_pte_requests_allowed_by_saw =
+ bw_int_to_fixed(128);
+ dceip->limit_excessive_outstanding_dmif_requests = true;
+ dceip->linear_mode_line_request_alternation_slice =
+ bw_int_to_fixed(64);
+ dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode =
+ 32;
+ dceip->display_write_back420_luma_mcifwr_buffer_size = 12288;
+ dceip->display_write_back420_chroma_mcifwr_buffer_size = 8192;
+ dceip->request_efficiency = bw_frc_to_fixed(8, 10);
+ dceip->dispclk_per_request = bw_int_to_fixed(2);
+ dceip->dispclk_ramping_factor = bw_frc_to_fixed(105, 100);
+ dceip->display_pipe_throughput_factor = bw_frc_to_fixed(105, 100);
+ dceip->scatter_gather_pte_request_rows_in_tiling_mode = 2;
+ dceip->mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0);
+ break;
+ case BW_CALCS_VERSION_STONEY:
+ vbios->memory_type = bw_def_gddr5;
+ vbios->dram_channel_width_in_bits = 64;
+ vbios->number_of_dram_channels = asic_id.vram_width / vbios->dram_channel_width_in_bits;
+ vbios->number_of_dram_banks = 8;
+ vbios->high_yclk = bw_int_to_fixed(1866);
+ vbios->mid_yclk = bw_int_to_fixed(1866);
+ vbios->low_yclk = bw_int_to_fixed(1333);
+ vbios->low_sclk = bw_int_to_fixed(200);
+ vbios->mid1_sclk = bw_int_to_fixed(600);
+ vbios->mid2_sclk = bw_int_to_fixed(600);
+ vbios->mid3_sclk = bw_int_to_fixed(600);
+ vbios->mid4_sclk = bw_int_to_fixed(600);
+ vbios->mid5_sclk = bw_int_to_fixed(600);
+ vbios->mid6_sclk = bw_int_to_fixed(600);
+ vbios->high_sclk = bw_int_to_fixed(800);
+ vbios->low_voltage_max_dispclk = bw_int_to_fixed(352);
+ vbios->mid_voltage_max_dispclk = bw_int_to_fixed(467);
+ vbios->high_voltage_max_dispclk = bw_int_to_fixed(643);
+ vbios->low_voltage_max_phyclk = bw_int_to_fixed(540);
+ vbios->mid_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->high_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->data_return_bus_width = bw_int_to_fixed(32);
+ vbios->trc = bw_int_to_fixed(50);
+ vbios->dmifmc_urgent_latency = bw_int_to_fixed(4);
+ vbios->stutter_self_refresh_exit_latency = bw_frc_to_fixed(158, 10);
+ vbios->stutter_self_refresh_entry_latency = bw_int_to_fixed(0);
+ vbios->nbp_state_change_latency = bw_frc_to_fixed(2008, 100);
+ vbios->mcifwrmc_urgent_latency = bw_int_to_fixed(10);
+ vbios->scatter_gather_enable = true;
+ vbios->down_spread_percentage = bw_frc_to_fixed(5, 10);
+ vbios->cursor_width = 32;
+ vbios->average_compression_rate = 4;
+ vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256;
+ vbios->blackout_duration = bw_int_to_fixed(0); /* us */
+ vbios->maximum_blackout_recovery_time = bw_int_to_fixed(0);
+
+ dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation = 100;
+ dceip->max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation = 100;
+ dceip->percent_of_ideal_port_bw_received_after_urgent_latency = 100;
+ dceip->large_cursor = false;
+ dceip->dmif_request_buffer_size = bw_int_to_fixed(768);
+ dceip->dmif_pipe_en_fbc_chunk_tracker = false;
+ dceip->cursor_max_outstanding_group_num = 1;
+ dceip->lines_interleaved_into_lb = 2;
+ dceip->chunk_width = 256;
+ dceip->number_of_graphics_pipes = 2;
+ dceip->number_of_underlay_pipes = 1;
+ dceip->low_power_tiling_mode = 0;
+ dceip->display_write_back_supported = false;
+ dceip->argb_compression_support = true;
+ dceip->underlay_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35556, 10000);
+ dceip->underlay_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->underlay_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->underlay_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->graphics_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35, 10);
+ dceip->graphics_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->graphics_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->graphics_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->alpha_vscaler_efficiency = bw_int_to_fixed(3);
+ dceip->max_dmif_buffer_allocated = 2;
+ dceip->graphics_dmif_size = 12288;
+ dceip->underlay_luma_dmif_size = 19456;
+ dceip->underlay_chroma_dmif_size = 23552;
+ dceip->pre_downscaler_enabled = true;
+ dceip->underlay_downscale_prefetch_enabled = true;
+ dceip->lb_write_pixels_per_dispclk = bw_int_to_fixed(1);
+ dceip->lb_size_per_component444 = bw_int_to_fixed(82176);
+ dceip->graphics_lb_nodownscaling_multi_line_prefetching = false;
+ dceip->stutter_and_dram_clock_state_change_gated_before_cursor =
+ bw_int_to_fixed(0);
+ dceip->underlay420_luma_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->underlay420_chroma_lb_size_per_component =
+ bw_int_to_fixed(164352);
+ dceip->underlay422_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->cursor_chunk_width = bw_int_to_fixed(64);
+ dceip->cursor_dcp_buffer_lines = bw_int_to_fixed(4);
+ dceip->underlay_maximum_width_efficient_for_tiling =
+ bw_int_to_fixed(1920);
+ dceip->underlay_maximum_height_efficient_for_tiling =
+ bw_int_to_fixed(1080);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display =
+ bw_frc_to_fixed(3, 10);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation =
+ bw_int_to_fixed(25);
+ dceip->minimum_outstanding_pte_request_limit = bw_int_to_fixed(
+ 2);
+ dceip->maximum_total_outstanding_pte_requests_allowed_by_saw =
+ bw_int_to_fixed(128);
+ dceip->limit_excessive_outstanding_dmif_requests = true;
+ dceip->linear_mode_line_request_alternation_slice =
+ bw_int_to_fixed(64);
+ dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode =
+ 32;
+ dceip->display_write_back420_luma_mcifwr_buffer_size = 12288;
+ dceip->display_write_back420_chroma_mcifwr_buffer_size = 8192;
+ dceip->request_efficiency = bw_frc_to_fixed(8, 10);
+ dceip->dispclk_per_request = bw_int_to_fixed(2);
+ dceip->dispclk_ramping_factor = bw_frc_to_fixed(105, 100);
+ dceip->display_pipe_throughput_factor = bw_frc_to_fixed(105, 100);
+ dceip->scatter_gather_pte_request_rows_in_tiling_mode = 2;
+ dceip->mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0);
+ break;
+ case BW_CALCS_VERSION_VEGA10:
+ vbios->memory_type = bw_def_hbm;
+ vbios->dram_channel_width_in_bits = 128;
+ vbios->number_of_dram_channels = asic_id.vram_width / vbios->dram_channel_width_in_bits;
+ vbios->number_of_dram_banks = 16;
+ vbios->high_yclk = bw_int_to_fixed(2400);
+ vbios->mid_yclk = bw_int_to_fixed(1700);
+ vbios->low_yclk = bw_int_to_fixed(1000);
+ vbios->low_sclk = bw_int_to_fixed(300);
+ vbios->mid1_sclk = bw_int_to_fixed(350);
+ vbios->mid2_sclk = bw_int_to_fixed(400);
+ vbios->mid3_sclk = bw_int_to_fixed(500);
+ vbios->mid4_sclk = bw_int_to_fixed(600);
+ vbios->mid5_sclk = bw_int_to_fixed(700);
+ vbios->mid6_sclk = bw_int_to_fixed(760);
+ vbios->high_sclk = bw_int_to_fixed(776);
+ vbios->low_voltage_max_dispclk = bw_int_to_fixed(460);
+ vbios->mid_voltage_max_dispclk = bw_int_to_fixed(670);
+ vbios->high_voltage_max_dispclk = bw_int_to_fixed(1133);
+ vbios->low_voltage_max_phyclk = bw_int_to_fixed(540);
+ vbios->mid_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->high_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios->data_return_bus_width = bw_int_to_fixed(32);
+ vbios->trc = bw_int_to_fixed(48);
+ vbios->dmifmc_urgent_latency = bw_int_to_fixed(3);
+ vbios->stutter_self_refresh_exit_latency = bw_frc_to_fixed(75, 10);
+ vbios->stutter_self_refresh_entry_latency = bw_frc_to_fixed(19, 10);
+ vbios->nbp_state_change_latency = bw_int_to_fixed(39);
+ vbios->mcifwrmc_urgent_latency = bw_int_to_fixed(10);
+ vbios->scatter_gather_enable = false;
+ vbios->down_spread_percentage = bw_frc_to_fixed(5, 10);
+ vbios->cursor_width = 32;
+ vbios->average_compression_rate = 4;
+ vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel = 8;
+ vbios->blackout_duration = bw_int_to_fixed(0); /* us */
+ vbios->maximum_blackout_recovery_time = bw_int_to_fixed(0);
+
+ dceip->max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation = 100;
+ dceip->max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation = 100;
+ dceip->percent_of_ideal_port_bw_received_after_urgent_latency = 100;
+ dceip->large_cursor = false;
+ dceip->dmif_request_buffer_size = bw_int_to_fixed(2304);
+ dceip->dmif_pipe_en_fbc_chunk_tracker = true;
+ dceip->cursor_max_outstanding_group_num = 1;
+ dceip->lines_interleaved_into_lb = 2;
+ dceip->chunk_width = 256;
+ dceip->number_of_graphics_pipes = 6;
+ dceip->number_of_underlay_pipes = 0;
+ dceip->low_power_tiling_mode = 0;
+ dceip->display_write_back_supported = true;
+ dceip->argb_compression_support = true;
+ dceip->underlay_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35556, 10000);
+ dceip->underlay_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->underlay_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->underlay_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->graphics_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35, 10);
+ dceip->graphics_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip->graphics_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip->graphics_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip->alpha_vscaler_efficiency = bw_int_to_fixed(3);
+ dceip->max_dmif_buffer_allocated = 4;
+ dceip->graphics_dmif_size = 24576;
+ dceip->underlay_luma_dmif_size = 19456;
+ dceip->underlay_chroma_dmif_size = 23552;
+ dceip->pre_downscaler_enabled = true;
+ dceip->underlay_downscale_prefetch_enabled = false;
+ dceip->lb_write_pixels_per_dispclk = bw_int_to_fixed(1);
+ dceip->lb_size_per_component444 = bw_int_to_fixed(245952);
+ dceip->graphics_lb_nodownscaling_multi_line_prefetching = true;
+ dceip->stutter_and_dram_clock_state_change_gated_before_cursor =
+ bw_int_to_fixed(1);
+ dceip->underlay420_luma_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->underlay420_chroma_lb_size_per_component =
+ bw_int_to_fixed(164352);
+ dceip->underlay422_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip->cursor_chunk_width = bw_int_to_fixed(64);
+ dceip->cursor_dcp_buffer_lines = bw_int_to_fixed(4);
+ dceip->underlay_maximum_width_efficient_for_tiling =
+ bw_int_to_fixed(1920);
+ dceip->underlay_maximum_height_efficient_for_tiling =
+ bw_int_to_fixed(1080);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display =
+ bw_frc_to_fixed(3, 10);
+ dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation =
+ bw_int_to_fixed(25);
+ dceip->minimum_outstanding_pte_request_limit = bw_int_to_fixed(
+ 2);
+ dceip->maximum_total_outstanding_pte_requests_allowed_by_saw =
+ bw_int_to_fixed(128);
+ dceip->limit_excessive_outstanding_dmif_requests = true;
+ dceip->linear_mode_line_request_alternation_slice =
+ bw_int_to_fixed(64);
+ dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode =
+ 32;
+ dceip->display_write_back420_luma_mcifwr_buffer_size = 12288;
+ dceip->display_write_back420_chroma_mcifwr_buffer_size = 8192;
+ dceip->request_efficiency = bw_frc_to_fixed(8, 10);
+ dceip->dispclk_per_request = bw_int_to_fixed(2);
+ dceip->dispclk_ramping_factor = bw_frc_to_fixed(105, 100);
+ dceip->display_pipe_throughput_factor = bw_frc_to_fixed(105, 100);
+ dceip->scatter_gather_pte_request_rows_in_tiling_mode = 2;
+ dceip->mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0);
+ break;
+ default:
+ break;
+ }
+ *bw_dceip = *dceip;
+ *bw_vbios = *vbios;
+
+ kfree(dceip);
+ kfree(vbios);
+}
+
+/*
+ * Compare calculated (required) clocks against the clocks available at
+ * maximum voltage (max Performance Level).
+ */
+static bool is_display_configuration_supported(
+ const struct bw_calcs_vbios *vbios,
+ const struct dce_bw_output *calcs_output)
+{
+ uint32_t int_max_clk;
+
+ int_max_clk = bw_fixed_to_int(vbios->high_voltage_max_dispclk);
+ int_max_clk *= 1000; /* MHz to kHz */
+ if (calcs_output->dispclk_khz > int_max_clk)
+ return false;
+
+ int_max_clk = bw_fixed_to_int(vbios->high_sclk);
+ int_max_clk *= 1000; /* MHz to kHz */
+ if (calcs_output->sclk_khz > int_max_clk)
+ return false;
+
+ return true;
+}
+
+static void populate_initial_data(
+ const struct pipe_ctx pipe[], int pipe_count, struct bw_calcs_data *data)
+{
+ int i, j;
+ int num_displays = 0;
+
+ data->underlay_surface_type = bw_def_420;
+ data->panning_and_bezel_adjustment = bw_def_none;
+ data->graphics_lb_bpc = 10;
+ data->underlay_lb_bpc = 8;
+ data->underlay_tiling_mode = bw_def_tiled;
+ data->graphics_tiling_mode = bw_def_tiled;
+ data->underlay_micro_tile_mode = bw_def_display_micro_tiling;
+ data->graphics_micro_tile_mode = bw_def_display_micro_tiling;
+ data->increase_voltage_to_support_mclk_switch = true;
+
+ /* Pipes with underlay first */
+ for (i = 0; i < pipe_count; i++) {
+ if (!pipe[i].stream || !pipe[i].bottom_pipe)
+ continue;
+
+ ASSERT(pipe[i].plane_state);
+
+ if (num_displays == 0) {
+ if (!pipe[i].plane_state->visible)
+ data->d0_underlay_mode = bw_def_underlay_only;
+ else
+ data->d0_underlay_mode = bw_def_blend;
+ } else {
+ if (!pipe[i].plane_state->visible)
+ data->d1_underlay_mode = bw_def_underlay_only;
+ else
+ data->d1_underlay_mode = bw_def_blend;
+ }
+
+ data->fbc_en[num_displays + 4] = false;
+ data->lpt_en[num_displays + 4] = false;
+ data->h_total[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.h_total);
+ data->v_total[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.v_total);
+ data->pixel_rate[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->timing.pix_clk_100hz, 10000);
+ data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.viewport.width);
+ data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4];
+ data->src_height[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.viewport.height);
+ data->h_taps[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.taps.h_taps);
+ data->v_taps[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.taps.v_taps);
+ data->h_scale_ratio[num_displays + 4] = fixed31_32_to_bw_fixed(pipe[i].plane_res.scl_data.ratios.horz.value);
+ data->v_scale_ratio[num_displays + 4] = fixed31_32_to_bw_fixed(pipe[i].plane_res.scl_data.ratios.vert.value);
+ switch (pipe[i].plane_state->rotation) {
+ case ROTATION_ANGLE_0:
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(0);
+ break;
+ case ROTATION_ANGLE_90:
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(90);
+ break;
+ case ROTATION_ANGLE_180:
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(180);
+ break;
+ case ROTATION_ANGLE_270:
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(270);
+ break;
+ default:
+ break;
+ }
+ switch (pipe[i].plane_state->format) {
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ data->bytes_per_pixel[num_displays + 4] = 2;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ data->bytes_per_pixel[num_displays + 4] = 4;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ data->bytes_per_pixel[num_displays + 4] = 8;
+ break;
+ default:
+ data->bytes_per_pixel[num_displays + 4] = 4;
+ break;
+ }
+ data->interlace_mode[num_displays + 4] = false;
+ data->stereo_mode[num_displays + 4] = bw_def_mono;
+
+
+ for (j = 0; j < 2; j++) {
+ data->fbc_en[num_displays * 2 + j] = false;
+ data->lpt_en[num_displays * 2 + j] = false;
+
+ data->src_height[num_displays * 2 + j] = bw_int_to_fixed(pipe[i].bottom_pipe->plane_res.scl_data.viewport.height);
+ data->src_width[num_displays * 2 + j] = bw_int_to_fixed(pipe[i].bottom_pipe->plane_res.scl_data.viewport.width);
+ data->pitch_in_pixels[num_displays * 2 + j] = bw_int_to_fixed(
+ pipe[i].bottom_pipe->plane_state->plane_size.surface_pitch);
+ data->h_taps[num_displays * 2 + j] = bw_int_to_fixed(pipe[i].bottom_pipe->plane_res.scl_data.taps.h_taps);
+ data->v_taps[num_displays * 2 + j] = bw_int_to_fixed(pipe[i].bottom_pipe->plane_res.scl_data.taps.v_taps);
+ data->h_scale_ratio[num_displays * 2 + j] = fixed31_32_to_bw_fixed(
+ pipe[i].bottom_pipe->plane_res.scl_data.ratios.horz.value);
+ data->v_scale_ratio[num_displays * 2 + j] = fixed31_32_to_bw_fixed(
+ pipe[i].bottom_pipe->plane_res.scl_data.ratios.vert.value);
+ switch (pipe[i].bottom_pipe->plane_state->rotation) {
+ case ROTATION_ANGLE_0:
+ data->rotation_angle[num_displays * 2 + j] = bw_int_to_fixed(0);
+ break;
+ case ROTATION_ANGLE_90:
+ data->rotation_angle[num_displays * 2 + j] = bw_int_to_fixed(90);
+ break;
+ case ROTATION_ANGLE_180:
+ data->rotation_angle[num_displays * 2 + j] = bw_int_to_fixed(180);
+ break;
+ case ROTATION_ANGLE_270:
+ data->rotation_angle[num_displays * 2 + j] = bw_int_to_fixed(270);
+ break;
+ default:
+ break;
+ }
+ data->stereo_mode[num_displays * 2 + j] = bw_def_mono;
+ }
+
+ num_displays++;
+ }
+
+ /* Pipes without underlay after */
+ for (i = 0; i < pipe_count; i++) {
+ unsigned int pixel_clock_100hz;
+ if (!pipe[i].stream || pipe[i].bottom_pipe)
+ continue;
+
+
+ data->fbc_en[num_displays + 4] = false;
+ data->lpt_en[num_displays + 4] = false;
+ data->h_total[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.h_total);
+ data->v_total[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.v_total);
+ pixel_clock_100hz = pipe[i].stream->timing.pix_clk_100hz;
+ if (pipe[i].stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
+ pixel_clock_100hz *= 2;
+ data->pixel_rate[num_displays + 4] = bw_frc_to_fixed(pixel_clock_100hz, 10000);
+ if (pipe[i].plane_state) {
+ data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.viewport.width);
+ data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4];
+ data->src_height[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.viewport.height);
+ data->h_taps[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.taps.h_taps);
+ data->v_taps[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.taps.v_taps);
+ data->h_scale_ratio[num_displays + 4] = fixed31_32_to_bw_fixed(pipe[i].plane_res.scl_data.ratios.horz.value);
+ data->v_scale_ratio[num_displays + 4] = fixed31_32_to_bw_fixed(pipe[i].plane_res.scl_data.ratios.vert.value);
+ switch (pipe[i].plane_state->rotation) {
+ case ROTATION_ANGLE_0:
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(0);
+ break;
+ case ROTATION_ANGLE_90:
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(90);
+ break;
+ case ROTATION_ANGLE_180:
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(180);
+ break;
+ case ROTATION_ANGLE_270:
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(270);
+ break;
+ default:
+ break;
+ }
+ switch (pipe[i].plane_state->format) {
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ data->bytes_per_pixel[num_displays + 4] = 2;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ data->bytes_per_pixel[num_displays + 4] = 4;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ data->bytes_per_pixel[num_displays + 4] = 8;
+ break;
+ default:
+ data->bytes_per_pixel[num_displays + 4] = 4;
+ break;
+ }
+ } else if (pipe[i].stream->dst.width != 0 &&
+ pipe[i].stream->dst.height != 0 &&
+ pipe[i].stream->src.width != 0 &&
+ pipe[i].stream->src.height != 0) {
+ data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->src.width);
+ data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4];
+ data->src_height[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->src.height);
+ data->h_taps[num_displays + 4] = pipe[i].stream->src.width == pipe[i].stream->dst.width ? bw_int_to_fixed(1) : bw_int_to_fixed(2);
+ data->v_taps[num_displays + 4] = pipe[i].stream->src.height == pipe[i].stream->dst.height ? bw_int_to_fixed(1) : bw_int_to_fixed(2);
+ data->h_scale_ratio[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->src.width, pipe[i].stream->dst.width);
+ data->v_scale_ratio[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->src.height, pipe[i].stream->dst.height);
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(0);
+ data->bytes_per_pixel[num_displays + 4] = 4;
+ } else {
+ data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.h_addressable);
+ data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4];
+ data->src_height[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.v_addressable);
+ data->h_taps[num_displays + 4] = bw_int_to_fixed(1);
+ data->v_taps[num_displays + 4] = bw_int_to_fixed(1);
+ data->h_scale_ratio[num_displays + 4] = bw_int_to_fixed(1);
+ data->v_scale_ratio[num_displays + 4] = bw_int_to_fixed(1);
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(0);
+ data->bytes_per_pixel[num_displays + 4] = 4;
+ }
+
+ data->interlace_mode[num_displays + 4] = false;
+ data->stereo_mode[num_displays + 4] = bw_def_mono;
+ num_displays++;
+ }
+
+ data->number_of_displays = num_displays;
+}
+
+static bool all_displays_in_sync(const struct pipe_ctx pipe[],
+ int pipe_count)
+{
+ const struct pipe_ctx *active_pipes[MAX_PIPES];
+ int i, num_active_pipes = 0;
+
+ for (i = 0; i < pipe_count; i++) {
+ if (!pipe[i].stream || pipe[i].top_pipe)
+ continue;
+
+ active_pipes[num_active_pipes++] = &pipe[i];
+ }
+
+ if (!num_active_pipes)
+ return false;
+
+ for (i = 1; i < num_active_pipes; ++i) {
+ if (!resource_are_streams_timing_synchronizable(
+ active_pipes[0]->stream, active_pipes[i]->stream)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Return:
+ * true - Display(s) configuration supported.
+ * In this case 'calcs_output' contains data for HW programming
+ * false - Display(s) configuration not supported (not enough bandwidth).
+ */
+bool bw_calcs(struct dc_context *ctx,
+ const struct bw_calcs_dceip *dceip,
+ const struct bw_calcs_vbios *vbios,
+ const struct pipe_ctx pipe[],
+ int pipe_count,
+ struct dce_bw_output *calcs_output)
+{
+ struct bw_calcs_data *data = kzalloc(sizeof(struct bw_calcs_data),
+ GFP_KERNEL);
+ if (!data)
+ return false;
+
+ populate_initial_data(pipe, pipe_count, data);
+
+ if (ctx->dc->config.multi_mon_pp_mclk_switch)
+ calcs_output->all_displays_in_sync = all_displays_in_sync(pipe, pipe_count);
+ else
+ calcs_output->all_displays_in_sync = false;
+
+ if (data->number_of_displays != 0) {
+ uint8_t yclk_lvl;
+ struct bw_fixed high_sclk = vbios->high_sclk;
+ struct bw_fixed mid1_sclk = vbios->mid1_sclk;
+ struct bw_fixed mid2_sclk = vbios->mid2_sclk;
+ struct bw_fixed mid3_sclk = vbios->mid3_sclk;
+ struct bw_fixed mid4_sclk = vbios->mid4_sclk;
+ struct bw_fixed mid5_sclk = vbios->mid5_sclk;
+ struct bw_fixed mid6_sclk = vbios->mid6_sclk;
+ struct bw_fixed low_sclk = vbios->low_sclk;
+ struct bw_fixed high_yclk = vbios->high_yclk;
+ struct bw_fixed mid_yclk = vbios->mid_yclk;
+ struct bw_fixed low_yclk = vbios->low_yclk;
+
+ if (ctx->dc->debug.bandwidth_calcs_trace) {
+ print_bw_calcs_dceip(ctx, dceip);
+ print_bw_calcs_vbios(ctx, vbios);
+ print_bw_calcs_data(ctx, data);
+ }
+ calculate_bandwidth(dceip, vbios, data);
+
+ yclk_lvl = data->y_clk_level;
+
+ calcs_output->nbp_state_change_enable =
+ data->nbp_state_change_enable;
+ calcs_output->cpuc_state_change_enable =
+ data->cpuc_state_change_enable;
+ calcs_output->cpup_state_change_enable =
+ data->cpup_state_change_enable;
+ calcs_output->stutter_mode_enable =
+ data->stutter_mode_enable;
+ calcs_output->dispclk_khz =
+ bw_fixed_to_int(bw_mul(data->dispclk,
+ bw_int_to_fixed(1000)));
+ calcs_output->blackout_recovery_time_us =
+ bw_fixed_to_int(data->blackout_recovery_time);
+ calcs_output->sclk_khz =
+ bw_fixed_to_int(bw_mul(data->required_sclk,
+ bw_int_to_fixed(1000)));
+ calcs_output->sclk_deep_sleep_khz =
+ bw_fixed_to_int(bw_mul(data->sclk_deep_sleep,
+ bw_int_to_fixed(1000)));
+ if (yclk_lvl == 0)
+ calcs_output->yclk_khz = bw_fixed_to_int(
+ bw_mul(low_yclk, bw_int_to_fixed(1000)));
+ else if (yclk_lvl == 1)
+ calcs_output->yclk_khz = bw_fixed_to_int(
+ bw_mul(mid_yclk, bw_int_to_fixed(1000)));
+ else
+ calcs_output->yclk_khz = bw_fixed_to_int(
+ bw_mul(high_yclk, bw_int_to_fixed(1000)));
+
+ /* units: nanosecond, 16bit storage. */
+
+ calcs_output->nbp_state_change_wm_ns[0].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[1].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[2].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[6], bw_int_to_fixed(1000)));
+
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->nbp_state_change_wm_ns[3].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[4].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->nbp_state_change_wm_ns[3].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[4].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->nbp_state_change_wm_ns[5].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[9], bw_int_to_fixed(1000)));
+
+
+
+ calcs_output->stutter_exit_wm_ns[0].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[1].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[2].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_exit_wm_ns[3].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[4].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_exit_wm_ns[3].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[4].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_exit_wm_ns[5].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[9], bw_int_to_fixed(1000)));
+
+ calcs_output->stutter_entry_wm_ns[0].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[1].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[2].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_entry_wm_ns[3].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_entry_wm_ns[3].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_entry_wm_ns[5].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[9], bw_int_to_fixed(1000)));
+
+ calcs_output->urgent_wm_ns[0].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[1].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[2].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->urgent_wm_ns[3].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[4].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->urgent_wm_ns[3].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[4].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->urgent_wm_ns[5].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[9], bw_int_to_fixed(1000)));
+
+ if (dceip->version != BW_CALCS_VERSION_CARRIZO) {
+ ((struct bw_calcs_vbios *)vbios)->low_sclk = mid3_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid1_sclk = mid3_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid2_sclk = mid3_sclk;
+ calculate_bandwidth(dceip, vbios, data);
+
+ calcs_output->nbp_state_change_wm_ns[0].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[4],bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[1].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[2].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[6], bw_int_to_fixed(1000)));
+
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->nbp_state_change_wm_ns[3].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[4].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->nbp_state_change_wm_ns[3].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[4].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->nbp_state_change_wm_ns[5].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[9], bw_int_to_fixed(1000)));
+
+
+
+ calcs_output->stutter_exit_wm_ns[0].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[1].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[2].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_exit_wm_ns[3].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[4].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_exit_wm_ns[3].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[4].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_exit_wm_ns[5].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[9], bw_int_to_fixed(1000)));
+
+ calcs_output->stutter_entry_wm_ns[0].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[1].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[2].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_entry_wm_ns[3].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_entry_wm_ns[3].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_entry_wm_ns[5].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[9], bw_int_to_fixed(1000)));
+
+ calcs_output->urgent_wm_ns[0].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[1].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[2].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->urgent_wm_ns[3].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[4].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->urgent_wm_ns[3].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[4].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->urgent_wm_ns[5].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[9], bw_int_to_fixed(1000)));
+
+ ((struct bw_calcs_vbios *)vbios)->low_sclk = low_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid1_sclk = mid1_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid2_sclk = mid2_sclk;
+ ((struct bw_calcs_vbios *)vbios)->low_yclk = mid_yclk;
+ calculate_bandwidth(dceip, vbios, data);
+
+ calcs_output->nbp_state_change_wm_ns[0].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[1].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[2].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->nbp_state_change_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->nbp_state_change_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->nbp_state_change_wm_ns[5].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[9], bw_int_to_fixed(1000)));
+
+
+ calcs_output->stutter_exit_wm_ns[0].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[1].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[2].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_exit_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_exit_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_exit_wm_ns[5].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[9], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[0].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[1].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[2].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_entry_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->stutter_entry_watermark[0],
+ bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->stutter_entry_watermark[1],
+ bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_entry_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->stutter_entry_watermark[7],
+ bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->stutter_entry_watermark[8],
+ bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_entry_wm_ns[5].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[9], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[0].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[1].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[2].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->urgent_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->urgent_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->urgent_wm_ns[5].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[9], bw_int_to_fixed(1000)));
+ }
+
+ if (dceip->version == BW_CALCS_VERSION_CARRIZO) {
+ ((struct bw_calcs_vbios *)vbios)->low_yclk = high_yclk;
+ ((struct bw_calcs_vbios *)vbios)->mid_yclk = high_yclk;
+ ((struct bw_calcs_vbios *)vbios)->low_sclk = high_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid1_sclk = high_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid2_sclk = high_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid3_sclk = high_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid4_sclk = high_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid5_sclk = high_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid6_sclk = high_sclk;
+ } else {
+ ((struct bw_calcs_vbios *)vbios)->low_yclk = mid_yclk;
+ ((struct bw_calcs_vbios *)vbios)->low_sclk = mid3_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid1_sclk = mid3_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid2_sclk = mid3_sclk;
+ }
+
+ calculate_bandwidth(dceip, vbios, data);
+
+ calcs_output->nbp_state_change_wm_ns[0].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[1].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[2].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->nbp_state_change_wm_ns[3].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[4].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->nbp_state_change_wm_ns[3].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->nbp_state_change_wm_ns[4].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->nbp_state_change_wm_ns[5].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ nbp_state_change_watermark[9], bw_int_to_fixed(1000)));
+
+ calcs_output->stutter_exit_wm_ns[0].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[1].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[2].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_exit_wm_ns[3].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[4].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_exit_wm_ns[3].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_exit_wm_ns[4].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_exit_wm_ns[5].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_exit_watermark[9], bw_int_to_fixed(1000)));
+
+ calcs_output->stutter_entry_wm_ns[0].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[1].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[2].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_entry_wm_ns[3].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_entry_wm_ns[3].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_entry_wm_ns[5].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[9], bw_int_to_fixed(1000)));
+
+ calcs_output->urgent_wm_ns[0].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[1].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[2].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->urgent_wm_ns[3].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[4].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->urgent_wm_ns[3].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->urgent_wm_ns[4].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->urgent_wm_ns[5].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ urgent_watermark[9], bw_int_to_fixed(1000)));
+
+ ((struct bw_calcs_vbios *)vbios)->low_yclk = low_yclk;
+ ((struct bw_calcs_vbios *)vbios)->mid_yclk = mid_yclk;
+ ((struct bw_calcs_vbios *)vbios)->low_sclk = low_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid1_sclk = mid1_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid2_sclk = mid2_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid3_sclk = mid3_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid4_sclk = mid4_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid5_sclk = mid5_sclk;
+ ((struct bw_calcs_vbios *)vbios)->mid6_sclk = mid6_sclk;
+ ((struct bw_calcs_vbios *)vbios)->high_sclk = high_sclk;
+ } else {
+ calcs_output->nbp_state_change_enable = true;
+ calcs_output->cpuc_state_change_enable = true;
+ calcs_output->cpup_state_change_enable = true;
+ calcs_output->stutter_mode_enable = true;
+ calcs_output->dispclk_khz = 0;
+ calcs_output->sclk_khz = 0;
+ }
+
+ kfree(data);
+
+ return is_display_configuration_supported(vbios, calcs_output);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c
new file mode 100644
index 000000000000..288d22a16cf2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c
@@ -0,0 +1,1933 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services.h"
+#include "dcn_calc_auto.h"
+#include "dcn_calc_math.h"
+
+/*
+ * NOTE:
+ * This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
+/*REVISION#250*/
+void scaler_settings_calculation(struct dcn_bw_internal_vars *v)
+{
+ int k;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->allow_different_hratio_vratio == dcn_bw_yes) {
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->h_ratio[k] = v->viewport_width[k] / v->scaler_rec_out_width[k];
+ v->v_ratio[k] = v->viewport_height[k] / v->scaler_recout_height[k];
+ }
+ else {
+ v->h_ratio[k] = v->viewport_height[k] / v->scaler_rec_out_width[k];
+ v->v_ratio[k] = v->viewport_width[k] / v->scaler_recout_height[k];
+ }
+ }
+ else {
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->h_ratio[k] =dcn_bw_max2(v->viewport_width[k] / v->scaler_rec_out_width[k], v->viewport_height[k] / v->scaler_recout_height[k]);
+ }
+ else {
+ v->h_ratio[k] =dcn_bw_max2(v->viewport_height[k] / v->scaler_rec_out_width[k], v->viewport_width[k] / v->scaler_recout_height[k]);
+ }
+ v->v_ratio[k] = v->h_ratio[k];
+ }
+ if (v->interlace_output[k] == 1.0) {
+ v->v_ratio[k] = 2.0 * v->v_ratio[k];
+ }
+ if (v->underscan_output[k] == 1.0) {
+ v->h_ratio[k] = v->h_ratio[k] * v->under_scan_factor;
+ v->v_ratio[k] = v->v_ratio[k] * v->under_scan_factor;
+ }
+ }
+ /*scaler taps calculation*/
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->h_ratio[k] > 1.0) {
+ v->acceptable_quality_hta_ps =dcn_bw_min2(v->max_hscl_taps, 2.0 *dcn_bw_ceil2(v->h_ratio[k], 1.0));
+ }
+ else if (v->h_ratio[k] < 1.0) {
+ v->acceptable_quality_hta_ps = 4.0;
+ }
+ else {
+ v->acceptable_quality_hta_ps = 1.0;
+ }
+ if (v->ta_pscalculation == dcn_bw_override) {
+ v->htaps[k] = v->override_hta_ps[k];
+ }
+ else {
+ v->htaps[k] = v->acceptable_quality_hta_ps;
+ }
+ if (v->v_ratio[k] > 1.0) {
+ v->acceptable_quality_vta_ps =dcn_bw_min2(v->max_vscl_taps, 2.0 *dcn_bw_ceil2(v->v_ratio[k], 1.0));
+ }
+ else if (v->v_ratio[k] < 1.0) {
+ v->acceptable_quality_vta_ps = 4.0;
+ }
+ else {
+ v->acceptable_quality_vta_ps = 1.0;
+ }
+ if (v->ta_pscalculation == dcn_bw_override) {
+ v->vtaps[k] = v->override_vta_ps[k];
+ }
+ else {
+ v->vtaps[k] = v->acceptable_quality_vta_ps;
+ }
+ if (v->source_pixel_format[k] == dcn_bw_rgb_sub_64 || v->source_pixel_format[k] == dcn_bw_rgb_sub_32 || v->source_pixel_format[k] == dcn_bw_rgb_sub_16) {
+ v->vta_pschroma[k] = 0.0;
+ v->hta_pschroma[k] = 0.0;
+ }
+ else {
+ if (v->ta_pscalculation == dcn_bw_override) {
+ v->vta_pschroma[k] = v->override_vta_pschroma[k];
+ v->hta_pschroma[k] = v->override_hta_pschroma[k];
+ }
+ else {
+ v->vta_pschroma[k] = v->acceptable_quality_vta_ps;
+ v->hta_pschroma[k] = v->acceptable_quality_hta_ps;
+ }
+ }
+ }
+}
+
+void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v)
+{
+ int i;
+ int j;
+ int k;
+ /*mode support, voltage state and soc configuration*/
+
+ /*scale ratio support check*/
+
+ v->scale_ratio_support = dcn_bw_yes;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->h_ratio[k] > v->max_hscl_ratio || v->v_ratio[k] > v->max_vscl_ratio || v->h_ratio[k] > v->htaps[k] || v->v_ratio[k] > v->vtaps[k] || (v->source_pixel_format[k] != dcn_bw_rgb_sub_64 && v->source_pixel_format[k] != dcn_bw_rgb_sub_32 && v->source_pixel_format[k] != dcn_bw_rgb_sub_16 && (v->h_ratio[k] / 2.0 > v->hta_pschroma[k] || v->v_ratio[k] / 2.0 > v->vta_pschroma[k]))) {
+ v->scale_ratio_support = dcn_bw_no;
+ }
+ }
+ /*source format, pixel format and scan support check*/
+
+ v->source_format_pixel_and_scan_support = dcn_bw_yes;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if ((v->source_surface_mode[k] == dcn_bw_sw_linear && v->source_scan[k] != dcn_bw_hor) || ((v->source_surface_mode[k] == dcn_bw_sw_4_kb_d || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d_x || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_x || v->source_surface_mode[k] == dcn_bw_sw_var_d || v->source_surface_mode[k] == dcn_bw_sw_var_d_x) && v->source_pixel_format[k] != dcn_bw_rgb_sub_64)) {
+ v->source_format_pixel_and_scan_support = dcn_bw_no;
+ }
+ }
+ /*bandwidth support check*/
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->swath_width_ysingle_dpp[k] = v->viewport_width[k];
+ }
+ else {
+ v->swath_width_ysingle_dpp[k] = v->viewport_height[k];
+ }
+ if (v->source_pixel_format[k] == dcn_bw_rgb_sub_64) {
+ v->byte_per_pixel_in_dety[k] = 8.0;
+ v->byte_per_pixel_in_detc[k] = 0.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_32) {
+ v->byte_per_pixel_in_dety[k] = 4.0;
+ v->byte_per_pixel_in_detc[k] = 0.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_16) {
+ v->byte_per_pixel_in_dety[k] = 2.0;
+ v->byte_per_pixel_in_detc[k] = 0.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8) {
+ v->byte_per_pixel_in_dety[k] = 1.0;
+ v->byte_per_pixel_in_detc[k] = 2.0;
+ }
+ else {
+ v->byte_per_pixel_in_dety[k] = 4.0f / 3.0f;
+ v->byte_per_pixel_in_detc[k] = 8.0f / 3.0f;
+ }
+ }
+ v->total_read_bandwidth_consumed_gbyte_per_second = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->read_bandwidth[k] = v->swath_width_ysingle_dpp[k] * (dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) * v->v_ratio[k] +dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / 2.0 * v->v_ratio[k] / 2) / (v->htotal[k] / v->pixel_clock[k]);
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->read_bandwidth[k] = v->read_bandwidth[k] * (1 + 1 / 256);
+ }
+ if (v->pte_enable == dcn_bw_yes && v->source_scan[k] != dcn_bw_hor && (v->source_surface_mode[k] == dcn_bw_sw_4_kb_s || v->source_surface_mode[k] == dcn_bw_sw_4_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d_x)) {
+ v->read_bandwidth[k] = v->read_bandwidth[k] * (1 + 1 / 64);
+ }
+ else if (v->pte_enable == dcn_bw_yes && v->source_scan[k] == dcn_bw_hor && (v->source_pixel_format[k] == dcn_bw_rgb_sub_64 || v->source_pixel_format[k] == dcn_bw_rgb_sub_32) && (v->source_surface_mode[k] == dcn_bw_sw_64_kb_s || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_x)) {
+ v->read_bandwidth[k] = v->read_bandwidth[k] * (1 + 1 / 256);
+ }
+ else if (v->pte_enable == dcn_bw_yes) {
+ v->read_bandwidth[k] = v->read_bandwidth[k] * (1 + 1 / 512);
+ }
+ v->total_read_bandwidth_consumed_gbyte_per_second = v->total_read_bandwidth_consumed_gbyte_per_second + v->read_bandwidth[k] / 1000.0;
+ }
+ v->total_write_bandwidth_consumed_gbyte_per_second = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->output[k] == dcn_bw_writeback && v->output_format[k] == dcn_bw_444) {
+ v->write_bandwidth[k] = v->scaler_rec_out_width[k] / (v->htotal[k] / v->pixel_clock[k]) * 4.0;
+ }
+ else if (v->output[k] == dcn_bw_writeback) {
+ v->write_bandwidth[k] = v->scaler_rec_out_width[k] / (v->htotal[k] / v->pixel_clock[k]) * 1.5;
+ }
+ else {
+ v->write_bandwidth[k] = 0.0;
+ }
+ v->total_write_bandwidth_consumed_gbyte_per_second = v->total_write_bandwidth_consumed_gbyte_per_second + v->write_bandwidth[k] / 1000.0;
+ }
+ v->total_bandwidth_consumed_gbyte_per_second = v->total_read_bandwidth_consumed_gbyte_per_second + v->total_write_bandwidth_consumed_gbyte_per_second;
+ v->dcc_enabled_in_any_plane = dcn_bw_no;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->dcc_enabled_in_any_plane = dcn_bw_yes;
+ }
+ }
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ v->return_bw_todcn_per_state =dcn_bw_min2(v->return_bus_width * v->dcfclk_per_state[i], v->fabric_and_dram_bandwidth_per_state[i] * 1000.0 * v->percent_of_ideal_drambw_received_after_urg_latency / 100.0);
+ v->return_bw_per_state[i] = v->return_bw_todcn_per_state;
+ if (v->dcc_enabled_in_any_plane == dcn_bw_yes && v->return_bw_todcn_per_state > v->dcfclk_per_state[i] * v->return_bus_width / 4.0) {
+ v->return_bw_per_state[i] =dcn_bw_min2(v->return_bw_per_state[i], v->return_bw_todcn_per_state * 4.0 * (1.0 - v->urgent_latency / ((v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0 / (v->return_bw_todcn_per_state - v->dcfclk_per_state[i] * v->return_bus_width / 4.0) + v->urgent_latency)));
+ }
+ v->critical_point = 2.0 * v->return_bus_width * v->dcfclk_per_state[i] * v->urgent_latency / (v->return_bw_todcn_per_state * v->urgent_latency + (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0);
+ if (v->dcc_enabled_in_any_plane == dcn_bw_yes && v->critical_point > 1.0 && v->critical_point < 4.0) {
+ v->return_bw_per_state[i] =dcn_bw_min2(v->return_bw_per_state[i], dcn_bw_pow(4.0 * v->return_bw_todcn_per_state * (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0 * v->return_bus_width * v->dcfclk_per_state[i] * v->urgent_latency / (v->return_bw_todcn_per_state * v->urgent_latency + (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0), 2));
+ }
+ v->return_bw_todcn_per_state =dcn_bw_min2(v->return_bus_width * v->dcfclk_per_state[i], v->fabric_and_dram_bandwidth_per_state[i] * 1000.0);
+ if (v->dcc_enabled_in_any_plane == dcn_bw_yes && v->return_bw_todcn_per_state > v->dcfclk_per_state[i] * v->return_bus_width / 4.0) {
+ v->return_bw_per_state[i] =dcn_bw_min2(v->return_bw_per_state[i], v->return_bw_todcn_per_state * 4.0 * (1.0 - v->urgent_latency / ((v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0 / (v->return_bw_todcn_per_state - v->dcfclk_per_state[i] * v->return_bus_width / 4.0) + v->urgent_latency)));
+ }
+ v->critical_point = 2.0 * v->return_bus_width * v->dcfclk_per_state[i] * v->urgent_latency / (v->return_bw_todcn_per_state * v->urgent_latency + (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0);
+ if (v->dcc_enabled_in_any_plane == dcn_bw_yes && v->critical_point > 1.0 && v->critical_point < 4.0) {
+ v->return_bw_per_state[i] =dcn_bw_min2(v->return_bw_per_state[i], dcn_bw_pow(4.0 * v->return_bw_todcn_per_state * (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0 * v->return_bus_width * v->dcfclk_per_state[i] * v->urgent_latency / (v->return_bw_todcn_per_state * v->urgent_latency + (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0), 2));
+ }
+ }
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ if ((v->total_read_bandwidth_consumed_gbyte_per_second * 1000.0 <= v->return_bw_per_state[i]) && (v->total_bandwidth_consumed_gbyte_per_second * 1000.0 <= v->fabric_and_dram_bandwidth_per_state[i] * 1000.0 * v->percent_of_ideal_drambw_received_after_urg_latency / 100.0)) {
+ v->bandwidth_support[i] = dcn_bw_yes;
+ }
+ else {
+ v->bandwidth_support[i] = dcn_bw_no;
+ }
+ }
+ /*writeback latency support check*/
+
+ v->writeback_latency_support = dcn_bw_yes;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->output[k] == dcn_bw_writeback && v->output_format[k] == dcn_bw_444 && v->scaler_rec_out_width[k] / (v->htotal[k] / v->pixel_clock[k]) * 4.0 > (v->writeback_luma_buffer_size + v->writeback_chroma_buffer_size) * 1024.0 / v->write_back_latency) {
+ v->writeback_latency_support = dcn_bw_no;
+ }
+ else if (v->output[k] == dcn_bw_writeback && v->scaler_rec_out_width[k] / (v->htotal[k] / v->pixel_clock[k]) >dcn_bw_min2(v->writeback_luma_buffer_size, 2.0 * v->writeback_chroma_buffer_size) * 1024.0 / v->write_back_latency) {
+ v->writeback_latency_support = dcn_bw_no;
+ }
+ }
+ /*re-ordering buffer support check*/
+
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ v->urgent_round_trip_and_out_of_order_latency_per_state[i] = (v->round_trip_ping_latency_cycles + 32.0) / v->dcfclk_per_state[i] + v->urgent_out_of_order_return_per_channel * v->number_of_channels / v->return_bw_per_state[i];
+ if ((v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0 / v->return_bw_per_state[i] > v->urgent_round_trip_and_out_of_order_latency_per_state[i]) {
+ v->rob_support[i] = dcn_bw_yes;
+ }
+ else {
+ v->rob_support[i] = dcn_bw_no;
+ }
+ }
+ /*display io support check*/
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->output[k] == dcn_bw_dp && v->dsc_capability == dcn_bw_yes) {
+ if (v->output_format[k] == dcn_bw_420) {
+ v->required_output_bw = v->pixel_clock[k] / 2.0;
+ }
+ else {
+ v->required_output_bw = v->pixel_clock[k];
+ }
+ }
+ else if (v->output_format[k] == dcn_bw_420) {
+ v->required_output_bw = v->pixel_clock[k] * 3.0 / 2.0;
+ }
+ else {
+ v->required_output_bw = v->pixel_clock[k] * 3.0;
+ }
+ if (v->output[k] == dcn_bw_hdmi) {
+ v->required_phyclk[k] = v->required_output_bw;
+ switch (v->output_deep_color[k]) {
+ case dcn_bw_encoder_10bpc:
+ v->required_phyclk[k] = v->required_phyclk[k] * 5.0 / 4;
+ break;
+ case dcn_bw_encoder_12bpc:
+ v->required_phyclk[k] = v->required_phyclk[k] * 3.0 / 2;
+ break;
+ default:
+ break;
+ }
+ v->required_phyclk[k] = v->required_phyclk[k] / 3.0;
+ }
+ else if (v->output[k] == dcn_bw_dp) {
+ v->required_phyclk[k] = v->required_output_bw / 4.0;
+ }
+ else {
+ v->required_phyclk[k] = 0.0;
+ }
+ }
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ v->dio_support[i] = dcn_bw_yes;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->required_phyclk[k] > v->phyclk_per_state[i] || (v->output[k] == dcn_bw_hdmi && v->required_phyclk[k] > 600.0)) {
+ v->dio_support[i] = dcn_bw_no;
+ }
+ }
+ }
+ /*total available writeback support check*/
+
+ v->total_number_of_active_writeback = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->output[k] == dcn_bw_writeback) {
+ v->total_number_of_active_writeback = v->total_number_of_active_writeback + 1.0;
+ }
+ }
+ if (v->total_number_of_active_writeback <= v->max_num_writeback) {
+ v->total_available_writeback_support = dcn_bw_yes;
+ }
+ else {
+ v->total_available_writeback_support = dcn_bw_no;
+ }
+ /*maximum dispclk/dppclk support check*/
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->h_ratio[k] > 1.0) {
+ v->pscl_factor[k] =dcn_bw_min2(v->max_dchub_topscl_throughput, v->max_pscl_tolb_throughput * v->h_ratio[k] /dcn_bw_ceil2(v->htaps[k] / 6.0, 1.0));
+ }
+ else {
+ v->pscl_factor[k] =dcn_bw_min2(v->max_dchub_topscl_throughput, v->max_pscl_tolb_throughput);
+ }
+ if (v->byte_per_pixel_in_detc[k] == 0.0) {
+ v->pscl_factor_chroma[k] = 0.0;
+ v->min_dppclk_using_single_dpp[k] = v->pixel_clock[k] *dcn_bw_max3(v->vtaps[k] / 6.0 *dcn_bw_min2(1.0, v->h_ratio[k]), v->h_ratio[k] * v->v_ratio[k] / v->pscl_factor[k], 1.0);
+ }
+ else {
+ if (v->h_ratio[k] / 2.0 > 1.0) {
+ v->pscl_factor_chroma[k] =dcn_bw_min2(v->max_dchub_topscl_throughput, v->max_pscl_tolb_throughput * v->h_ratio[k] / 2.0 /dcn_bw_ceil2(v->hta_pschroma[k] / 6.0, 1.0));
+ }
+ else {
+ v->pscl_factor_chroma[k] =dcn_bw_min2(v->max_dchub_topscl_throughput, v->max_pscl_tolb_throughput);
+ }
+ v->min_dppclk_using_single_dpp[k] = v->pixel_clock[k] *dcn_bw_max5(v->vtaps[k] / 6.0 *dcn_bw_min2(1.0, v->h_ratio[k]), v->h_ratio[k] * v->v_ratio[k] / v->pscl_factor[k], v->vta_pschroma[k] / 6.0 *dcn_bw_min2(1.0, v->h_ratio[k] / 2.0), v->h_ratio[k] * v->v_ratio[k] / 4.0 / v->pscl_factor_chroma[k], 1.0);
+ }
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if ((v->source_pixel_format[k] == dcn_bw_rgb_sub_64 || v->source_pixel_format[k] == dcn_bw_rgb_sub_32 || v->source_pixel_format[k] == dcn_bw_rgb_sub_16)) {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->read256_block_height_y[k] = 1.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_64) {
+ v->read256_block_height_y[k] = 4.0;
+ }
+ else {
+ v->read256_block_height_y[k] = 8.0;
+ }
+ v->read256_block_width_y[k] = 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / v->read256_block_height_y[k];
+ v->read256_block_height_c[k] = 0.0;
+ v->read256_block_width_c[k] = 0.0;
+ }
+ else {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->read256_block_height_y[k] = 1.0;
+ v->read256_block_height_c[k] = 1.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8) {
+ v->read256_block_height_y[k] = 16.0;
+ v->read256_block_height_c[k] = 8.0;
+ }
+ else {
+ v->read256_block_height_y[k] = 8.0;
+ v->read256_block_height_c[k] = 8.0;
+ }
+ v->read256_block_width_y[k] = 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / v->read256_block_height_y[k];
+ v->read256_block_width_c[k] = 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / v->read256_block_height_c[k];
+ }
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->max_swath_height_y[k] = v->read256_block_height_y[k];
+ v->max_swath_height_c[k] = v->read256_block_height_c[k];
+ }
+ else {
+ v->max_swath_height_y[k] = v->read256_block_width_y[k];
+ v->max_swath_height_c[k] = v->read256_block_width_c[k];
+ }
+ if ((v->source_pixel_format[k] == dcn_bw_rgb_sub_64 || v->source_pixel_format[k] == dcn_bw_rgb_sub_32 || v->source_pixel_format[k] == dcn_bw_rgb_sub_16)) {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear || (v->source_pixel_format[k] == dcn_bw_rgb_sub_64 && (v->source_surface_mode[k] == dcn_bw_sw_4_kb_s || v->source_surface_mode[k] == dcn_bw_sw_4_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_var_s || v->source_surface_mode[k] == dcn_bw_sw_var_s_x) && v->source_scan[k] == dcn_bw_hor)) {
+ v->min_swath_height_y[k] = v->max_swath_height_y[k];
+ }
+ else {
+ v->min_swath_height_y[k] = v->max_swath_height_y[k] / 2.0;
+ }
+ v->min_swath_height_c[k] = v->max_swath_height_c[k];
+ }
+ else {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->min_swath_height_y[k] = v->max_swath_height_y[k];
+ v->min_swath_height_c[k] = v->max_swath_height_c[k];
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8 && v->source_scan[k] == dcn_bw_hor) {
+ v->min_swath_height_y[k] = v->max_swath_height_y[k] / 2.0;
+ if (v->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes) {
+ v->min_swath_height_c[k] = v->max_swath_height_c[k];
+ }
+ else {
+ v->min_swath_height_c[k] = v->max_swath_height_c[k] / 2.0;
+ }
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10 && v->source_scan[k] == dcn_bw_hor) {
+ v->min_swath_height_c[k] = v->max_swath_height_c[k] / 2.0;
+ if (v->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes) {
+ v->min_swath_height_y[k] = v->max_swath_height_y[k];
+ }
+ else {
+ v->min_swath_height_y[k] = v->max_swath_height_y[k] / 2.0;
+ }
+ }
+ else {
+ v->min_swath_height_y[k] = v->max_swath_height_y[k];
+ v->min_swath_height_c[k] = v->max_swath_height_c[k];
+ }
+ }
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->maximum_swath_width = 8192.0;
+ }
+ else {
+ v->maximum_swath_width = 5120.0;
+ }
+ v->number_of_dpp_required_for_det_size =dcn_bw_ceil2(v->swath_width_ysingle_dpp[k] /dcn_bw_min2(v->maximum_swath_width, v->det_buffer_size_in_kbyte * 1024.0 / 2.0 / (v->byte_per_pixel_in_dety[k] * v->min_swath_height_y[k] + v->byte_per_pixel_in_detc[k] / 2.0 * v->min_swath_height_c[k])), 1.0);
+ if (v->byte_per_pixel_in_detc[k] == 0.0) {
+ v->number_of_dpp_required_for_lb_size =dcn_bw_ceil2((v->vtaps[k] +dcn_bw_max2(dcn_bw_ceil2(v->v_ratio[k], 1.0) - 2, 0.0)) * v->swath_width_ysingle_dpp[k] /dcn_bw_max2(v->h_ratio[k], 1.0) * v->lb_bit_per_pixel[k] / v->line_buffer_size, 1.0);
+ }
+ else {
+ v->number_of_dpp_required_for_lb_size =dcn_bw_max2(dcn_bw_ceil2((v->vtaps[k] +dcn_bw_max2(dcn_bw_ceil2(v->v_ratio[k], 1.0) - 2, 0.0)) * v->swath_width_ysingle_dpp[k] /dcn_bw_max2(v->h_ratio[k], 1.0) * v->lb_bit_per_pixel[k] / v->line_buffer_size, 1.0),dcn_bw_ceil2((v->vta_pschroma[k] +dcn_bw_max2(dcn_bw_ceil2(v->v_ratio[k] / 2.0, 1.0) - 2, 0.0)) * v->swath_width_ysingle_dpp[k] / 2.0 /dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0) * v->lb_bit_per_pixel[k] / v->line_buffer_size, 1.0));
+ }
+ v->number_of_dpp_required_for_det_and_lb_size[k] =dcn_bw_max2(v->number_of_dpp_required_for_det_size, v->number_of_dpp_required_for_lb_size);
+ }
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ for (j = 0; j <= 1; j++) {
+ v->total_number_of_active_dpp[i][j] = 0.0;
+ v->required_dispclk[i][j] = 0.0;
+ v->dispclk_dppclk_support[i][j] = dcn_bw_yes;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->min_dispclk_using_single_dpp =dcn_bw_max2(v->pixel_clock[k], v->min_dppclk_using_single_dpp[k] * (j + 1)) * (1.0 + v->downspreading / 100.0);
+ if (v->odm_capability == dcn_bw_yes) {
+ v->min_dispclk_using_dual_dpp =dcn_bw_max2(v->pixel_clock[k] / 2.0, v->min_dppclk_using_single_dpp[k] / 2.0 * (j + 1)) * (1.0 + v->downspreading / 100.0);
+ }
+ else {
+ v->min_dispclk_using_dual_dpp =dcn_bw_max2(v->pixel_clock[k], v->min_dppclk_using_single_dpp[k] / 2.0 * (j + 1)) * (1.0 + v->downspreading / 100.0);
+ }
+ if (i < number_of_states) {
+ v->min_dispclk_using_single_dpp = v->min_dispclk_using_single_dpp * (1.0 + v->dispclk_ramping_margin / 100.0);
+ v->min_dispclk_using_dual_dpp = v->min_dispclk_using_dual_dpp * (1.0 + v->dispclk_ramping_margin / 100.0);
+ }
+ if (v->min_dispclk_using_single_dpp <=dcn_bw_min2(v->max_dispclk[i], (j + 1) * v->max_dppclk[i]) && v->number_of_dpp_required_for_det_and_lb_size[k] <= 1.0) {
+ v->no_of_dpp[i][j][k] = 1.0;
+ v->required_dispclk[i][j] =dcn_bw_max2(v->required_dispclk[i][j], v->min_dispclk_using_single_dpp);
+ }
+ else if (v->min_dispclk_using_dual_dpp <=dcn_bw_min2(v->max_dispclk[i], (j + 1) * v->max_dppclk[i])) {
+ v->no_of_dpp[i][j][k] = 2.0;
+ v->required_dispclk[i][j] =dcn_bw_max2(v->required_dispclk[i][j], v->min_dispclk_using_dual_dpp);
+ }
+ else {
+ v->no_of_dpp[i][j][k] = 2.0;
+ v->required_dispclk[i][j] =dcn_bw_max2(v->required_dispclk[i][j], v->min_dispclk_using_dual_dpp);
+ v->dispclk_dppclk_support[i][j] = dcn_bw_no;
+ }
+ v->total_number_of_active_dpp[i][j] = v->total_number_of_active_dpp[i][j] + v->no_of_dpp[i][j][k];
+ }
+ if (v->total_number_of_active_dpp[i][j] > v->max_num_dpp) {
+ v->total_number_of_active_dpp[i][j] = 0.0;
+ v->required_dispclk[i][j] = 0.0;
+ v->dispclk_dppclk_support[i][j] = dcn_bw_yes;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->min_dispclk_using_single_dpp =dcn_bw_max2(v->pixel_clock[k], v->min_dppclk_using_single_dpp[k] * (j + 1)) * (1.0 + v->downspreading / 100.0);
+ v->min_dispclk_using_dual_dpp =dcn_bw_max2(v->pixel_clock[k], v->min_dppclk_using_single_dpp[k] / 2.0 * (j + 1)) * (1.0 + v->downspreading / 100.0);
+ if (i < number_of_states) {
+ v->min_dispclk_using_single_dpp = v->min_dispclk_using_single_dpp * (1.0 + v->dispclk_ramping_margin / 100.0);
+ v->min_dispclk_using_dual_dpp = v->min_dispclk_using_dual_dpp * (1.0 + v->dispclk_ramping_margin / 100.0);
+ }
+ if (v->number_of_dpp_required_for_det_and_lb_size[k] <= 1.0) {
+ v->no_of_dpp[i][j][k] = 1.0;
+ v->required_dispclk[i][j] =dcn_bw_max2(v->required_dispclk[i][j], v->min_dispclk_using_single_dpp);
+ if (v->min_dispclk_using_single_dpp >dcn_bw_min2(v->max_dispclk[i], (j + 1) * v->max_dppclk[i])) {
+ v->dispclk_dppclk_support[i][j] = dcn_bw_no;
+ }
+ }
+ else {
+ v->no_of_dpp[i][j][k] = 2.0;
+ v->required_dispclk[i][j] =dcn_bw_max2(v->required_dispclk[i][j], v->min_dispclk_using_dual_dpp);
+ if (v->min_dispclk_using_dual_dpp >dcn_bw_min2(v->max_dispclk[i], (j + 1) * v->max_dppclk[i])) {
+ v->dispclk_dppclk_support[i][j] = dcn_bw_no;
+ }
+ }
+ v->total_number_of_active_dpp[i][j] = v->total_number_of_active_dpp[i][j] + v->no_of_dpp[i][j][k];
+ }
+ }
+ }
+ }
+ /*viewport size check*/
+
+ v->viewport_size_support = dcn_bw_yes;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->number_of_dpp_required_for_det_and_lb_size[k] > 2.0) {
+ v->viewport_size_support = dcn_bw_no;
+ }
+ }
+ /*total available pipes support check*/
+
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ for (j = 0; j <= 1; j++) {
+ if (v->total_number_of_active_dpp[i][j] <= v->max_num_dpp) {
+ v->total_available_pipes_support[i][j] = dcn_bw_yes;
+ }
+ else {
+ v->total_available_pipes_support[i][j] = dcn_bw_no;
+ }
+ }
+ }
+ /*urgent latency support check*/
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ for (j = 0; j <= 1; j++) {
+ v->swath_width_yper_state[i][j][k] = v->swath_width_ysingle_dpp[k] / v->no_of_dpp[i][j][k];
+ v->swath_width_granularity_y = 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / v->max_swath_height_y[k];
+ v->rounded_up_max_swath_size_bytes_y = (dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] - 1.0, v->swath_width_granularity_y) + v->swath_width_granularity_y) * v->byte_per_pixel_in_dety[k] * v->max_swath_height_y[k];
+ if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
+ v->rounded_up_max_swath_size_bytes_y =dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_y, 256.0) + 256;
+ }
+ if (v->max_swath_height_c[k] > 0.0) {
+ v->swath_width_granularity_c = 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / v->max_swath_height_c[k];
+ v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k];
+ if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
+ v->rounded_up_max_swath_size_bytes_c = dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
+ }
+ }
+ if (v->rounded_up_max_swath_size_bytes_y + v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 2.0) {
+ v->swath_height_yper_state[i][j][k] = v->max_swath_height_y[k];
+ v->swath_height_cper_state[i][j][k] = v->max_swath_height_c[k];
+ }
+ else {
+ v->swath_height_yper_state[i][j][k] = v->min_swath_height_y[k];
+ v->swath_height_cper_state[i][j][k] = v->min_swath_height_c[k];
+ }
+ if (v->byte_per_pixel_in_detc[k] == 0.0) {
+ v->lines_in_det_luma = v->det_buffer_size_in_kbyte * 1024.0 / v->byte_per_pixel_in_dety[k] / v->swath_width_yper_state[i][j][k];
+ v->lines_in_det_chroma = 0.0;
+ }
+ else if (v->swath_height_yper_state[i][j][k] <= v->swath_height_cper_state[i][j][k]) {
+ v->lines_in_det_luma = v->det_buffer_size_in_kbyte * 1024.0 / 2.0 / v->byte_per_pixel_in_dety[k] / v->swath_width_yper_state[i][j][k];
+ v->lines_in_det_chroma = v->det_buffer_size_in_kbyte * 1024.0 / 2.0 / v->byte_per_pixel_in_detc[k] / (v->swath_width_yper_state[i][j][k] / 2.0);
+ }
+ else {
+ v->lines_in_det_luma = v->det_buffer_size_in_kbyte * 1024.0 * 2.0 / 3.0 / v->byte_per_pixel_in_dety[k] / v->swath_width_yper_state[i][j][k];
+ v->lines_in_det_chroma = v->det_buffer_size_in_kbyte * 1024.0 / 3.0 / v->byte_per_pixel_in_dety[k] / (v->swath_width_yper_state[i][j][k] / 2.0);
+ }
+ v->effective_lb_latency_hiding_source_lines_luma =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] /dcn_bw_max2(v->h_ratio[k], 1.0)), 1.0)) - (v->vtaps[k] - 1.0);
+ v->effective_detlb_lines_luma =dcn_bw_floor2(v->lines_in_det_luma +dcn_bw_min2(v->lines_in_det_luma * v->required_dispclk[i][j] * v->byte_per_pixel_in_dety[k] * v->pscl_factor[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_luma), v->swath_height_yper_state[i][j][k]);
+ if (v->byte_per_pixel_in_detc[k] == 0.0) {
+ v->urgent_latency_support_us_per_state[i][j][k] = v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]);
+ }
+ else {
+ v->effective_lb_latency_hiding_source_lines_chroma = dcn_bw_min2(v->max_line_buffer_lines, dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 / dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0);
+ v->effective_detlb_lines_chroma = dcn_bw_floor2(v->lines_in_det_chroma + dcn_bw_min2(v->lines_in_det_chroma * v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * v->pscl_factor_chroma[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_chroma), v->swath_height_cper_state[i][j][k]);
+ v->urgent_latency_support_us_per_state[i][j][k] = dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] * dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * v->swath_width_yper_state[i][j][k] / 2.0 * dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]));
+ }
+ }
+ }
+ }
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ for (j = 0; j <= 1; j++) {
+ v->urgent_latency_support[i][j] = dcn_bw_yes;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->urgent_latency_support_us_per_state[i][j][k] < v->urgent_latency / 1.0) {
+ v->urgent_latency_support[i][j] = dcn_bw_no;
+ }
+ }
+ }
+ }
+ /*prefetch check*/
+
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ for (j = 0; j <= 1; j++) {
+ v->total_number_of_dcc_active_dpp[i][j] = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->total_number_of_dcc_active_dpp[i][j] = v->total_number_of_dcc_active_dpp[i][j] + v->no_of_dpp[i][j][k];
+ }
+ }
+ }
+ }
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ for (j = 0; j <= 1; j++) {
+ v->projected_dcfclk_deep_sleep = 8.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->projected_dcfclk_deep_sleep =dcn_bw_max2(v->projected_dcfclk_deep_sleep, v->pixel_clock[k] / 16.0);
+ if (v->byte_per_pixel_in_detc[k] == 0.0) {
+ if (v->v_ratio[k] <= 1.0) {
+ v->projected_dcfclk_deep_sleep =dcn_bw_max2(v->projected_dcfclk_deep_sleep, 1.1 *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / 64.0 * v->h_ratio[k] * v->pixel_clock[k] / v->no_of_dpp[i][j][k]);
+ }
+ else {
+ v->projected_dcfclk_deep_sleep =dcn_bw_max2(v->projected_dcfclk_deep_sleep, 1.1 *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / 64.0 * v->pscl_factor[k] * v->required_dispclk[i][j] / (1 + j));
+ }
+ }
+ else {
+ if (v->v_ratio[k] <= 1.0) {
+ v->projected_dcfclk_deep_sleep =dcn_bw_max2(v->projected_dcfclk_deep_sleep, 1.1 *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / 32.0 * v->h_ratio[k] * v->pixel_clock[k] / v->no_of_dpp[i][j][k]);
+ }
+ else {
+ v->projected_dcfclk_deep_sleep =dcn_bw_max2(v->projected_dcfclk_deep_sleep, 1.1 *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / 32.0 * v->pscl_factor[k] * v->required_dispclk[i][j] / (1 + j));
+ }
+ if (v->v_ratio[k] / 2.0 <= 1.0) {
+ v->projected_dcfclk_deep_sleep =dcn_bw_max2(v->projected_dcfclk_deep_sleep, 1.1 *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / 32.0 * v->h_ratio[k] / 2.0 * v->pixel_clock[k] / v->no_of_dpp[i][j][k]);
+ }
+ else {
+ v->projected_dcfclk_deep_sleep =dcn_bw_max2(v->projected_dcfclk_deep_sleep, 1.1 *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / 32.0 * v->pscl_factor_chroma[k] * v->required_dispclk[i][j] / (1 + j));
+ }
+ }
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->meta_req_height_y = 8.0 * v->read256_block_height_y[k];
+ v->meta_req_width_y = 64.0 * 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / v->meta_req_height_y;
+ v->meta_surface_width_y =dcn_bw_ceil2(v->viewport_width[k] / v->no_of_dpp[i][j][k] - 1.0, v->meta_req_width_y) + v->meta_req_width_y;
+ v->meta_surface_height_y =dcn_bw_ceil2(v->viewport_height[k] - 1.0, v->meta_req_height_y) + v->meta_req_height_y;
+ if (v->pte_enable == dcn_bw_yes) {
+ v->meta_pte_bytes_per_frame_y = (dcn_bw_ceil2((v->meta_surface_width_y * v->meta_surface_height_y *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / 256.0 - 4096.0) / 8.0 / 4096.0, 1.0) + 1) * 64.0;
+ }
+ else {
+ v->meta_pte_bytes_per_frame_y = 0.0;
+ }
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->meta_row_bytes_y = v->meta_surface_width_y * v->meta_req_height_y *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / 256.0;
+ }
+ else {
+ v->meta_row_bytes_y = v->meta_surface_height_y * v->meta_req_width_y *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / 256.0;
+ }
+ }
+ else {
+ v->meta_pte_bytes_per_frame_y = 0.0;
+ v->meta_row_bytes_y = 0.0;
+ }
+ if (v->pte_enable == dcn_bw_yes) {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->macro_tile_block_size_bytes_y = 256.0;
+ v->macro_tile_block_height_y = 1.0;
+ }
+ else if (v->source_surface_mode[k] == dcn_bw_sw_4_kb_s || v->source_surface_mode[k] == dcn_bw_sw_4_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d_x) {
+ v->macro_tile_block_size_bytes_y = 4096.0;
+ v->macro_tile_block_height_y = 4.0 * v->read256_block_height_y[k];
+ }
+ else if (v->source_surface_mode[k] == dcn_bw_sw_64_kb_s || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_x) {
+ v->macro_tile_block_size_bytes_y = 64.0 * 1024;
+ v->macro_tile_block_height_y = 16.0 * v->read256_block_height_y[k];
+ }
+ else {
+ v->macro_tile_block_size_bytes_y = 256.0 * 1024;
+ v->macro_tile_block_height_y = 32.0 * v->read256_block_height_y[k];
+ }
+ if (v->macro_tile_block_size_bytes_y <= 65536.0) {
+ v->data_pte_req_height_y = v->macro_tile_block_height_y;
+ }
+ else {
+ v->data_pte_req_height_y = 16.0 * v->read256_block_height_y[k];
+ }
+ v->data_pte_req_width_y = 4096.0 /dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / v->data_pte_req_height_y * 8;
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->dpte_bytes_per_row_y = 64.0 * (dcn_bw_ceil2((v->viewport_width[k] / v->no_of_dpp[i][j][k] *dcn_bw_min2(128.0, dcn_bw_pow(2.0,dcn_bw_floor2(dcn_bw_log(v->pte_buffer_size_in_requests * v->data_pte_req_width_y / (v->viewport_width[k] / v->no_of_dpp[i][j][k]), 2.0), 1.0))) - 1.0) / v->data_pte_req_width_y, 1.0) + 1);
+ }
+ else if (v->source_scan[k] == dcn_bw_hor) {
+ v->dpte_bytes_per_row_y = 64.0 * (dcn_bw_ceil2((v->viewport_width[k] / v->no_of_dpp[i][j][k] - 1.0) / v->data_pte_req_width_y, 1.0) + 1);
+ }
+ else {
+ v->dpte_bytes_per_row_y = 64.0 * (dcn_bw_ceil2((v->viewport_height[k] - 1.0) / v->data_pte_req_height_y, 1.0) + 1);
+ }
+ }
+ else {
+ v->dpte_bytes_per_row_y = 0.0;
+ }
+ if ((v->source_pixel_format[k] != dcn_bw_rgb_sub_64 && v->source_pixel_format[k] != dcn_bw_rgb_sub_32 && v->source_pixel_format[k] != dcn_bw_rgb_sub_16)) {
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->meta_req_height_c = 8.0 * v->read256_block_height_c[k];
+ v->meta_req_width_c = 64.0 * 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / v->meta_req_height_c;
+ v->meta_surface_width_c =dcn_bw_ceil2(v->viewport_width[k] / v->no_of_dpp[i][j][k] / 2.0 - 1.0, v->meta_req_width_c) + v->meta_req_width_c;
+ v->meta_surface_height_c =dcn_bw_ceil2(v->viewport_height[k] / 2.0 - 1.0, v->meta_req_height_c) + v->meta_req_height_c;
+ if (v->pte_enable == dcn_bw_yes) {
+ v->meta_pte_bytes_per_frame_c = (dcn_bw_ceil2((v->meta_surface_width_c * v->meta_surface_height_c *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / 256.0 - 4096.0) / 8.0 / 4096.0, 1.0) + 1) * 64.0;
+ }
+ else {
+ v->meta_pte_bytes_per_frame_c = 0.0;
+ }
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->meta_row_bytes_c = v->meta_surface_width_c * v->meta_req_height_c *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / 256.0;
+ }
+ else {
+ v->meta_row_bytes_c = v->meta_surface_height_c * v->meta_req_width_c *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / 256.0;
+ }
+ }
+ else {
+ v->meta_pte_bytes_per_frame_c = 0.0;
+ v->meta_row_bytes_c = 0.0;
+ }
+ if (v->pte_enable == dcn_bw_yes) {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->macro_tile_block_size_bytes_c = 256.0;
+ v->macro_tile_block_height_c = 1.0;
+ }
+ else if (v->source_surface_mode[k] == dcn_bw_sw_4_kb_s || v->source_surface_mode[k] == dcn_bw_sw_4_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d_x) {
+ v->macro_tile_block_size_bytes_c = 4096.0;
+ v->macro_tile_block_height_c = 4.0 * v->read256_block_height_c[k];
+ }
+ else if (v->source_surface_mode[k] == dcn_bw_sw_64_kb_s || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_x) {
+ v->macro_tile_block_size_bytes_c = 64.0 * 1024;
+ v->macro_tile_block_height_c = 16.0 * v->read256_block_height_c[k];
+ }
+ else {
+ v->macro_tile_block_size_bytes_c = 256.0 * 1024;
+ v->macro_tile_block_height_c = 32.0 * v->read256_block_height_c[k];
+ }
+ v->macro_tile_block_width_c = v->macro_tile_block_size_bytes_c /dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / v->macro_tile_block_height_c;
+ if (v->macro_tile_block_size_bytes_c <= 65536.0) {
+ v->data_pte_req_height_c = v->macro_tile_block_height_c;
+ }
+ else {
+ v->data_pte_req_height_c = 16.0 * v->read256_block_height_c[k];
+ }
+ v->data_pte_req_width_c = 4096.0 /dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / v->data_pte_req_height_c * 8;
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->dpte_bytes_per_row_c = 64.0 * (dcn_bw_ceil2((v->viewport_width[k] / v->no_of_dpp[i][j][k] / 2.0 * dcn_bw_min2(128.0, dcn_bw_pow(2.0,dcn_bw_floor2(dcn_bw_log(v->pte_buffer_size_in_requests * v->data_pte_req_width_c / (v->viewport_width[k] / v->no_of_dpp[i][j][k] / 2.0), 2.0), 1.0))) - 1.0) / v->data_pte_req_width_c, 1.0) + 1);
+ }
+ else if (v->source_scan[k] == dcn_bw_hor) {
+ v->dpte_bytes_per_row_c = 64.0 * (dcn_bw_ceil2((v->viewport_width[k] / v->no_of_dpp[i][j][k] / 2.0 - 1.0) / v->data_pte_req_width_c, 1.0) + 1);
+ }
+ else {
+ v->dpte_bytes_per_row_c = 64.0 * (dcn_bw_ceil2((v->viewport_height[k] / 2.0 - 1.0) / v->data_pte_req_height_c, 1.0) + 1);
+ }
+ }
+ else {
+ v->dpte_bytes_per_row_c = 0.0;
+ }
+ }
+ else {
+ v->dpte_bytes_per_row_c = 0.0;
+ v->meta_pte_bytes_per_frame_c = 0.0;
+ v->meta_row_bytes_c = 0.0;
+ }
+ v->dpte_bytes_per_row[k] = v->dpte_bytes_per_row_y + v->dpte_bytes_per_row_c;
+ v->meta_pte_bytes_per_frame[k] = v->meta_pte_bytes_per_frame_y + v->meta_pte_bytes_per_frame_c;
+ v->meta_row_bytes[k] = v->meta_row_bytes_y + v->meta_row_bytes_c;
+ v->v_init_y = (v->v_ratio[k] + v->vtaps[k] + 1.0 + v->interlace_output[k] * 0.5 * v->v_ratio[k]) / 2.0;
+ v->prefill_y[k] =dcn_bw_floor2(v->v_init_y, 1.0);
+ v->max_num_sw_y[k] =dcn_bw_ceil2((v->prefill_y[k] - 1.0) / v->swath_height_yper_state[i][j][k], 1.0) + 1;
+ if (v->prefill_y[k] > 1.0) {
+ v->max_partial_sw_y =dcn_bw_mod((v->prefill_y[k] - 2.0), v->swath_height_yper_state[i][j][k]);
+ }
+ else {
+ v->max_partial_sw_y =dcn_bw_mod((v->prefill_y[k] + v->swath_height_yper_state[i][j][k] - 2.0), v->swath_height_yper_state[i][j][k]);
+ }
+ v->max_partial_sw_y =dcn_bw_max2(1.0, v->max_partial_sw_y);
+ v->prefetch_lines_y[k] = v->max_num_sw_y[k] * v->swath_height_yper_state[i][j][k] + v->max_partial_sw_y;
+ if ((v->source_pixel_format[k] != dcn_bw_rgb_sub_64 && v->source_pixel_format[k] != dcn_bw_rgb_sub_32 && v->source_pixel_format[k] != dcn_bw_rgb_sub_16)) {
+ v->v_init_c = (v->v_ratio[k] / 2.0 + v->vtaps[k] + 1.0 + v->interlace_output[k] * 0.5 * v->v_ratio[k] / 2.0) / 2.0;
+ v->prefill_c[k] =dcn_bw_floor2(v->v_init_c, 1.0);
+ v->max_num_sw_c[k] =dcn_bw_ceil2((v->prefill_c[k] - 1.0) / v->swath_height_cper_state[i][j][k], 1.0) + 1;
+ if (v->prefill_c[k] > 1.0) {
+ v->max_partial_sw_c =dcn_bw_mod((v->prefill_c[k] - 2.0), v->swath_height_cper_state[i][j][k]);
+ }
+ else {
+ v->max_partial_sw_c =dcn_bw_mod((v->prefill_c[k] + v->swath_height_cper_state[i][j][k] - 2.0), v->swath_height_cper_state[i][j][k]);
+ }
+ v->max_partial_sw_c =dcn_bw_max2(1.0, v->max_partial_sw_c);
+ v->prefetch_lines_c[k] = v->max_num_sw_c[k] * v->swath_height_cper_state[i][j][k] + v->max_partial_sw_c;
+ }
+ else {
+ v->prefetch_lines_c[k] = 0.0;
+ }
+ v->dst_x_after_scaler = 90.0 * v->pixel_clock[k] / (v->required_dispclk[i][j] / (j + 1)) + 42.0 * v->pixel_clock[k] / v->required_dispclk[i][j];
+ if (v->no_of_dpp[i][j][k] > 1.0) {
+ v->dst_x_after_scaler = v->dst_x_after_scaler + v->scaler_rec_out_width[k] / 2.0;
+ }
+ if (v->output_format[k] == dcn_bw_420) {
+ v->dst_y_after_scaler = 1.0;
+ }
+ else {
+ v->dst_y_after_scaler = 0.0;
+ }
+ v->time_calc = 24.0 / v->projected_dcfclk_deep_sleep;
+ v->v_update_offset[k][j] = dcn_bw_ceil2(v->htotal[k] / 4.0, 1.0);
+ v->total_repeater_delay = v->max_inter_dcn_tile_repeaters * (2.0 / (v->required_dispclk[i][j] / (j + 1)) + 3.0 / v->required_dispclk[i][j]);
+ v->v_update_width[k][j] = (14.0 / v->projected_dcfclk_deep_sleep + 12.0 / (v->required_dispclk[i][j] / (j + 1)) + v->total_repeater_delay) * v->pixel_clock[k];
+ v->v_ready_offset[k][j] = dcn_bw_max2(150.0 / (v->required_dispclk[i][j] / (j + 1)), v->total_repeater_delay + 20.0 / v->projected_dcfclk_deep_sleep + 10.0 / (v->required_dispclk[i][j] / (j + 1))) * v->pixel_clock[k];
+ v->time_setup = (v->v_update_offset[k][j] + v->v_update_width[k][j] + v->v_ready_offset[k][j]) / v->pixel_clock[k];
+ v->extra_latency = v->urgent_round_trip_and_out_of_order_latency_per_state[i] + (v->total_number_of_active_dpp[i][j] * v->pixel_chunk_size_in_kbyte + v->total_number_of_dcc_active_dpp[i][j] * v->meta_chunk_size) * 1024.0 / v->return_bw_per_state[i];
+ if (v->pte_enable == dcn_bw_yes) {
+ v->extra_latency = v->extra_latency + v->total_number_of_active_dpp[i][j] * v->pte_chunk_size * 1024.0 / v->return_bw_per_state[i];
+ }
+ if (v->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one == dcn_bw_yes) {
+ v->maximum_vstartup = v->vtotal[k] - v->vactive[k] - 1.0;
+ }
+ else {
+ v->maximum_vstartup = v->v_sync_plus_back_porch[k] - 1.0;
+ }
+
+ do {
+ v->line_times_for_prefetch[k] = v->maximum_vstartup - v->urgent_latency / (v->htotal[k] / v->pixel_clock[k]) - (v->time_calc + v->time_setup) / (v->htotal[k] / v->pixel_clock[k]) - (v->dst_y_after_scaler + v->dst_x_after_scaler / v->htotal[k]);
+ v->line_times_for_prefetch[k] =dcn_bw_floor2(4.0 * (v->line_times_for_prefetch[k] + 0.125), 1.0) / 4;
+ v->prefetch_bw[k] = (v->meta_pte_bytes_per_frame[k] + 2.0 * v->meta_row_bytes[k] + 2.0 * v->dpte_bytes_per_row[k] + v->prefetch_lines_y[k] * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) + v->prefetch_lines_c[k] * v->swath_width_yper_state[i][j][k] / 2.0 *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0)) / (v->line_times_for_prefetch[k] * v->htotal[k] / v->pixel_clock[k]);
+
+ if (v->pte_enable == dcn_bw_yes && v->dcc_enable[k] == dcn_bw_yes) {
+ v->time_for_meta_pte_without_immediate_flip = dcn_bw_max3(
+ v->meta_pte_bytes_frame[k] / v->prefetch_bw[k],
+ v->extra_latency,
+ v->htotal[k] / v->pixel_clock[k] / 4.0);
+ } else {
+ v->time_for_meta_pte_without_immediate_flip = v->htotal[k] / v->pixel_clock[k] / 4.0;
+ }
+
+ if (v->pte_enable == dcn_bw_yes || v->dcc_enable[k] == dcn_bw_yes) {
+ v->time_for_meta_and_dpte_row_without_immediate_flip = dcn_bw_max3((
+ v->meta_row_bytes[k] + v->dpte_bytes_per_row[k]) / v->prefetch_bw[k],
+ v->htotal[k] / v->pixel_clock[k] - v->time_for_meta_pte_without_immediate_flip,
+ v->extra_latency);
+ } else {
+ v->time_for_meta_and_dpte_row_without_immediate_flip = dcn_bw_max2(
+ v->htotal[k] / v->pixel_clock[k] - v->time_for_meta_pte_without_immediate_flip,
+ v->extra_latency - v->time_for_meta_pte_with_immediate_flip);
+ }
+
+ v->lines_for_meta_pte_without_immediate_flip[k] =dcn_bw_floor2(4.0 * (v->time_for_meta_pte_without_immediate_flip / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4;
+ v->lines_for_meta_and_dpte_row_without_immediate_flip[k] =dcn_bw_floor2(4.0 * (v->time_for_meta_and_dpte_row_without_immediate_flip / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4;
+ v->maximum_vstartup = v->maximum_vstartup - 1;
+
+ if (v->lines_for_meta_pte_without_immediate_flip[k] < 32.0 && v->lines_for_meta_and_dpte_row_without_immediate_flip[k] < 16.0)
+ break;
+
+ } while(1);
+ }
+ v->bw_available_for_immediate_flip = v->return_bw_per_state[i];
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->bw_available_for_immediate_flip = v->bw_available_for_immediate_flip -dcn_bw_max2(v->read_bandwidth[k], v->prefetch_bw[k]);
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->total_immediate_flip_bytes[k] = 0.0;
+ if ((v->source_pixel_format[k] != dcn_bw_yuv420_sub_8 && v->source_pixel_format[k] != dcn_bw_yuv420_sub_10)) {
+ v->total_immediate_flip_bytes[k] = v->total_immediate_flip_bytes[k] + v->meta_pte_bytes_per_frame[k] + v->meta_row_bytes[k] + v->dpte_bytes_per_row[k];
+ }
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->pte_enable == dcn_bw_yes && v->dcc_enable[k] == dcn_bw_yes) {
+ v->time_for_meta_pte_with_immediate_flip =dcn_bw_max5(v->meta_pte_bytes_per_frame[k] / v->prefetch_bw[k], v->meta_pte_bytes_per_frame[k] * v->total_immediate_flip_bytes[k] / (v->bw_available_for_immediate_flip * (v->meta_pte_bytes_per_frame[k] + v->meta_row_bytes[k] + v->dpte_bytes_per_row[k])), v->extra_latency, v->urgent_latency, v->htotal[k] / v->pixel_clock[k] / 4.0);
+ }
+ else {
+ v->time_for_meta_pte_with_immediate_flip = v->htotal[k] / v->pixel_clock[k] / 4.0;
+ }
+ if (v->pte_enable == dcn_bw_yes || v->dcc_enable[k] == dcn_bw_yes) {
+ v->time_for_meta_and_dpte_row_with_immediate_flip =dcn_bw_max5((v->meta_row_bytes[k] + v->dpte_bytes_per_row[k]) / v->prefetch_bw[k], (v->meta_row_bytes[k] + v->dpte_bytes_per_row[k]) * v->total_immediate_flip_bytes[k] / (v->bw_available_for_immediate_flip * (v->meta_pte_bytes_per_frame[k] + v->meta_row_bytes[k] + v->dpte_bytes_per_row[k])), v->htotal[k] / v->pixel_clock[k] - v->time_for_meta_pte_with_immediate_flip, v->extra_latency, 2.0 * v->urgent_latency);
+ }
+ else {
+ v->time_for_meta_and_dpte_row_with_immediate_flip =dcn_bw_max2(v->htotal[k] / v->pixel_clock[k] - v->time_for_meta_pte_with_immediate_flip, v->extra_latency - v->time_for_meta_pte_with_immediate_flip);
+ }
+ v->lines_for_meta_pte_with_immediate_flip[k] =dcn_bw_floor2(4.0 * (v->time_for_meta_pte_with_immediate_flip / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4;
+ v->lines_for_meta_and_dpte_row_with_immediate_flip[k] =dcn_bw_floor2(4.0 * (v->time_for_meta_and_dpte_row_with_immediate_flip / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4;
+ v->line_times_to_request_prefetch_pixel_data_with_immediate_flip = v->line_times_for_prefetch[k] - v->lines_for_meta_pte_with_immediate_flip[k] - v->lines_for_meta_and_dpte_row_with_immediate_flip[k];
+ v->line_times_to_request_prefetch_pixel_data_without_immediate_flip = v->line_times_for_prefetch[k] - v->lines_for_meta_pte_without_immediate_flip[k] - v->lines_for_meta_and_dpte_row_without_immediate_flip[k];
+ if (v->line_times_to_request_prefetch_pixel_data_with_immediate_flip > 0.0) {
+ v->v_ratio_pre_ywith_immediate_flip[i][j][k] = v->prefetch_lines_y[k] / v->line_times_to_request_prefetch_pixel_data_with_immediate_flip;
+ if ((v->swath_height_yper_state[i][j][k] > 4.0)) {
+ if (v->line_times_to_request_prefetch_pixel_data_with_immediate_flip - (v->prefill_y[k] - 3.0) / 2.0 > 0.0) {
+ v->v_ratio_pre_ywith_immediate_flip[i][j][k] =dcn_bw_max2(v->v_ratio_pre_ywith_immediate_flip[i][j][k], (v->max_num_sw_y[k] * v->swath_height_yper_state[i][j][k]) / (v->line_times_to_request_prefetch_pixel_data_with_immediate_flip - (v->prefill_y[k] - 3.0) / 2.0));
+ }
+ else {
+ v->v_ratio_pre_ywith_immediate_flip[i][j][k] = 999999.0;
+ }
+ }
+ v->v_ratio_pre_cwith_immediate_flip[i][j][k] = v->prefetch_lines_c[k] / v->line_times_to_request_prefetch_pixel_data_with_immediate_flip;
+ if ((v->swath_height_cper_state[i][j][k] > 4.0)) {
+ if (v->line_times_to_request_prefetch_pixel_data_with_immediate_flip - (v->prefill_c[k] - 3.0) / 2.0 > 0.0) {
+ v->v_ratio_pre_cwith_immediate_flip[i][j][k] =dcn_bw_max2(v->v_ratio_pre_cwith_immediate_flip[i][j][k], (v->max_num_sw_c[k] * v->swath_height_cper_state[i][j][k]) / (v->line_times_to_request_prefetch_pixel_data_with_immediate_flip - (v->prefill_c[k] - 3.0) / 2.0));
+ }
+ else {
+ v->v_ratio_pre_cwith_immediate_flip[i][j][k] = 999999.0;
+ }
+ }
+ v->required_prefetch_pixel_data_bw_with_immediate_flip[i][j][k] = v->no_of_dpp[i][j][k] * (v->prefetch_lines_y[k] / v->line_times_to_request_prefetch_pixel_data_with_immediate_flip *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) + v->prefetch_lines_c[k] / v->line_times_to_request_prefetch_pixel_data_with_immediate_flip *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / 2.0) * v->swath_width_yper_state[i][j][k] / (v->htotal[k] / v->pixel_clock[k]);
+ }
+ else {
+ v->v_ratio_pre_ywith_immediate_flip[i][j][k] = 999999.0;
+ v->v_ratio_pre_cwith_immediate_flip[i][j][k] = 999999.0;
+ v->required_prefetch_pixel_data_bw_with_immediate_flip[i][j][k] = 999999.0;
+ }
+ if (v->line_times_to_request_prefetch_pixel_data_without_immediate_flip > 0.0) {
+ v->v_ratio_pre_ywithout_immediate_flip[i][j][k] = v->prefetch_lines_y[k] / v->line_times_to_request_prefetch_pixel_data_without_immediate_flip;
+ if ((v->swath_height_yper_state[i][j][k] > 4.0)) {
+ if (v->line_times_to_request_prefetch_pixel_data_without_immediate_flip - (v->prefill_y[k] - 3.0) / 2.0 > 0.0) {
+ v->v_ratio_pre_ywithout_immediate_flip[i][j][k] =dcn_bw_max2(v->v_ratio_pre_ywithout_immediate_flip[i][j][k], (v->max_num_sw_y[k] * v->swath_height_yper_state[i][j][k]) / (v->line_times_to_request_prefetch_pixel_data_without_immediate_flip - (v->prefill_y[k] - 3.0) / 2.0));
+ }
+ else {
+ v->v_ratio_pre_ywithout_immediate_flip[i][j][k] = 999999.0;
+ }
+ }
+ v->v_ratio_pre_cwithout_immediate_flip[i][j][k] = v->prefetch_lines_c[k] / v->line_times_to_request_prefetch_pixel_data_without_immediate_flip;
+ if ((v->swath_height_cper_state[i][j][k] > 4.0)) {
+ if (v->line_times_to_request_prefetch_pixel_data_without_immediate_flip - (v->prefill_c[k] - 3.0) / 2.0 > 0.0) {
+ v->v_ratio_pre_cwithout_immediate_flip[i][j][k] =dcn_bw_max2(v->v_ratio_pre_cwithout_immediate_flip[i][j][k], (v->max_num_sw_c[k] * v->swath_height_cper_state[i][j][k]) / (v->line_times_to_request_prefetch_pixel_data_without_immediate_flip - (v->prefill_c[k] - 3.0) / 2.0));
+ }
+ else {
+ v->v_ratio_pre_cwithout_immediate_flip[i][j][k] = 999999.0;
+ }
+ }
+ v->required_prefetch_pixel_data_bw_without_immediate_flip[i][j][k] = v->no_of_dpp[i][j][k] * (v->prefetch_lines_y[k] / v->line_times_to_request_prefetch_pixel_data_without_immediate_flip *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) + v->prefetch_lines_c[k] / v->line_times_to_request_prefetch_pixel_data_without_immediate_flip *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / 2.0) * v->swath_width_yper_state[i][j][k] / (v->htotal[k] / v->pixel_clock[k]);
+ }
+ else {
+ v->v_ratio_pre_ywithout_immediate_flip[i][j][k] = 999999.0;
+ v->v_ratio_pre_cwithout_immediate_flip[i][j][k] = 999999.0;
+ v->required_prefetch_pixel_data_bw_without_immediate_flip[i][j][k] = 999999.0;
+ }
+ }
+ v->maximum_read_bandwidth_with_prefetch_with_immediate_flip = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if ((v->source_pixel_format[k] != dcn_bw_yuv420_sub_8 && v->source_pixel_format[k] != dcn_bw_yuv420_sub_10)) {
+ v->maximum_read_bandwidth_with_prefetch_with_immediate_flip = v->maximum_read_bandwidth_with_prefetch_with_immediate_flip +dcn_bw_max2(v->read_bandwidth[k], v->required_prefetch_pixel_data_bw_with_immediate_flip[i][j][k]) +dcn_bw_max2(v->meta_pte_bytes_per_frame[k] / (v->lines_for_meta_pte_with_immediate_flip[k] * v->htotal[k] / v->pixel_clock[k]), (v->meta_row_bytes[k] + v->dpte_bytes_per_row[k]) / (v->lines_for_meta_and_dpte_row_with_immediate_flip[k] * v->htotal[k] / v->pixel_clock[k]));
+ }
+ else {
+ v->maximum_read_bandwidth_with_prefetch_with_immediate_flip = v->maximum_read_bandwidth_with_prefetch_with_immediate_flip +dcn_bw_max2(v->read_bandwidth[k], v->required_prefetch_pixel_data_bw_without_immediate_flip[i][j][k]);
+ }
+ }
+ v->maximum_read_bandwidth_with_prefetch_without_immediate_flip = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->maximum_read_bandwidth_with_prefetch_without_immediate_flip = v->maximum_read_bandwidth_with_prefetch_without_immediate_flip +dcn_bw_max2(v->read_bandwidth[k], v->required_prefetch_pixel_data_bw_without_immediate_flip[i][j][k]);
+ }
+ v->prefetch_supported_with_immediate_flip[i][j] = dcn_bw_yes;
+ if (v->maximum_read_bandwidth_with_prefetch_with_immediate_flip > v->return_bw_per_state[i]) {
+ v->prefetch_supported_with_immediate_flip[i][j] = dcn_bw_no;
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->line_times_for_prefetch[k] < 2.0 || v->lines_for_meta_pte_with_immediate_flip[k] >= 8.0 || v->lines_for_meta_and_dpte_row_with_immediate_flip[k] >= 16.0) {
+ v->prefetch_supported_with_immediate_flip[i][j] = dcn_bw_no;
+ }
+ }
+ v->prefetch_supported_without_immediate_flip[i][j] = dcn_bw_yes;
+ if (v->maximum_read_bandwidth_with_prefetch_without_immediate_flip > v->return_bw_per_state[i]) {
+ v->prefetch_supported_without_immediate_flip[i][j] = dcn_bw_no;
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->line_times_for_prefetch[k] < 2.0 || v->lines_for_meta_pte_without_immediate_flip[k] >= 8.0 || v->lines_for_meta_and_dpte_row_without_immediate_flip[k] >= 16.0) {
+ v->prefetch_supported_without_immediate_flip[i][j] = dcn_bw_no;
+ }
+ }
+ }
+ }
+ for (i = 0; i <= number_of_states_plus_one; i++) {
+ for (j = 0; j <= 1; j++) {
+ v->v_ratio_in_prefetch_supported_with_immediate_flip[i][j] = dcn_bw_yes;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if ((((v->source_pixel_format[k] != dcn_bw_yuv420_sub_8 && v->source_pixel_format[k] != dcn_bw_yuv420_sub_10) && (v->v_ratio_pre_ywith_immediate_flip[i][j][k] > 4.0 || v->v_ratio_pre_cwith_immediate_flip[i][j][k] > 4.0)) || ((v->source_pixel_format[k] == dcn_bw_yuv420_sub_8 || v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) && (v->v_ratio_pre_ywithout_immediate_flip[i][j][k] > 4.0 || v->v_ratio_pre_cwithout_immediate_flip[i][j][k] > 4.0)))) {
+ v->v_ratio_in_prefetch_supported_with_immediate_flip[i][j] = dcn_bw_no;
+ }
+ }
+ v->v_ratio_in_prefetch_supported_without_immediate_flip[i][j] = dcn_bw_yes;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if ((v->v_ratio_pre_ywithout_immediate_flip[i][j][k] > 4.0 || v->v_ratio_pre_cwithout_immediate_flip[i][j][k] > 4.0)) {
+ v->v_ratio_in_prefetch_supported_without_immediate_flip[i][j] = dcn_bw_no;
+ }
+ }
+ }
+ }
+ /*mode support, voltage state and soc configuration*/
+
+ for (i = number_of_states_plus_one; i >= 0; i--) {
+ for (j = 0; j <= 1; j++) {
+ if (v->scale_ratio_support == dcn_bw_yes && v->source_format_pixel_and_scan_support == dcn_bw_yes && v->viewport_size_support == dcn_bw_yes && v->bandwidth_support[i] == dcn_bw_yes && v->dio_support[i] == dcn_bw_yes && v->urgent_latency_support[i][j] == dcn_bw_yes && v->rob_support[i] == dcn_bw_yes && v->dispclk_dppclk_support[i][j] == dcn_bw_yes && v->total_available_pipes_support[i][j] == dcn_bw_yes && v->total_available_writeback_support == dcn_bw_yes && v->writeback_latency_support == dcn_bw_yes) {
+ if (v->prefetch_supported_with_immediate_flip[i][j] == dcn_bw_yes && v->v_ratio_in_prefetch_supported_with_immediate_flip[i][j] == dcn_bw_yes) {
+ v->mode_support_with_immediate_flip[i][j] = dcn_bw_yes;
+ }
+ else {
+ v->mode_support_with_immediate_flip[i][j] = dcn_bw_no;
+ }
+ if (v->prefetch_supported_without_immediate_flip[i][j] == dcn_bw_yes && v->v_ratio_in_prefetch_supported_without_immediate_flip[i][j] == dcn_bw_yes) {
+ v->mode_support_without_immediate_flip[i][j] = dcn_bw_yes;
+ }
+ else {
+ v->mode_support_without_immediate_flip[i][j] = dcn_bw_no;
+ }
+ }
+ else {
+ v->mode_support_with_immediate_flip[i][j] = dcn_bw_no;
+ v->mode_support_without_immediate_flip[i][j] = dcn_bw_no;
+ }
+ }
+ }
+ for (i = number_of_states_plus_one; i >= 0; i--) {
+ if ((i == number_of_states_plus_one || v->mode_support_with_immediate_flip[i][1] == dcn_bw_yes || v->mode_support_with_immediate_flip[i][0] == dcn_bw_yes) && i >= v->voltage_override_level) {
+ v->voltage_level_with_immediate_flip = i;
+ }
+ }
+ for (i = number_of_states_plus_one; i >= 0; i--) {
+ if ((i == number_of_states_plus_one || v->mode_support_without_immediate_flip[i][1] == dcn_bw_yes || v->mode_support_without_immediate_flip[i][0] == dcn_bw_yes) && i >= v->voltage_override_level) {
+ v->voltage_level_without_immediate_flip = i;
+ }
+ }
+ if (v->voltage_level_with_immediate_flip == number_of_states_plus_one) {
+ v->immediate_flip_supported = dcn_bw_no;
+ v->voltage_level = v->voltage_level_without_immediate_flip;
+ }
+ else {
+ v->immediate_flip_supported = dcn_bw_yes;
+ v->voltage_level = v->voltage_level_with_immediate_flip;
+ }
+ v->dcfclk = v->dcfclk_per_state[v->voltage_level];
+ v->fabric_and_dram_bandwidth = v->fabric_and_dram_bandwidth_per_state[v->voltage_level];
+ for (j = 0; j <= 1; j++) {
+ v->required_dispclk_per_ratio[j] = v->required_dispclk[v->voltage_level][j];
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->dpp_per_plane_per_ratio[j][k] = v->no_of_dpp[v->voltage_level][j][k];
+ }
+ v->dispclk_dppclk_support_per_ratio[j] = v->dispclk_dppclk_support[v->voltage_level][j];
+ }
+ v->max_phyclk = v->phyclk_per_state[v->voltage_level];
+}
+void display_pipe_configuration(struct dcn_bw_internal_vars *v)
+{
+ int j;
+ int k;
+ /*display pipe configuration*/
+
+ for (j = 0; j <= 1; j++) {
+ v->total_number_of_active_dpp_per_ratio[j] = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->total_number_of_active_dpp_per_ratio[j] = v->total_number_of_active_dpp_per_ratio[j] + v->dpp_per_plane_per_ratio[j][k];
+ }
+ }
+ if ((v->dispclk_dppclk_support_per_ratio[0] == dcn_bw_yes && v->dispclk_dppclk_support_per_ratio[1] == dcn_bw_no) || (v->dispclk_dppclk_support_per_ratio[0] == v->dispclk_dppclk_support_per_ratio[1] && (v->total_number_of_active_dpp_per_ratio[0] < v->total_number_of_active_dpp_per_ratio[1] || (((v->total_number_of_active_dpp_per_ratio[0] == v->total_number_of_active_dpp_per_ratio[1]) && v->required_dispclk_per_ratio[0] <= 0.5 * v->required_dispclk_per_ratio[1]))))) {
+ v->dispclk_dppclk_ratio = 1;
+ v->final_error_message = v->error_message[0];
+ }
+ else {
+ v->dispclk_dppclk_ratio = 2;
+ v->final_error_message = v->error_message[1];
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->dpp_per_plane[k] = v->dpp_per_plane_per_ratio[v->dispclk_dppclk_ratio - 1][k];
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->source_pixel_format[k] == dcn_bw_rgb_sub_64) {
+ v->byte_per_pix_dety = 8.0;
+ v->byte_per_pix_detc = 0.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_32) {
+ v->byte_per_pix_dety = 4.0;
+ v->byte_per_pix_detc = 0.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_16) {
+ v->byte_per_pix_dety = 2.0;
+ v->byte_per_pix_detc = 0.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8) {
+ v->byte_per_pix_dety = 1.0;
+ v->byte_per_pix_detc = 2.0;
+ }
+ else {
+ v->byte_per_pix_dety = 4.0f / 3.0f;
+ v->byte_per_pix_detc = 8.0f / 3.0f;
+ }
+ if ((v->source_pixel_format[k] == dcn_bw_rgb_sub_64 || v->source_pixel_format[k] == dcn_bw_rgb_sub_32 || v->source_pixel_format[k] == dcn_bw_rgb_sub_16)) {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->read256_bytes_block_height_y = 1.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_64) {
+ v->read256_bytes_block_height_y = 4.0;
+ }
+ else {
+ v->read256_bytes_block_height_y = 8.0;
+ }
+ v->read256_bytes_block_width_y = 256.0 /dcn_bw_ceil2(v->byte_per_pix_dety, 1.0) / v->read256_bytes_block_height_y;
+ v->read256_bytes_block_height_c = 0.0;
+ v->read256_bytes_block_width_c = 0.0;
+ }
+ else {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->read256_bytes_block_height_y = 1.0;
+ v->read256_bytes_block_height_c = 1.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8) {
+ v->read256_bytes_block_height_y = 16.0;
+ v->read256_bytes_block_height_c = 8.0;
+ }
+ else {
+ v->read256_bytes_block_height_y = 8.0;
+ v->read256_bytes_block_height_c = 8.0;
+ }
+ v->read256_bytes_block_width_y = 256.0 /dcn_bw_ceil2(v->byte_per_pix_dety, 1.0) / v->read256_bytes_block_height_y;
+ v->read256_bytes_block_width_c = 256.0 /dcn_bw_ceil2(v->byte_per_pix_detc, 2.0) / v->read256_bytes_block_height_c;
+ }
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->maximum_swath_height_y = v->read256_bytes_block_height_y;
+ v->maximum_swath_height_c = v->read256_bytes_block_height_c;
+ }
+ else {
+ v->maximum_swath_height_y = v->read256_bytes_block_width_y;
+ v->maximum_swath_height_c = v->read256_bytes_block_width_c;
+ }
+ if ((v->source_pixel_format[k] == dcn_bw_rgb_sub_64 || v->source_pixel_format[k] == dcn_bw_rgb_sub_32 || v->source_pixel_format[k] == dcn_bw_rgb_sub_16)) {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear || (v->source_pixel_format[k] == dcn_bw_rgb_sub_64 && (v->source_surface_mode[k] == dcn_bw_sw_4_kb_s || v->source_surface_mode[k] == dcn_bw_sw_4_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_var_s || v->source_surface_mode[k] == dcn_bw_sw_var_s_x) && v->source_scan[k] == dcn_bw_hor)) {
+ v->minimum_swath_height_y = v->maximum_swath_height_y;
+ }
+ else {
+ v->minimum_swath_height_y = v->maximum_swath_height_y / 2.0;
+ }
+ v->minimum_swath_height_c = v->maximum_swath_height_c;
+ }
+ else {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->minimum_swath_height_y = v->maximum_swath_height_y;
+ v->minimum_swath_height_c = v->maximum_swath_height_c;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8 && v->source_scan[k] == dcn_bw_hor) {
+ v->minimum_swath_height_y = v->maximum_swath_height_y / 2.0;
+ if (v->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes) {
+ v->minimum_swath_height_c = v->maximum_swath_height_c;
+ }
+ else {
+ v->minimum_swath_height_c = v->maximum_swath_height_c / 2.0;
+ }
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10 && v->source_scan[k] == dcn_bw_hor) {
+ v->minimum_swath_height_c = v->maximum_swath_height_c / 2.0;
+ if (v->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes) {
+ v->minimum_swath_height_y = v->maximum_swath_height_y;
+ }
+ else {
+ v->minimum_swath_height_y = v->maximum_swath_height_y / 2.0;
+ }
+ }
+ else {
+ v->minimum_swath_height_y = v->maximum_swath_height_y;
+ v->minimum_swath_height_c = v->maximum_swath_height_c;
+ }
+ }
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->swath_width = v->viewport_width[k] / v->dpp_per_plane[k];
+ }
+ else {
+ v->swath_width = v->viewport_height[k] / v->dpp_per_plane[k];
+ }
+ v->swath_width_granularity_y = 256.0 /dcn_bw_ceil2(v->byte_per_pix_dety, 1.0) / v->maximum_swath_height_y;
+ v->rounded_up_max_swath_size_bytes_y = (dcn_bw_ceil2(v->swath_width - 1.0, v->swath_width_granularity_y) + v->swath_width_granularity_y) * v->byte_per_pix_dety * v->maximum_swath_height_y;
+ if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
+ v->rounded_up_max_swath_size_bytes_y =dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_y, 256.0) + 256;
+ }
+ if (v->maximum_swath_height_c > 0.0) {
+ v->swath_width_granularity_c = 256.0 /dcn_bw_ceil2(v->byte_per_pix_detc, 2.0) / v->maximum_swath_height_c;
+ v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pix_detc * v->maximum_swath_height_c;
+ if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
+ v->rounded_up_max_swath_size_bytes_c = dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
+ }
+ }
+ if (v->rounded_up_max_swath_size_bytes_y + v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 2.0) {
+ v->swath_height_y[k] = v->maximum_swath_height_y;
+ v->swath_height_c[k] = v->maximum_swath_height_c;
+ }
+ else {
+ v->swath_height_y[k] = v->minimum_swath_height_y;
+ v->swath_height_c[k] = v->minimum_swath_height_c;
+ }
+ if (v->swath_height_c[k] == 0.0) {
+ v->det_buffer_size_y[k] = v->det_buffer_size_in_kbyte * 1024.0;
+ v->det_buffer_size_c[k] = 0.0;
+ }
+ else if (v->swath_height_y[k] <= v->swath_height_c[k]) {
+ v->det_buffer_size_y[k] = v->det_buffer_size_in_kbyte * 1024.0 / 2.0;
+ v->det_buffer_size_c[k] = v->det_buffer_size_in_kbyte * 1024.0 / 2.0;
+ }
+ else {
+ v->det_buffer_size_y[k] = v->det_buffer_size_in_kbyte * 1024.0 * 2.0 / 3.0;
+ v->det_buffer_size_c[k] = v->det_buffer_size_in_kbyte * 1024.0 / 3.0;
+ }
+ }
+}
+void dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(struct dcn_bw_internal_vars *v)
+{
+ int k;
+ /*dispclk and dppclk calculation*/
+
+ v->dispclk_with_ramping = 0.0;
+ v->dispclk_without_ramping = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->h_ratio[k] > 1.0) {
+ v->pscl_throughput[k] =dcn_bw_min2(v->max_dchub_topscl_throughput, v->max_pscl_tolb_throughput * v->h_ratio[k] /dcn_bw_ceil2(v->htaps[k] / 6.0, 1.0));
+ }
+ else {
+ v->pscl_throughput[k] =dcn_bw_min2(v->max_dchub_topscl_throughput, v->max_pscl_tolb_throughput);
+ }
+ v->dppclk_using_single_dpp_luma = v->pixel_clock[k] *dcn_bw_max3(v->vtaps[k] / 6.0 *dcn_bw_min2(1.0, v->h_ratio[k]), v->h_ratio[k] * v->v_ratio[k] / v->pscl_throughput[k], 1.0);
+ if ((v->source_pixel_format[k] != dcn_bw_yuv420_sub_8 && v->source_pixel_format[k] != dcn_bw_yuv420_sub_10)) {
+ v->pscl_throughput_chroma[k] = 0.0;
+ v->dppclk_using_single_dpp = v->dppclk_using_single_dpp_luma;
+ }
+ else {
+ if (v->h_ratio[k] > 1.0) {
+ v->pscl_throughput_chroma[k] =dcn_bw_min2(v->max_dchub_topscl_throughput, v->max_pscl_tolb_throughput * v->h_ratio[k] / 2.0 /dcn_bw_ceil2(v->hta_pschroma[k] / 6.0, 1.0));
+ }
+ else {
+ v->pscl_throughput_chroma[k] =dcn_bw_min2(v->max_dchub_topscl_throughput, v->max_pscl_tolb_throughput);
+ }
+ v->dppclk_using_single_dpp_chroma = v->pixel_clock[k] *dcn_bw_max3(v->vta_pschroma[k] / 6.0 *dcn_bw_min2(1.0, v->h_ratio[k] / 2.0), v->h_ratio[k] * v->v_ratio[k] / 4.0 / v->pscl_throughput_chroma[k], 1.0);
+ v->dppclk_using_single_dpp =dcn_bw_max2(v->dppclk_using_single_dpp_luma, v->dppclk_using_single_dpp_chroma);
+ }
+ if (v->odm_capable == dcn_bw_yes) {
+ v->dispclk_with_ramping =dcn_bw_max2(v->dispclk_with_ramping,dcn_bw_max2(v->dppclk_using_single_dpp / v->dpp_per_plane[k] * v->dispclk_dppclk_ratio, v->pixel_clock[k] / v->dpp_per_plane[k]) * (1.0 + v->downspreading / 100.0) * (1.0 + v->dispclk_ramping_margin / 100.0));
+ v->dispclk_without_ramping =dcn_bw_max2(v->dispclk_without_ramping,dcn_bw_max2(v->dppclk_using_single_dpp / v->dpp_per_plane[k] * v->dispclk_dppclk_ratio, v->pixel_clock[k] / v->dpp_per_plane[k]) * (1.0 + v->downspreading / 100.0));
+ }
+ else {
+ v->dispclk_with_ramping =dcn_bw_max2(v->dispclk_with_ramping,dcn_bw_max2(v->dppclk_using_single_dpp / v->dpp_per_plane[k] * v->dispclk_dppclk_ratio, v->pixel_clock[k]) * (1.0 + v->downspreading / 100.0) * (1.0 + v->dispclk_ramping_margin / 100.0));
+ v->dispclk_without_ramping =dcn_bw_max2(v->dispclk_without_ramping,dcn_bw_max2(v->dppclk_using_single_dpp / v->dpp_per_plane[k] * v->dispclk_dppclk_ratio, v->pixel_clock[k]) * (1.0 + v->downspreading / 100.0));
+ }
+ }
+ if (v->dispclk_without_ramping > v->max_dispclk[number_of_states]) {
+ v->dispclk = v->dispclk_without_ramping;
+ }
+ else if (v->dispclk_with_ramping > v->max_dispclk[number_of_states]) {
+ v->dispclk = v->max_dispclk[number_of_states];
+ }
+ else {
+ v->dispclk = v->dispclk_with_ramping;
+ }
+ v->dppclk = v->dispclk / v->dispclk_dppclk_ratio;
+ /*urgent watermark*/
+
+ v->return_bandwidth_to_dcn =dcn_bw_min2(v->return_bus_width * v->dcfclk, v->fabric_and_dram_bandwidth * 1000.0 * v->percent_of_ideal_drambw_received_after_urg_latency / 100.0);
+ v->dcc_enabled_any_plane = dcn_bw_no;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->dcc_enabled_any_plane = dcn_bw_yes;
+ }
+ }
+ v->return_bw = v->return_bandwidth_to_dcn;
+ if (v->dcc_enabled_any_plane == dcn_bw_yes && v->return_bandwidth_to_dcn > v->dcfclk * v->return_bus_width / 4.0) {
+ v->return_bw =dcn_bw_min2(v->return_bw, v->return_bandwidth_to_dcn * 4.0 * (1.0 - v->urgent_latency / ((v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0 / (v->return_bandwidth_to_dcn - v->dcfclk * v->return_bus_width / 4.0) + v->urgent_latency)));
+ }
+ v->critical_compression = 2.0 * v->return_bus_width * v->dcfclk * v->urgent_latency / (v->return_bandwidth_to_dcn * v->urgent_latency + (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0);
+ if (v->dcc_enabled_any_plane == dcn_bw_yes && v->critical_compression > 1.0 && v->critical_compression < 4.0) {
+ v->return_bw =dcn_bw_min2(v->return_bw, dcn_bw_pow(4.0 * v->return_bandwidth_to_dcn * (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0 * v->return_bus_width * v->dcfclk * v->urgent_latency / (v->return_bandwidth_to_dcn * v->urgent_latency + (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0), 2));
+ }
+ v->return_bandwidth_to_dcn =dcn_bw_min2(v->return_bus_width * v->dcfclk, v->fabric_and_dram_bandwidth * 1000.0);
+ if (v->dcc_enabled_any_plane == dcn_bw_yes && v->return_bandwidth_to_dcn > v->dcfclk * v->return_bus_width / 4.0) {
+ v->return_bw =dcn_bw_min2(v->return_bw, v->return_bandwidth_to_dcn * 4.0 * (1.0 - v->urgent_latency / ((v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0 / (v->return_bandwidth_to_dcn - v->dcfclk * v->return_bus_width / 4.0) + v->urgent_latency)));
+ }
+ v->critical_compression = 2.0 * v->return_bus_width * v->dcfclk * v->urgent_latency / (v->return_bandwidth_to_dcn * v->urgent_latency + (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0);
+ if (v->dcc_enabled_any_plane == dcn_bw_yes && v->critical_compression > 1.0 && v->critical_compression < 4.0) {
+ v->return_bw =dcn_bw_min2(v->return_bw, dcn_bw_pow(4.0 * v->return_bandwidth_to_dcn * (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0 * v->return_bus_width * v->dcfclk * v->urgent_latency / (v->return_bandwidth_to_dcn * v->urgent_latency + (v->rob_buffer_size_in_kbyte - v->pixel_chunk_size_in_kbyte) * 1024.0), 2));
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->swath_width_y[k] = v->viewport_width[k] / v->dpp_per_plane[k];
+ }
+ else {
+ v->swath_width_y[k] = v->viewport_height[k] / v->dpp_per_plane[k];
+ }
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->source_pixel_format[k] == dcn_bw_rgb_sub_64) {
+ v->byte_per_pixel_dety[k] = 8.0;
+ v->byte_per_pixel_detc[k] = 0.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_32) {
+ v->byte_per_pixel_dety[k] = 4.0;
+ v->byte_per_pixel_detc[k] = 0.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_16) {
+ v->byte_per_pixel_dety[k] = 2.0;
+ v->byte_per_pixel_detc[k] = 0.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8) {
+ v->byte_per_pixel_dety[k] = 1.0;
+ v->byte_per_pixel_detc[k] = 2.0;
+ }
+ else {
+ v->byte_per_pixel_dety[k] = 4.0f / 3.0f;
+ v->byte_per_pixel_detc[k] = 8.0f / 3.0f;
+ }
+ }
+ v->total_data_read_bandwidth = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->read_bandwidth_plane_luma[k] = v->swath_width_y[k] * v->dpp_per_plane[k] *dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / (v->htotal[k] / v->pixel_clock[k]) * v->v_ratio[k];
+ v->read_bandwidth_plane_chroma[k] = v->swath_width_y[k] / 2.0 * v->dpp_per_plane[k] *dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / (v->htotal[k] / v->pixel_clock[k]) * v->v_ratio[k] / 2.0;
+ v->total_data_read_bandwidth = v->total_data_read_bandwidth + v->read_bandwidth_plane_luma[k] + v->read_bandwidth_plane_chroma[k];
+ }
+ v->total_active_dpp = 0.0;
+ v->total_dcc_active_dpp = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->total_active_dpp = v->total_active_dpp + v->dpp_per_plane[k];
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->total_dcc_active_dpp = v->total_dcc_active_dpp + v->dpp_per_plane[k];
+ }
+ }
+ v->urgent_round_trip_and_out_of_order_latency = (v->round_trip_ping_latency_cycles + 32.0) / v->dcfclk + v->urgent_out_of_order_return_per_channel * v->number_of_channels / v->return_bw;
+ v->last_pixel_of_line_extra_watermark = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->v_ratio[k] <= 1.0) {
+ v->display_pipe_line_delivery_time_luma[k] = v->swath_width_y[k] * v->dpp_per_plane[k] / v->h_ratio[k] / v->pixel_clock[k];
+ }
+ else {
+ v->display_pipe_line_delivery_time_luma[k] = v->swath_width_y[k] / v->pscl_throughput[k] / v->dppclk;
+ }
+ v->data_fabric_line_delivery_time_luma = v->swath_width_y[k] * v->swath_height_y[k] *dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / (v->return_bw * v->read_bandwidth_plane_luma[k] / v->dpp_per_plane[k] / v->total_data_read_bandwidth);
+ v->last_pixel_of_line_extra_watermark =dcn_bw_max2(v->last_pixel_of_line_extra_watermark, v->data_fabric_line_delivery_time_luma - v->display_pipe_line_delivery_time_luma[k]);
+ if (v->byte_per_pixel_detc[k] == 0.0) {
+ v->display_pipe_line_delivery_time_chroma[k] = 0.0;
+ }
+ else {
+ if (v->v_ratio[k] / 2.0 <= 1.0) {
+ v->display_pipe_line_delivery_time_chroma[k] = v->swath_width_y[k] / 2.0 * v->dpp_per_plane[k] / (v->h_ratio[k] / 2.0) / v->pixel_clock[k];
+ }
+ else {
+ v->display_pipe_line_delivery_time_chroma[k] = v->swath_width_y[k] / 2.0 / v->pscl_throughput_chroma[k] / v->dppclk;
+ }
+ v->data_fabric_line_delivery_time_chroma = v->swath_width_y[k] / 2.0 * v->swath_height_c[k] *dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / (v->return_bw * v->read_bandwidth_plane_chroma[k] / v->dpp_per_plane[k] / v->total_data_read_bandwidth);
+ v->last_pixel_of_line_extra_watermark =dcn_bw_max2(v->last_pixel_of_line_extra_watermark, v->data_fabric_line_delivery_time_chroma - v->display_pipe_line_delivery_time_chroma[k]);
+ }
+ }
+ v->urgent_extra_latency = v->urgent_round_trip_and_out_of_order_latency + (v->total_active_dpp * v->pixel_chunk_size_in_kbyte + v->total_dcc_active_dpp * v->meta_chunk_size) * 1024.0 / v->return_bw;
+ if (v->pte_enable == dcn_bw_yes) {
+ v->urgent_extra_latency = v->urgent_extra_latency + v->total_active_dpp * v->pte_chunk_size * 1024.0 / v->return_bw;
+ }
+ v->urgent_watermark = v->urgent_latency + v->last_pixel_of_line_extra_watermark + v->urgent_extra_latency;
+ v->ptemeta_urgent_watermark = v->urgent_watermark + 2.0 * v->urgent_latency;
+ /*nb p-state/dram clock change watermark*/
+
+ v->dram_clock_change_watermark = v->dram_clock_change_latency + v->urgent_watermark;
+ v->total_active_writeback = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->output[k] == dcn_bw_writeback) {
+ v->total_active_writeback = v->total_active_writeback + 1.0;
+ }
+ }
+ if (v->total_active_writeback <= 1.0) {
+ v->writeback_dram_clock_change_watermark = v->dram_clock_change_latency + v->write_back_latency;
+ }
+ else {
+ v->writeback_dram_clock_change_watermark = v->dram_clock_change_latency + v->write_back_latency + v->writeback_chunk_size * 1024.0 / 32.0 / v->socclk;
+ }
+ /*stutter efficiency*/
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->lines_in_dety[k] = v->det_buffer_size_y[k] / v->byte_per_pixel_dety[k] / v->swath_width_y[k];
+ v->lines_in_dety_rounded_down_to_swath[k] =dcn_bw_floor2(v->lines_in_dety[k], v->swath_height_y[k]);
+ v->full_det_buffering_time_y[k] = v->lines_in_dety_rounded_down_to_swath[k] * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k];
+ if (v->byte_per_pixel_detc[k] > 0.0) {
+ v->lines_in_detc[k] = v->det_buffer_size_c[k] / v->byte_per_pixel_detc[k] / (v->swath_width_y[k] / 2.0);
+ v->lines_in_detc_rounded_down_to_swath[k] =dcn_bw_floor2(v->lines_in_detc[k], v->swath_height_c[k]);
+ v->full_det_buffering_time_c[k] = v->lines_in_detc_rounded_down_to_swath[k] * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0);
+ }
+ else {
+ v->lines_in_detc[k] = 0.0;
+ v->lines_in_detc_rounded_down_to_swath[k] = 0.0;
+ v->full_det_buffering_time_c[k] = 999999.0;
+ }
+ }
+ v->min_full_det_buffering_time = 999999.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->full_det_buffering_time_y[k] < v->min_full_det_buffering_time) {
+ v->min_full_det_buffering_time = v->full_det_buffering_time_y[k];
+ v->frame_time_for_min_full_det_buffering_time = v->vtotal[k] * v->htotal[k] / v->pixel_clock[k];
+ }
+ if (v->full_det_buffering_time_c[k] < v->min_full_det_buffering_time) {
+ v->min_full_det_buffering_time = v->full_det_buffering_time_c[k];
+ v->frame_time_for_min_full_det_buffering_time = v->vtotal[k] * v->htotal[k] / v->pixel_clock[k];
+ }
+ }
+ v->average_read_bandwidth_gbyte_per_second = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->average_read_bandwidth_gbyte_per_second = v->average_read_bandwidth_gbyte_per_second + v->read_bandwidth_plane_luma[k] / v->dcc_rate[k] / 1000.0 + v->read_bandwidth_plane_chroma[k] / v->dcc_rate[k] / 1000.0;
+ }
+ else {
+ v->average_read_bandwidth_gbyte_per_second = v->average_read_bandwidth_gbyte_per_second + v->read_bandwidth_plane_luma[k] / 1000.0 + v->read_bandwidth_plane_chroma[k] / 1000.0;
+ }
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->average_read_bandwidth_gbyte_per_second = v->average_read_bandwidth_gbyte_per_second + v->read_bandwidth_plane_luma[k] / 1000.0 / 256.0 + v->read_bandwidth_plane_chroma[k] / 1000.0 / 256.0;
+ }
+ if (v->pte_enable == dcn_bw_yes) {
+ v->average_read_bandwidth_gbyte_per_second = v->average_read_bandwidth_gbyte_per_second + v->read_bandwidth_plane_luma[k] / 1000.0 / 512.0 + v->read_bandwidth_plane_chroma[k] / 1000.0 / 512.0;
+ }
+ }
+ v->part_of_burst_that_fits_in_rob =dcn_bw_min2(v->min_full_det_buffering_time * v->total_data_read_bandwidth, v->rob_buffer_size_in_kbyte * 1024.0 * v->total_data_read_bandwidth / (v->average_read_bandwidth_gbyte_per_second * 1000.0));
+ v->stutter_burst_time = v->part_of_burst_that_fits_in_rob * (v->average_read_bandwidth_gbyte_per_second * 1000.0) / v->total_data_read_bandwidth / v->return_bw + (v->min_full_det_buffering_time * v->total_data_read_bandwidth - v->part_of_burst_that_fits_in_rob) / (v->dcfclk * 64.0);
+ if (v->total_active_writeback == 0.0) {
+ v->stutter_efficiency_not_including_vblank = (1.0 - (v->sr_exit_time + v->stutter_burst_time) / v->min_full_det_buffering_time) * 100.0;
+ }
+ else {
+ v->stutter_efficiency_not_including_vblank = 0.0;
+ }
+ v->smallest_vblank = 999999.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->synchronized_vblank == dcn_bw_yes || v->number_of_active_planes == 1) {
+ v->v_blank_time = (v->vtotal[k] - v->vactive[k]) * v->htotal[k] / v->pixel_clock[k];
+ }
+ else {
+ v->v_blank_time = 0.0;
+ }
+ v->smallest_vblank =dcn_bw_min2(v->smallest_vblank, v->v_blank_time);
+ }
+ v->stutter_efficiency = (v->stutter_efficiency_not_including_vblank / 100.0 * (v->frame_time_for_min_full_det_buffering_time - v->smallest_vblank) + v->smallest_vblank) / v->frame_time_for_min_full_det_buffering_time * 100.0;
+ /*dcfclk deep sleep*/
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->byte_per_pixel_detc[k] > 0.0) {
+ v->dcfclk_deep_sleep_per_plane[k] =dcn_bw_max2(1.1 * v->swath_width_y[k] *dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / 32.0 / v->display_pipe_line_delivery_time_luma[k], 1.1 * v->swath_width_y[k] / 2.0 *dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / 32.0 / v->display_pipe_line_delivery_time_chroma[k]);
+ }
+ else {
+ v->dcfclk_deep_sleep_per_plane[k] = 1.1 * v->swath_width_y[k] *dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / 64.0 / v->display_pipe_line_delivery_time_luma[k];
+ }
+ v->dcfclk_deep_sleep_per_plane[k] =dcn_bw_max2(v->dcfclk_deep_sleep_per_plane[k], v->pixel_clock[k] / 16.0);
+ }
+ v->dcf_clk_deep_sleep = 8.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->dcf_clk_deep_sleep =dcn_bw_max2(v->dcf_clk_deep_sleep, v->dcfclk_deep_sleep_per_plane[k]);
+ }
+ /*stutter watermark*/
+
+ v->stutter_exit_watermark = v->sr_exit_time + v->last_pixel_of_line_extra_watermark + v->urgent_extra_latency + 10.0 / v->dcf_clk_deep_sleep;
+ v->stutter_enter_plus_exit_watermark = v->sr_enter_plus_exit_time + v->last_pixel_of_line_extra_watermark + v->urgent_extra_latency;
+ /*urgent latency supported*/
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->effective_det_plus_lb_lines_luma =dcn_bw_floor2(v->lines_in_dety[k] +dcn_bw_min2(v->lines_in_dety[k] * v->dppclk * v->byte_per_pixel_dety[k] * v->pscl_throughput[k] / (v->return_bw / v->dpp_per_plane[k]), v->effective_lb_latency_hiding_source_lines_luma), v->swath_height_y[k]);
+ v->urgent_latency_support_us_luma = v->effective_det_plus_lb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_det_plus_lb_lines_luma * v->swath_width_y[k] * v->byte_per_pixel_dety[k] / (v->return_bw / v->dpp_per_plane[k]);
+ if (v->byte_per_pixel_detc[k] > 0.0) {
+ v->effective_det_plus_lb_lines_chroma =dcn_bw_floor2(v->lines_in_detc[k] +dcn_bw_min2(v->lines_in_detc[k] * v->dppclk * v->byte_per_pixel_detc[k] * v->pscl_throughput_chroma[k] / (v->return_bw / v->dpp_per_plane[k]), v->effective_lb_latency_hiding_source_lines_chroma), v->swath_height_c[k]);
+ v->urgent_latency_support_us_chroma = v->effective_det_plus_lb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0) - v->effective_det_plus_lb_lines_chroma * (v->swath_width_y[k] / 2.0) * v->byte_per_pixel_detc[k] / (v->return_bw / v->dpp_per_plane[k]);
+ v->urgent_latency_support_us[k] =dcn_bw_min2(v->urgent_latency_support_us_luma, v->urgent_latency_support_us_chroma);
+ }
+ else {
+ v->urgent_latency_support_us[k] = v->urgent_latency_support_us_luma;
+ }
+ }
+ v->min_urgent_latency_support_us = 999999.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->min_urgent_latency_support_us =dcn_bw_min2(v->min_urgent_latency_support_us, v->urgent_latency_support_us[k]);
+ }
+ /*non-urgent latency tolerance*/
+
+ v->non_urgent_latency_tolerance = v->min_urgent_latency_support_us - v->urgent_watermark;
+ /*prefetch*/
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if ((v->source_pixel_format[k] == dcn_bw_rgb_sub_64 || v->source_pixel_format[k] == dcn_bw_rgb_sub_32 || v->source_pixel_format[k] == dcn_bw_rgb_sub_16)) {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->block_height256_bytes_y = 1.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_64) {
+ v->block_height256_bytes_y = 4.0;
+ }
+ else {
+ v->block_height256_bytes_y = 8.0;
+ }
+ v->block_height256_bytes_c = 0.0;
+ }
+ else {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->block_height256_bytes_y = 1.0;
+ v->block_height256_bytes_c = 1.0;
+ }
+ else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8) {
+ v->block_height256_bytes_y = 16.0;
+ v->block_height256_bytes_c = 8.0;
+ }
+ else {
+ v->block_height256_bytes_y = 8.0;
+ v->block_height256_bytes_c = 8.0;
+ }
+ }
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->meta_request_width_y = 64.0 * 256.0 /dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / (8.0 * v->block_height256_bytes_y);
+ v->meta_surf_width_y =dcn_bw_ceil2(v->swath_width_y[k] - 1.0, v->meta_request_width_y) + v->meta_request_width_y;
+ v->meta_surf_height_y =dcn_bw_ceil2(v->viewport_height[k] - 1.0, 8.0 * v->block_height256_bytes_y) + 8.0 * v->block_height256_bytes_y;
+ if (v->pte_enable == dcn_bw_yes) {
+ v->meta_pte_bytes_frame_y = (dcn_bw_ceil2((v->meta_surf_width_y * v->meta_surf_height_y *dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / 256.0 - 4096.0) / 8.0 / 4096.0, 1.0) + 1) * 64.0;
+ }
+ else {
+ v->meta_pte_bytes_frame_y = 0.0;
+ }
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->meta_row_byte_y = v->meta_surf_width_y * 8.0 * v->block_height256_bytes_y *dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / 256.0;
+ }
+ else {
+ v->meta_row_byte_y = v->meta_surf_height_y * v->meta_request_width_y *dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / 256.0;
+ }
+ }
+ else {
+ v->meta_pte_bytes_frame_y = 0.0;
+ v->meta_row_byte_y = 0.0;
+ }
+ if (v->pte_enable == dcn_bw_yes) {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->macro_tile_size_byte_y = 256.0;
+ v->macro_tile_height_y = 1.0;
+ }
+ else if (v->source_surface_mode[k] == dcn_bw_sw_4_kb_s || v->source_surface_mode[k] == dcn_bw_sw_4_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d_x) {
+ v->macro_tile_size_byte_y = 4096.0;
+ v->macro_tile_height_y = 4.0 * v->block_height256_bytes_y;
+ }
+ else if (v->source_surface_mode[k] == dcn_bw_sw_64_kb_s || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_x) {
+ v->macro_tile_size_byte_y = 64.0 * 1024;
+ v->macro_tile_height_y = 16.0 * v->block_height256_bytes_y;
+ }
+ else {
+ v->macro_tile_size_byte_y = 256.0 * 1024;
+ v->macro_tile_height_y = 32.0 * v->block_height256_bytes_y;
+ }
+ if (v->macro_tile_size_byte_y <= 65536.0) {
+ v->pixel_pte_req_height_y = v->macro_tile_height_y;
+ }
+ else {
+ v->pixel_pte_req_height_y = 16.0 * v->block_height256_bytes_y;
+ }
+ v->pixel_pte_req_width_y = 4096.0 /dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / v->pixel_pte_req_height_y * 8;
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->pixel_pte_bytes_per_row_y = 64.0 * (dcn_bw_ceil2((v->swath_width_y[k] *dcn_bw_min2(128.0, dcn_bw_pow(2.0,dcn_bw_floor2(dcn_bw_log(v->pte_buffer_size_in_requests * v->pixel_pte_req_width_y / v->swath_width_y[k], 2.0), 1.0))) - 1.0) / v->pixel_pte_req_width_y, 1.0) + 1);
+ }
+ else if (v->source_scan[k] == dcn_bw_hor) {
+ v->pixel_pte_bytes_per_row_y = 64.0 * (dcn_bw_ceil2((v->swath_width_y[k] - 1.0) / v->pixel_pte_req_width_y, 1.0) + 1);
+ }
+ else {
+ v->pixel_pte_bytes_per_row_y = 64.0 * (dcn_bw_ceil2((v->viewport_height[k] - 1.0) / v->pixel_pte_req_height_y, 1.0) + 1);
+ }
+ }
+ else {
+ v->pixel_pte_bytes_per_row_y = 0.0;
+ }
+ if ((v->source_pixel_format[k] != dcn_bw_rgb_sub_64 && v->source_pixel_format[k] != dcn_bw_rgb_sub_32 && v->source_pixel_format[k] != dcn_bw_rgb_sub_16)) {
+ if (v->dcc_enable[k] == dcn_bw_yes) {
+ v->meta_request_width_c = 64.0 * 256.0 /dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / (8.0 * v->block_height256_bytes_c);
+ v->meta_surf_width_c =dcn_bw_ceil2(v->swath_width_y[k] / 2.0 - 1.0, v->meta_request_width_c) + v->meta_request_width_c;
+ v->meta_surf_height_c =dcn_bw_ceil2(v->viewport_height[k] / 2.0 - 1.0, 8.0 * v->block_height256_bytes_c) + 8.0 * v->block_height256_bytes_c;
+ if (v->pte_enable == dcn_bw_yes) {
+ v->meta_pte_bytes_frame_c = (dcn_bw_ceil2((v->meta_surf_width_c * v->meta_surf_height_c *dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / 256.0 - 4096.0) / 8.0 / 4096.0, 1.0) + 1) * 64.0;
+ }
+ else {
+ v->meta_pte_bytes_frame_c = 0.0;
+ }
+ if (v->source_scan[k] == dcn_bw_hor) {
+ v->meta_row_byte_c = v->meta_surf_width_c * 8.0 * v->block_height256_bytes_c *dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / 256.0;
+ }
+ else {
+ v->meta_row_byte_c = v->meta_surf_height_c * v->meta_request_width_c *dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / 256.0;
+ }
+ }
+ else {
+ v->meta_pte_bytes_frame_c = 0.0;
+ v->meta_row_byte_c = 0.0;
+ }
+ if (v->pte_enable == dcn_bw_yes) {
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->macro_tile_size_bytes_c = 256.0;
+ v->macro_tile_height_c = 1.0;
+ }
+ else if (v->source_surface_mode[k] == dcn_bw_sw_4_kb_s || v->source_surface_mode[k] == dcn_bw_sw_4_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d || v->source_surface_mode[k] == dcn_bw_sw_4_kb_d_x) {
+ v->macro_tile_size_bytes_c = 4096.0;
+ v->macro_tile_height_c = 4.0 * v->block_height256_bytes_c;
+ }
+ else if (v->source_surface_mode[k] == dcn_bw_sw_64_kb_s || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_s_x || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_t || v->source_surface_mode[k] == dcn_bw_sw_64_kb_d_x) {
+ v->macro_tile_size_bytes_c = 64.0 * 1024;
+ v->macro_tile_height_c = 16.0 * v->block_height256_bytes_c;
+ }
+ else {
+ v->macro_tile_size_bytes_c = 256.0 * 1024;
+ v->macro_tile_height_c = 32.0 * v->block_height256_bytes_c;
+ }
+ if (v->macro_tile_size_bytes_c <= 65536.0) {
+ v->pixel_pte_req_height_c = v->macro_tile_height_c;
+ }
+ else {
+ v->pixel_pte_req_height_c = 16.0 * v->block_height256_bytes_c;
+ }
+ v->pixel_pte_req_width_c = 4096.0 /dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / v->pixel_pte_req_height_c * 8;
+ if (v->source_surface_mode[k] == dcn_bw_sw_linear) {
+ v->pixel_pte_bytes_per_row_c = 64.0 * (dcn_bw_ceil2((v->swath_width_y[k] / 2.0 * dcn_bw_min2(128.0, dcn_bw_pow(2.0,dcn_bw_floor2(dcn_bw_log(v->pte_buffer_size_in_requests * v->pixel_pte_req_width_c / (v->swath_width_y[k] / 2.0), 2.0), 1.0))) - 1.0) / v->pixel_pte_req_width_c, 1.0) + 1);
+ }
+ else if (v->source_scan[k] == dcn_bw_hor) {
+ v->pixel_pte_bytes_per_row_c = 64.0 * (dcn_bw_ceil2((v->swath_width_y[k] / 2.0 - 1.0) / v->pixel_pte_req_width_c, 1.0) + 1);
+ }
+ else {
+ v->pixel_pte_bytes_per_row_c = 64.0 * (dcn_bw_ceil2((v->viewport_height[k] / 2.0 - 1.0) / v->pixel_pte_req_height_c, 1.0) + 1);
+ }
+ }
+ else {
+ v->pixel_pte_bytes_per_row_c = 0.0;
+ }
+ }
+ else {
+ v->pixel_pte_bytes_per_row_c = 0.0;
+ v->meta_pte_bytes_frame_c = 0.0;
+ v->meta_row_byte_c = 0.0;
+ }
+ v->pixel_pte_bytes_per_row[k] = v->pixel_pte_bytes_per_row_y + v->pixel_pte_bytes_per_row_c;
+ v->meta_pte_bytes_frame[k] = v->meta_pte_bytes_frame_y + v->meta_pte_bytes_frame_c;
+ v->meta_row_byte[k] = v->meta_row_byte_y + v->meta_row_byte_c;
+ v->v_init_pre_fill_y[k] =dcn_bw_floor2((v->v_ratio[k] + v->vtaps[k] + 1.0 + v->interlace_output[k] * 0.5 * v->v_ratio[k]) / 2.0, 1.0);
+ v->max_num_swath_y[k] =dcn_bw_ceil2((v->v_init_pre_fill_y[k] - 1.0) / v->swath_height_y[k], 1.0) + 1;
+ if (v->v_init_pre_fill_y[k] > 1.0) {
+ v->max_partial_swath_y =dcn_bw_mod((v->v_init_pre_fill_y[k] - 2.0), v->swath_height_y[k]);
+ }
+ else {
+ v->max_partial_swath_y =dcn_bw_mod((v->v_init_pre_fill_y[k] + v->swath_height_y[k] - 2.0), v->swath_height_y[k]);
+ }
+ v->max_partial_swath_y =dcn_bw_max2(1.0, v->max_partial_swath_y);
+ v->prefetch_source_lines_y[k] = v->max_num_swath_y[k] * v->swath_height_y[k] + v->max_partial_swath_y;
+ if ((v->source_pixel_format[k] != dcn_bw_rgb_sub_64 && v->source_pixel_format[k] != dcn_bw_rgb_sub_32 && v->source_pixel_format[k] != dcn_bw_rgb_sub_16)) {
+ v->v_init_pre_fill_c[k] =dcn_bw_floor2((v->v_ratio[k] / 2.0 + v->vtaps[k] + 1.0 + v->interlace_output[k] * 0.5 * v->v_ratio[k] / 2.0) / 2.0, 1.0);
+ v->max_num_swath_c[k] =dcn_bw_ceil2((v->v_init_pre_fill_c[k] - 1.0) / v->swath_height_c[k], 1.0) + 1;
+ if (v->v_init_pre_fill_c[k] > 1.0) {
+ v->max_partial_swath_c =dcn_bw_mod((v->v_init_pre_fill_c[k] - 2.0), v->swath_height_c[k]);
+ }
+ else {
+ v->max_partial_swath_c =dcn_bw_mod((v->v_init_pre_fill_c[k] + v->swath_height_c[k] - 2.0), v->swath_height_c[k]);
+ }
+ v->max_partial_swath_c =dcn_bw_max2(1.0, v->max_partial_swath_c);
+ }
+ else {
+ v->max_num_swath_c[k] = 0.0;
+ v->max_partial_swath_c = 0.0;
+ }
+ v->prefetch_source_lines_c[k] = v->max_num_swath_c[k] * v->swath_height_c[k] + v->max_partial_swath_c;
+ }
+ v->t_calc = 24.0 / v->dcf_clk_deep_sleep;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one == dcn_bw_yes) {
+ v->max_vstartup_lines[k] = v->vtotal[k] - v->vactive[k] - 1.0;
+ }
+ else {
+ v->max_vstartup_lines[k] = v->v_sync_plus_back_porch[k] - 1.0;
+ }
+ }
+ v->next_prefetch_mode = 0.0;
+ do {
+ v->v_startup_lines = 13.0;
+ do {
+ v->planes_with_room_to_increase_vstartup_prefetch_bw_less_than_active_bw = dcn_bw_yes;
+ v->planes_with_room_to_increase_vstartup_vratio_prefetch_more_than4 = dcn_bw_no;
+ v->planes_with_room_to_increase_vstartup_destination_line_times_for_prefetch_less_than2 = dcn_bw_no;
+ v->v_ratio_prefetch_more_than4 = dcn_bw_no;
+ v->destination_line_times_for_prefetch_less_than2 = dcn_bw_no;
+ v->prefetch_mode = v->next_prefetch_mode;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->dstx_after_scaler = 90.0 * v->pixel_clock[k] / v->dppclk + 42.0 * v->pixel_clock[k] / v->dispclk;
+ if (v->dpp_per_plane[k] > 1.0) {
+ v->dstx_after_scaler = v->dstx_after_scaler + v->scaler_rec_out_width[k] / 2.0;
+ }
+ if (v->output_format[k] == dcn_bw_420) {
+ v->dsty_after_scaler = 1.0;
+ }
+ else {
+ v->dsty_after_scaler = 0.0;
+ }
+ v->v_update_offset_pix[k] = dcn_bw_ceil2(v->htotal[k] / 4.0, 1.0);
+ v->total_repeater_delay_time = v->max_inter_dcn_tile_repeaters * (2.0 / v->dppclk + 3.0 / v->dispclk);
+ v->v_update_width_pix[k] = (14.0 / v->dcf_clk_deep_sleep + 12.0 / v->dppclk + v->total_repeater_delay_time) * v->pixel_clock[k];
+ v->v_ready_offset_pix[k] = dcn_bw_max2(150.0 / v->dppclk, v->total_repeater_delay_time + 20.0 / v->dcf_clk_deep_sleep + 10.0 / v->dppclk) * v->pixel_clock[k];
+ v->t_setup = (v->v_update_offset_pix[k] + v->v_update_width_pix[k] + v->v_ready_offset_pix[k]) / v->pixel_clock[k];
+ v->v_startup[k] =dcn_bw_min2(v->v_startup_lines, v->max_vstartup_lines[k]);
+ if (v->prefetch_mode == 0.0) {
+ v->t_wait =dcn_bw_max3(v->dram_clock_change_latency + v->urgent_latency, v->sr_enter_plus_exit_time, v->urgent_latency);
+ }
+ else if (v->prefetch_mode == 1.0) {
+ v->t_wait =dcn_bw_max2(v->sr_enter_plus_exit_time, v->urgent_latency);
+ }
+ else {
+ v->t_wait = v->urgent_latency;
+ }
+ v->destination_lines_for_prefetch[k] =dcn_bw_floor2(4.0 * (v->v_startup[k] - v->t_wait / (v->htotal[k] / v->pixel_clock[k]) - (v->t_calc + v->t_setup) / (v->htotal[k] / v->pixel_clock[k]) - (v->dsty_after_scaler + v->dstx_after_scaler / v->htotal[k]) + 0.125), 1.0) / 4;
+ if (v->destination_lines_for_prefetch[k] > 0.0) {
+ v->prefetch_bandwidth[k] = (v->meta_pte_bytes_frame[k] + 2.0 * v->meta_row_byte[k] + 2.0 * v->pixel_pte_bytes_per_row[k] + v->prefetch_source_lines_y[k] * v->swath_width_y[k] *dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) + v->prefetch_source_lines_c[k] * v->swath_width_y[k] / 2.0 *dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0)) / (v->destination_lines_for_prefetch[k] * v->htotal[k] / v->pixel_clock[k]);
+ }
+ else {
+ v->prefetch_bandwidth[k] = 999999.0;
+ }
+ }
+ v->bandwidth_available_for_immediate_flip = v->return_bw;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->bandwidth_available_for_immediate_flip = v->bandwidth_available_for_immediate_flip -dcn_bw_max2(v->read_bandwidth_plane_luma[k] + v->read_bandwidth_plane_chroma[k], v->prefetch_bandwidth[k]);
+ }
+ v->tot_immediate_flip_bytes = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->immediate_flip_supported == dcn_bw_yes && (v->source_pixel_format[k] != dcn_bw_yuv420_sub_8 && v->source_pixel_format[k] != dcn_bw_yuv420_sub_10)) {
+ v->tot_immediate_flip_bytes = v->tot_immediate_flip_bytes + v->meta_pte_bytes_frame[k] + v->meta_row_byte[k] + v->pixel_pte_bytes_per_row[k];
+ }
+ }
+ v->max_rd_bandwidth = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->pte_enable == dcn_bw_yes && v->dcc_enable[k] == dcn_bw_yes) {
+ if (v->immediate_flip_supported == dcn_bw_yes && (v->source_pixel_format[k] != dcn_bw_yuv420_sub_8 && v->source_pixel_format[k] != dcn_bw_yuv420_sub_10)) {
+ v->time_for_fetching_meta_pte =dcn_bw_max5(v->meta_pte_bytes_frame[k] / v->prefetch_bandwidth[k], v->meta_pte_bytes_frame[k] * v->tot_immediate_flip_bytes / (v->bandwidth_available_for_immediate_flip * (v->meta_pte_bytes_frame[k] + v->meta_row_byte[k] + v->pixel_pte_bytes_per_row[k])), v->urgent_extra_latency, v->urgent_latency, v->htotal[k] / v->pixel_clock[k] / 4.0);
+ }
+ else {
+ v->time_for_fetching_meta_pte =dcn_bw_max3(v->meta_pte_bytes_frame[k] / v->prefetch_bandwidth[k], v->urgent_extra_latency, v->htotal[k] / v->pixel_clock[k] / 4.0);
+ }
+ }
+ else {
+ v->time_for_fetching_meta_pte = v->htotal[k] / v->pixel_clock[k] / 4.0;
+ }
+ v->destination_lines_to_request_vm_inv_blank[k] =dcn_bw_floor2(4.0 * (v->time_for_fetching_meta_pte / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4;
+ if ((v->pte_enable == dcn_bw_yes || v->dcc_enable[k] == dcn_bw_yes)) {
+ if (v->immediate_flip_supported == dcn_bw_yes && (v->source_pixel_format[k] != dcn_bw_yuv420_sub_8 && v->source_pixel_format[k] != dcn_bw_yuv420_sub_10)) {
+ v->time_for_fetching_row_in_vblank =dcn_bw_max5((v->meta_row_byte[k] + v->pixel_pte_bytes_per_row[k]) / v->prefetch_bandwidth[k], (v->meta_row_byte[k] + v->pixel_pte_bytes_per_row[k]) * v->tot_immediate_flip_bytes / (v->bandwidth_available_for_immediate_flip * (v->meta_pte_bytes_frame[k] + v->meta_row_byte[k] + v->pixel_pte_bytes_per_row[k])), v->urgent_extra_latency, 2.0 * v->urgent_latency, v->htotal[k] / v->pixel_clock[k] - v->time_for_fetching_meta_pte);
+ }
+ else {
+ v->time_for_fetching_row_in_vblank =dcn_bw_max3((v->meta_row_byte[k] + v->pixel_pte_bytes_per_row[k]) / v->prefetch_bandwidth[k], v->urgent_extra_latency, v->htotal[k] / v->pixel_clock[k] - v->time_for_fetching_meta_pte);
+ }
+ }
+ else {
+ v->time_for_fetching_row_in_vblank =dcn_bw_max2(v->urgent_extra_latency - v->time_for_fetching_meta_pte, v->htotal[k] / v->pixel_clock[k] - v->time_for_fetching_meta_pte);
+ }
+ v->destination_lines_to_request_row_in_vblank[k] =dcn_bw_floor2(4.0 * (v->time_for_fetching_row_in_vblank / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4;
+ v->lines_to_request_prefetch_pixel_data = v->destination_lines_for_prefetch[k] - v->destination_lines_to_request_vm_inv_blank[k] - v->destination_lines_to_request_row_in_vblank[k];
+ if (v->lines_to_request_prefetch_pixel_data > 0.0) {
+ v->v_ratio_prefetch_y[k] = v->prefetch_source_lines_y[k] / v->lines_to_request_prefetch_pixel_data;
+ if ((v->swath_height_y[k] > 4.0)) {
+ if (v->lines_to_request_prefetch_pixel_data > (v->v_init_pre_fill_y[k] - 3.0) / 2.0) {
+ v->v_ratio_prefetch_y[k] =dcn_bw_max2(v->v_ratio_prefetch_y[k], v->max_num_swath_y[k] * v->swath_height_y[k] / (v->lines_to_request_prefetch_pixel_data - (v->v_init_pre_fill_y[k] - 3.0) / 2.0));
+ }
+ else {
+ v->v_ratio_prefetch_y[k] = 999999.0;
+ }
+ }
+ }
+ else {
+ v->v_ratio_prefetch_y[k] = 999999.0;
+ }
+ v->v_ratio_prefetch_y[k] =dcn_bw_max2(v->v_ratio_prefetch_y[k], 1.0);
+ if (v->lines_to_request_prefetch_pixel_data > 0.0) {
+ v->v_ratio_prefetch_c[k] = v->prefetch_source_lines_c[k] / v->lines_to_request_prefetch_pixel_data;
+ if ((v->swath_height_c[k] > 4.0)) {
+ if (v->lines_to_request_prefetch_pixel_data > (v->v_init_pre_fill_c[k] - 3.0) / 2.0) {
+ v->v_ratio_prefetch_c[k] =dcn_bw_max2(v->v_ratio_prefetch_c[k], v->max_num_swath_c[k] * v->swath_height_c[k] / (v->lines_to_request_prefetch_pixel_data - (v->v_init_pre_fill_c[k] - 3.0) / 2.0));
+ }
+ else {
+ v->v_ratio_prefetch_c[k] = 999999.0;
+ }
+ }
+ }
+ else {
+ v->v_ratio_prefetch_c[k] = 999999.0;
+ }
+ v->v_ratio_prefetch_c[k] =dcn_bw_max2(v->v_ratio_prefetch_c[k], 1.0);
+ if (v->lines_to_request_prefetch_pixel_data > 0.0) {
+ v->required_prefetch_pix_data_bw = v->dpp_per_plane[k] * (v->prefetch_source_lines_y[k] / v->lines_to_request_prefetch_pixel_data *dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) + v->prefetch_source_lines_c[k] / v->lines_to_request_prefetch_pixel_data *dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / 2.0) * v->swath_width_y[k] / (v->htotal[k] / v->pixel_clock[k]);
+ }
+ else {
+ v->required_prefetch_pix_data_bw = 999999.0;
+ }
+ v->max_rd_bandwidth = v->max_rd_bandwidth +dcn_bw_max2(v->read_bandwidth_plane_luma[k] + v->read_bandwidth_plane_chroma[k], v->required_prefetch_pix_data_bw);
+ if (v->immediate_flip_supported == dcn_bw_yes && (v->source_pixel_format[k] != dcn_bw_yuv420_sub_8 && v->source_pixel_format[k] != dcn_bw_yuv420_sub_10)) {
+ v->max_rd_bandwidth = v->max_rd_bandwidth +dcn_bw_max2(v->meta_pte_bytes_frame[k] / (v->destination_lines_to_request_vm_inv_blank[k] * v->htotal[k] / v->pixel_clock[k]), (v->meta_row_byte[k] + v->pixel_pte_bytes_per_row[k]) / (v->destination_lines_to_request_row_in_vblank[k] * v->htotal[k] / v->pixel_clock[k]));
+ }
+ if (v->v_ratio_prefetch_y[k] > 4.0 || v->v_ratio_prefetch_c[k] > 4.0) {
+ v->v_ratio_prefetch_more_than4 = dcn_bw_yes;
+ }
+ if (v->destination_lines_for_prefetch[k] < 2.0) {
+ v->destination_line_times_for_prefetch_less_than2 = dcn_bw_yes;
+ }
+ if (v->max_vstartup_lines[k] > v->v_startup_lines) {
+ if (v->required_prefetch_pix_data_bw > (v->read_bandwidth_plane_luma[k] + v->read_bandwidth_plane_chroma[k])) {
+ v->planes_with_room_to_increase_vstartup_prefetch_bw_less_than_active_bw = dcn_bw_no;
+ }
+ if (v->v_ratio_prefetch_y[k] > 4.0 || v->v_ratio_prefetch_c[k] > 4.0) {
+ v->planes_with_room_to_increase_vstartup_vratio_prefetch_more_than4 = dcn_bw_yes;
+ }
+ if (v->destination_lines_for_prefetch[k] < 2.0) {
+ v->planes_with_room_to_increase_vstartup_destination_line_times_for_prefetch_less_than2 = dcn_bw_yes;
+ }
+ }
+ }
+ if (v->max_rd_bandwidth <= v->return_bw && v->v_ratio_prefetch_more_than4 == dcn_bw_no && v->destination_line_times_for_prefetch_less_than2 == dcn_bw_no) {
+ v->prefetch_mode_supported = dcn_bw_yes;
+ }
+ else {
+ v->prefetch_mode_supported = dcn_bw_no;
+ }
+ v->v_startup_lines = v->v_startup_lines + 1.0;
+ } while (!(v->prefetch_mode_supported == dcn_bw_yes || (v->planes_with_room_to_increase_vstartup_prefetch_bw_less_than_active_bw == dcn_bw_yes && v->planes_with_room_to_increase_vstartup_vratio_prefetch_more_than4 == dcn_bw_no && v->planes_with_room_to_increase_vstartup_destination_line_times_for_prefetch_less_than2 == dcn_bw_no)));
+ v->next_prefetch_mode = v->next_prefetch_mode + 1.0;
+ } while (!(v->prefetch_mode_supported == dcn_bw_yes || v->prefetch_mode == 2.0));
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->v_ratio_prefetch_y[k] <= 1.0) {
+ v->display_pipe_line_delivery_time_luma_prefetch[k] = v->swath_width_y[k] * v->dpp_per_plane[k] / v->h_ratio[k] / v->pixel_clock[k];
+ }
+ else {
+ v->display_pipe_line_delivery_time_luma_prefetch[k] = v->swath_width_y[k] / v->pscl_throughput[k] / v->dppclk;
+ }
+ if (v->byte_per_pixel_detc[k] == 0.0) {
+ v->display_pipe_line_delivery_time_chroma_prefetch[k] = 0.0;
+ }
+ else {
+ if (v->v_ratio_prefetch_c[k] <= 1.0) {
+ v->display_pipe_line_delivery_time_chroma_prefetch[k] = v->swath_width_y[k] * v->dpp_per_plane[k] / v->h_ratio[k] / v->pixel_clock[k];
+ }
+ else {
+ v->display_pipe_line_delivery_time_chroma_prefetch[k] = v->swath_width_y[k] / v->pscl_throughput[k] / v->dppclk;
+ }
+ }
+ }
+ /*min ttuv_blank*/
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->prefetch_mode == 0.0) {
+ v->allow_dram_clock_change_during_vblank[k] = dcn_bw_yes;
+ v->allow_dram_self_refresh_during_vblank[k] = dcn_bw_yes;
+ v->min_ttuv_blank[k] = v->t_calc +dcn_bw_max3(v->dram_clock_change_watermark, v->stutter_enter_plus_exit_watermark, v->urgent_watermark);
+ }
+ else if (v->prefetch_mode == 1.0) {
+ v->allow_dram_clock_change_during_vblank[k] = dcn_bw_no;
+ v->allow_dram_self_refresh_during_vblank[k] = dcn_bw_yes;
+ v->min_ttuv_blank[k] = v->t_calc +dcn_bw_max2(v->stutter_enter_plus_exit_watermark, v->urgent_watermark);
+ }
+ else {
+ v->allow_dram_clock_change_during_vblank[k] = dcn_bw_no;
+ v->allow_dram_self_refresh_during_vblank[k] = dcn_bw_no;
+ v->min_ttuv_blank[k] = v->t_calc + v->urgent_watermark;
+ }
+ }
+ /*nb p-state/dram clock change support*/
+
+ v->active_dp_ps = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->active_dp_ps = v->active_dp_ps + v->dpp_per_plane[k];
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->lb_latency_hiding_source_lines_y =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_y[k] /dcn_bw_max2(v->h_ratio[k], 1.0)), 1.0)) - (v->vtaps[k] - 1.0);
+ v->lb_latency_hiding_source_lines_c =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_y[k] / 2.0 /dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0);
+ v->effective_lb_latency_hiding_y = v->lb_latency_hiding_source_lines_y / v->v_ratio[k] * (v->htotal[k] / v->pixel_clock[k]);
+ v->effective_lb_latency_hiding_c = v->lb_latency_hiding_source_lines_c / (v->v_ratio[k] / 2.0) * (v->htotal[k] / v->pixel_clock[k]);
+ if (v->swath_width_y[k] > 2.0 * v->dpp_output_buffer_pixels) {
+ v->dpp_output_buffer_lines_y = v->dpp_output_buffer_pixels / v->swath_width_y[k];
+ }
+ else if (v->swath_width_y[k] > v->dpp_output_buffer_pixels) {
+ v->dpp_output_buffer_lines_y = 0.5;
+ }
+ else {
+ v->dpp_output_buffer_lines_y = 1.0;
+ }
+ if (v->swath_width_y[k] / 2.0 > 2.0 * v->dpp_output_buffer_pixels) {
+ v->dpp_output_buffer_lines_c = v->dpp_output_buffer_pixels / (v->swath_width_y[k] / 2.0);
+ }
+ else if (v->swath_width_y[k] / 2.0 > v->dpp_output_buffer_pixels) {
+ v->dpp_output_buffer_lines_c = 0.5;
+ }
+ else {
+ v->dpp_output_buffer_lines_c = 1.0;
+ }
+ v->dppopp_buffering_y = (v->htotal[k] / v->pixel_clock[k]) * (v->dpp_output_buffer_lines_y + v->opp_output_buffer_lines);
+ v->max_det_buffering_time_y = v->full_det_buffering_time_y[k] + (v->lines_in_dety[k] - v->lines_in_dety_rounded_down_to_swath[k]) / v->swath_height_y[k] * (v->htotal[k] / v->pixel_clock[k]);
+ v->active_dram_clock_change_latency_margin_y = v->dppopp_buffering_y + v->effective_lb_latency_hiding_y + v->max_det_buffering_time_y - v->dram_clock_change_watermark;
+ if (v->active_dp_ps > 1.0) {
+ v->active_dram_clock_change_latency_margin_y = v->active_dram_clock_change_latency_margin_y - (1.0 - 1.0 / (v->active_dp_ps - 1.0)) * v->swath_height_y[k] * (v->htotal[k] / v->pixel_clock[k]);
+ }
+ if (v->byte_per_pixel_detc[k] > 0.0) {
+ v->dppopp_buffering_c = (v->htotal[k] / v->pixel_clock[k]) * (v->dpp_output_buffer_lines_c + v->opp_output_buffer_lines);
+ v->max_det_buffering_time_c = v->full_det_buffering_time_c[k] + (v->lines_in_detc[k] - v->lines_in_detc_rounded_down_to_swath[k]) / v->swath_height_c[k] * (v->htotal[k] / v->pixel_clock[k]);
+ v->active_dram_clock_change_latency_margin_c = v->dppopp_buffering_c + v->effective_lb_latency_hiding_c + v->max_det_buffering_time_c - v->dram_clock_change_watermark;
+ if (v->active_dp_ps > 1.0) {
+ v->active_dram_clock_change_latency_margin_c = v->active_dram_clock_change_latency_margin_c - (1.0 - 1.0 / (v->active_dp_ps - 1.0)) * v->swath_height_c[k] * (v->htotal[k] / v->pixel_clock[k]);
+ }
+ v->active_dram_clock_change_latency_margin[k] =dcn_bw_min2(v->active_dram_clock_change_latency_margin_y, v->active_dram_clock_change_latency_margin_c);
+ }
+ else {
+ v->active_dram_clock_change_latency_margin[k] = v->active_dram_clock_change_latency_margin_y;
+ }
+ if (v->output_format[k] == dcn_bw_444) {
+ v->writeback_dram_clock_change_latency_margin = (v->writeback_luma_buffer_size + v->writeback_chroma_buffer_size) * 1024.0 / (v->scaler_rec_out_width[k] / (v->htotal[k] / v->pixel_clock[k]) * 4.0) - v->writeback_dram_clock_change_watermark;
+ }
+ else {
+ v->writeback_dram_clock_change_latency_margin =dcn_bw_min2(v->writeback_luma_buffer_size, 2.0 * v->writeback_chroma_buffer_size) * 1024.0 / (v->scaler_rec_out_width[k] / (v->htotal[k] / v->pixel_clock[k])) - v->writeback_dram_clock_change_watermark;
+ }
+ if (v->output[k] == dcn_bw_writeback) {
+ v->active_dram_clock_change_latency_margin[k] =dcn_bw_min2(v->active_dram_clock_change_latency_margin[k], v->writeback_dram_clock_change_latency_margin);
+ }
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->allow_dram_clock_change_during_vblank[k] == dcn_bw_yes) {
+ v->v_blank_dram_clock_change_latency_margin[k] = (v->vtotal[k] - v->scaler_recout_height[k]) * (v->htotal[k] / v->pixel_clock[k]) -dcn_bw_max2(v->dram_clock_change_watermark, v->writeback_dram_clock_change_watermark);
+ }
+ else {
+ v->v_blank_dram_clock_change_latency_margin[k] = 0.0;
+ }
+ }
+ v->min_active_dram_clock_change_margin = 999999.0;
+ v->v_blank_of_min_active_dram_clock_change_margin = 999999.0;
+ v->second_min_active_dram_clock_change_margin = 999999.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->active_dram_clock_change_latency_margin[k] < v->min_active_dram_clock_change_margin) {
+ v->second_min_active_dram_clock_change_margin = v->min_active_dram_clock_change_margin;
+ v->min_active_dram_clock_change_margin = v->active_dram_clock_change_latency_margin[k];
+ v->v_blank_of_min_active_dram_clock_change_margin = v->v_blank_dram_clock_change_latency_margin[k];
+ }
+ else if (v->active_dram_clock_change_latency_margin[k] < v->second_min_active_dram_clock_change_margin) {
+ v->second_min_active_dram_clock_change_margin = v->active_dram_clock_change_latency_margin[k];
+ }
+ }
+ v->min_vblank_dram_clock_change_margin = 999999.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->min_vblank_dram_clock_change_margin > v->v_blank_dram_clock_change_latency_margin[k]) {
+ v->min_vblank_dram_clock_change_margin = v->v_blank_dram_clock_change_latency_margin[k];
+ }
+ }
+ if (v->synchronized_vblank == dcn_bw_yes || v->number_of_active_planes == 1) {
+ v->dram_clock_change_margin =dcn_bw_max2(v->min_active_dram_clock_change_margin, v->min_vblank_dram_clock_change_margin);
+ }
+ else if (v->v_blank_of_min_active_dram_clock_change_margin > v->min_active_dram_clock_change_margin) {
+ v->dram_clock_change_margin =dcn_bw_min2(v->second_min_active_dram_clock_change_margin, v->v_blank_of_min_active_dram_clock_change_margin);
+ }
+ else {
+ v->dram_clock_change_margin = v->min_active_dram_clock_change_margin;
+ }
+ if (v->min_active_dram_clock_change_margin > 0.0) {
+ v->dram_clock_change_support = dcn_bw_supported_in_v_active;
+ }
+ else if (v->dram_clock_change_margin > 0.0) {
+ v->dram_clock_change_support = dcn_bw_supported_in_v_blank;
+ }
+ else {
+ v->dram_clock_change_support = dcn_bw_not_supported;
+ }
+ /*maximum bandwidth used*/
+
+ v->wr_bandwidth = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->output[k] == dcn_bw_writeback && v->output_format[k] == dcn_bw_444) {
+ v->wr_bandwidth = v->wr_bandwidth + v->scaler_rec_out_width[k] / (v->htotal[k] / v->pixel_clock[k]) * 4.0;
+ }
+ else if (v->output[k] == dcn_bw_writeback) {
+ v->wr_bandwidth = v->wr_bandwidth + v->scaler_rec_out_width[k] / (v->htotal[k] / v->pixel_clock[k]) * 1.5;
+ }
+ }
+ v->max_used_bw = v->max_rd_bandwidth + v->wr_bandwidth;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.h b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.h
new file mode 100644
index 000000000000..ce35de79a6c7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DCN_CALC_AUTO_H_
+#define _DCN_CALC_AUTO_H_
+
+#include "dc.h"
+#include "dcn_calcs.h"
+
+void scaler_settings_calculation(struct dcn_bw_internal_vars *v);
+void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v);
+void display_pipe_configuration(struct dcn_bw_internal_vars *v);
+void dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(
+ struct dcn_bw_internal_vars *v);
+
+#endif /* _DCN_CALC_AUTO_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c
new file mode 100644
index 000000000000..cac72413a097
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "os_types.h"
+#include "dcn_calc_math.h"
+
+#define isNaN(number) ((number) != (number))
+
+/*
+ * NOTE:
+ * This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
+float dcn_bw_mod(const float arg1, const float arg2)
+{
+ if (isNaN(arg1))
+ return arg2;
+ if (isNaN(arg2))
+ return arg1;
+ return arg1 - arg1 * ((int) (arg1 / arg2));
+}
+
+float dcn_bw_min2(const float arg1, const float arg2)
+{
+ if (isNaN(arg1))
+ return arg2;
+ if (isNaN(arg2))
+ return arg1;
+ return arg1 < arg2 ? arg1 : arg2;
+}
+
+unsigned int dcn_bw_max(const unsigned int arg1, const unsigned int arg2)
+{
+ return arg1 > arg2 ? arg1 : arg2;
+}
+float dcn_bw_max2(const float arg1, const float arg2)
+{
+ if (isNaN(arg1))
+ return arg2;
+ if (isNaN(arg2))
+ return arg1;
+ return arg1 > arg2 ? arg1 : arg2;
+}
+
+float dcn_bw_floor2(const float arg, const float significance)
+{
+ ASSERT(significance != 0);
+
+ return ((int) (arg / significance)) * significance;
+}
+float dcn_bw_floor(const float arg)
+{
+ return ((int) (arg));
+}
+
+float dcn_bw_ceil(const float arg)
+{
+ return (int) (arg + 0.99999);
+}
+
+float dcn_bw_ceil2(const float arg, const float significance)
+{
+ ASSERT(significance != 0);
+
+ return ((int) (arg / significance + 0.99999)) * significance;
+}
+
+float dcn_bw_max3(float v1, float v2, float v3)
+{
+ return v3 > dcn_bw_max2(v1, v2) ? v3 : dcn_bw_max2(v1, v2);
+}
+
+float dcn_bw_max5(float v1, float v2, float v3, float v4, float v5)
+{
+ return dcn_bw_max3(v1, v2, v3) > dcn_bw_max2(v4, v5) ? dcn_bw_max3(v1, v2, v3) : dcn_bw_max2(v4, v5);
+}
+
+float dcn_bw_pow(float a, float exp)
+{
+ float temp;
+ /*ASSERT(exp == (int)exp);*/
+ if ((int)exp == 0)
+ return 1;
+ temp = dcn_bw_pow(a, (int)(exp / 2));
+ if (((int)exp % 2) == 0) {
+ return temp * temp;
+ } else {
+ if ((int)exp > 0)
+ return a * temp * temp;
+ else
+ return (temp * temp) / a;
+ }
+}
+
+double dcn_bw_fabs(double a)
+{
+ if (a > 0)
+ return (a);
+ else
+ return (-a);
+}
+
+
+float dcn_bw_log(float a, float b)
+{
+ int * const exp_ptr = (int *)(&a);
+ int x = *exp_ptr;
+ const int log_2 = ((x >> 23) & 255) - 128;
+ x &= ~(255 << 23);
+ x += 127 << 23;
+ *exp_ptr = x;
+
+ a = ((-1.0f / 3) * a + 2) * a - 2.0f / 3;
+
+ if (b > 2.00001 || b < 1.99999)
+ return (a + log_2) / dcn_bw_log(b, 2);
+ else
+ return (a + log_2);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
new file mode 100644
index 000000000000..e73f089c84bb
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
@@ -0,0 +1,1752 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * Copyright 2019 Raptor Engineering, LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services.h"
+#include "dc.h"
+#include "dcn_calcs.h"
+#include "dcn_calc_auto.h"
+#include "dal_asic_id.h"
+#include "resource.h"
+#include "dcn10/dcn10_resource.h"
+#include "dcn10/dcn10_hubbub.h"
+#include "dml/dml1_display_rq_dlg_calc.h"
+
+#include "dcn_calc_math.h"
+
+#define DC_LOGGER \
+ dc->ctx->logger
+
+#define WM_SET_COUNT 4
+#define WM_A 0
+#define WM_B 1
+#define WM_C 2
+#define WM_D 3
+
+/*
+ * NOTE:
+ * This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
+/* Defaults from spreadsheet rev#247.
+ * RV2 delta: dram_clock_change_latency, max_num_dpp
+ */
+const struct dcn_soc_bounding_box dcn10_soc_defaults = {
+ /* latencies */
+ .sr_exit_time = 17, /*us*/
+ .sr_enter_plus_exit_time = 19, /*us*/
+ .urgent_latency = 4, /*us*/
+ .dram_clock_change_latency = 17, /*us*/
+ .write_back_latency = 12, /*us*/
+ .percent_of_ideal_drambw_received_after_urg_latency = 80, /*%*/
+
+ /* below default clocks derived from STA target base on
+ * slow-slow corner + 10% margin with voltages aligned to FCLK.
+ *
+ * Use these value if fused value doesn't make sense as earlier
+ * part don't have correct value fused */
+ /* default DCF CLK DPM on RV*/
+ .dcfclkv_max0p9 = 655, /* MHz, = 3600/5.5 */
+ .dcfclkv_nom0p8 = 626, /* MHz, = 3600/5.75 */
+ .dcfclkv_mid0p72 = 600, /* MHz, = 3600/6, bypass */
+ .dcfclkv_min0p65 = 300, /* MHz, = 3600/12, bypass */
+
+ /* default DISP CLK voltage state on RV */
+ .max_dispclk_vmax0p9 = 1108, /* MHz, = 3600/3.25 */
+ .max_dispclk_vnom0p8 = 1029, /* MHz, = 3600/3.5 */
+ .max_dispclk_vmid0p72 = 960, /* MHz, = 3600/3.75 */
+ .max_dispclk_vmin0p65 = 626, /* MHz, = 3600/5.75 */
+
+ /* default DPP CLK voltage state on RV */
+ .max_dppclk_vmax0p9 = 720, /* MHz, = 3600/5 */
+ .max_dppclk_vnom0p8 = 686, /* MHz, = 3600/5.25 */
+ .max_dppclk_vmid0p72 = 626, /* MHz, = 3600/5.75 */
+ .max_dppclk_vmin0p65 = 400, /* MHz, = 3600/9 */
+
+ /* default PHY CLK voltage state on RV */
+ .phyclkv_max0p9 = 900, /*MHz*/
+ .phyclkv_nom0p8 = 847, /*MHz*/
+ .phyclkv_mid0p72 = 800, /*MHz*/
+ .phyclkv_min0p65 = 600, /*MHz*/
+
+ /* BW depend on FCLK, MCLK, # of channels */
+ /* dual channel BW */
+ .fabric_and_dram_bandwidth_vmax0p9 = 38.4f, /*GB/s*/
+ .fabric_and_dram_bandwidth_vnom0p8 = 34.133f, /*GB/s*/
+ .fabric_and_dram_bandwidth_vmid0p72 = 29.866f, /*GB/s*/
+ .fabric_and_dram_bandwidth_vmin0p65 = 12.8f, /*GB/s*/
+ /* single channel BW
+ .fabric_and_dram_bandwidth_vmax0p9 = 19.2f,
+ .fabric_and_dram_bandwidth_vnom0p8 = 17.066f,
+ .fabric_and_dram_bandwidth_vmid0p72 = 14.933f,
+ .fabric_and_dram_bandwidth_vmin0p65 = 12.8f,
+ */
+
+ .number_of_channels = 2,
+
+ .socclk = 208, /*MHz*/
+ .downspreading = 0.5f, /*%*/
+ .round_trip_ping_latency_cycles = 128, /*DCFCLK Cycles*/
+ .urgent_out_of_order_return_per_channel = 256, /*bytes*/
+ .vmm_page_size = 4096, /*bytes*/
+ .return_bus_width = 64, /*bytes*/
+ .max_request_size = 256, /*bytes*/
+
+ /* Depends on user class (client vs embedded, workstation, etc) */
+ .percent_disp_bw_limit = 0.3f /*%*/
+};
+
+const struct dcn_ip_params dcn10_ip_defaults = {
+ .rob_buffer_size_in_kbyte = 64,
+ .det_buffer_size_in_kbyte = 164,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_in_kbyte = 8,
+ .pte_enable = dcn_bw_yes,
+ .pte_chunk_size = 2, /*kbytes*/
+ .meta_chunk_size = 2, /*kbytes*/
+ .writeback_chunk_size = 2, /*kbytes*/
+ .odm_capability = dcn_bw_no,
+ .dsc_capability = dcn_bw_no,
+ .line_buffer_size = 589824, /*bit*/
+ .max_line_buffer_lines = 12,
+ .is_line_buffer_bpp_fixed = dcn_bw_no,
+ .line_buffer_fixed_bpp = dcn_bw_na,
+ .writeback_luma_buffer_size = 12, /*kbytes*/
+ .writeback_chroma_buffer_size = 8, /*kbytes*/
+ .max_num_dpp = 4,
+ .max_num_writeback = 2,
+ .max_dchub_topscl_throughput = 4, /*pixels/dppclk*/
+ .max_pscl_tolb_throughput = 2, /*pixels/dppclk*/
+ .max_lb_tovscl_throughput = 4, /*pixels/dppclk*/
+ .max_vscl_tohscl_throughput = 4, /*pixels/dppclk*/
+ .max_hscl_ratio = 4,
+ .max_vscl_ratio = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .pte_buffer_size_in_requests = 42,
+ .dispclk_ramping_margin = 1, /*%*/
+ .under_scan_factor = 1.11f,
+ .max_inter_dcn_tile_repeaters = 8,
+ .can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = dcn_bw_no,
+ .bug_forcing_luma_and_chroma_request_to_same_size_fixed = dcn_bw_no,
+ .dcfclk_cstate_latency = 10 /*TODO clone of something else? sr_enter_plus_exit_time?*/
+};
+
+static enum dcn_bw_defs tl_sw_mode_to_bw_defs(enum swizzle_mode_values sw_mode)
+{
+ switch (sw_mode) {
+ case DC_SW_LINEAR:
+ return dcn_bw_sw_linear;
+ case DC_SW_4KB_S:
+ return dcn_bw_sw_4_kb_s;
+ case DC_SW_4KB_D:
+ return dcn_bw_sw_4_kb_d;
+ case DC_SW_64KB_S:
+ return dcn_bw_sw_64_kb_s;
+ case DC_SW_64KB_D:
+ return dcn_bw_sw_64_kb_d;
+ case DC_SW_VAR_S:
+ return dcn_bw_sw_var_s;
+ case DC_SW_VAR_D:
+ return dcn_bw_sw_var_d;
+ case DC_SW_64KB_S_T:
+ return dcn_bw_sw_64_kb_s_t;
+ case DC_SW_64KB_D_T:
+ return dcn_bw_sw_64_kb_d_t;
+ case DC_SW_4KB_S_X:
+ return dcn_bw_sw_4_kb_s_x;
+ case DC_SW_4KB_D_X:
+ return dcn_bw_sw_4_kb_d_x;
+ case DC_SW_64KB_S_X:
+ return dcn_bw_sw_64_kb_s_x;
+ case DC_SW_64KB_D_X:
+ return dcn_bw_sw_64_kb_d_x;
+ case DC_SW_VAR_S_X:
+ return dcn_bw_sw_var_s_x;
+ case DC_SW_VAR_D_X:
+ return dcn_bw_sw_var_d_x;
+ case DC_SW_256B_S:
+ case DC_SW_256_D:
+ case DC_SW_256_R:
+ case DC_SW_4KB_R:
+ case DC_SW_64KB_R:
+ case DC_SW_VAR_R:
+ case DC_SW_4KB_R_X:
+ case DC_SW_64KB_R_X:
+ case DC_SW_VAR_R_X:
+ default:
+ BREAK_TO_DEBUGGER(); /*not in formula*/
+ return dcn_bw_sw_4_kb_s;
+ }
+}
+
+static int tl_lb_bpp_to_int(enum lb_pixel_depth depth)
+{
+ switch (depth) {
+ case LB_PIXEL_DEPTH_18BPP:
+ return 18;
+ case LB_PIXEL_DEPTH_24BPP:
+ return 24;
+ case LB_PIXEL_DEPTH_30BPP:
+ return 30;
+ case LB_PIXEL_DEPTH_36BPP:
+ return 36;
+ default:
+ return 30;
+ }
+}
+
+static enum dcn_bw_defs tl_pixel_format_to_bw_defs(enum surface_pixel_format format)
+{
+ switch (format) {
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ return dcn_bw_rgb_sub_16;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS:
+ return dcn_bw_rgb_sub_32;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ return dcn_bw_rgb_sub_64;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ return dcn_bw_yuv420_sub_8;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ return dcn_bw_yuv420_sub_10;
+ default:
+ return dcn_bw_rgb_sub_32;
+ }
+}
+
+enum source_macro_tile_size swizzle_mode_to_macro_tile_size(enum swizzle_mode_values sw_mode)
+{
+ switch (sw_mode) {
+ /* for 4/8/16 high tiles */
+ case DC_SW_LINEAR:
+ return dm_4k_tile;
+ case DC_SW_4KB_S:
+ case DC_SW_4KB_S_X:
+ return dm_4k_tile;
+ case DC_SW_64KB_S:
+ case DC_SW_64KB_S_X:
+ case DC_SW_64KB_S_T:
+ return dm_64k_tile;
+ case DC_SW_VAR_S:
+ case DC_SW_VAR_S_X:
+ return dm_256k_tile;
+
+ /* For 64bpp 2 high tiles */
+ case DC_SW_4KB_D:
+ case DC_SW_4KB_D_X:
+ return dm_4k_tile;
+ case DC_SW_64KB_D:
+ case DC_SW_64KB_D_X:
+ case DC_SW_64KB_D_T:
+ return dm_64k_tile;
+ case DC_SW_VAR_D:
+ case DC_SW_VAR_D_X:
+ return dm_256k_tile;
+
+ case DC_SW_4KB_R:
+ case DC_SW_4KB_R_X:
+ return dm_4k_tile;
+ case DC_SW_64KB_R:
+ case DC_SW_64KB_R_X:
+ return dm_64k_tile;
+ case DC_SW_VAR_R:
+ case DC_SW_VAR_R_X:
+ return dm_256k_tile;
+
+ /* Unsupported swizzle modes for dcn */
+ case DC_SW_256B_S:
+ default:
+ ASSERT(0); /* Not supported */
+ return 0;
+ }
+}
+
+static void pipe_ctx_to_e2e_pipe_params (
+ const struct pipe_ctx *pipe,
+ struct _vcs_dpi_display_pipe_params_st *input)
+{
+ input->src.is_hsplit = false;
+
+ /* stereo can never be split */
+ if (pipe->plane_state->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE ||
+ pipe->plane_state->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) {
+ /* reset the split group if it was already considered split. */
+ input->src.hsplit_grp = pipe->pipe_idx;
+ } else if (pipe->top_pipe != NULL && pipe->top_pipe->plane_state == pipe->plane_state) {
+ input->src.is_hsplit = true;
+ } else if (pipe->bottom_pipe != NULL && pipe->bottom_pipe->plane_state == pipe->plane_state) {
+ input->src.is_hsplit = true;
+ }
+
+ if (pipe->plane_res.dpp->ctx->dc->debug.optimized_watermark) {
+ /*
+ * this method requires us to always re-calculate watermark when dcc change
+ * between flip.
+ */
+ input->src.dcc = pipe->plane_state->dcc.enable ? 1 : 0;
+ } else {
+ /*
+ * allow us to disable dcc on the fly without re-calculating WM
+ *
+ * extra overhead for DCC is quite small. for 1080p WM without
+ * DCC is only 0.417us lower (urgent goes from 6.979us to 6.562us)
+ */
+ unsigned int bpe;
+
+ input->src.dcc = pipe->plane_res.dpp->ctx->dc->res_pool->hubbub->funcs->
+ dcc_support_pixel_format(pipe->plane_state->format, &bpe) ? 1 : 0;
+ }
+ input->src.dcc_rate = 1;
+ input->src.meta_pitch = pipe->plane_state->dcc.meta_pitch;
+ input->src.source_scan = dm_horz;
+ input->src.sw_mode = pipe->plane_state->tiling_info.gfx9.swizzle;
+
+ input->src.viewport_width = pipe->plane_res.scl_data.viewport.width;
+ input->src.viewport_height = pipe->plane_res.scl_data.viewport.height;
+ input->src.data_pitch = pipe->plane_res.scl_data.viewport.width;
+ input->src.data_pitch_c = pipe->plane_res.scl_data.viewport.width;
+ input->src.cur0_src_width = 128; /* TODO: Cursor calcs, not curently stored */
+ input->src.cur0_bpp = 32;
+
+ input->src.macro_tile_size = swizzle_mode_to_macro_tile_size(pipe->plane_state->tiling_info.gfx9.swizzle);
+
+ switch (pipe->plane_state->rotation) {
+ case ROTATION_ANGLE_0:
+ case ROTATION_ANGLE_180:
+ input->src.source_scan = dm_horz;
+ break;
+ case ROTATION_ANGLE_90:
+ case ROTATION_ANGLE_270:
+ input->src.source_scan = dm_vert;
+ break;
+ default:
+ ASSERT(0); /* Not supported */
+ break;
+ }
+
+ /* TODO: Fix pixel format mappings */
+ switch (pipe->plane_state->format) {
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ input->src.source_format = dm_420_8;
+ input->src.viewport_width_c = input->src.viewport_width / 2;
+ input->src.viewport_height_c = input->src.viewport_height / 2;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ input->src.source_format = dm_420_10;
+ input->src.viewport_width_c = input->src.viewport_width / 2;
+ input->src.viewport_height_c = input->src.viewport_height / 2;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ input->src.source_format = dm_444_64;
+ input->src.viewport_width_c = input->src.viewport_width;
+ input->src.viewport_height_c = input->src.viewport_height;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+ input->src.source_format = dm_rgbe_alpha;
+ input->src.viewport_width_c = input->src.viewport_width;
+ input->src.viewport_height_c = input->src.viewport_height;
+ break;
+ default:
+ input->src.source_format = dm_444_32;
+ input->src.viewport_width_c = input->src.viewport_width;
+ input->src.viewport_height_c = input->src.viewport_height;
+ break;
+ }
+
+ input->scale_taps.htaps = pipe->plane_res.scl_data.taps.h_taps;
+ input->scale_ratio_depth.hscl_ratio = pipe->plane_res.scl_data.ratios.horz.value/4294967296.0;
+ input->scale_ratio_depth.vscl_ratio = pipe->plane_res.scl_data.ratios.vert.value/4294967296.0;
+ input->scale_ratio_depth.vinit = pipe->plane_res.scl_data.inits.v.value/4294967296.0;
+ if (input->scale_ratio_depth.vinit < 1.0)
+ input->scale_ratio_depth.vinit = 1;
+ input->scale_taps.vtaps = pipe->plane_res.scl_data.taps.v_taps;
+ input->scale_taps.vtaps_c = pipe->plane_res.scl_data.taps.v_taps_c;
+ input->scale_taps.htaps_c = pipe->plane_res.scl_data.taps.h_taps_c;
+ input->scale_ratio_depth.hscl_ratio_c = pipe->plane_res.scl_data.ratios.horz_c.value/4294967296.0;
+ input->scale_ratio_depth.vscl_ratio_c = pipe->plane_res.scl_data.ratios.vert_c.value/4294967296.0;
+ input->scale_ratio_depth.vinit_c = pipe->plane_res.scl_data.inits.v_c.value/4294967296.0;
+ if (input->scale_ratio_depth.vinit_c < 1.0)
+ input->scale_ratio_depth.vinit_c = 1;
+ switch (pipe->plane_res.scl_data.lb_params.depth) {
+ case LB_PIXEL_DEPTH_30BPP:
+ input->scale_ratio_depth.lb_depth = 30; break;
+ case LB_PIXEL_DEPTH_36BPP:
+ input->scale_ratio_depth.lb_depth = 36; break;
+ default:
+ input->scale_ratio_depth.lb_depth = 24; break;
+ }
+
+
+ input->dest.vactive = pipe->stream->timing.v_addressable + pipe->stream->timing.v_border_top
+ + pipe->stream->timing.v_border_bottom;
+
+ input->dest.recout_width = pipe->plane_res.scl_data.recout.width;
+ input->dest.recout_height = pipe->plane_res.scl_data.recout.height;
+
+ input->dest.full_recout_width = pipe->plane_res.scl_data.recout.width;
+ input->dest.full_recout_height = pipe->plane_res.scl_data.recout.height;
+
+ input->dest.htotal = pipe->stream->timing.h_total;
+ input->dest.hblank_start = input->dest.htotal - pipe->stream->timing.h_front_porch;
+ input->dest.hblank_end = input->dest.hblank_start
+ - pipe->stream->timing.h_addressable
+ - pipe->stream->timing.h_border_left
+ - pipe->stream->timing.h_border_right;
+
+ input->dest.vtotal = pipe->stream->timing.v_total;
+ input->dest.vblank_start = input->dest.vtotal - pipe->stream->timing.v_front_porch;
+ input->dest.vblank_end = input->dest.vblank_start
+ - pipe->stream->timing.v_addressable
+ - pipe->stream->timing.v_border_bottom
+ - pipe->stream->timing.v_border_top;
+ input->dest.pixel_rate_mhz = pipe->stream->timing.pix_clk_100hz/10000.0;
+ input->dest.vstartup_start = pipe->pipe_dlg_param.vstartup_start;
+ input->dest.vupdate_offset = pipe->pipe_dlg_param.vupdate_offset;
+ input->dest.vupdate_offset = pipe->pipe_dlg_param.vupdate_offset;
+ input->dest.vupdate_width = pipe->pipe_dlg_param.vupdate_width;
+
+}
+
+static void dcn_bw_calc_rq_dlg_ttu(
+ const struct dc *dc,
+ const struct dcn_bw_internal_vars *v,
+ struct pipe_ctx *pipe,
+ int in_idx)
+{
+ struct display_mode_lib *dml = (struct display_mode_lib *)(&dc->dml);
+ struct _vcs_dpi_display_dlg_regs_st *dlg_regs = &pipe->dlg_regs;
+ struct _vcs_dpi_display_ttu_regs_st *ttu_regs = &pipe->ttu_regs;
+ struct _vcs_dpi_display_rq_regs_st *rq_regs = &pipe->rq_regs;
+ struct _vcs_dpi_display_rq_params_st *rq_param = &pipe->dml_rq_param;
+ struct _vcs_dpi_display_dlg_sys_params_st *dlg_sys_param = &pipe->dml_dlg_sys_param;
+ struct _vcs_dpi_display_e2e_pipe_params_st *input = &pipe->dml_input;
+ float total_active_bw = 0;
+ float total_prefetch_bw = 0;
+ int total_flip_bytes = 0;
+ int i;
+
+ memset(dlg_regs, 0, sizeof(*dlg_regs));
+ memset(ttu_regs, 0, sizeof(*ttu_regs));
+ memset(rq_regs, 0, sizeof(*rq_regs));
+ memset(rq_param, 0, sizeof(*rq_param));
+ memset(dlg_sys_param, 0, sizeof(*dlg_sys_param));
+ memset(input, 0, sizeof(*input));
+
+ for (i = 0; i < number_of_planes; i++) {
+ total_active_bw += v->read_bandwidth[i];
+ total_prefetch_bw += v->prefetch_bandwidth[i];
+ total_flip_bytes += v->total_immediate_flip_bytes[i];
+ }
+ dlg_sys_param->total_flip_bw = v->return_bw - dcn_bw_max2(total_active_bw, total_prefetch_bw);
+ if (dlg_sys_param->total_flip_bw < 0.0)
+ dlg_sys_param->total_flip_bw = 0;
+
+ dlg_sys_param->t_mclk_wm_us = v->dram_clock_change_watermark;
+ dlg_sys_param->t_sr_wm_us = v->stutter_enter_plus_exit_watermark;
+ dlg_sys_param->t_urg_wm_us = v->urgent_watermark;
+ dlg_sys_param->t_extra_us = v->urgent_extra_latency;
+ dlg_sys_param->deepsleep_dcfclk_mhz = v->dcf_clk_deep_sleep;
+ dlg_sys_param->total_flip_bytes = total_flip_bytes;
+
+ pipe_ctx_to_e2e_pipe_params(pipe, &input->pipe);
+ input->clks_cfg.dcfclk_mhz = v->dcfclk;
+ input->clks_cfg.dispclk_mhz = v->dispclk;
+ input->clks_cfg.dppclk_mhz = v->dppclk;
+ input->clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+ input->clks_cfg.socclk_mhz = v->socclk;
+ input->clks_cfg.voltage = v->voltage_level;
+// dc->dml.logger = pool->base.logger;
+ input->dout.output_format = (v->output_format[in_idx] == dcn_bw_420) ? dm_420 : dm_444;
+ input->dout.output_type = (v->output[in_idx] == dcn_bw_hdmi) ? dm_hdmi : dm_dp;
+ //input[in_idx].dout.output_standard;
+
+ /*todo: soc->sr_enter_plus_exit_time??*/
+
+ dml1_rq_dlg_get_rq_params(dml, rq_param, &input->pipe.src);
+ dml1_extract_rq_regs(dml, rq_regs, rq_param);
+ dml1_rq_dlg_get_dlg_params(
+ dml,
+ dlg_regs,
+ ttu_regs,
+ &rq_param->dlg,
+ dlg_sys_param,
+ input,
+ true,
+ true,
+ v->pte_enable == dcn_bw_yes,
+ pipe->plane_state->flip_immediate);
+}
+
+static void split_stream_across_pipes(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct pipe_ctx *primary_pipe,
+ struct pipe_ctx *secondary_pipe)
+{
+ int pipe_idx = secondary_pipe->pipe_idx;
+
+ if (!primary_pipe->plane_state)
+ return;
+
+ *secondary_pipe = *primary_pipe;
+
+ secondary_pipe->pipe_idx = pipe_idx;
+ secondary_pipe->plane_res.mi = pool->mis[secondary_pipe->pipe_idx];
+ secondary_pipe->plane_res.hubp = pool->hubps[secondary_pipe->pipe_idx];
+ secondary_pipe->plane_res.ipp = pool->ipps[secondary_pipe->pipe_idx];
+ secondary_pipe->plane_res.xfm = pool->transforms[secondary_pipe->pipe_idx];
+ secondary_pipe->plane_res.dpp = pool->dpps[secondary_pipe->pipe_idx];
+ secondary_pipe->plane_res.mpcc_inst = pool->dpps[secondary_pipe->pipe_idx]->inst;
+ if (primary_pipe->bottom_pipe) {
+ ASSERT(primary_pipe->bottom_pipe != secondary_pipe);
+ secondary_pipe->bottom_pipe = primary_pipe->bottom_pipe;
+ secondary_pipe->bottom_pipe->top_pipe = secondary_pipe;
+ }
+ primary_pipe->bottom_pipe = secondary_pipe;
+ secondary_pipe->top_pipe = primary_pipe;
+
+ resource_build_scaling_params(primary_pipe);
+ resource_build_scaling_params(secondary_pipe);
+}
+
+#if 0
+static void calc_wm_sets_and_perf_params(
+ struct dc_state *context,
+ struct dcn_bw_internal_vars *v)
+{
+ /* Calculate set A last to keep internal var state consistent for required config */
+ if (v->voltage_level < 2) {
+ v->fabric_and_dram_bandwidth_per_state[1] = v->fabric_and_dram_bandwidth_vnom0p8;
+ v->fabric_and_dram_bandwidth_per_state[0] = v->fabric_and_dram_bandwidth_vnom0p8;
+ v->fabric_and_dram_bandwidth = v->fabric_and_dram_bandwidth_vnom0p8;
+ dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v);
+
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns =
+ v->stutter_exit_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns =
+ v->stutter_enter_plus_exit_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns =
+ v->dram_clock_change_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = v->urgent_watermark * 1000;
+
+ v->dcfclk_per_state[1] = v->dcfclkv_nom0p8;
+ v->dcfclk_per_state[0] = v->dcfclkv_nom0p8;
+ v->dcfclk = v->dcfclkv_nom0p8;
+ dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v);
+
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns =
+ v->stutter_exit_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns =
+ v->stutter_enter_plus_exit_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns =
+ v->dram_clock_change_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = v->urgent_watermark * 1000;
+ }
+
+ if (v->voltage_level < 3) {
+ v->fabric_and_dram_bandwidth_per_state[2] = v->fabric_and_dram_bandwidth_vmax0p9;
+ v->fabric_and_dram_bandwidth_per_state[1] = v->fabric_and_dram_bandwidth_vmax0p9;
+ v->fabric_and_dram_bandwidth_per_state[0] = v->fabric_and_dram_bandwidth_vmax0p9;
+ v->fabric_and_dram_bandwidth = v->fabric_and_dram_bandwidth_vmax0p9;
+ v->dcfclk_per_state[2] = v->dcfclkv_max0p9;
+ v->dcfclk_per_state[1] = v->dcfclkv_max0p9;
+ v->dcfclk_per_state[0] = v->dcfclkv_max0p9;
+ v->dcfclk = v->dcfclkv_max0p9;
+ dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v);
+
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns =
+ v->stutter_exit_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns =
+ v->stutter_enter_plus_exit_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns =
+ v->dram_clock_change_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = v->urgent_watermark * 1000;
+ }
+
+ v->fabric_and_dram_bandwidth_per_state[2] = v->fabric_and_dram_bandwidth_vnom0p8;
+ v->fabric_and_dram_bandwidth_per_state[1] = v->fabric_and_dram_bandwidth_vmid0p72;
+ v->fabric_and_dram_bandwidth_per_state[0] = v->fabric_and_dram_bandwidth_vmin0p65;
+ v->fabric_and_dram_bandwidth = v->fabric_and_dram_bandwidth_per_state[v->voltage_level];
+ v->dcfclk_per_state[2] = v->dcfclkv_nom0p8;
+ v->dcfclk_per_state[1] = v->dcfclkv_mid0p72;
+ v->dcfclk_per_state[0] = v->dcfclkv_min0p65;
+ v->dcfclk = v->dcfclk_per_state[v->voltage_level];
+ dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v);
+
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns =
+ v->stutter_exit_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
+ v->stutter_enter_plus_exit_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns =
+ v->dram_clock_change_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = v->urgent_watermark * 1000;
+ if (v->voltage_level >= 2) {
+ context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a;
+ context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a;
+ }
+ if (v->voltage_level >= 3)
+ context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
+}
+#endif
+
+static bool dcn_bw_apply_registry_override(struct dc *dc)
+{
+ bool updated = false;
+
+ if ((int)(dc->dcn_soc->sr_exit_time * 1000) != dc->debug.sr_exit_time_ns
+ && dc->debug.sr_exit_time_ns) {
+ updated = true;
+ dc->dcn_soc->sr_exit_time = dc->debug.sr_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dc->dcn_soc->sr_enter_plus_exit_time * 1000)
+ != dc->debug.sr_enter_plus_exit_time_ns
+ && dc->debug.sr_enter_plus_exit_time_ns) {
+ updated = true;
+ dc->dcn_soc->sr_enter_plus_exit_time =
+ dc->debug.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dc->dcn_soc->urgent_latency * 1000) != dc->debug.urgent_latency_ns
+ && dc->debug.urgent_latency_ns) {
+ updated = true;
+ dc->dcn_soc->urgent_latency = dc->debug.urgent_latency_ns / 1000.0;
+ }
+
+ if ((int)(dc->dcn_soc->percent_of_ideal_drambw_received_after_urg_latency * 1000)
+ != dc->debug.percent_of_ideal_drambw
+ && dc->debug.percent_of_ideal_drambw) {
+ updated = true;
+ dc->dcn_soc->percent_of_ideal_drambw_received_after_urg_latency =
+ dc->debug.percent_of_ideal_drambw;
+ }
+
+ if ((int)(dc->dcn_soc->dram_clock_change_latency * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ updated = true;
+ dc->dcn_soc->dram_clock_change_latency =
+ dc->debug.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ return updated;
+}
+
+static void hack_disable_optional_pipe_split(struct dcn_bw_internal_vars *v)
+{
+ /*
+ * disable optional pipe split by lower dispclk bounding box
+ * at DPM0
+ */
+ v->max_dispclk[0] = v->max_dppclk_vmin0p65;
+}
+
+static void hack_force_pipe_split(struct dcn_bw_internal_vars *v,
+ unsigned int pixel_rate_100hz)
+{
+ float pixel_rate_mhz = pixel_rate_100hz / 10000;
+
+ /*
+ * force enabling pipe split by lower dpp clock for DPM0 to just
+ * below the specify pixel_rate, so bw calc would split pipe.
+ */
+ if (pixel_rate_mhz < v->max_dppclk[0])
+ v->max_dppclk[0] = pixel_rate_mhz;
+}
+
+static void hack_bounding_box(struct dcn_bw_internal_vars *v,
+ struct dc_debug_options *dbg,
+ struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ /**
+ * Workaround for avoiding pipe-split in cases where we'd split
+ * planes that are too small, resulting in splits that aren't
+ * valid for the scaler.
+ */
+ if (pipe->plane_state &&
+ (pipe->plane_state->dst_rect.width <= 16 ||
+ pipe->plane_state->dst_rect.height <= 16 ||
+ pipe->plane_state->src_rect.width <= 16 ||
+ pipe->plane_state->src_rect.height <= 16)) {
+ hack_disable_optional_pipe_split(v);
+ return;
+ }
+ }
+
+ if (dbg->pipe_split_policy == MPC_SPLIT_AVOID)
+ hack_disable_optional_pipe_split(v);
+
+ if (dbg->pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP &&
+ context->stream_count >= 2)
+ hack_disable_optional_pipe_split(v);
+
+ if (context->stream_count == 1 &&
+ dbg->force_single_disp_pipe_split)
+ hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_100hz);
+}
+
+static unsigned int get_highest_allowed_voltage_level(bool is_vmin_only_asic)
+{
+ /* for low power RV2 variants, the highest voltage level we want is 0 */
+ if (is_vmin_only_asic)
+ return 0;
+ else /* we are ok with all levels */
+ return 4;
+}
+
+bool dcn_validate_bandwidth(
+ struct dc *dc,
+ struct dc_state *context,
+ bool fast_validate)
+{
+ /*
+ * we want a breakdown of the various stages of validation, which the
+ * perf_trace macro doesn't support
+ */
+ BW_VAL_TRACE_SETUP();
+
+ const struct resource_pool *pool = dc->res_pool;
+ struct dcn_bw_internal_vars *v = &context->dcn_bw_vars;
+ int i, input_idx, k;
+ int vesa_sync_start, asic_blank_end, asic_blank_start;
+ bool bw_limit_pass;
+ float bw_limit;
+
+ PERFORMANCE_TRACE_START();
+
+ BW_VAL_TRACE_COUNT();
+
+ if (dcn_bw_apply_registry_override(dc))
+ dcn_bw_sync_calcs_and_dml(dc);
+
+ memset(v, 0, sizeof(*v));
+
+ v->sr_exit_time = dc->dcn_soc->sr_exit_time;
+ v->sr_enter_plus_exit_time = dc->dcn_soc->sr_enter_plus_exit_time;
+ v->urgent_latency = dc->dcn_soc->urgent_latency;
+ v->write_back_latency = dc->dcn_soc->write_back_latency;
+ v->percent_of_ideal_drambw_received_after_urg_latency =
+ dc->dcn_soc->percent_of_ideal_drambw_received_after_urg_latency;
+
+ v->dcfclkv_min0p65 = dc->dcn_soc->dcfclkv_min0p65;
+ v->dcfclkv_mid0p72 = dc->dcn_soc->dcfclkv_mid0p72;
+ v->dcfclkv_nom0p8 = dc->dcn_soc->dcfclkv_nom0p8;
+ v->dcfclkv_max0p9 = dc->dcn_soc->dcfclkv_max0p9;
+
+ v->max_dispclk_vmin0p65 = dc->dcn_soc->max_dispclk_vmin0p65;
+ v->max_dispclk_vmid0p72 = dc->dcn_soc->max_dispclk_vmid0p72;
+ v->max_dispclk_vnom0p8 = dc->dcn_soc->max_dispclk_vnom0p8;
+ v->max_dispclk_vmax0p9 = dc->dcn_soc->max_dispclk_vmax0p9;
+
+ v->max_dppclk_vmin0p65 = dc->dcn_soc->max_dppclk_vmin0p65;
+ v->max_dppclk_vmid0p72 = dc->dcn_soc->max_dppclk_vmid0p72;
+ v->max_dppclk_vnom0p8 = dc->dcn_soc->max_dppclk_vnom0p8;
+ v->max_dppclk_vmax0p9 = dc->dcn_soc->max_dppclk_vmax0p9;
+
+ v->socclk = dc->dcn_soc->socclk;
+
+ v->fabric_and_dram_bandwidth_vmin0p65 = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65;
+ v->fabric_and_dram_bandwidth_vmid0p72 = dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72;
+ v->fabric_and_dram_bandwidth_vnom0p8 = dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8;
+ v->fabric_and_dram_bandwidth_vmax0p9 = dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9;
+
+ v->phyclkv_min0p65 = dc->dcn_soc->phyclkv_min0p65;
+ v->phyclkv_mid0p72 = dc->dcn_soc->phyclkv_mid0p72;
+ v->phyclkv_nom0p8 = dc->dcn_soc->phyclkv_nom0p8;
+ v->phyclkv_max0p9 = dc->dcn_soc->phyclkv_max0p9;
+
+ v->downspreading = dc->dcn_soc->downspreading;
+ v->round_trip_ping_latency_cycles = dc->dcn_soc->round_trip_ping_latency_cycles;
+ v->urgent_out_of_order_return_per_channel = dc->dcn_soc->urgent_out_of_order_return_per_channel;
+ v->number_of_channels = dc->dcn_soc->number_of_channels;
+ v->vmm_page_size = dc->dcn_soc->vmm_page_size;
+ v->dram_clock_change_latency = dc->dcn_soc->dram_clock_change_latency;
+ v->return_bus_width = dc->dcn_soc->return_bus_width;
+
+ v->rob_buffer_size_in_kbyte = dc->dcn_ip->rob_buffer_size_in_kbyte;
+ v->det_buffer_size_in_kbyte = dc->dcn_ip->det_buffer_size_in_kbyte;
+ v->dpp_output_buffer_pixels = dc->dcn_ip->dpp_output_buffer_pixels;
+ v->opp_output_buffer_lines = dc->dcn_ip->opp_output_buffer_lines;
+ v->pixel_chunk_size_in_kbyte = dc->dcn_ip->pixel_chunk_size_in_kbyte;
+ v->pte_enable = dc->dcn_ip->pte_enable;
+ v->pte_chunk_size = dc->dcn_ip->pte_chunk_size;
+ v->meta_chunk_size = dc->dcn_ip->meta_chunk_size;
+ v->writeback_chunk_size = dc->dcn_ip->writeback_chunk_size;
+ v->odm_capability = dc->dcn_ip->odm_capability;
+ v->dsc_capability = dc->dcn_ip->dsc_capability;
+ v->line_buffer_size = dc->dcn_ip->line_buffer_size;
+ v->is_line_buffer_bpp_fixed = dc->dcn_ip->is_line_buffer_bpp_fixed;
+ v->line_buffer_fixed_bpp = dc->dcn_ip->line_buffer_fixed_bpp;
+ v->max_line_buffer_lines = dc->dcn_ip->max_line_buffer_lines;
+ v->writeback_luma_buffer_size = dc->dcn_ip->writeback_luma_buffer_size;
+ v->writeback_chroma_buffer_size = dc->dcn_ip->writeback_chroma_buffer_size;
+ v->max_num_dpp = dc->dcn_ip->max_num_dpp;
+ v->max_num_writeback = dc->dcn_ip->max_num_writeback;
+ v->max_dchub_topscl_throughput = dc->dcn_ip->max_dchub_topscl_throughput;
+ v->max_pscl_tolb_throughput = dc->dcn_ip->max_pscl_tolb_throughput;
+ v->max_lb_tovscl_throughput = dc->dcn_ip->max_lb_tovscl_throughput;
+ v->max_vscl_tohscl_throughput = dc->dcn_ip->max_vscl_tohscl_throughput;
+ v->max_hscl_ratio = dc->dcn_ip->max_hscl_ratio;
+ v->max_vscl_ratio = dc->dcn_ip->max_vscl_ratio;
+ v->max_hscl_taps = dc->dcn_ip->max_hscl_taps;
+ v->max_vscl_taps = dc->dcn_ip->max_vscl_taps;
+ v->under_scan_factor = dc->dcn_ip->under_scan_factor;
+ v->pte_buffer_size_in_requests = dc->dcn_ip->pte_buffer_size_in_requests;
+ v->dispclk_ramping_margin = dc->dcn_ip->dispclk_ramping_margin;
+ v->max_inter_dcn_tile_repeaters = dc->dcn_ip->max_inter_dcn_tile_repeaters;
+ v->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one =
+ dc->dcn_ip->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one;
+ v->bug_forcing_luma_and_chroma_request_to_same_size_fixed =
+ dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed;
+
+ v->voltage[5] = dcn_bw_no_support;
+ v->voltage[4] = dcn_bw_v_max0p9;
+ v->voltage[3] = dcn_bw_v_max0p9;
+ v->voltage[2] = dcn_bw_v_nom0p8;
+ v->voltage[1] = dcn_bw_v_mid0p72;
+ v->voltage[0] = dcn_bw_v_min0p65;
+ v->fabric_and_dram_bandwidth_per_state[5] = v->fabric_and_dram_bandwidth_vmax0p9;
+ v->fabric_and_dram_bandwidth_per_state[4] = v->fabric_and_dram_bandwidth_vmax0p9;
+ v->fabric_and_dram_bandwidth_per_state[3] = v->fabric_and_dram_bandwidth_vmax0p9;
+ v->fabric_and_dram_bandwidth_per_state[2] = v->fabric_and_dram_bandwidth_vnom0p8;
+ v->fabric_and_dram_bandwidth_per_state[1] = v->fabric_and_dram_bandwidth_vmid0p72;
+ v->fabric_and_dram_bandwidth_per_state[0] = v->fabric_and_dram_bandwidth_vmin0p65;
+ v->dcfclk_per_state[5] = v->dcfclkv_max0p9;
+ v->dcfclk_per_state[4] = v->dcfclkv_max0p9;
+ v->dcfclk_per_state[3] = v->dcfclkv_max0p9;
+ v->dcfclk_per_state[2] = v->dcfclkv_nom0p8;
+ v->dcfclk_per_state[1] = v->dcfclkv_mid0p72;
+ v->dcfclk_per_state[0] = v->dcfclkv_min0p65;
+ v->max_dispclk[5] = v->max_dispclk_vmax0p9;
+ v->max_dispclk[4] = v->max_dispclk_vmax0p9;
+ v->max_dispclk[3] = v->max_dispclk_vmax0p9;
+ v->max_dispclk[2] = v->max_dispclk_vnom0p8;
+ v->max_dispclk[1] = v->max_dispclk_vmid0p72;
+ v->max_dispclk[0] = v->max_dispclk_vmin0p65;
+ v->max_dppclk[5] = v->max_dppclk_vmax0p9;
+ v->max_dppclk[4] = v->max_dppclk_vmax0p9;
+ v->max_dppclk[3] = v->max_dppclk_vmax0p9;
+ v->max_dppclk[2] = v->max_dppclk_vnom0p8;
+ v->max_dppclk[1] = v->max_dppclk_vmid0p72;
+ v->max_dppclk[0] = v->max_dppclk_vmin0p65;
+ v->phyclk_per_state[5] = v->phyclkv_max0p9;
+ v->phyclk_per_state[4] = v->phyclkv_max0p9;
+ v->phyclk_per_state[3] = v->phyclkv_max0p9;
+ v->phyclk_per_state[2] = v->phyclkv_nom0p8;
+ v->phyclk_per_state[1] = v->phyclkv_mid0p72;
+ v->phyclk_per_state[0] = v->phyclkv_min0p65;
+ v->synchronized_vblank = dcn_bw_no;
+ v->ta_pscalculation = dcn_bw_override;
+ v->allow_different_hratio_vratio = dcn_bw_yes;
+
+ for (i = 0, input_idx = 0; i < pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+ /* skip all but first of split pipes */
+ if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state)
+ continue;
+
+ v->underscan_output[input_idx] = false; /* taken care of in recout already*/
+ v->interlace_output[input_idx] = false;
+
+ v->htotal[input_idx] = pipe->stream->timing.h_total;
+ v->vtotal[input_idx] = pipe->stream->timing.v_total;
+ v->vactive[input_idx] = pipe->stream->timing.v_addressable +
+ pipe->stream->timing.v_border_top + pipe->stream->timing.v_border_bottom;
+ v->v_sync_plus_back_porch[input_idx] = pipe->stream->timing.v_total
+ - v->vactive[input_idx]
+ - pipe->stream->timing.v_front_porch;
+ v->pixel_clock[input_idx] = pipe->stream->timing.pix_clk_100hz/10000.0;
+ if (pipe->stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
+ v->pixel_clock[input_idx] *= 2;
+ if (!pipe->plane_state) {
+ v->dcc_enable[input_idx] = dcn_bw_yes;
+ v->source_pixel_format[input_idx] = dcn_bw_rgb_sub_32;
+ v->source_surface_mode[input_idx] = dcn_bw_sw_4_kb_s;
+ v->lb_bit_per_pixel[input_idx] = 30;
+ v->viewport_width[input_idx] = pipe->stream->timing.h_addressable;
+ v->viewport_height[input_idx] = pipe->stream->timing.v_addressable;
+ /*
+ * for cases where we have no plane, we want to validate up to 1080p
+ * source size because here we are only interested in if the output
+ * timing is supported or not. if we cannot support native resolution
+ * of the high res display, we still want to support lower res up scale
+ * to native
+ */
+ if (v->viewport_width[input_idx] > 1920)
+ v->viewport_width[input_idx] = 1920;
+ if (v->viewport_height[input_idx] > 1080)
+ v->viewport_height[input_idx] = 1080;
+ v->scaler_rec_out_width[input_idx] = v->viewport_width[input_idx];
+ v->scaler_recout_height[input_idx] = v->viewport_height[input_idx];
+ v->override_hta_ps[input_idx] = 1;
+ v->override_vta_ps[input_idx] = 1;
+ v->override_hta_pschroma[input_idx] = 1;
+ v->override_vta_pschroma[input_idx] = 1;
+ v->source_scan[input_idx] = dcn_bw_hor;
+
+ } else {
+ v->viewport_height[input_idx] = pipe->plane_res.scl_data.viewport.height;
+ v->viewport_width[input_idx] = pipe->plane_res.scl_data.viewport.width;
+ v->scaler_rec_out_width[input_idx] = pipe->plane_res.scl_data.recout.width;
+ v->scaler_recout_height[input_idx] = pipe->plane_res.scl_data.recout.height;
+ if (pipe->bottom_pipe && pipe->bottom_pipe->plane_state == pipe->plane_state) {
+ if (pipe->plane_state->rotation % 2 == 0) {
+ int viewport_end = pipe->plane_res.scl_data.viewport.width
+ + pipe->plane_res.scl_data.viewport.x;
+ int viewport_b_end = pipe->bottom_pipe->plane_res.scl_data.viewport.width
+ + pipe->bottom_pipe->plane_res.scl_data.viewport.x;
+
+ if (viewport_end > viewport_b_end)
+ v->viewport_width[input_idx] = viewport_end
+ - pipe->bottom_pipe->plane_res.scl_data.viewport.x;
+ else
+ v->viewport_width[input_idx] = viewport_b_end
+ - pipe->plane_res.scl_data.viewport.x;
+ } else {
+ int viewport_end = pipe->plane_res.scl_data.viewport.height
+ + pipe->plane_res.scl_data.viewport.y;
+ int viewport_b_end = pipe->bottom_pipe->plane_res.scl_data.viewport.height
+ + pipe->bottom_pipe->plane_res.scl_data.viewport.y;
+
+ if (viewport_end > viewport_b_end)
+ v->viewport_height[input_idx] = viewport_end
+ - pipe->bottom_pipe->plane_res.scl_data.viewport.y;
+ else
+ v->viewport_height[input_idx] = viewport_b_end
+ - pipe->plane_res.scl_data.viewport.y;
+ }
+ v->scaler_rec_out_width[input_idx] = pipe->plane_res.scl_data.recout.width
+ + pipe->bottom_pipe->plane_res.scl_data.recout.width;
+ }
+
+ if (pipe->plane_state->rotation % 2 == 0) {
+ ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dc_fixpt_one.value
+ || v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]);
+ ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dc_fixpt_one.value
+ || v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]);
+ } else {
+ ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dc_fixpt_one.value
+ || v->scaler_recout_height[input_idx] == v->viewport_width[input_idx]);
+ ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dc_fixpt_one.value
+ || v->scaler_rec_out_width[input_idx] == v->viewport_height[input_idx]);
+ }
+
+ if (dc->debug.optimized_watermark) {
+ /*
+ * this method requires us to always re-calculate watermark when dcc change
+ * between flip.
+ */
+ v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no;
+ } else {
+ /*
+ * allow us to disable dcc on the fly without re-calculating WM
+ *
+ * extra overhead for DCC is quite small. for 1080p WM without
+ * DCC is only 0.417us lower (urgent goes from 6.979us to 6.562us)
+ */
+ unsigned int bpe;
+
+ v->dcc_enable[input_idx] = dc->res_pool->hubbub->funcs->dcc_support_pixel_format(
+ pipe->plane_state->format, &bpe) ? dcn_bw_yes : dcn_bw_no;
+ }
+
+ v->source_pixel_format[input_idx] = tl_pixel_format_to_bw_defs(
+ pipe->plane_state->format);
+ v->source_surface_mode[input_idx] = tl_sw_mode_to_bw_defs(
+ pipe->plane_state->tiling_info.gfx9.swizzle);
+ v->lb_bit_per_pixel[input_idx] = tl_lb_bpp_to_int(pipe->plane_res.scl_data.lb_params.depth);
+ v->override_hta_ps[input_idx] = pipe->plane_res.scl_data.taps.h_taps;
+ v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps;
+ v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c;
+ v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c;
+ /*
+ * Spreadsheet doesn't handle taps_c is one properly,
+ * need to force Chroma to always be scaled to pass
+ * bandwidth validation.
+ */
+ if (v->override_hta_pschroma[input_idx] == 1)
+ v->override_hta_pschroma[input_idx] = 2;
+ if (v->override_vta_pschroma[input_idx] == 1)
+ v->override_vta_pschroma[input_idx] = 2;
+ v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor;
+ }
+ if (v->is_line_buffer_bpp_fixed == dcn_bw_yes)
+ v->lb_bit_per_pixel[input_idx] = v->line_buffer_fixed_bpp;
+ v->dcc_rate[input_idx] = 1; /*TODO: Worst case? does this change?*/
+ v->output_format[input_idx] = pipe->stream->timing.pixel_encoding ==
+ PIXEL_ENCODING_YCBCR420 ? dcn_bw_420 : dcn_bw_444;
+ v->output[input_idx] = pipe->stream->signal ==
+ SIGNAL_TYPE_HDMI_TYPE_A ? dcn_bw_hdmi : dcn_bw_dp;
+ v->output_deep_color[input_idx] = dcn_bw_encoder_8bpc;
+ if (v->output[input_idx] == dcn_bw_hdmi) {
+ switch (pipe->stream->timing.display_color_depth) {
+ case COLOR_DEPTH_101010:
+ v->output_deep_color[input_idx] = dcn_bw_encoder_10bpc;
+ break;
+ case COLOR_DEPTH_121212:
+ v->output_deep_color[input_idx] = dcn_bw_encoder_12bpc;
+ break;
+ case COLOR_DEPTH_161616:
+ v->output_deep_color[input_idx] = dcn_bw_encoder_16bpc;
+ break;
+ default:
+ break;
+ }
+ }
+
+ input_idx++;
+ }
+ v->number_of_active_planes = input_idx;
+
+ scaler_settings_calculation(v);
+
+ hack_bounding_box(v, &dc->debug, context);
+
+ mode_support_and_system_configuration(v);
+
+ /* Unhack dppclk: dont bother with trying to pipe split if we cannot maintain dpm0 */
+ if (v->voltage_level != 0
+ && context->stream_count == 1
+ && dc->debug.force_single_disp_pipe_split) {
+ v->max_dppclk[0] = v->max_dppclk_vmin0p65;
+ mode_support_and_system_configuration(v);
+ }
+
+ if (v->voltage_level == 0 &&
+ (dc->debug.sr_exit_time_dpm0_ns
+ || dc->debug.sr_enter_plus_exit_time_dpm0_ns)) {
+
+ if (dc->debug.sr_enter_plus_exit_time_dpm0_ns)
+ v->sr_enter_plus_exit_time =
+ dc->debug.sr_enter_plus_exit_time_dpm0_ns / 1000.0f;
+ if (dc->debug.sr_exit_time_dpm0_ns)
+ v->sr_exit_time = dc->debug.sr_exit_time_dpm0_ns / 1000.0f;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = v->sr_enter_plus_exit_time;
+ context->bw_ctx.dml.soc.sr_exit_time_us = v->sr_exit_time;
+ mode_support_and_system_configuration(v);
+ }
+
+ display_pipe_configuration(v);
+
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->source_scan[k] == dcn_bw_hor)
+ v->swath_width_y[k] = v->viewport_width[k] / v->dpp_per_plane[k];
+ else
+ v->swath_width_y[k] = v->viewport_height[k] / v->dpp_per_plane[k];
+ }
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ if (v->source_pixel_format[k] == dcn_bw_rgb_sub_64) {
+ v->byte_per_pixel_dety[k] = 8.0;
+ v->byte_per_pixel_detc[k] = 0.0;
+ } else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_32) {
+ v->byte_per_pixel_dety[k] = 4.0;
+ v->byte_per_pixel_detc[k] = 0.0;
+ } else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_16) {
+ v->byte_per_pixel_dety[k] = 2.0;
+ v->byte_per_pixel_detc[k] = 0.0;
+ } else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8) {
+ v->byte_per_pixel_dety[k] = 1.0;
+ v->byte_per_pixel_detc[k] = 2.0;
+ } else {
+ v->byte_per_pixel_dety[k] = 4.0f / 3.0f;
+ v->byte_per_pixel_detc[k] = 8.0f / 3.0f;
+ }
+ }
+
+ v->total_data_read_bandwidth = 0.0;
+ for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+ v->read_bandwidth_plane_luma[k] = v->swath_width_y[k] * v->dpp_per_plane[k] *
+ dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / (v->htotal[k] / v->pixel_clock[k]) * v->v_ratio[k];
+ v->read_bandwidth_plane_chroma[k] = v->swath_width_y[k] / 2.0 * v->dpp_per_plane[k] *
+ dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / (v->htotal[k] / v->pixel_clock[k]) * v->v_ratio[k] / 2.0;
+ v->total_data_read_bandwidth = v->total_data_read_bandwidth +
+ v->read_bandwidth_plane_luma[k] + v->read_bandwidth_plane_chroma[k];
+ }
+
+ BW_VAL_TRACE_END_VOLTAGE_LEVEL();
+
+ if (v->voltage_level != number_of_states_plus_one && !fast_validate) {
+ float bw_consumed = v->total_bandwidth_consumed_gbyte_per_second;
+
+ if (bw_consumed < v->fabric_and_dram_bandwidth_vmin0p65)
+ bw_consumed = v->fabric_and_dram_bandwidth_vmin0p65;
+ else if (bw_consumed < v->fabric_and_dram_bandwidth_vmid0p72)
+ bw_consumed = v->fabric_and_dram_bandwidth_vmid0p72;
+ else if (bw_consumed < v->fabric_and_dram_bandwidth_vnom0p8)
+ bw_consumed = v->fabric_and_dram_bandwidth_vnom0p8;
+ else
+ bw_consumed = v->fabric_and_dram_bandwidth_vmax0p9;
+
+ if (bw_consumed < v->fabric_and_dram_bandwidth)
+ if (dc->debug.voltage_align_fclk)
+ bw_consumed = v->fabric_and_dram_bandwidth;
+
+ display_pipe_configuration(v);
+ /*calc_wm_sets_and_perf_params(context, v);*/
+ /* Only 1 set is used by dcn since no noticeable
+ * performance improvement was measured and due to hw bug DEGVIDCN10-254
+ */
+ dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v);
+
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns =
+ v->stutter_exit_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
+ v->stutter_enter_plus_exit_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns =
+ v->dram_clock_change_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = v->urgent_watermark * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a;
+ context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a;
+ context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
+
+ context->bw_ctx.bw.dcn.clk.fclk_khz = (int)(bw_consumed * 1000000 /
+ (ddr4_dram_factor_single_Channel * v->number_of_channels));
+ if (bw_consumed == v->fabric_and_dram_bandwidth_vmin0p65)
+ context->bw_ctx.bw.dcn.clk.fclk_khz = (int)(bw_consumed * 1000000 / 32);
+
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (int)(v->dcf_clk_deep_sleep * 1000);
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = (int)(v->dcfclk * 1000);
+
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = (int)(v->dispclk * 1000);
+ if (dc->debug.max_disp_clk == true)
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = (int)(dc->dcn_soc->max_dispclk_vmax0p9 * 1000);
+
+ if (context->bw_ctx.bw.dcn.clk.dispclk_khz <
+ dc->debug.min_disp_clk_khz) {
+ context->bw_ctx.bw.dcn.clk.dispclk_khz =
+ dc->debug.min_disp_clk_khz;
+ }
+
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz /
+ v->dispclk_dppclk_ratio;
+ context->bw_ctx.bw.dcn.clk.phyclk_khz = v->phyclk_per_state[v->voltage_level];
+ switch (v->voltage_level) {
+ case 0:
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz =
+ (int)(dc->dcn_soc->max_dppclk_vmin0p65 * 1000);
+ break;
+ case 1:
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz =
+ (int)(dc->dcn_soc->max_dppclk_vmid0p72 * 1000);
+ break;
+ case 2:
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz =
+ (int)(dc->dcn_soc->max_dppclk_vnom0p8 * 1000);
+ break;
+ default:
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz =
+ (int)(dc->dcn_soc->max_dppclk_vmax0p9 * 1000);
+ break;
+ }
+
+ BW_VAL_TRACE_END_WATERMARKS();
+
+ for (i = 0, input_idx = 0; i < pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ /* skip inactive pipe */
+ if (!pipe->stream)
+ continue;
+ /* skip all but first of split pipes */
+ if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state)
+ continue;
+
+ pipe->pipe_dlg_param.vupdate_width = v->v_update_width_pix[input_idx];
+ pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset_pix[input_idx];
+ pipe->pipe_dlg_param.vready_offset = v->v_ready_offset_pix[input_idx];
+ pipe->pipe_dlg_param.vstartup_start = v->v_startup[input_idx];
+
+ pipe->pipe_dlg_param.htotal = pipe->stream->timing.h_total;
+ pipe->pipe_dlg_param.vtotal = pipe->stream->timing.v_total;
+ vesa_sync_start = pipe->stream->timing.v_addressable +
+ pipe->stream->timing.v_border_bottom +
+ pipe->stream->timing.v_front_porch;
+
+ asic_blank_end = (pipe->stream->timing.v_total -
+ vesa_sync_start -
+ pipe->stream->timing.v_border_top)
+ * (pipe->stream->timing.flags.INTERLACE ? 1 : 0);
+
+ asic_blank_start = asic_blank_end +
+ (pipe->stream->timing.v_border_top +
+ pipe->stream->timing.v_addressable +
+ pipe->stream->timing.v_border_bottom)
+ * (pipe->stream->timing.flags.INTERLACE ? 1 : 0);
+
+ pipe->pipe_dlg_param.vblank_start = asic_blank_start;
+ pipe->pipe_dlg_param.vblank_end = asic_blank_end;
+
+ if (pipe->plane_state) {
+ struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe;
+
+ pipe->plane_state->update_flags.bits.full_update = 1;
+
+ if (v->dpp_per_plane[input_idx] == 2 ||
+ ((pipe->stream->view_format ==
+ VIEW_3D_FORMAT_SIDE_BY_SIDE ||
+ pipe->stream->view_format ==
+ VIEW_3D_FORMAT_TOP_AND_BOTTOM) &&
+ (pipe->stream->timing.timing_3d_format ==
+ TIMING_3D_FORMAT_TOP_AND_BOTTOM ||
+ pipe->stream->timing.timing_3d_format ==
+ TIMING_3D_FORMAT_SIDE_BY_SIDE))) {
+ if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) {
+ /* update previously split pipe */
+ hsplit_pipe->pipe_dlg_param.vupdate_width = v->v_update_width_pix[input_idx];
+ hsplit_pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset_pix[input_idx];
+ hsplit_pipe->pipe_dlg_param.vready_offset = v->v_ready_offset_pix[input_idx];
+ hsplit_pipe->pipe_dlg_param.vstartup_start = v->v_startup[input_idx];
+
+ hsplit_pipe->pipe_dlg_param.htotal = pipe->stream->timing.h_total;
+ hsplit_pipe->pipe_dlg_param.vtotal = pipe->stream->timing.v_total;
+ hsplit_pipe->pipe_dlg_param.vblank_start = pipe->pipe_dlg_param.vblank_start;
+ hsplit_pipe->pipe_dlg_param.vblank_end = pipe->pipe_dlg_param.vblank_end;
+ } else {
+ /* pipe not split previously needs split */
+ hsplit_pipe = find_idle_secondary_pipe(&context->res_ctx, pool, pipe);
+ ASSERT(hsplit_pipe);
+ split_stream_across_pipes(&context->res_ctx, pool, pipe, hsplit_pipe);
+ }
+
+ dcn_bw_calc_rq_dlg_ttu(dc, v, hsplit_pipe, input_idx);
+ } else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) {
+ /* merge previously split pipe */
+ pipe->bottom_pipe = hsplit_pipe->bottom_pipe;
+ if (hsplit_pipe->bottom_pipe)
+ hsplit_pipe->bottom_pipe->top_pipe = pipe;
+ hsplit_pipe->plane_state = NULL;
+ hsplit_pipe->stream = NULL;
+ hsplit_pipe->top_pipe = NULL;
+ hsplit_pipe->bottom_pipe = NULL;
+ /* Clear plane_res and stream_res */
+ memset(&hsplit_pipe->plane_res, 0, sizeof(hsplit_pipe->plane_res));
+ memset(&hsplit_pipe->stream_res, 0, sizeof(hsplit_pipe->stream_res));
+ resource_build_scaling_params(pipe);
+ }
+ /* for now important to do this after pipe split for building e2e params */
+ dcn_bw_calc_rq_dlg_ttu(dc, v, pipe, input_idx);
+ }
+
+ input_idx++;
+ }
+ } else if (v->voltage_level == number_of_states_plus_one) {
+ BW_VAL_TRACE_SKIP(fail);
+ } else if (fast_validate) {
+ BW_VAL_TRACE_SKIP(fast);
+ }
+
+ if (v->voltage_level == 0) {
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us =
+ dc->dcn_soc->sr_enter_plus_exit_time;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->dcn_soc->sr_exit_time;
+ }
+
+ /*
+ * BW limit is set to prevent display from impacting other system functions
+ */
+
+ bw_limit = dc->dcn_soc->percent_disp_bw_limit * v->fabric_and_dram_bandwidth_vmax0p9;
+ bw_limit_pass = (v->total_data_read_bandwidth / 1000.0) < bw_limit;
+
+ PERFORMANCE_TRACE_END();
+ BW_VAL_TRACE_FINISH();
+
+ if (bw_limit_pass && v->voltage_level <= get_highest_allowed_voltage_level(dc->config.is_vmin_only_asic))
+ return true;
+ else
+ return false;
+}
+
+static unsigned int dcn_find_normalized_clock_vdd_Level(
+ const struct dc *dc,
+ enum dm_pp_clock_type clocks_type,
+ int clocks_in_khz)
+{
+ int vdd_level = dcn_bw_v_min0p65;
+
+ if (clocks_in_khz == 0)/*todo some clock not in the considerations*/
+ return vdd_level;
+
+ switch (clocks_type) {
+ case DM_PP_CLOCK_TYPE_DISPLAY_CLK:
+ if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmax0p9*1000) {
+ vdd_level = dcn_bw_v_max0p91;
+ BREAK_TO_DEBUGGER();
+ } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vnom0p8*1000) {
+ vdd_level = dcn_bw_v_max0p9;
+ } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmid0p72*1000) {
+ vdd_level = dcn_bw_v_nom0p8;
+ } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmin0p65*1000) {
+ vdd_level = dcn_bw_v_mid0p72;
+ } else
+ vdd_level = dcn_bw_v_min0p65;
+ break;
+ case DM_PP_CLOCK_TYPE_DISPLAYPHYCLK:
+ if (clocks_in_khz > dc->dcn_soc->phyclkv_max0p9*1000) {
+ vdd_level = dcn_bw_v_max0p91;
+ BREAK_TO_DEBUGGER();
+ } else if (clocks_in_khz > dc->dcn_soc->phyclkv_nom0p8*1000) {
+ vdd_level = dcn_bw_v_max0p9;
+ } else if (clocks_in_khz > dc->dcn_soc->phyclkv_mid0p72*1000) {
+ vdd_level = dcn_bw_v_nom0p8;
+ } else if (clocks_in_khz > dc->dcn_soc->phyclkv_min0p65*1000) {
+ vdd_level = dcn_bw_v_mid0p72;
+ } else
+ vdd_level = dcn_bw_v_min0p65;
+ break;
+
+ case DM_PP_CLOCK_TYPE_DPPCLK:
+ if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmax0p9*1000) {
+ vdd_level = dcn_bw_v_max0p91;
+ BREAK_TO_DEBUGGER();
+ } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vnom0p8*1000) {
+ vdd_level = dcn_bw_v_max0p9;
+ } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmid0p72*1000) {
+ vdd_level = dcn_bw_v_nom0p8;
+ } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmin0p65*1000) {
+ vdd_level = dcn_bw_v_mid0p72;
+ } else
+ vdd_level = dcn_bw_v_min0p65;
+ break;
+
+ case DM_PP_CLOCK_TYPE_MEMORY_CLK:
+ {
+ unsigned factor = (ddr4_dram_factor_single_Channel * dc->dcn_soc->number_of_channels);
+
+ if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9*1000000/factor) {
+ vdd_level = dcn_bw_v_max0p91;
+ BREAK_TO_DEBUGGER();
+ } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8*1000000/factor) {
+ vdd_level = dcn_bw_v_max0p9;
+ } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72*1000000/factor) {
+ vdd_level = dcn_bw_v_nom0p8;
+ } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65*1000000/factor) {
+ vdd_level = dcn_bw_v_mid0p72;
+ } else
+ vdd_level = dcn_bw_v_min0p65;
+ }
+ break;
+
+ case DM_PP_CLOCK_TYPE_DCFCLK:
+ if (clocks_in_khz > dc->dcn_soc->dcfclkv_max0p9*1000) {
+ vdd_level = dcn_bw_v_max0p91;
+ BREAK_TO_DEBUGGER();
+ } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_nom0p8*1000) {
+ vdd_level = dcn_bw_v_max0p9;
+ } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_mid0p72*1000) {
+ vdd_level = dcn_bw_v_nom0p8;
+ } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_min0p65*1000) {
+ vdd_level = dcn_bw_v_mid0p72;
+ } else
+ vdd_level = dcn_bw_v_min0p65;
+ break;
+
+ default:
+ break;
+ }
+ return vdd_level;
+}
+
+unsigned int dcn_find_dcfclk_suits_all(
+ const struct dc *dc,
+ struct dc_clocks *clocks)
+{
+ unsigned vdd_level, vdd_level_temp;
+ unsigned dcf_clk;
+
+ /*find a common supported voltage level*/
+ vdd_level = dcn_find_normalized_clock_vdd_Level(
+ dc, DM_PP_CLOCK_TYPE_DISPLAY_CLK, clocks->dispclk_khz);
+ vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
+ dc, DM_PP_CLOCK_TYPE_DISPLAYPHYCLK, clocks->phyclk_khz);
+
+ vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
+ vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
+ dc, DM_PP_CLOCK_TYPE_DPPCLK, clocks->dppclk_khz);
+ vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
+
+ vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
+ dc, DM_PP_CLOCK_TYPE_MEMORY_CLK, clocks->fclk_khz);
+ vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
+ vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
+ dc, DM_PP_CLOCK_TYPE_DCFCLK, clocks->dcfclk_khz);
+
+ /*find that level conresponding dcfclk*/
+ vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
+ if (vdd_level == dcn_bw_v_max0p91) {
+ BREAK_TO_DEBUGGER();
+ dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000;
+ } else if (vdd_level == dcn_bw_v_max0p9)
+ dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000;
+ else if (vdd_level == dcn_bw_v_nom0p8)
+ dcf_clk = dc->dcn_soc->dcfclkv_nom0p8*1000;
+ else if (vdd_level == dcn_bw_v_mid0p72)
+ dcf_clk = dc->dcn_soc->dcfclkv_mid0p72*1000;
+ else
+ dcf_clk = dc->dcn_soc->dcfclkv_min0p65*1000;
+
+ DC_LOG_BANDWIDTH_CALCS("\tdcf_clk for voltage = %d\n", dcf_clk);
+ return dcf_clk;
+}
+
+void dcn_bw_update_from_pplib_fclks(
+ struct dc *dc,
+ struct dm_pp_clock_levels_with_voltage *fclks)
+{
+ unsigned vmin0p65_idx, vmid0p72_idx, vnom0p8_idx, vmax0p9_idx;
+
+ ASSERT(fclks->num_levels);
+
+ vmin0p65_idx = 0;
+ vmid0p72_idx = fclks->num_levels -
+ (fclks->num_levels > 2 ? 3 : (fclks->num_levels > 1 ? 2 : 1));
+ vnom0p8_idx = fclks->num_levels - (fclks->num_levels > 1 ? 2 : 1);
+ vmax0p9_idx = fclks->num_levels - 1;
+
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 =
+ 32 * (fclks->data[vmin0p65_idx].clocks_in_khz / 1000.0) / 1000.0;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 =
+ dc->dcn_soc->number_of_channels *
+ (fclks->data[vmid0p72_idx].clocks_in_khz / 1000.0)
+ * ddr4_dram_factor_single_Channel / 1000.0;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 =
+ dc->dcn_soc->number_of_channels *
+ (fclks->data[vnom0p8_idx].clocks_in_khz / 1000.0)
+ * ddr4_dram_factor_single_Channel / 1000.0;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 =
+ dc->dcn_soc->number_of_channels *
+ (fclks->data[vmax0p9_idx].clocks_in_khz / 1000.0)
+ * ddr4_dram_factor_single_Channel / 1000.0;
+}
+
+void dcn_bw_update_from_pplib_dcfclks(
+ struct dc *dc,
+ struct dm_pp_clock_levels_with_voltage *dcfclks)
+{
+ if (dcfclks->num_levels >= 3) {
+ dc->dcn_soc->dcfclkv_min0p65 = dcfclks->data[0].clocks_in_khz / 1000.0;
+ dc->dcn_soc->dcfclkv_mid0p72 = dcfclks->data[dcfclks->num_levels - 3].clocks_in_khz / 1000.0;
+ dc->dcn_soc->dcfclkv_nom0p8 = dcfclks->data[dcfclks->num_levels - 2].clocks_in_khz / 1000.0;
+ dc->dcn_soc->dcfclkv_max0p9 = dcfclks->data[dcfclks->num_levels - 1].clocks_in_khz / 1000.0;
+ }
+}
+
+void dcn_get_soc_clks(
+ struct dc *dc,
+ int *min_fclk_khz,
+ int *min_dcfclk_khz,
+ int *socclk_khz)
+{
+ *min_fclk_khz = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 * 1000000 / 32;
+ *min_dcfclk_khz = dc->dcn_soc->dcfclkv_min0p65 * 1000;
+ *socclk_khz = dc->dcn_soc->socclk * 1000;
+}
+
+void dcn_bw_notify_pplib_of_wm_ranges(
+ struct dc *dc,
+ int min_fclk_khz,
+ int min_dcfclk_khz,
+ int socclk_khz)
+{
+ struct pp_smu_funcs_rv *pp = NULL;
+ struct pp_smu_wm_range_sets ranges = {0};
+ const int overdrive = 5000000; /* 5 GHz to cover Overdrive */
+
+ if (dc->res_pool->pp_smu)
+ pp = &dc->res_pool->pp_smu->rv_funcs;
+ if (!pp || !pp->set_wm_ranges)
+ return;
+
+ /* Now notify PPLib/SMU about which Watermarks sets they should select
+ * depending on DPM state they are in. And update BW MGR GFX Engine and
+ * Memory clock member variables for Watermarks calculations for each
+ * Watermark Set. Only one watermark set for dcn1 due to hw bug DEGVIDCN10-254.
+ */
+ /* SOCCLK does not affect anytihng but writeback for DCN so for now we dont
+ * care what the value is, hence min to overdrive level
+ */
+ ranges.num_reader_wm_sets = WM_SET_COUNT;
+ ranges.num_writer_wm_sets = WM_SET_COUNT;
+ ranges.reader_wm_sets[0].wm_inst = WM_A;
+ ranges.reader_wm_sets[0].min_drain_clk_mhz = min_dcfclk_khz / 1000;
+ ranges.reader_wm_sets[0].max_drain_clk_mhz = overdrive / 1000;
+ ranges.reader_wm_sets[0].min_fill_clk_mhz = min_fclk_khz / 1000;
+ ranges.reader_wm_sets[0].max_fill_clk_mhz = overdrive / 1000;
+ ranges.writer_wm_sets[0].wm_inst = WM_A;
+ ranges.writer_wm_sets[0].min_fill_clk_mhz = socclk_khz / 1000;
+ ranges.writer_wm_sets[0].max_fill_clk_mhz = overdrive / 1000;
+ ranges.writer_wm_sets[0].min_drain_clk_mhz = min_fclk_khz / 1000;
+ ranges.writer_wm_sets[0].max_drain_clk_mhz = overdrive / 1000;
+
+ if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) {
+ ranges.reader_wm_sets[0].wm_inst = WM_A;
+ ranges.reader_wm_sets[0].min_drain_clk_mhz = 300;
+ ranges.reader_wm_sets[0].max_drain_clk_mhz = 5000;
+ ranges.reader_wm_sets[0].min_fill_clk_mhz = 800;
+ ranges.reader_wm_sets[0].max_fill_clk_mhz = 5000;
+ ranges.writer_wm_sets[0].wm_inst = WM_A;
+ ranges.writer_wm_sets[0].min_fill_clk_mhz = 200;
+ ranges.writer_wm_sets[0].max_fill_clk_mhz = 5000;
+ ranges.writer_wm_sets[0].min_drain_clk_mhz = 800;
+ ranges.writer_wm_sets[0].max_drain_clk_mhz = 5000;
+ }
+
+ ranges.reader_wm_sets[1] = ranges.writer_wm_sets[0];
+ ranges.reader_wm_sets[1].wm_inst = WM_B;
+
+ ranges.reader_wm_sets[2] = ranges.writer_wm_sets[0];
+ ranges.reader_wm_sets[2].wm_inst = WM_C;
+
+ ranges.reader_wm_sets[3] = ranges.writer_wm_sets[0];
+ ranges.reader_wm_sets[3].wm_inst = WM_D;
+
+ /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */
+ pp->set_wm_ranges(&pp->pp_smu, &ranges);
+}
+
+void dcn_bw_sync_calcs_and_dml(struct dc *dc)
+{
+ DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n"
+ "sr_enter_plus_exit_time: %f ns\n"
+ "urgent_latency: %f ns\n"
+ "write_back_latency: %f ns\n"
+ "percent_of_ideal_drambw_received_after_urg_latency: %f %%\n"
+ "max_request_size: %d bytes\n"
+ "dcfclkv_max0p9: %f kHz\n"
+ "dcfclkv_nom0p8: %f kHz\n"
+ "dcfclkv_mid0p72: %f kHz\n"
+ "dcfclkv_min0p65: %f kHz\n"
+ "max_dispclk_vmax0p9: %f kHz\n"
+ "max_dispclk_vnom0p8: %f kHz\n"
+ "max_dispclk_vmid0p72: %f kHz\n"
+ "max_dispclk_vmin0p65: %f kHz\n"
+ "max_dppclk_vmax0p9: %f kHz\n"
+ "max_dppclk_vnom0p8: %f kHz\n"
+ "max_dppclk_vmid0p72: %f kHz\n"
+ "max_dppclk_vmin0p65: %f kHz\n"
+ "socclk: %f kHz\n"
+ "fabric_and_dram_bandwidth_vmax0p9: %f MB/s\n"
+ "fabric_and_dram_bandwidth_vnom0p8: %f MB/s\n"
+ "fabric_and_dram_bandwidth_vmid0p72: %f MB/s\n"
+ "fabric_and_dram_bandwidth_vmin0p65: %f MB/s\n"
+ "phyclkv_max0p9: %f kHz\n"
+ "phyclkv_nom0p8: %f kHz\n"
+ "phyclkv_mid0p72: %f kHz\n"
+ "phyclkv_min0p65: %f kHz\n"
+ "downspreading: %f %%\n"
+ "round_trip_ping_latency_cycles: %d DCFCLK Cycles\n"
+ "urgent_out_of_order_return_per_channel: %d Bytes\n"
+ "number_of_channels: %d\n"
+ "vmm_page_size: %d Bytes\n"
+ "dram_clock_change_latency: %f ns\n"
+ "return_bus_width: %d Bytes\n",
+ dc->dcn_soc->sr_exit_time * 1000,
+ dc->dcn_soc->sr_enter_plus_exit_time * 1000,
+ dc->dcn_soc->urgent_latency * 1000,
+ dc->dcn_soc->write_back_latency * 1000,
+ dc->dcn_soc->percent_of_ideal_drambw_received_after_urg_latency,
+ dc->dcn_soc->max_request_size,
+ dc->dcn_soc->dcfclkv_max0p9 * 1000,
+ dc->dcn_soc->dcfclkv_nom0p8 * 1000,
+ dc->dcn_soc->dcfclkv_mid0p72 * 1000,
+ dc->dcn_soc->dcfclkv_min0p65 * 1000,
+ dc->dcn_soc->max_dispclk_vmax0p9 * 1000,
+ dc->dcn_soc->max_dispclk_vnom0p8 * 1000,
+ dc->dcn_soc->max_dispclk_vmid0p72 * 1000,
+ dc->dcn_soc->max_dispclk_vmin0p65 * 1000,
+ dc->dcn_soc->max_dppclk_vmax0p9 * 1000,
+ dc->dcn_soc->max_dppclk_vnom0p8 * 1000,
+ dc->dcn_soc->max_dppclk_vmid0p72 * 1000,
+ dc->dcn_soc->max_dppclk_vmin0p65 * 1000,
+ dc->dcn_soc->socclk * 1000,
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 * 1000,
+ dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 * 1000,
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 * 1000,
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 * 1000,
+ dc->dcn_soc->phyclkv_max0p9 * 1000,
+ dc->dcn_soc->phyclkv_nom0p8 * 1000,
+ dc->dcn_soc->phyclkv_mid0p72 * 1000,
+ dc->dcn_soc->phyclkv_min0p65 * 1000,
+ dc->dcn_soc->downspreading * 100,
+ dc->dcn_soc->round_trip_ping_latency_cycles,
+ dc->dcn_soc->urgent_out_of_order_return_per_channel,
+ dc->dcn_soc->number_of_channels,
+ dc->dcn_soc->vmm_page_size,
+ dc->dcn_soc->dram_clock_change_latency * 1000,
+ dc->dcn_soc->return_bus_width);
+ DC_LOG_BANDWIDTH_CALCS("rob_buffer_size_in_kbyte: %f\n"
+ "det_buffer_size_in_kbyte: %f\n"
+ "dpp_output_buffer_pixels: %f\n"
+ "opp_output_buffer_lines: %f\n"
+ "pixel_chunk_size_in_kbyte: %f\n"
+ "pte_enable: %d\n"
+ "pte_chunk_size: %d kbytes\n"
+ "meta_chunk_size: %d kbytes\n"
+ "writeback_chunk_size: %d kbytes\n"
+ "odm_capability: %d\n"
+ "dsc_capability: %d\n"
+ "line_buffer_size: %d bits\n"
+ "max_line_buffer_lines: %d\n"
+ "is_line_buffer_bpp_fixed: %d\n"
+ "line_buffer_fixed_bpp: %d\n"
+ "writeback_luma_buffer_size: %d kbytes\n"
+ "writeback_chroma_buffer_size: %d kbytes\n"
+ "max_num_dpp: %d\n"
+ "max_num_writeback: %d\n"
+ "max_dchub_topscl_throughput: %d pixels/dppclk\n"
+ "max_pscl_tolb_throughput: %d pixels/dppclk\n"
+ "max_lb_tovscl_throughput: %d pixels/dppclk\n"
+ "max_vscl_tohscl_throughput: %d pixels/dppclk\n"
+ "max_hscl_ratio: %f\n"
+ "max_vscl_ratio: %f\n"
+ "max_hscl_taps: %d\n"
+ "max_vscl_taps: %d\n"
+ "pte_buffer_size_in_requests: %d\n"
+ "dispclk_ramping_margin: %f %%\n"
+ "under_scan_factor: %f %%\n"
+ "max_inter_dcn_tile_repeaters: %d\n"
+ "can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one: %d\n"
+ "bug_forcing_luma_and_chroma_request_to_same_size_fixed: %d\n"
+ "dcfclk_cstate_latency: %d\n",
+ dc->dcn_ip->rob_buffer_size_in_kbyte,
+ dc->dcn_ip->det_buffer_size_in_kbyte,
+ dc->dcn_ip->dpp_output_buffer_pixels,
+ dc->dcn_ip->opp_output_buffer_lines,
+ dc->dcn_ip->pixel_chunk_size_in_kbyte,
+ dc->dcn_ip->pte_enable,
+ dc->dcn_ip->pte_chunk_size,
+ dc->dcn_ip->meta_chunk_size,
+ dc->dcn_ip->writeback_chunk_size,
+ dc->dcn_ip->odm_capability,
+ dc->dcn_ip->dsc_capability,
+ dc->dcn_ip->line_buffer_size,
+ dc->dcn_ip->max_line_buffer_lines,
+ dc->dcn_ip->is_line_buffer_bpp_fixed,
+ dc->dcn_ip->line_buffer_fixed_bpp,
+ dc->dcn_ip->writeback_luma_buffer_size,
+ dc->dcn_ip->writeback_chroma_buffer_size,
+ dc->dcn_ip->max_num_dpp,
+ dc->dcn_ip->max_num_writeback,
+ dc->dcn_ip->max_dchub_topscl_throughput,
+ dc->dcn_ip->max_pscl_tolb_throughput,
+ dc->dcn_ip->max_lb_tovscl_throughput,
+ dc->dcn_ip->max_vscl_tohscl_throughput,
+ dc->dcn_ip->max_hscl_ratio,
+ dc->dcn_ip->max_vscl_ratio,
+ dc->dcn_ip->max_hscl_taps,
+ dc->dcn_ip->max_vscl_taps,
+ dc->dcn_ip->pte_buffer_size_in_requests,
+ dc->dcn_ip->dispclk_ramping_margin,
+ dc->dcn_ip->under_scan_factor * 100,
+ dc->dcn_ip->max_inter_dcn_tile_repeaters,
+ dc->dcn_ip->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one,
+ dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed,
+ dc->dcn_ip->dcfclk_cstate_latency);
+
+ dc->dml.soc.sr_exit_time_us = dc->dcn_soc->sr_exit_time;
+ dc->dml.soc.sr_enter_plus_exit_time_us = dc->dcn_soc->sr_enter_plus_exit_time;
+ dc->dml.soc.urgent_latency_us = dc->dcn_soc->urgent_latency;
+ dc->dml.soc.writeback_latency_us = dc->dcn_soc->write_back_latency;
+ dc->dml.soc.ideal_dram_bw_after_urgent_percent =
+ dc->dcn_soc->percent_of_ideal_drambw_received_after_urg_latency;
+ dc->dml.soc.max_request_size_bytes = dc->dcn_soc->max_request_size;
+ dc->dml.soc.downspread_percent = dc->dcn_soc->downspreading;
+ dc->dml.soc.round_trip_ping_latency_dcfclk_cycles =
+ dc->dcn_soc->round_trip_ping_latency_cycles;
+ dc->dml.soc.urgent_out_of_order_return_per_channel_bytes =
+ dc->dcn_soc->urgent_out_of_order_return_per_channel;
+ dc->dml.soc.num_chans = dc->dcn_soc->number_of_channels;
+ dc->dml.soc.vmm_page_size_bytes = dc->dcn_soc->vmm_page_size;
+ dc->dml.soc.dram_clock_change_latency_us = dc->dcn_soc->dram_clock_change_latency;
+ dc->dml.soc.return_bus_width_bytes = dc->dcn_soc->return_bus_width;
+
+ dc->dml.ip.rob_buffer_size_kbytes = dc->dcn_ip->rob_buffer_size_in_kbyte;
+ dc->dml.ip.det_buffer_size_kbytes = dc->dcn_ip->det_buffer_size_in_kbyte;
+ dc->dml.ip.dpp_output_buffer_pixels = dc->dcn_ip->dpp_output_buffer_pixels;
+ dc->dml.ip.opp_output_buffer_lines = dc->dcn_ip->opp_output_buffer_lines;
+ dc->dml.ip.pixel_chunk_size_kbytes = dc->dcn_ip->pixel_chunk_size_in_kbyte;
+ dc->dml.ip.pte_enable = dc->dcn_ip->pte_enable == dcn_bw_yes;
+ dc->dml.ip.pte_chunk_size_kbytes = dc->dcn_ip->pte_chunk_size;
+ dc->dml.ip.meta_chunk_size_kbytes = dc->dcn_ip->meta_chunk_size;
+ dc->dml.ip.writeback_chunk_size_kbytes = dc->dcn_ip->writeback_chunk_size;
+ dc->dml.ip.line_buffer_size_bits = dc->dcn_ip->line_buffer_size;
+ dc->dml.ip.max_line_buffer_lines = dc->dcn_ip->max_line_buffer_lines;
+ dc->dml.ip.IsLineBufferBppFixed = dc->dcn_ip->is_line_buffer_bpp_fixed == dcn_bw_yes;
+ dc->dml.ip.LineBufferFixedBpp = dc->dcn_ip->line_buffer_fixed_bpp;
+ dc->dml.ip.writeback_luma_buffer_size_kbytes = dc->dcn_ip->writeback_luma_buffer_size;
+ dc->dml.ip.writeback_chroma_buffer_size_kbytes = dc->dcn_ip->writeback_chroma_buffer_size;
+ dc->dml.ip.max_num_dpp = dc->dcn_ip->max_num_dpp;
+ dc->dml.ip.max_num_wb = dc->dcn_ip->max_num_writeback;
+ dc->dml.ip.max_dchub_pscl_bw_pix_per_clk = dc->dcn_ip->max_dchub_topscl_throughput;
+ dc->dml.ip.max_pscl_lb_bw_pix_per_clk = dc->dcn_ip->max_pscl_tolb_throughput;
+ dc->dml.ip.max_lb_vscl_bw_pix_per_clk = dc->dcn_ip->max_lb_tovscl_throughput;
+ dc->dml.ip.max_vscl_hscl_bw_pix_per_clk = dc->dcn_ip->max_vscl_tohscl_throughput;
+ dc->dml.ip.max_hscl_ratio = dc->dcn_ip->max_hscl_ratio;
+ dc->dml.ip.max_vscl_ratio = dc->dcn_ip->max_vscl_ratio;
+ dc->dml.ip.max_hscl_taps = dc->dcn_ip->max_hscl_taps;
+ dc->dml.ip.max_vscl_taps = dc->dcn_ip->max_vscl_taps;
+ /*pte_buffer_size_in_requests missing in dml*/
+ dc->dml.ip.dispclk_ramp_margin_percent = dc->dcn_ip->dispclk_ramping_margin;
+ dc->dml.ip.underscan_factor = dc->dcn_ip->under_scan_factor;
+ dc->dml.ip.max_inter_dcn_tile_repeaters = dc->dcn_ip->max_inter_dcn_tile_repeaters;
+ dc->dml.ip.can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one =
+ dc->dcn_ip->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one == dcn_bw_yes;
+ dc->dml.ip.bug_forcing_LC_req_same_size_fixed =
+ dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes;
+ dc->dml.ip.dcfclk_cstate_latency = dc->dcn_ip->dcfclk_cstate_latency;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
index 2a1983324629..74e86732e301 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
@@ -29,7 +29,7 @@
#define DC__PRESENT 1
#define DC__PRESENT__1 1
#define DC__NUM_DPP 4
-#define DC__VOLTAGE_STATES 9
+#define DC__VOLTAGE_STATES 20
#define DC__NUM_DPP__4 1
#define DC__NUM_DPP__0_PRESENT 1
#define DC__NUM_DPP__1_PRESENT 1
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
new file mode 100644
index 000000000000..99644d896222
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dcn10/dcn10_resource.h"
+
+#include "dcn10_fpu.h"
+
+/**
+ * DOC: DCN10 FPU manipulation Overview
+ *
+ * The DCN architecture relies on FPU operations, which require special
+ * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we
+ * want to avoid spreading FPU access across multiple files. With this idea in
+ * mind, this file aims to centralize DCN10 functions that require FPU access
+ * in a single place. Code in this file follows the following code pattern:
+ *
+ * 1. Functions that use FPU operations should be isolated in static functions.
+ * 2. The FPU functions should have the noinline attribute to ensure anything
+ * that deals with FP register is contained within this call.
+ * 3. All function that needs to be accessed outside this file requires a
+ * public interface that not uses any FPU reference.
+ * 4. Developers **must not** use DC_FP_START/END in this file, but they need
+ * to ensure that the caller invokes it before access any function available
+ * in this file. For this reason, public functions in this file must invoke
+ * dc_assert_fp_enabled();
+ *
+ * Let's expand a little bit more the idea in the code pattern. To fully
+ * isolate FPU operations in a single place, we must avoid situations where
+ * compilers spill FP values to registers due to FP enable in a specific C
+ * file. Note that even if we isolate all FPU functions in a single file and
+ * call its interface from other files, the compiler might enable the use of
+ * FPU before we call DC_FP_START. Nevertheless, it is the programmer's
+ * responsibility to invoke DC_FP_START/END in the correct place. To highlight
+ * situations where developers forgot to use the FP protection before calling
+ * the DC FPU interface functions, we introduce a helper that checks if the
+ * function is invoked under FP protection. If not, it will trigger a kernel
+ * warning.
+ */
+
+struct _vcs_dpi_ip_params_st dcn1_0_ip = {
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = 164,
+ .dpte_buffer_size_in_pte_reqs_luma = 42,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .pte_chunk_size_kbytes = 2,
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 2,
+ .line_buffer_size_bits = 589824,
+ .max_line_buffer_lines = 12,
+ .IsLineBufferBppFixed = 0,
+ .LineBufferFixedBpp = -1,
+ .writeback_luma_buffer_size_kbytes = 12,
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .max_num_dpp = 4,
+ .max_num_wb = 2,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 4,
+ .max_vscl_ratio = 4,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.10,
+ .min_vblank_lines = 14,
+ .dppclk_delay_subtotal = 90,
+ .dispclk_delay_subtotal = 42,
+ .dcfclk_cstate_latency = 10,
+ .max_inter_dcn_tile_repeaters = 8,
+ .can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = 0,
+ .bug_forcing_LC_req_same_size_fixed = 0,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .urgent_latency_us = 4.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 80.0,
+ .max_request_size_bytes = 256,
+ .downspread_percent = 0.5,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 128,
+ .urgent_out_of_order_return_per_channel_bytes = 256,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 2,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 17.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+};
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
new file mode 100644
index 000000000000..e74ed4b4ce5b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN10_FPU_H__
+#define __DCN10_FPU_H__
+
+#endif /* __DCN20_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index d590dc917363..45db40c41882 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -25,9 +25,27 @@
*/
#include "resource.h"
+#include "clk_mgr.h"
+#include "dc_link_dp.h"
+#include "dchubbub.h"
+#include "dcn20/dcn20_resource.h"
+#include "dcn21/dcn21_resource.h"
+#include "clk_mgr/dcn21/rn_clk_mgr.h"
#include "dcn20_fpu.h"
+#define DC_LOGGER_INIT(logger)
+
+#ifndef MAX
+#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
+#endif
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+
+/* Constant */
+#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */
+
/**
* DOC: DCN2x FPU manipulation Overview
*
@@ -61,6 +79,803 @@
* warning.
*/
+struct _vcs_dpi_ip_params_st dcn2_0_ip = {
+ .odm_capable = 1,
+ .gpuvm_enable = 0,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .pte_group_size_bytes = 2048,
+ .num_dsc = 6,
+ .rob_buffer_size_kbytes = 168,
+ .det_buffer_size_kbytes = 164,
+ .dpte_buffer_size_in_pte_reqs_luma = 84,
+ .pde_proc_buffer_size_64k_reqs = 48,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_chunk_size_kbytes = 2,
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 2,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0,
+ .line_buffer_fixed_bpp = 0,
+ .dcc_supported = true,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12,
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 12,
+ .writeback_max_vscl_taps = 12,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 6,
+ .max_num_dpp = 6,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 8,
+ .max_vscl_ratio = 8,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.10,
+ .min_vblank_lines = 32, //
+ .dppclk_delay_subtotal = 77, //
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 8,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 87, //
+ .dcfclk_cstate_latency = 10, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .xfc_supported = true,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip = {
+ .odm_capable = 1,
+ .gpuvm_enable = 0,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .num_dsc = 5,
+ .rob_buffer_size_kbytes = 168,
+ .det_buffer_size_kbytes = 164,
+ .dpte_buffer_size_in_pte_reqs_luma = 84,
+ .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 4,
+ .pte_chunk_size_kbytes = 2,
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 2,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0,
+ .line_buffer_fixed_bpp = 0,
+ .dcc_supported = true,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12,
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 12,
+ .writeback_max_vscl_taps = 12,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 5,
+ .max_num_dpp = 5,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 8,
+ .max_vscl_ratio = 8,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.10,
+ .min_vblank_lines = 32, //
+ .dppclk_delay_subtotal = 77, //
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 8,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 87, //
+ .dcfclk_cstate_latency = 10, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .xfc_supported = true,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .ptoi_supported = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = {
+ /* Defaults that get patched on driver load from firmware. */
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 560.0,
+ .fabricclk_mhz = 560.0,
+ .dispclk_mhz = 513.0,
+ .dppclk_mhz = 513.0,
+ .phyclk_mhz = 540.0,
+ .socclk_mhz = 560.0,
+ .dscclk_mhz = 171.0,
+ .dram_speed_mts = 8960.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 694.0,
+ .fabricclk_mhz = 694.0,
+ .dispclk_mhz = 642.0,
+ .dppclk_mhz = 642.0,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 694.0,
+ .dscclk_mhz = 214.0,
+ .dram_speed_mts = 11104.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 875.0,
+ .fabricclk_mhz = 875.0,
+ .dispclk_mhz = 734.0,
+ .dppclk_mhz = 734.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 875.0,
+ .dscclk_mhz = 245.0,
+ .dram_speed_mts = 14000.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 1000.0,
+ .fabricclk_mhz = 1000.0,
+ .dispclk_mhz = 1100.0,
+ .dppclk_mhz = 1100.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1000.0,
+ .dscclk_mhz = 367.0,
+ .dram_speed_mts = 16000.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ /*Extra state, no dispclk ramping*/
+ {
+ .state = 5,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 8.6,
+ .sr_enter_plus_exit_time_us = 10.9,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 40.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 40.0,
+ .max_request_size_bytes = 256,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 131,
+ .urgent_out_of_order_return_per_channel_bytes = 256,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 16,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404.0,
+ .dummy_pstate_latency_us = 5.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3850,
+ .xfc_bus_transport_time_us = 20,
+ .xfc_xbuf_latency_tolerance_us = 4,
+ .use_urgent_burst_bw = 0
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 560.0,
+ .fabricclk_mhz = 560.0,
+ .dispclk_mhz = 513.0,
+ .dppclk_mhz = 513.0,
+ .phyclk_mhz = 540.0,
+ .socclk_mhz = 560.0,
+ .dscclk_mhz = 171.0,
+ .dram_speed_mts = 8960.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 694.0,
+ .fabricclk_mhz = 694.0,
+ .dispclk_mhz = 642.0,
+ .dppclk_mhz = 642.0,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 694.0,
+ .dscclk_mhz = 214.0,
+ .dram_speed_mts = 11104.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 875.0,
+ .fabricclk_mhz = 875.0,
+ .dispclk_mhz = 734.0,
+ .dppclk_mhz = 734.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 875.0,
+ .dscclk_mhz = 245.0,
+ .dram_speed_mts = 14000.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 1000.0,
+ .fabricclk_mhz = 1000.0,
+ .dispclk_mhz = 1100.0,
+ .dppclk_mhz = 1100.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1000.0,
+ .dscclk_mhz = 367.0,
+ .dram_speed_mts = 16000.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ /*Extra state, no dispclk ramping*/
+ {
+ .state = 5,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 11.6,
+ .sr_enter_plus_exit_time_us = 13.9,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 40.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 40.0,
+ .max_request_size_bytes = 256,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 131,
+ .urgent_out_of_order_return_per_channel_bytes = 256,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 8,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404.0,
+ .dummy_pstate_latency_us = 5.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3850,
+ .xfc_bus_transport_time_us = 20,
+ .xfc_xbuf_latency_tolerance_us = 4,
+ .use_urgent_burst_bw = 0
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 };
+
+struct _vcs_dpi_ip_params_st dcn2_1_ip = {
+ .odm_capable = 1,
+ .gpuvm_enable = 1,
+ .hostvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 2,
+ .num_dsc = 3,
+ .rob_buffer_size_kbytes = 168,
+ .det_buffer_size_kbytes = 164,
+ .dpte_buffer_size_in_pte_reqs_luma = 44,
+ .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 4,
+ .pte_chunk_size_kbytes = 2,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 2,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0,
+ .line_buffer_fixed_bpp = 0,
+ .dcc_supported = true,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12,
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 12,
+ .writeback_max_vscl_taps = 12,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 4,
+ .max_num_dpp = 4,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 4,
+ .max_vscl_ratio = 4,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.10,
+ .min_vblank_lines = 32, //
+ .dppclk_delay_subtotal = 77, //
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 8,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 87, //
+ .dcfclk_cstate_latency = 10, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+
+ .xfc_supported = false,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .ptoi_supported = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 400.0,
+ .fabricclk_mhz = 400.0,
+ .dispclk_mhz = 600.0,
+ .dppclk_mhz = 400.00,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 278.0,
+ .dscclk_mhz = 205.67,
+ .dram_speed_mts = 1600.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 464.52,
+ .fabricclk_mhz = 800.0,
+ .dispclk_mhz = 654.55,
+ .dppclk_mhz = 626.09,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 278.0,
+ .dscclk_mhz = 205.67,
+ .dram_speed_mts = 1600.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 514.29,
+ .fabricclk_mhz = 933.0,
+ .dispclk_mhz = 757.89,
+ .dppclk_mhz = 685.71,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 278.0,
+ .dscclk_mhz = 287.67,
+ .dram_speed_mts = 1866.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 576.00,
+ .fabricclk_mhz = 1067.0,
+ .dispclk_mhz = 847.06,
+ .dppclk_mhz = 757.89,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 715.0,
+ .dscclk_mhz = 318.334,
+ .dram_speed_mts = 2134.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 626.09,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 900.00,
+ .dppclk_mhz = 847.06,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 953.0,
+ .dscclk_mhz = 489.0,
+ .dram_speed_mts = 2400.0,
+ },
+ {
+ .state = 5,
+ .dcfclk_mhz = 685.71,
+ .fabricclk_mhz = 1333.0,
+ .dispclk_mhz = 1028.57,
+ .dppclk_mhz = 960.00,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 278.0,
+ .dscclk_mhz = 287.67,
+ .dram_speed_mts = 2666.0,
+ },
+ {
+ .state = 6,
+ .dcfclk_mhz = 757.89,
+ .fabricclk_mhz = 1467.0,
+ .dispclk_mhz = 1107.69,
+ .dppclk_mhz = 1028.57,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 715.0,
+ .dscclk_mhz = 318.334,
+ .dram_speed_mts = 3200.0,
+ },
+ {
+ .state = 7,
+ .dcfclk_mhz = 847.06,
+ .fabricclk_mhz = 1600.0,
+ .dispclk_mhz = 1395.0,
+ .dppclk_mhz = 1285.00,
+ .phyclk_mhz = 1325.0,
+ .socclk_mhz = 953.0,
+ .dscclk_mhz = 489.0,
+ .dram_speed_mts = 4266.0,
+ },
+ /*Extra state, no dispclk ramping*/
+ {
+ .state = 8,
+ .dcfclk_mhz = 847.06,
+ .fabricclk_mhz = 1600.0,
+ .dispclk_mhz = 1395.0,
+ .dppclk_mhz = 1285.0,
+ .phyclk_mhz = 1325.0,
+ .socclk_mhz = 953.0,
+ .dscclk_mhz = 489.0,
+ .dram_speed_mts = 4266.0,
+ },
+
+ },
+
+ .sr_exit_time_us = 12.5,
+ .sr_enter_plus_exit_time_us = 17.0,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 75.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 100.0,
+ .writeback_latency_us = 12.0,
+ .max_request_size_bytes = 256,
+ .dram_channel_width_bytes = 4,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 128,
+ .urgent_out_of_order_return_per_channel_bytes = 4096,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 4,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 23.84,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3600,
+ .xfc_bus_transport_time_us = 4,
+ .xfc_xbuf_latency_tolerance_us = 4,
+ .use_urgent_burst_bw = 1,
+ .num_states = 8
+};
+
+struct wm_table ddr4_wm_table_gs = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 7.09,
+ .sr_enter_plus_exit_time_us = 8.14,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr4_wm_table_gs = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 5.32,
+ .sr_enter_plus_exit_time_us = 6.38,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.82,
+ .sr_enter_plus_exit_time_us = 11.196,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.89,
+ .sr_enter_plus_exit_time_us = 11.24,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.748,
+ .sr_enter_plus_exit_time_us = 11.102,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr4_wm_table_with_disabled_ppt = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 8.32,
+ .sr_enter_plus_exit_time_us = 9.38,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.82,
+ .sr_enter_plus_exit_time_us = 11.196,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.89,
+ .sr_enter_plus_exit_time_us = 11.24,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.748,
+ .sr_enter_plus_exit_time_us = 11.102,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table ddr4_wm_table_rn = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 11.90,
+ .sr_enter_plus_exit_time_us = 12.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table ddr4_1R_wm_table_rn = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr4_wm_table_rn = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 7.32,
+ .sr_enter_plus_exit_time_us = 8.38,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.82,
+ .sr_enter_plus_exit_time_us = 11.196,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.89,
+ .sr_enter_plus_exit_time_us = 11.24,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.748,
+ .sr_enter_plus_exit_time_us = 11.102,
+ .valid = true,
+ },
+ }
+};
+
void dcn20_populate_dml_writeback_from_context(struct dc *dc,
struct resource_context *res_ctx,
display_e2e_pipe_params_st *pipes)
@@ -100,3 +915,1480 @@ void dcn20_populate_dml_writeback_from_context(struct dc *dc,
pipe_cnt++;
}
}
+
+void dcn20_fpu_set_wb_arb_params(struct mcif_arb_params *wb_arb_params,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt, int i)
+{
+ int k;
+
+ dc_assert_fp_enabled();
+
+ for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) {
+ wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ }
+ wb_arb_params->time_per_pixel = 16.0 * 1000 / (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk / 1000); /* 4 bit fraction, ms */
+}
+
+static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
+{
+ int i;
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i]))
+ return true;
+ }
+ return false;
+}
+
+static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+ int plane_count;
+ int i;
+ unsigned int optimized_min_dst_y_next_start_us;
+
+ plane_count = 0;
+ optimized_min_dst_y_next_start_us = 0;
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ plane_count++;
+ }
+
+ /*
+ * Z9 and Z10 allowed cases:
+ * 1. 0 Planes enabled
+ * 2. single eDP, on link 0, 1 plane and stutter period > 5ms
+ * Z10 only cases:
+ * 1. single eDP, on link 0, 1 plane and stutter period >= 5ms
+ * Zstate not allowed cases:
+ * 1. Everything else
+ */
+ if (plane_count == 0)
+ return DCN_ZSTATE_SUPPORT_ALLOW;
+ else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+ struct dc_link *link = context->streams[0]->sink->link;
+ struct dc_stream_status *stream_status = &context->stream_status[0];
+
+ if (dc_extended_blank_supported(dc)) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].stream == context->streams[0]
+ && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min == context->res_ctx.pipe_ctx[i].stream->adjust.v_total_max
+ && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min > context->res_ctx.pipe_ctx[i].stream->timing.v_total) {
+ optimized_min_dst_y_next_start_us =
+ context->res_ctx.pipe_ctx[i].dlg_regs.optimized_min_dst_y_next_start_us;
+ break;
+ }
+ }
+ }
+ /* zstate only supported on PWRSEQ0 and when there's <2 planes*/
+ if (link->link_index != 0 || stream_status->plane_count > 1)
+ return DCN_ZSTATE_SUPPORT_DISALLOW;
+
+ if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || optimized_min_dst_y_next_start_us > 5000)
+ return DCN_ZSTATE_SUPPORT_ALLOW;
+ else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !dc->debug.disable_psr)
+ return DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
+ else
+ return DCN_ZSTATE_SUPPORT_DISALLOW;
+ } else
+ return DCN_ZSTATE_SUPPORT_DISALLOW;
+}
+
+void dcn20_calculate_dlg_params(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ int i, pipe_idx;
+
+ dc_assert_fp_enabled();
+
+ /* Writeback MCIF_WB arbitration parameters */
+ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
+
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16;
+
+ if (dc->debug.min_dram_clk_khz > context->bw_ctx.bw.dcn.clk.dramclk_khz)
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = dc->debug.min_dram_clk_khz;
+
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support =
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
+ != dm_dram_clock_change_unsupported;
+
+ /* Pstate change might not be supported by hardware, but it might be
+ * possible with firmware driven vertical blank stretching.
+ */
+ context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching;
+
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+
+ context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
+
+ if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
+ context->res_ctx.pipe_ctx[i].unbounded_req = false;
+ } else {
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes;
+ context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode;
+ }
+ if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
+ pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+ pipe_idx++;
+ }
+ /*save a original dppclock copy*/
+ context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
+ context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz * 1000;
+ context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz * 1000;
+
+ context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes
+ - context->bw_ctx.dml.ip.det_buffer_size_kbytes * pipe_idx;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ bool cstate_en = context->bw_ctx.dml.vba.PrefetchMode[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != 2;
+
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ if (dc->ctx->dce_version == DCN_VERSION_2_01)
+ cstate_en = false;
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg(&context->bw_ctx.dml,
+ &context->res_ctx.pipe_ctx[i].dlg_regs,
+ &context->res_ctx.pipe_ctx[i].ttu_regs,
+ pipes,
+ pipe_cnt,
+ pipe_idx,
+ cstate_en,
+ context->bw_ctx.bw.dcn.clk.p_state_change_support,
+ false, false, true);
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml,
+ &context->res_ctx.pipe_ctx[i].rq_regs,
+ &pipes[pipe_idx].pipe);
+ pipe_idx++;
+ }
+ context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context);
+}
+
+static void swizzle_to_dml_params(
+ enum swizzle_mode_values swizzle,
+ unsigned int *sw_mode)
+{
+ switch (swizzle) {
+ case DC_SW_LINEAR:
+ *sw_mode = dm_sw_linear;
+ break;
+ case DC_SW_4KB_S:
+ *sw_mode = dm_sw_4kb_s;
+ break;
+ case DC_SW_4KB_S_X:
+ *sw_mode = dm_sw_4kb_s_x;
+ break;
+ case DC_SW_4KB_D:
+ *sw_mode = dm_sw_4kb_d;
+ break;
+ case DC_SW_4KB_D_X:
+ *sw_mode = dm_sw_4kb_d_x;
+ break;
+ case DC_SW_64KB_S:
+ *sw_mode = dm_sw_64kb_s;
+ break;
+ case DC_SW_64KB_S_X:
+ *sw_mode = dm_sw_64kb_s_x;
+ break;
+ case DC_SW_64KB_S_T:
+ *sw_mode = dm_sw_64kb_s_t;
+ break;
+ case DC_SW_64KB_D:
+ *sw_mode = dm_sw_64kb_d;
+ break;
+ case DC_SW_64KB_D_X:
+ *sw_mode = dm_sw_64kb_d_x;
+ break;
+ case DC_SW_64KB_D_T:
+ *sw_mode = dm_sw_64kb_d_t;
+ break;
+ case DC_SW_64KB_R_X:
+ *sw_mode = dm_sw_64kb_r_x;
+ break;
+ case DC_SW_VAR_S:
+ *sw_mode = dm_sw_var_s;
+ break;
+ case DC_SW_VAR_S_X:
+ *sw_mode = dm_sw_var_s_x;
+ break;
+ case DC_SW_VAR_D:
+ *sw_mode = dm_sw_var_d;
+ break;
+ case DC_SW_VAR_D_X:
+ *sw_mode = dm_sw_var_d_x;
+ break;
+ case DC_SW_VAR_R_X:
+ *sw_mode = dm_sw_var_r_x;
+ break;
+ default:
+ ASSERT(0); /* Not supported */
+ break;
+ }
+}
+
+int dcn20_populate_dml_pipes_from_context(
+ struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate)
+{
+ int pipe_cnt, i;
+ bool synchronized_vblank = true;
+ struct resource_context *res_ctx = &context->res_ctx;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) {
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ if (pipe_cnt < 0) {
+ pipe_cnt = i;
+ continue;
+ }
+
+ if (res_ctx->pipe_ctx[pipe_cnt].stream == res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ if (dc->debug.disable_timing_sync ||
+ (!resource_are_streams_timing_synchronizable(
+ res_ctx->pipe_ctx[pipe_cnt].stream,
+ res_ctx->pipe_ctx[i].stream) &&
+ !resource_are_vblanks_synchronizable(
+ res_ctx->pipe_ctx[pipe_cnt].stream,
+ res_ctx->pipe_ctx[i].stream))) {
+ synchronized_vblank = false;
+ break;
+ }
+ }
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing;
+ unsigned int v_total;
+ unsigned int front_porch;
+ int output_bpc;
+ struct audio_check aud_check = {0};
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ v_total = timing->v_total;
+ front_porch = timing->v_front_porch;
+
+ /* todo:
+ pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0;
+ pipes[pipe_cnt].pipe.src.dcc = 0;
+ pipes[pipe_cnt].pipe.src.vm = 0;*/
+
+ pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+
+ pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC;
+ /* todo: rotation?*/
+ pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h;
+ if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) {
+ pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true;
+ /* 1/2 vblank */
+ pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active =
+ (v_total - timing->v_addressable
+ - timing->v_border_top - timing->v_border_bottom) / 2;
+ /* 36 bytes dp, 32 hdmi */
+ pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes =
+ dc_is_dp_signal(res_ctx->pipe_ctx[i].stream->signal) ? 36 : 32;
+ }
+ pipes[pipe_cnt].pipe.src.dcc = false;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 1;
+ pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank;
+ pipes[pipe_cnt].pipe.dest.synchronize_timings = synchronized_vblank;
+ pipes[pipe_cnt].pipe.dest.hblank_start = timing->h_total - timing->h_front_porch;
+ pipes[pipe_cnt].pipe.dest.hblank_end = pipes[pipe_cnt].pipe.dest.hblank_start
+ - timing->h_addressable
+ - timing->h_border_left
+ - timing->h_border_right;
+ pipes[pipe_cnt].pipe.dest.vblank_start = v_total - front_porch;
+ pipes[pipe_cnt].pipe.dest.vblank_end = pipes[pipe_cnt].pipe.dest.vblank_start
+ - timing->v_addressable
+ - timing->v_border_top
+ - timing->v_border_bottom;
+ pipes[pipe_cnt].pipe.dest.htotal = timing->h_total;
+ pipes[pipe_cnt].pipe.dest.vtotal = v_total;
+ pipes[pipe_cnt].pipe.dest.hactive =
+ timing->h_addressable + timing->h_border_left + timing->h_border_right;
+ pipes[pipe_cnt].pipe.dest.vactive =
+ timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
+ pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE;
+ pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0;
+ if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
+ pipes[pipe_cnt].pipe.dest.pixel_rate_mhz *= 2;
+ pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst;
+ pipes[pipe_cnt].dout.dp_lanes = 4;
+ if (res_ctx->pipe_ctx[i].stream->link)
+ pipes[pipe_cnt].dout.dp_rate = dm_dp_rate_na;
+ pipes[pipe_cnt].dout.is_virtual = 0;
+ pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min;
+ pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max;
+ switch (get_num_odm_splits(&res_ctx->pipe_ctx[i])) {
+ case 1:
+ pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_2to1;
+ break;
+ case 3:
+ pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_4to1;
+ break;
+ default:
+ pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_disabled;
+ }
+ pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
+ if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state
+ == res_ctx->pipe_ctx[i].plane_state) {
+ struct pipe_ctx *first_pipe = res_ctx->pipe_ctx[i].top_pipe;
+ int split_idx = 0;
+
+ while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state
+ == res_ctx->pipe_ctx[i].plane_state) {
+ first_pipe = first_pipe->top_pipe;
+ split_idx++;
+ }
+ /* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */
+ if (split_idx == 0)
+ pipes[pipe_cnt].pipe.src.hsplit_grp = first_pipe->pipe_idx;
+ else if (split_idx == 1)
+ pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
+ else if (split_idx == 2)
+ pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].top_pipe->pipe_idx;
+ } else if (res_ctx->pipe_ctx[i].prev_odm_pipe) {
+ struct pipe_ctx *first_pipe = res_ctx->pipe_ctx[i].prev_odm_pipe;
+
+ while (first_pipe->prev_odm_pipe)
+ first_pipe = first_pipe->prev_odm_pipe;
+ pipes[pipe_cnt].pipe.src.hsplit_grp = first_pipe->pipe_idx;
+ }
+
+ switch (res_ctx->pipe_ctx[i].stream->signal) {
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ pipes[pipe_cnt].dout.output_type = dm_dp;
+ break;
+ case SIGNAL_TYPE_EDP:
+ pipes[pipe_cnt].dout.output_type = dm_edp;
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ pipes[pipe_cnt].dout.output_type = dm_hdmi;
+ break;
+ default:
+ /* In case there is no signal, set dp with 4 lanes to allow max config */
+ pipes[pipe_cnt].dout.is_virtual = 1;
+ pipes[pipe_cnt].dout.output_type = dm_dp;
+ pipes[pipe_cnt].dout.dp_lanes = 4;
+ pipes[pipe_cnt].dout.dp_rate = dm_dp_rate_hbr2;
+ }
+
+ switch (res_ctx->pipe_ctx[i].stream->timing.display_color_depth) {
+ case COLOR_DEPTH_666:
+ output_bpc = 6;
+ break;
+ case COLOR_DEPTH_888:
+ output_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ output_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ output_bpc = 12;
+ break;
+ case COLOR_DEPTH_141414:
+ output_bpc = 14;
+ break;
+ case COLOR_DEPTH_161616:
+ output_bpc = 16;
+ break;
+ case COLOR_DEPTH_999:
+ output_bpc = 9;
+ break;
+ case COLOR_DEPTH_111111:
+ output_bpc = 11;
+ break;
+ default:
+ output_bpc = 8;
+ break;
+ }
+
+ switch (res_ctx->pipe_ctx[i].stream->timing.pixel_encoding) {
+ case PIXEL_ENCODING_RGB:
+ case PIXEL_ENCODING_YCBCR444:
+ pipes[pipe_cnt].dout.output_format = dm_444;
+ pipes[pipe_cnt].dout.output_bpp = output_bpc * 3;
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ pipes[pipe_cnt].dout.output_format = dm_420;
+ pipes[pipe_cnt].dout.output_bpp = (output_bpc * 3.0) / 2;
+ break;
+ case PIXEL_ENCODING_YCBCR422:
+ if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC &&
+ !res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.ycbcr422_simple)
+ pipes[pipe_cnt].dout.output_format = dm_n422;
+ else
+ pipes[pipe_cnt].dout.output_format = dm_s422;
+ pipes[pipe_cnt].dout.output_bpp = output_bpc * 2;
+ break;
+ default:
+ pipes[pipe_cnt].dout.output_format = dm_444;
+ pipes[pipe_cnt].dout.output_bpp = output_bpc * 3;
+ }
+
+ if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC)
+ pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0;
+
+ /* todo: default max for now, until there is logic reflecting this in dc*/
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ /*fill up the audio sample rate (unit in kHz)*/
+ get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check);
+ pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000;
+ /*
+ * For graphic plane, cursor number is 1, nv12 is 0
+ * bw calculations due to cursor on/off
+ */
+ if (res_ctx->pipe_ctx[i].plane_state &&
+ (res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
+ res_ctx->pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM))
+ pipes[pipe_cnt].pipe.src.num_cursors = 0;
+ else
+ pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors;
+
+ pipes[pipe_cnt].pipe.src.cur0_src_width = 256;
+ pipes[pipe_cnt].pipe.src.cur0_bpp = dm_cur_32bit;
+
+ if (!res_ctx->pipe_ctx[i].plane_state) {
+ pipes[pipe_cnt].pipe.src.is_hsplit = pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled;
+ pipes[pipe_cnt].pipe.src.source_scan = dm_horz;
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_0;
+ pipes[pipe_cnt].pipe.src.sw_mode = dm_sw_4kb_s;
+ pipes[pipe_cnt].pipe.src.macro_tile_size = dm_64k_tile;
+ pipes[pipe_cnt].pipe.src.viewport_width = timing->h_addressable;
+ if (pipes[pipe_cnt].pipe.src.viewport_width > 1920)
+ pipes[pipe_cnt].pipe.src.viewport_width = 1920;
+ pipes[pipe_cnt].pipe.src.viewport_height = timing->v_addressable;
+ if (pipes[pipe_cnt].pipe.src.viewport_height > 1080)
+ pipes[pipe_cnt].pipe.src.viewport_height = 1080;
+ pipes[pipe_cnt].pipe.src.surface_height_y = pipes[pipe_cnt].pipe.src.viewport_height;
+ pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width;
+ pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height;
+ pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width;
+ pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256;
+ pipes[pipe_cnt].pipe.src.source_format = dm_444_32;
+ pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/
+ pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/
+ pipes[pipe_cnt].pipe.dest.full_recout_width = pipes[pipe_cnt].pipe.dest.recout_width; /*when is_hsplit != 1*/
+ pipes[pipe_cnt].pipe.dest.full_recout_height = pipes[pipe_cnt].pipe.dest.recout_height; /*when is_hsplit != 1*/
+ pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_16;
+ pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio = 1.0;
+ pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio = 1.0;
+ pipes[pipe_cnt].pipe.scale_ratio_depth.scl_enable = 0; /*Lb only or Full scl*/
+ pipes[pipe_cnt].pipe.scale_taps.htaps = 1;
+ pipes[pipe_cnt].pipe.scale_taps.vtaps = 1;
+ pipes[pipe_cnt].pipe.dest.vtotal_min = v_total;
+ pipes[pipe_cnt].pipe.dest.vtotal_max = v_total;
+
+ if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_2to1) {
+ pipes[pipe_cnt].pipe.src.viewport_width /= 2;
+ pipes[pipe_cnt].pipe.dest.recout_width /= 2;
+ } else if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_4to1) {
+ pipes[pipe_cnt].pipe.src.viewport_width /= 4;
+ pipes[pipe_cnt].pipe.dest.recout_width /= 4;
+ }
+ } else {
+ struct dc_plane_state *pln = res_ctx->pipe_ctx[i].plane_state;
+ struct scaler_data *scl = &res_ctx->pipe_ctx[i].plane_res.scl_data;
+
+ pipes[pipe_cnt].pipe.src.immediate_flip = pln->flip_immediate;
+ pipes[pipe_cnt].pipe.src.is_hsplit = (res_ctx->pipe_ctx[i].bottom_pipe && res_ctx->pipe_ctx[i].bottom_pipe->plane_state == pln)
+ || (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state == pln)
+ || pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled;
+
+ /* stereo is not split */
+ if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE ||
+ pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) {
+ pipes[pipe_cnt].pipe.src.is_hsplit = false;
+ pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx;
+ }
+
+ pipes[pipe_cnt].pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90
+ || pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz;
+ switch (pln->rotation) {
+ case ROTATION_ANGLE_0:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_0;
+ break;
+ case ROTATION_ANGLE_90:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_90;
+ break;
+ case ROTATION_ANGLE_180:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_180;
+ break;
+ case ROTATION_ANGLE_270:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_270;
+ break;
+ default:
+ break;
+ }
+ pipes[pipe_cnt].pipe.src.viewport_y_y = scl->viewport.y;
+ pipes[pipe_cnt].pipe.src.viewport_y_c = scl->viewport_c.y;
+ pipes[pipe_cnt].pipe.src.viewport_x_y = scl->viewport.x;
+ pipes[pipe_cnt].pipe.src.viewport_x_c = scl->viewport_c.x;
+ pipes[pipe_cnt].pipe.src.viewport_width = scl->viewport.width;
+ pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width;
+ pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height;
+ pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height;
+ pipes[pipe_cnt].pipe.src.viewport_width_max = pln->src_rect.width;
+ pipes[pipe_cnt].pipe.src.viewport_height_max = pln->src_rect.height;
+ pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width;
+ pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height;
+ pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width;
+ pipes[pipe_cnt].pipe.src.surface_height_c = pln->plane_size.chroma_size.height;
+ if (pln->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA
+ || pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
+ pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch;
+ pipes[pipe_cnt].pipe.src.data_pitch_c = pln->plane_size.chroma_pitch;
+ pipes[pipe_cnt].pipe.src.meta_pitch = pln->dcc.meta_pitch;
+ pipes[pipe_cnt].pipe.src.meta_pitch_c = pln->dcc.meta_pitch_c;
+ } else {
+ pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch;
+ pipes[pipe_cnt].pipe.src.meta_pitch = pln->dcc.meta_pitch;
+ }
+ pipes[pipe_cnt].pipe.src.dcc = pln->dcc.enable;
+ pipes[pipe_cnt].pipe.dest.recout_width = scl->recout.width;
+ pipes[pipe_cnt].pipe.dest.recout_height = scl->recout.height;
+ pipes[pipe_cnt].pipe.dest.full_recout_height = scl->recout.height;
+ pipes[pipe_cnt].pipe.dest.full_recout_width = scl->recout.width;
+ if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_2to1)
+ pipes[pipe_cnt].pipe.dest.full_recout_width *= 2;
+ else if (pipes[pipe_cnt].pipe.dest.odm_combine == dm_odm_combine_mode_4to1)
+ pipes[pipe_cnt].pipe.dest.full_recout_width *= 4;
+ else {
+ struct pipe_ctx *split_pipe = res_ctx->pipe_ctx[i].bottom_pipe;
+
+ while (split_pipe && split_pipe->plane_state == pln) {
+ pipes[pipe_cnt].pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width;
+ split_pipe = split_pipe->bottom_pipe;
+ }
+ split_pipe = res_ctx->pipe_ctx[i].top_pipe;
+ while (split_pipe && split_pipe->plane_state == pln) {
+ pipes[pipe_cnt].pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width;
+ split_pipe = split_pipe->top_pipe;
+ }
+ }
+
+ pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_16;
+ pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio = (double) scl->ratios.horz.value / (1ULL<<32);
+ pipes[pipe_cnt].pipe.scale_ratio_depth.hscl_ratio_c = (double) scl->ratios.horz_c.value / (1ULL<<32);
+ pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio = (double) scl->ratios.vert.value / (1ULL<<32);
+ pipes[pipe_cnt].pipe.scale_ratio_depth.vscl_ratio_c = (double) scl->ratios.vert_c.value / (1ULL<<32);
+ pipes[pipe_cnt].pipe.scale_ratio_depth.scl_enable =
+ scl->ratios.vert.value != dc_fixpt_one.value
+ || scl->ratios.horz.value != dc_fixpt_one.value
+ || scl->ratios.vert_c.value != dc_fixpt_one.value
+ || scl->ratios.horz_c.value != dc_fixpt_one.value /*Lb only or Full scl*/
+ || dc->debug.always_scale; /*support always scale*/
+ pipes[pipe_cnt].pipe.scale_taps.htaps = scl->taps.h_taps;
+ pipes[pipe_cnt].pipe.scale_taps.htaps_c = scl->taps.h_taps_c;
+ pipes[pipe_cnt].pipe.scale_taps.vtaps = scl->taps.v_taps;
+ pipes[pipe_cnt].pipe.scale_taps.vtaps_c = scl->taps.v_taps_c;
+
+ pipes[pipe_cnt].pipe.src.macro_tile_size =
+ swizzle_mode_to_macro_tile_size(pln->tiling_info.gfx9.swizzle);
+ swizzle_to_dml_params(pln->tiling_info.gfx9.swizzle,
+ &pipes[pipe_cnt].pipe.src.sw_mode);
+
+ switch (pln->format) {
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ pipes[pipe_cnt].pipe.src.source_format = dm_420_8;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ pipes[pipe_cnt].pipe.src.source_format = dm_420_10;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ pipes[pipe_cnt].pipe.src.source_format = dm_444_64;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ pipes[pipe_cnt].pipe.src.source_format = dm_444_16;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS:
+ pipes[pipe_cnt].pipe.src.source_format = dm_444_8;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+ pipes[pipe_cnt].pipe.src.source_format = dm_rgbe_alpha;
+ break;
+ default:
+ pipes[pipe_cnt].pipe.src.source_format = dm_444_32;
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+
+ /* populate writeback information */
+ dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes);
+
+ return pipe_cnt;
+}
+
+void dcn20_calculate_wm(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *out_pipe_cnt,
+ int *pipe_split_from,
+ int vlevel,
+ bool fast_validate)
+{
+ int pipe_cnt, i, pipe_idx;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_idx = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+ pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.vba.RequiredDISPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+
+ if (pipe_split_from[i] < 0) {
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz =
+ context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx];
+ if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_idx] == pipe_idx)
+ pipes[pipe_cnt].pipe.dest.odm_combine =
+ context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx];
+ else
+ pipes[pipe_cnt].pipe.dest.odm_combine = 0;
+ pipe_idx++;
+ } else {
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz =
+ context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_split_from[i]];
+ if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_split_from[i]] == pipe_split_from[i])
+ pipes[pipe_cnt].pipe.dest.odm_combine =
+ context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_split_from[i]];
+ else
+ pipes[pipe_cnt].pipe.dest.odm_combine = 0;
+ }
+
+ if (dc->config.forced_clocks) {
+ pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+ }
+ if (dc->debug.min_disp_clk_khz > pipes[pipe_cnt].clks_cfg.dispclk_mhz * 1000)
+ pipes[pipe_cnt].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+ if (dc->debug.min_dpp_clk_khz > pipes[pipe_cnt].clks_cfg.dppclk_mhz * 1000)
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+ pipe_cnt++;
+ }
+
+ if (pipe_cnt != pipe_idx) {
+ if (dc->res_pool->funcs->populate_dml_pipes)
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
+ context, pipes, fast_validate);
+ else
+ pipe_cnt = dcn20_populate_dml_pipes_from_context(dc,
+ context, pipes, fast_validate);
+ }
+
+ *out_pipe_cnt = pipe_cnt;
+
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+
+ /* only pipe 0 is read for voltage and dcf/soc clocks */
+ if (vlevel < 1) {
+ pipes[0].clks_cfg.voltage = 1;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[1].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[1].socclk_mhz;
+ }
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if (vlevel < 2) {
+ pipes[0].clks_cfg.voltage = 2;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].socclk_mhz;
+ }
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if (vlevel < 3) {
+ pipes[0].clks_cfg.voltage = 3;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[2].socclk_mhz;
+ }
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+}
+
+void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb,
+ struct pp_smu_nv_clock_table *max_clocks, unsigned int *uclk_states, unsigned int num_states)
+{
+ int num_calculated_states = 0;
+ int min_dcfclk = 0;
+ int i;
+
+ dc_assert_fp_enabled();
+
+ if (num_states == 0)
+ return;
+
+ memset(bb->clock_limits, 0, sizeof(bb->clock_limits));
+
+ if (dc->bb_overrides.min_dcfclk_mhz > 0) {
+ min_dcfclk = dc->bb_overrides.min_dcfclk_mhz;
+ } else {
+ if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev))
+ min_dcfclk = 310;
+ else
+ // Accounting for SOC/DCF relationship, we can go as high as
+ // 506Mhz in Vmin.
+ min_dcfclk = 506;
+ }
+
+ for (i = 0; i < num_states; i++) {
+ int min_fclk_required_by_uclk;
+ bb->clock_limits[i].state = i;
+ bb->clock_limits[i].dram_speed_mts = uclk_states[i] * 16 / 1000;
+
+ // FCLK:UCLK ratio is 1.08
+ min_fclk_required_by_uclk = div_u64(((unsigned long long)uclk_states[i]) * 1080,
+ 1000000);
+
+ bb->clock_limits[i].fabricclk_mhz = (min_fclk_required_by_uclk < min_dcfclk) ?
+ min_dcfclk : min_fclk_required_by_uclk;
+
+ bb->clock_limits[i].socclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->socClockInKhz / 1000) ?
+ max_clocks->socClockInKhz / 1000 : bb->clock_limits[i].fabricclk_mhz;
+
+ bb->clock_limits[i].dcfclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->dcfClockInKhz / 1000) ?
+ max_clocks->dcfClockInKhz / 1000 : bb->clock_limits[i].fabricclk_mhz;
+
+ bb->clock_limits[i].dispclk_mhz = max_clocks->displayClockInKhz / 1000;
+ bb->clock_limits[i].dppclk_mhz = max_clocks->displayClockInKhz / 1000;
+ bb->clock_limits[i].dscclk_mhz = max_clocks->displayClockInKhz / (1000 * 3);
+
+ bb->clock_limits[i].phyclk_mhz = max_clocks->phyClockInKhz / 1000;
+
+ num_calculated_states++;
+ }
+
+ bb->clock_limits[num_calculated_states - 1].socclk_mhz = max_clocks->socClockInKhz / 1000;
+ bb->clock_limits[num_calculated_states - 1].fabricclk_mhz = max_clocks->socClockInKhz / 1000;
+ bb->clock_limits[num_calculated_states - 1].dcfclk_mhz = max_clocks->dcfClockInKhz / 1000;
+
+ bb->num_states = num_calculated_states;
+
+ // Duplicate the last state, DML always an extra state identical to max state to work
+ memcpy(&bb->clock_limits[num_calculated_states], &bb->clock_limits[num_calculated_states - 1], sizeof(struct _vcs_dpi_voltage_scaling_st));
+ bb->clock_limits[num_calculated_states].state = bb->num_states;
+}
+
+void dcn20_cap_soc_clocks(
+ struct _vcs_dpi_soc_bounding_box_st *bb,
+ struct pp_smu_nv_clock_table max_clocks)
+{
+ int i;
+
+ dc_assert_fp_enabled();
+
+ // First pass - cap all clocks higher than the reported max
+ for (i = 0; i < bb->num_states; i++) {
+ if ((bb->clock_limits[i].dcfclk_mhz > (max_clocks.dcfClockInKhz / 1000))
+ && max_clocks.dcfClockInKhz != 0)
+ bb->clock_limits[i].dcfclk_mhz = (max_clocks.dcfClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].dram_speed_mts > (max_clocks.uClockInKhz / 1000) * 16)
+ && max_clocks.uClockInKhz != 0)
+ bb->clock_limits[i].dram_speed_mts = (max_clocks.uClockInKhz / 1000) * 16;
+
+ if ((bb->clock_limits[i].fabricclk_mhz > (max_clocks.fabricClockInKhz / 1000))
+ && max_clocks.fabricClockInKhz != 0)
+ bb->clock_limits[i].fabricclk_mhz = (max_clocks.fabricClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].dispclk_mhz > (max_clocks.displayClockInKhz / 1000))
+ && max_clocks.displayClockInKhz != 0)
+ bb->clock_limits[i].dispclk_mhz = (max_clocks.displayClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].dppclk_mhz > (max_clocks.dppClockInKhz / 1000))
+ && max_clocks.dppClockInKhz != 0)
+ bb->clock_limits[i].dppclk_mhz = (max_clocks.dppClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].phyclk_mhz > (max_clocks.phyClockInKhz / 1000))
+ && max_clocks.phyClockInKhz != 0)
+ bb->clock_limits[i].phyclk_mhz = (max_clocks.phyClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].socclk_mhz > (max_clocks.socClockInKhz / 1000))
+ && max_clocks.socClockInKhz != 0)
+ bb->clock_limits[i].socclk_mhz = (max_clocks.socClockInKhz / 1000);
+
+ if ((bb->clock_limits[i].dscclk_mhz > (max_clocks.dscClockInKhz / 1000))
+ && max_clocks.dscClockInKhz != 0)
+ bb->clock_limits[i].dscclk_mhz = (max_clocks.dscClockInKhz / 1000);
+ }
+
+ // Second pass - remove all duplicate clock states
+ for (i = bb->num_states - 1; i > 1; i--) {
+ bool duplicate = true;
+
+ if (bb->clock_limits[i-1].dcfclk_mhz != bb->clock_limits[i].dcfclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].dispclk_mhz != bb->clock_limits[i].dispclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].dppclk_mhz != bb->clock_limits[i].dppclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].dram_speed_mts != bb->clock_limits[i].dram_speed_mts)
+ duplicate = false;
+ if (bb->clock_limits[i-1].dscclk_mhz != bb->clock_limits[i].dscclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].fabricclk_mhz != bb->clock_limits[i].fabricclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].phyclk_mhz != bb->clock_limits[i].phyclk_mhz)
+ duplicate = false;
+ if (bb->clock_limits[i-1].socclk_mhz != bb->clock_limits[i].socclk_mhz)
+ duplicate = false;
+
+ if (duplicate)
+ bb->num_states--;
+ }
+}
+
+void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb)
+{
+ dc_assert_fp_enabled();
+
+ if ((int)(bb->sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
+ && dc->bb_overrides.sr_exit_time_ns) {
+ bb->sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(bb->sr_enter_plus_exit_time_us * 1000)
+ != dc->bb_overrides.sr_enter_plus_exit_time_ns
+ && dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ bb->sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(bb->urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
+ && dc->bb_overrides.urgent_latency_ns) {
+ bb->urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ }
+
+ if ((int)(bb->dram_clock_change_latency_us * 1000)
+ != dc->bb_overrides.dram_clock_change_latency_ns
+ && dc->bb_overrides.dram_clock_change_latency_ns) {
+ bb->dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if ((int)(bb->dummy_pstate_latency_us * 1000)
+ != dc->bb_overrides.dummy_clock_change_latency_ns
+ && dc->bb_overrides.dummy_clock_change_latency_ns) {
+ bb->dummy_pstate_latency_us =
+ dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0;
+ }
+}
+
+static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context,
+ bool fast_validate)
+{
+ bool out = false;
+
+ BW_VAL_TRACE_SETUP();
+
+ int vlevel = 0;
+ int pipe_split_from[MAX_PIPES];
+ int pipe_cnt = 0;
+ display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ BW_VAL_TRACE_COUNT();
+
+ out = dcn20_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, fast_validate);
+
+ if (pipe_cnt == 0)
+ goto validate_out;
+
+ if (!out)
+ goto validate_fail;
+
+ BW_VAL_TRACE_END_VOLTAGE_LEVEL();
+
+ if (fast_validate) {
+ BW_VAL_TRACE_SKIP(fast);
+ goto validate_out;
+ }
+
+ dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
+ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+
+ BW_VAL_TRACE_END_WATERMARKS();
+
+ goto validate_out;
+
+validate_fail:
+ DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n",
+ dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
+
+ BW_VAL_TRACE_SKIP(fail);
+ out = false;
+
+validate_out:
+ kfree(pipes);
+
+ BW_VAL_TRACE_FINISH();
+
+ return out;
+}
+
+bool dcn20_validate_bandwidth_fp(struct dc *dc,
+ struct dc_state *context,
+ bool fast_validate)
+{
+ bool voltage_supported = false;
+ bool full_pstate_supported = false;
+ bool dummy_pstate_supported = false;
+ double p_state_latency_us;
+
+ dc_assert_fp_enabled();
+
+ p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us;
+ context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support =
+ dc->debug.disable_dram_clock_change_vactive_support;
+ context->bw_ctx.dml.soc.allow_dram_clock_one_display_vactive =
+ dc->debug.enable_dram_clock_change_one_display_vactive;
+
+ /*Unsafe due to current pipe merge and split logic*/
+ ASSERT(context != dc->current_state);
+
+ if (fast_validate) {
+ return dcn20_validate_bandwidth_internal(dc, context, true);
+ }
+
+ // Best case, we support full UCLK switch latency
+ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
+ full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
+
+ if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 ||
+ (voltage_supported && full_pstate_supported)) {
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported;
+ goto restore_dml_state;
+ }
+
+ // Fallback: Try to only support G6 temperature read latency
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us;
+
+ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
+ dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
+
+ if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) {
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = false;
+ goto restore_dml_state;
+ }
+
+ // ERROR: fallback is supposed to always work.
+ ASSERT(false);
+
+restore_dml_state:
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us;
+ return voltage_supported;
+}
+
+void dcn20_fpu_set_wm_ranges(int i,
+ struct pp_smu_wm_range_sets *ranges,
+ struct _vcs_dpi_soc_bounding_box_st *loaded_bb)
+{
+ dc_assert_fp_enabled();
+
+ ranges->reader_wm_sets[i].min_fill_clk_mhz = (i > 0) ? (loaded_bb->clock_limits[i - 1].dram_speed_mts / 16) + 1 : 0;
+ ranges->reader_wm_sets[i].max_fill_clk_mhz = loaded_bb->clock_limits[i].dram_speed_mts / 16;
+}
+
+void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v,
+ int vlevel,
+ int max_mpc_comb,
+ int pipe_idx,
+ bool is_validating_bw)
+{
+ dc_assert_fp_enabled();
+
+ if (is_validating_bw)
+ v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] *= 2;
+ else
+ v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2;
+}
+
+int dcn21_populate_dml_pipes_from_context(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate)
+{
+ uint32_t pipe_cnt;
+ int i;
+
+ dc_assert_fp_enabled();
+
+ pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+
+ for (i = 0; i < pipe_cnt; i++) {
+
+ pipes[i].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
+ pipes[i].pipe.src.gpuvm = 1;
+ }
+
+ return pipe_cnt;
+}
+
+static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb)
+{
+ int i;
+
+ if (dc->bb_overrides.sr_exit_time_ns) {
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ dc->clk_mgr->bw_params->wm_table.entries[i].sr_exit_time_us =
+ dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
+ }
+
+ if (dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ dc->clk_mgr->bw_params->wm_table.entries[i].sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+ }
+
+ if (dc->bb_overrides.urgent_latency_ns) {
+ bb->urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ }
+
+ if (dc->bb_overrides.dram_clock_change_latency_ns) {
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ dc->clk_mgr->bw_params->wm_table.entries[i].pstate_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
+ }
+}
+
+static void calculate_wm_set_for_vlevel(int vlevel,
+ struct wm_range_table_entry *table_entry,
+ struct dcn_watermarks *wm_set,
+ struct display_mode_lib *dml,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ double dram_clock_change_latency_cached = dml->soc.dram_clock_change_latency_us;
+
+ ASSERT(vlevel < dml->soc.num_states);
+ /* only pipe 0 is read for voltage and dcf/soc clocks */
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dml->soc.clock_limits[vlevel].dcfclk_mhz;
+ pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz;
+
+ dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us;
+ dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us;
+ dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us;
+
+ wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000;
+ wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000;
+ wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000;
+ wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
+ wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000;
+ wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000;
+ wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000;
+ wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000;
+ dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached;
+}
+
+static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *out_pipe_cnt,
+ int *pipe_split_from,
+ int vlevel_req,
+ bool fast_validate)
+{
+ int pipe_cnt, i, pipe_idx;
+ int vlevel, vlevel_max;
+ struct wm_range_table_entry *table_entry;
+ struct clk_bw_params *bw_params = dc->clk_mgr->bw_params;
+
+ ASSERT(bw_params);
+
+ patch_bounding_box(dc, &context->bw_ctx.dml.soc);
+
+ for (i = 0, pipe_idx = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+ pipes[pipe_cnt].clks_cfg.dispclk_mhz = context->bw_ctx.dml.vba.RequiredDISPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb];
+
+ if (pipe_split_from[i] < 0) {
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz =
+ context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx];
+ if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_idx] == pipe_idx)
+ pipes[pipe_cnt].pipe.dest.odm_combine =
+ context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel_req][pipe_idx];
+ else
+ pipes[pipe_cnt].pipe.dest.odm_combine = 0;
+ pipe_idx++;
+ } else {
+ pipes[pipe_cnt].clks_cfg.dppclk_mhz =
+ context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel_req][context->bw_ctx.dml.vba.maxMpcComb][pipe_split_from[i]];
+ if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_split_from[i]] == pipe_split_from[i])
+ pipes[pipe_cnt].pipe.dest.odm_combine =
+ context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel_req][pipe_split_from[i]];
+ else
+ pipes[pipe_cnt].pipe.dest.odm_combine = 0;
+ }
+ pipe_cnt++;
+ }
+
+ if (pipe_cnt != pipe_idx) {
+ if (dc->res_pool->funcs->populate_dml_pipes)
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
+ context, pipes, fast_validate);
+ else
+ pipe_cnt = dcn21_populate_dml_pipes_from_context(dc,
+ context, pipes, fast_validate);
+ }
+
+ *out_pipe_cnt = pipe_cnt;
+
+ vlevel_max = bw_params->clk_table.num_entries - 1;
+
+
+ /* WM Set D */
+ table_entry = &bw_params->wm_table.entries[WM_D];
+ if (table_entry->wm_type == WM_TYPE_RETRAINING)
+ vlevel = 0;
+ else
+ vlevel = vlevel_max;
+ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.d,
+ &context->bw_ctx.dml, pipes, pipe_cnt);
+ /* WM Set C */
+ table_entry = &bw_params->wm_table.entries[WM_C];
+ vlevel = MIN(MAX(vlevel_req, 3), vlevel_max);
+ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c,
+ &context->bw_ctx.dml, pipes, pipe_cnt);
+ /* WM Set B */
+ table_entry = &bw_params->wm_table.entries[WM_B];
+ vlevel = MIN(MAX(vlevel_req, 2), vlevel_max);
+ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b,
+ &context->bw_ctx.dml, pipes, pipe_cnt);
+
+ /* WM Set A */
+ table_entry = &bw_params->wm_table.entries[WM_A];
+ vlevel = MIN(vlevel_req, vlevel_max);
+ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.a,
+ &context->bw_ctx.dml, pipes, pipe_cnt);
+}
+
+bool dcn21_validate_bandwidth_fp(struct dc *dc,
+ struct dc_state *context,
+ bool fast_validate)
+{
+ bool out = false;
+
+ BW_VAL_TRACE_SETUP();
+
+ int vlevel = 0;
+ int pipe_split_from[MAX_PIPES];
+ int pipe_cnt = 0;
+ display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ BW_VAL_TRACE_COUNT();
+
+ dc_assert_fp_enabled();
+
+ /*Unsafe due to current pipe merge and split logic*/
+ ASSERT(context != dc->current_state);
+
+ out = dcn21_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, fast_validate);
+
+ if (pipe_cnt == 0)
+ goto validate_out;
+
+ if (!out)
+ goto validate_fail;
+
+ BW_VAL_TRACE_END_VOLTAGE_LEVEL();
+
+ if (fast_validate) {
+ BW_VAL_TRACE_SKIP(fast);
+ goto validate_out;
+ }
+
+ dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
+ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+
+ BW_VAL_TRACE_END_WATERMARKS();
+
+ goto validate_out;
+
+validate_fail:
+ DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n",
+ dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
+
+ BW_VAL_TRACE_SKIP(fail);
+ out = false;
+
+validate_out:
+ kfree(pipes);
+
+ BW_VAL_TRACE_FINISH();
+
+ return out;
+}
+
+static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_limit_table *clk_table, unsigned int high_voltage_lvl)
+{
+ struct _vcs_dpi_voltage_scaling_st low_pstate_lvl;
+ int i;
+
+ low_pstate_lvl.state = 1;
+ low_pstate_lvl.dcfclk_mhz = clk_table->entries[0].dcfclk_mhz;
+ low_pstate_lvl.fabricclk_mhz = clk_table->entries[0].fclk_mhz;
+ low_pstate_lvl.socclk_mhz = clk_table->entries[0].socclk_mhz;
+ low_pstate_lvl.dram_speed_mts = clk_table->entries[0].memclk_mhz * 2;
+
+ low_pstate_lvl.dispclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dispclk_mhz;
+ low_pstate_lvl.dppclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dppclk_mhz;
+ low_pstate_lvl.dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[high_voltage_lvl].dram_bw_per_chan_gbps;
+ low_pstate_lvl.dscclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dscclk_mhz;
+ low_pstate_lvl.dtbclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].dtbclk_mhz;
+ low_pstate_lvl.phyclk_d18_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_d18_mhz;
+ low_pstate_lvl.phyclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_mhz;
+
+ for (i = clk_table->num_entries; i > 1; i--)
+ clk_table->entries[i] = clk_table->entries[i-1];
+ clk_table->entries[1] = clk_table->entries[0];
+ clk_table->num_entries++;
+
+ return low_pstate_lvl;
+}
+
+void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
+ struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool);
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ unsigned int i, closest_clk_lvl = 0, k = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ dcn2_1_ip.max_num_otg = pool->base.res_cap->num_timing_generator;
+ dcn2_1_ip.max_num_dpp = pool->base.pipe_count;
+ dcn2_1_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+ /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */
+ memcpy(s, dcn2_1_soc.clock_limits, sizeof(dcn2_1_soc.clock_limits));
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn2_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+
+ /* clk_table[1] is reserved for min DF PState. skip here to fill in later. */
+ if (i == 1)
+ k++;
+
+ s[k].state = k;
+ s[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ s[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ s[k].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ s[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
+
+ s[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+ s[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+ s[k].dram_bw_per_chan_gbps =
+ dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ s[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ s[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ s[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ s[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+
+ k++;
+ }
+
+ memcpy(dcn2_1_soc.clock_limits, s, sizeof(dcn2_1_soc.clock_limits));
+
+ if (clk_table->num_entries) {
+ dcn2_1_soc.num_states = clk_table->num_entries + 1;
+ /* fill in min DF PState */
+ dcn2_1_soc.clock_limits[1] = construct_low_pstate_lvl(clk_table, closest_clk_lvl);
+ /* duplicate last level */
+ dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1];
+ dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states;
+ }
+
+ dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21);
+}
+
+void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params)
+{
+ dc_assert_fp_enabled();
+
+ bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY;
+ bw_params->wm_table.entries[WM_D].wm_inst = WM_D;
+ bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING;
+ bw_params->wm_table.entries[WM_D].valid = true;
+}
+
+void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc,
+ struct resource_context *res_ctx,
+ display_e2e_pipe_params_st *pipes)
+{
+ int pipe_cnt, i, j;
+ double max_calc_writeback_dispclk;
+ double writeback_dispclk;
+ struct writeback_st dout_wb;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
+
+ if (!stream)
+ continue;
+ max_calc_writeback_dispclk = 0;
+
+ /* Set writeback information */
+ pipes[pipe_cnt].dout.wb_enable = 0;
+ pipes[pipe_cnt].dout.num_active_wb = 0;
+ for (j = 0; j < stream->num_wb_info; j++) {
+ struct dc_writeback_info *wb_info = &stream->writeback_info[j];
+
+ if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
+ (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
+ pipes[pipe_cnt].dout.wb_enable = 1;
+ pipes[pipe_cnt].dout.num_active_wb++;
+ dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_height :
+ wb_info->dwb_params.cnv_params.src_height;
+ dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_width :
+ wb_info->dwb_params.cnv_params.src_width;
+ dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
+ dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
+ dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
+ dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
+ dout_wb.wb_htaps_chroma = wb_info->dwb_params.scaler_taps.h_taps_c;
+ dout_wb.wb_vtaps_chroma = wb_info->dwb_params.scaler_taps.v_taps_c;
+ dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_width /
+ (double)wb_info->dwb_params.dest_width :
+ (double)wb_info->dwb_params.cnv_params.src_width /
+ (double)wb_info->dwb_params.dest_width;
+ dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_height /
+ (double)wb_info->dwb_params.dest_height :
+ (double)wb_info->dwb_params.cnv_params.src_height /
+ (double)wb_info->dwb_params.dest_height;
+ if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) {
+ if (wb_info->dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC)
+ dout_wb.wb_pixel_format = dm_420_8;
+ else
+ dout_wb.wb_pixel_format = dm_420_10;
+ } else
+ dout_wb.wb_pixel_format = dm_444_32;
+
+ /* Workaround for cases where multiple writebacks are connected to same plane
+ * In which case, need to compute worst case and set the associated writeback parameters
+ * This workaround is necessary due to DML computation assuming only 1 set of writeback
+ * parameters per pipe */
+ writeback_dispclk = CalculateWriteBackDISPCLK(
+ dout_wb.wb_pixel_format,
+ pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
+ dout_wb.wb_hratio,
+ dout_wb.wb_vratio,
+ dout_wb.wb_htaps_luma,
+ dout_wb.wb_vtaps_luma,
+ dout_wb.wb_htaps_chroma,
+ dout_wb.wb_vtaps_chroma,
+ dout_wb.wb_dst_width,
+ pipes[pipe_cnt].pipe.dest.htotal,
+ 2);
+
+ if (writeback_dispclk > max_calc_writeback_dispclk) {
+ max_calc_writeback_dispclk = writeback_dispclk;
+ pipes[pipe_cnt].dout.wb = dout_wb;
+ }
+ }
+ }
+
+ pipe_cnt++;
+ }
+
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
index 36f26126d574..c51badf7b68a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
@@ -23,6 +23,7 @@
* Authors: AMD
*
*/
+#include "core_types.h"
#ifndef __DCN20_FPU_H__
#define __DCN20_FPU_H__
@@ -31,4 +32,60 @@ void dcn20_populate_dml_writeback_from_context(struct dc *dc,
struct resource_context *res_ctx,
display_e2e_pipe_params_st *pipes);
+void dcn20_fpu_set_wb_arb_params(struct mcif_arb_params *wb_arb_params,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt, int i);
+void dcn20_calculate_dlg_params(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+int dcn20_populate_dml_pipes_from_context(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate);
+void dcn20_calculate_wm(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *out_pipe_cnt,
+ int *pipe_split_from,
+ int vlevel,
+ bool fast_validate);
+void dcn20_cap_soc_clocks(struct _vcs_dpi_soc_bounding_box_st *bb,
+ struct pp_smu_nv_clock_table max_clocks);
+void dcn20_update_bounding_box(struct dc *dc,
+ struct _vcs_dpi_soc_bounding_box_st *bb,
+ struct pp_smu_nv_clock_table *max_clocks,
+ unsigned int *uclk_states,
+ unsigned int num_states);
+void dcn20_patch_bounding_box(struct dc *dc,
+ struct _vcs_dpi_soc_bounding_box_st *bb);
+bool dcn20_validate_bandwidth_fp(struct dc *dc,
+ struct dc_state *context,
+ bool fast_validate);
+void dcn20_fpu_set_wm_ranges(int i,
+ struct pp_smu_wm_range_sets *ranges,
+ struct _vcs_dpi_soc_bounding_box_st *loaded_bb);
+void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v,
+ int vlevel,
+ int max_mpc_comb,
+ int pipe_idx,
+ bool is_validating_bw);
+
+int dcn21_populate_dml_pipes_from_context(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate);
+bool dcn21_validate_bandwidth_fp(struct dc *dc,
+ struct dc_state *context,
+ bool fast_validate);
+void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+
+void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params);
+
+void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc,
+ struct resource_context *res_ctx,
+ display_e2e_pipe_params_st *pipes);
+
#endif /* __DCN20_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index 63bbdf8b8678..edd098c7eb92 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -4478,17 +4478,17 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
locals->EffectiveLBLatencyHidingSourceLinesLuma),
locals->SwathHeightYPerState[i][j][k]);
- locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
- locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
- locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
- locals->EffectiveLBLatencyHidingSourceLinesChroma),
- locals->SwathHeightCPerState[i][j][k]);
if (locals->BytePerPixelInDETC[k] == 0) {
locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]);
} else {
+ locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
+ locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
+ locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
+ locals->EffectiveLBLatencyHidingSourceLinesChroma),
+ locals->SwathHeightCPerState[i][j][k]);
locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min(
locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
index 246071c72f6b..548cdef8a8ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
@@ -1576,8 +1576,6 @@ void dml20_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
index 015e7f2c0b16..0fc9f3e3ffae 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
@@ -1577,8 +1577,6 @@ void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index 8a7485e21d53..1d84ae50311d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -806,10 +806,12 @@ static bool CalculatePrefetchSchedule(
if (myPipe->SourceScan == dm_horz) {
*swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY;
- *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
+ if (myPipe->BlockWidth256BytesC > 0)
+ *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
} else {
*swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY;
- *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
+ if (myPipe->BlockWidth256BytesC > 0)
+ *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
}
prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto;
@@ -2634,7 +2636,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
&mode_lib->vba.SrcActiveDrainRate,
&mode_lib->vba.TInitXFill,
&mode_lib->vba.TslvChk);
- locals->XFCRemoteSurfaceFlipLatency[k] =
+ locals->XFCRemoteSurfaceFlipLatency[k] =
dml_floor(
mode_lib->vba.XFCRemoteSurfaceFlipDelay
/ (mode_lib->vba.HTotal[k]
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
index 8bc27de4c104..618f4b682ab1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
@@ -1688,8 +1688,6 @@ void dml21_rq_dlg_get_dlg_reg(
mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
new file mode 100644
index 000000000000..e1e92daba668
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
@@ -0,0 +1,743 @@
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "resource.h"
+#include "clk_mgr.h"
+#include "reg_helper.h"
+#include "dcn_calc_math.h"
+#include "dcn20/dcn20_resource.h"
+#include "dcn30/dcn30_resource.h"
+
+#include "clk_mgr/dcn30/dcn30_smu11_driver_if.h"
+#include "display_mode_vba_30.h"
+#include "dcn30_fpu.h"
+
+#define REG(reg)\
+ optc1->tg_regs->reg
+
+#define CTX \
+ optc1->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ optc1->tg_shift->field_name, optc1->tg_mask->field_name
+
+
+struct _vcs_dpi_ip_params_st dcn3_0_ip = {
+ .use_min_dcfclk = 0,
+ .clamp_min_dcfclk = 0,
+ .odm_capable = 1,
+ .gpuvm_enable = 0,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .pte_group_size_bytes = 2048,
+ .num_dsc = 6,
+ .rob_buffer_size_kbytes = 184,
+ .det_buffer_size_kbytes = 184,
+ .dpte_buffer_size_in_pte_reqs_luma = 84,
+ .pde_proc_buffer_size_64k_reqs = 48,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 2,
+ .pte_chunk_size_kbytes = 2, // ?
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 8,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0, // ?
+ .line_buffer_fixed_bpp = 0, // ?
+ .dcc_supported = true,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .writeback_line_buffer_buffer_size = 0,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 6,
+ .max_num_dpp = 6,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.11,
+ .min_vblank_lines = 32,
+ .dppclk_delay_subtotal = 46,
+ .dynamic_metadata_vm_enabled = true,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dcfclk_cstate_latency = 5.2, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .max_num_hdmi_frl_outputs = 1,
+ .odm_combine_4to1_supported = true,
+
+ .xfc_supported = false,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .gfx7_compat_tiling_supported = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 562.0,
+ .dppclk_mhz = 300.0,
+ .phyclk_mhz = 300.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 405.6,
+ },
+ },
+
+ .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
+ .num_states = 1,
+ .sr_exit_time_us = 15.5,
+ .sr_enter_plus_exit_time_us = 20,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .max_request_size_bytes = 256,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 191,
+ .urgent_out_of_order_return_per_channel_bytes = 4096,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404,
+ .dummy_pstate_latency_us = 5,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3650,
+ .xfc_bus_transport_time_us = 20, // ?
+ .xfc_xbuf_latency_tolerance_us = 4, // ?
+ .use_urgent_burst_bw = 1, // ?
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+
+void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
+ double vtotal_avg)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ double vtotal_min, vtotal_max;
+ double ratio, modulo, phase;
+ uint32_t vblank_start;
+ uint32_t v_total_mask_value = 0;
+
+ dc_assert_fp_enabled();
+
+ /* Compute VTOTAL_MIN and VTOTAL_MAX, so that
+ * VOTAL_MAX - VTOTAL_MIN = 1
+ */
+ v_total_mask_value = 16;
+ vtotal_min = dcn_bw_floor(vtotal_avg);
+ vtotal_max = dcn_bw_ceil(vtotal_avg);
+
+ /* Check that bottom VBLANK is at least 2 lines tall when running with
+ * VTOTAL_MIN. Note that VTOTAL registers are defined as 'total number
+ * of lines in a frame - 1'.
+ */
+ REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START,
+ &vblank_start);
+ ASSERT(vtotal_min >= vblank_start + 1);
+
+ /* Special case where the average frame rate can be achieved
+ * without using the DTO
+ */
+ if (vtotal_min == vtotal_max) {
+ REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min);
+
+ optc->funcs->set_vtotal_min_max(optc, 0, 0);
+ REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0);
+ REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0);
+ REG_UPDATE_3(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MIN_SEL, 0,
+ OTG_V_TOTAL_MAX_SEL, 0,
+ OTG_SET_V_TOTAL_MIN_MASK_EN, 0);
+ return;
+ }
+
+ ratio = vtotal_max - vtotal_avg;
+ modulo = 65536.0 * 65536.0 - 1.0; /* 2^32 - 1 */
+ phase = ratio * modulo;
+
+ /* Special cases where the DTO phase gets rounded to 0 or
+ * to DTO modulo
+ */
+ if (phase <= 0 || phase >= modulo) {
+ REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL,
+ phase <= 0 ?
+ (uint32_t)vtotal_max : (uint32_t)vtotal_min);
+ REG_SET(OTG_V_TOTAL_MIN, 0, OTG_V_TOTAL_MIN, 0);
+ REG_SET(OTG_V_TOTAL_MAX, 0, OTG_V_TOTAL_MAX, 0);
+ REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0);
+ REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0);
+ REG_UPDATE_3(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MIN_SEL, 0,
+ OTG_V_TOTAL_MAX_SEL, 0,
+ OTG_SET_V_TOTAL_MIN_MASK_EN, 0);
+ return;
+ }
+ REG_UPDATE_6(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MIN_SEL, 1,
+ OTG_V_TOTAL_MAX_SEL, 1,
+ OTG_SET_V_TOTAL_MIN_MASK_EN, 1,
+ OTG_SET_V_TOTAL_MIN_MASK, v_total_mask_value,
+ OTG_VTOTAL_MID_REPLACING_MIN_EN, 0,
+ OTG_VTOTAL_MID_REPLACING_MAX_EN, 0);
+ REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min);
+ optc->funcs->set_vtotal_min_max(optc, vtotal_min, vtotal_max);
+ REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, (uint32_t)phase);
+ REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, (uint32_t)modulo);
+}
+
+void dcn30_fpu_populate_dml_writeback_from_context(
+ struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
+{
+ int pipe_cnt, i, j;
+ double max_calc_writeback_dispclk;
+ double writeback_dispclk;
+ struct writeback_st dout_wb;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
+
+ if (!stream)
+ continue;
+ max_calc_writeback_dispclk = 0;
+
+ /* Set writeback information */
+ pipes[pipe_cnt].dout.wb_enable = 0;
+ pipes[pipe_cnt].dout.num_active_wb = 0;
+ for (j = 0; j < stream->num_wb_info; j++) {
+ struct dc_writeback_info *wb_info = &stream->writeback_info[j];
+
+ if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
+ (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
+ pipes[pipe_cnt].dout.wb_enable = 1;
+ pipes[pipe_cnt].dout.num_active_wb++;
+ dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_height :
+ wb_info->dwb_params.cnv_params.src_height;
+ dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_width :
+ wb_info->dwb_params.cnv_params.src_width;
+ dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
+ dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
+
+ /* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */
+ if (dc->dml.ip.writeback_max_hscl_taps > 1) {
+ dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
+ dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
+ } else {
+ dout_wb.wb_htaps_luma = 1;
+ dout_wb.wb_vtaps_luma = 1;
+ }
+ dout_wb.wb_htaps_chroma = 0;
+ dout_wb.wb_vtaps_chroma = 0;
+ dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_width /
+ (double)wb_info->dwb_params.dest_width :
+ (double)wb_info->dwb_params.cnv_params.src_width /
+ (double)wb_info->dwb_params.dest_width;
+ dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_height /
+ (double)wb_info->dwb_params.dest_height :
+ (double)wb_info->dwb_params.cnv_params.src_height /
+ (double)wb_info->dwb_params.dest_height;
+ if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB ||
+ wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA)
+ dout_wb.wb_pixel_format = dm_444_64;
+ else
+ dout_wb.wb_pixel_format = dm_444_32;
+
+ /* Workaround for cases where multiple writebacks are connected to same plane
+ * In which case, need to compute worst case and set the associated writeback parameters
+ * This workaround is necessary due to DML computation assuming only 1 set of writeback
+ * parameters per pipe
+ */
+ writeback_dispclk = dml30_CalculateWriteBackDISPCLK(
+ dout_wb.wb_pixel_format,
+ pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
+ dout_wb.wb_hratio,
+ dout_wb.wb_vratio,
+ dout_wb.wb_htaps_luma,
+ dout_wb.wb_vtaps_luma,
+ dout_wb.wb_src_width,
+ dout_wb.wb_dst_width,
+ pipes[pipe_cnt].pipe.dest.htotal,
+ dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size);
+
+ if (writeback_dispclk > max_calc_writeback_dispclk) {
+ max_calc_writeback_dispclk = writeback_dispclk;
+ pipes[pipe_cnt].dout.wb = dout_wb;
+ }
+ }
+ }
+
+ pipe_cnt++;
+ }
+}
+
+void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
+ struct display_mode_lib *dml,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int cur_pipe)
+{
+ int i;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0; i < ARRAY_SIZE(wb_arb_params->cli_watermark); i++) {
+ wb_arb_params->cli_watermark[i] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000;
+ wb_arb_params->pstate_watermark[i] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
+ }
+
+ wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
+}
+
+void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
+{
+
+ dc_assert_fp_enabled();
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
+ }
+}
+
+void dcn30_fpu_calculate_wm_and_dlg(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ int i, pipe_idx;
+ double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
+ bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
+
+ dc_assert_fp_enabled();
+
+ if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
+ dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
+
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+
+ /* Set B:
+ * DCFCLK: 1GHz or min required above 1GHz
+ * FCLK/UCLK: Max
+ */
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
+ if (vlevel == 0) {
+ pipes[0].clks_cfg.voltage = 1;
+ pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
+ }
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
+ }
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+
+ /* Set D:
+ * DCFCLK: Min Required
+ * FCLK(proportional to UCLK): 1GHz or Max
+ * MALL stutter, sr_enter_exit = 4, sr_exit = 2us
+ */
+ /*
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
+ }
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ */
+
+ /* Set C:
+ * DCFCLK: Min Required
+ * FCLK(proportional to UCLK): 1GHz or Max
+ * pstate latency overridden to 5us
+ */
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
+ unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+ unsigned int min_dram_speed_mts_margin = 160;
+
+ if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported)
+ min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
+
+ /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
+ for (i = 3; i > 0; i--)
+ if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
+ break;
+
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
+
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
+ }
+
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if (!pstate_en) {
+ /* The only difference between A and C is p-state latency, if p-state is not supported we want to
+ * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark
+ */
+ context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
+ } else {
+ /* Set A:
+ * DCFCLK: Min Required
+ * FCLK(proportional to UCLK): 1GHz or Max
+ *
+ * Set A calculated last so that following calculations are based on Set A
+ */
+ dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ }
+
+ context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
+
+ /* Make set D = set A until set D is enabled */
+ context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ if (dc->config.forced_clocks) {
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+ }
+ if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+ if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+ pipe_idx++;
+ }
+
+ DC_FP_START();
+ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+ DC_FP_END();
+
+ if (!pstate_en)
+ /* Restore full p-state latency */
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
+ dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context);
+}
+
+void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+}
+
+void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk)
+{
+ dc_assert_fp_enabled();
+
+ if (!dcn30_bb_max_clk->max_dcfclk_mhz)
+ dcn30_bb_max_clk->max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz;
+ if (!dcn30_bb_max_clk->max_dispclk_mhz)
+ dcn30_bb_max_clk->max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz;
+ if (!dcn30_bb_max_clk->max_dppclk_mhz)
+ dcn30_bb_max_clk->max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz;
+ if (!dcn30_bb_max_clk->max_phyclk_mhz)
+ dcn30_bb_max_clk->max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz;
+}
+
+void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ dc_assert_fp_enabled();
+
+ bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans *
+ dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans *
+ dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
+ struct clk_bw_params *bw_params,
+ struct dc_bounding_box_max_clk *dcn30_bb_max_clk,
+ unsigned int *dcfclk_mhz,
+ unsigned int *dram_speed_mts)
+{
+ unsigned int i;
+
+ dc_assert_fp_enabled();
+
+ dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ for (i = 0; i < dcn3_0_soc.num_states; i++) {
+ dcn3_0_soc.clock_limits[i].state = i;
+ dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+ dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* Fill all states with max values of all other clocks */
+ dcn3_0_soc.clock_limits[i].dispclk_mhz = dcn30_bb_max_clk->max_dispclk_mhz;
+ dcn3_0_soc.clock_limits[i].dppclk_mhz = dcn30_bb_max_clk->max_dppclk_mhz;
+ dcn3_0_soc.clock_limits[i].phyclk_mhz = dcn30_bb_max_clk->max_phyclk_mhz;
+ dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz;
+ /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */
+ /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
+ dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz;
+ dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz;
+ }
+ /* re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
+
+}
+
+/**
+ * Finds dummy_latency_index when MCLK switching using firmware based
+ * vblank stretch is enabled. This function will iterate through the
+ * table of dummy pstate latencies until the lowest value that allows
+ * dm_allow_self_refresh_and_mclk_switch to happen is found
+ */
+int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ const int max_latency_table_entries = 4;
+ int dummy_latency_index = 0;
+
+ dc_assert_fp_enabled();
+
+ while (dummy_latency_index < max_latency_table_entries) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+ dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+
+ if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank ==
+ dm_allow_self_refresh_and_mclk_switch)
+ break;
+
+ dummy_latency_index++;
+ }
+
+ if (dummy_latency_index == max_latency_table_entries) {
+ ASSERT(dummy_latency_index != max_latency_table_entries);
+ /* If the execution gets here, it means dummy p_states are
+ * not possible. This should never happen and would mean
+ * something is severely wrong.
+ * Here we reset dummy_latency_index to 3, because it is
+ * better to have underflows than system crashes.
+ */
+ dummy_latency_index = 3;
+ }
+
+ return dummy_latency_index;
+}
+
+void dcn3_fpu_build_wm_range_table(struct clk_mgr *base)
+{
+ /* defaults */
+ double pstate_latency_us = base->ctx->dc->dml.soc.dram_clock_change_latency_us;
+ double sr_exit_time_us = base->ctx->dc->dml.soc.sr_exit_time_us;
+ double sr_enter_plus_exit_time_us = base->ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
+ uint16_t min_uclk_mhz = base->bw_params->clk_table.entries[0].memclk_mhz;
+
+ dc_assert_fp_enabled();
+
+ /* Set A - Normal - default values*/
+ base->bw_params->wm_table.nv_entries[WM_A].valid = true;
+ base->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
+ base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set B - Performance - higher minimum clocks */
+// base->bw_params->wm_table.nv_entries[WM_B].valid = true;
+// base->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
+// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
+// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
+ base->bw_params->wm_table.nv_entries[WM_C].valid = true;
+ base->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0;
+ base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
+ base->bw_params->dummy_pstate_table[0].dram_speed_mts = 1600;
+ base->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
+ base->bw_params->dummy_pstate_table[1].dram_speed_mts = 8000;
+ base->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
+ base->bw_params->dummy_pstate_table[2].dram_speed_mts = 10000;
+ base->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
+ base->bw_params->dummy_pstate_table[3].dram_speed_mts = 16000;
+ base->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
+
+ /* Set D - MALL - SR enter and exit times adjusted for MALL */
+ base->bw_params->wm_table.nv_entries[WM_D].valid = true;
+ base->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
+ base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
+ base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
+}
+
+void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
+ struct bp_soc_bb_info bb_info = {0};
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_0_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_0_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_0_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
new file mode 100644
index 000000000000..cab864095ce7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN30_FPU_H__
+#define __DCN30_FPU_H__
+
+#include "core_types.h"
+#include "dcn20/dcn20_optc.h"
+
+void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
+ double vtotal_avg);
+
+void dcn30_fpu_populate_dml_writeback_from_context(
+ struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes);
+
+void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
+ struct display_mode_lib *dml,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int cur_pipe);
+
+void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
+
+void dcn30_fpu_calculate_wm_and_dlg(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc);
+
+void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk);
+
+void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk);
+
+void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
+ struct clk_bw_params *bw_params,
+ struct dc_bounding_box_max_clk *dcn30_bb_max_clk,
+ unsigned int *dcfclk_mhz,
+ unsigned int *dram_speed_mts);
+
+int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn3_fpu_build_wm_range_table(struct clk_mgr *base);
+
+void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip);
+
+#endif /* __DCN30_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index f47d82da115c..479e2c1a1301 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -396,64 +396,10 @@ static void CalculateUrgentBurstFactor(
static void UseMinimumDCFCLK(
struct display_mode_lib *mode_lib,
- int MaxInterDCNTileRepeaters,
+ struct vba_vars_st *v,
int MaxPrefetchMode,
- double FinalDRAMClockChangeLatency,
- double SREnterPlusExitTime,
- int ReturnBusWidth,
- int RoundTripPingLatencyCycles,
- int ReorderingBytes,
- int PixelChunkSizeInKByte,
- int MetaChunkSize,
- bool GPUVMEnable,
- int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- int NumberOfActivePlanes,
- double HostVMMinPageSize,
- int HostVMMaxNonCachedPageTableLevels,
- bool DynamicMetadataVMEnabled,
- enum immediate_flip_requirement ImmediateFlipRequirement,
- bool ProgressiveToInterlaceUnitInOPP,
- double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
- int VTotal[],
- int VActive[],
- int DynamicMetadataTransmittedBytes[],
- int DynamicMetadataLinesBeforeActiveRequired[],
- bool Interlace[],
- double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
- double RequiredDISPCLK[][2],
- double UrgLatency[],
- unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
- double ProjectedDCFCLKDeepSleep[][2],
- double MaximumVStartup[][2][DC__NUM_DPP__MAX],
- double TotalVActivePixelBandwidth[][2],
- double TotalVActiveCursorBandwidth[][2],
- double TotalMetaRowBandwidth[][2],
- double TotalDPTERowBandwidth[][2],
- unsigned int TotalNumberOfActiveDPP[][2],
- unsigned int TotalNumberOfDCCActiveDPP[][2],
- int dpte_group_bytes[],
- double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
- double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
- int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int BytePerPixelY[],
- int BytePerPixelC[],
- int HTotal[],
- double PixelClock[],
- double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
- double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
- double MetaRowBytes[][2][DC__NUM_DPP__MAX],
- bool DynamicMetadataEnable[],
- double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
- double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
- double ReadBandwidthLuma[],
- double ReadBandwidthChroma[],
- double DCFCLKPerState[],
- double DCFCLKState[][2]);
+ int ReorderingBytes);
+
static void CalculatePixelDeliveryTimes(
unsigned int NumberOfActivePlanes,
double VRatio[],
@@ -712,18 +658,6 @@ static double CalculateUrgentLatency(
double UrgentLatencyAdjustmentFabricClockReference,
double FabricClockSingle);
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC);
-
void dml30_recalculate(struct display_mode_lib *mode_lib)
{
ModeSupportAndSystemConfiguration(mode_lib);
@@ -2095,7 +2029,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
DTRACE(" return_bus_bw = %f", v->ReturnBW);
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -3049,40 +2983,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
{
//Maximum Bandwidth Used
- double TotalWRBandwidth = 0;
- double MaxPerPlaneVActiveWRBandwidth = 0;
- double WRBandwidth = 0;
- double MaxUsedBW = 0;
- for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- if (v->WritebackEnable[k] == true
- && v->WritebackPixelFormat[k] == dm_444_32) {
- WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
- / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
- } else if (v->WritebackEnable[k] == true) {
- WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
- / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
- }
- TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
- MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
- }
-
v->TotalDataReadBandwidth = 0;
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
+ v->ReadBandwidthPlaneLuma[k]
+ v->ReadBandwidthPlaneChroma[k];
}
-
- {
- double MaxPerPlaneVActiveRDBandwidth = 0;
- for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth,
- v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
-
- }
- }
-
- MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth;
}
// VStartup Margin
@@ -3165,7 +3071,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
mode_lib->vba.SourcePixelFormat[k],
mode_lib->vba.SurfaceTiling[k],
&BytePerPixY[k],
@@ -3218,7 +3124,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
&dummysinglestring);
}
-static bool CalculateBytePerPixelAnd256BBlockSizes(
+void dml30_CalculateBytePerPixelAnd256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int *BytePerPixelY,
@@ -3305,7 +3211,6 @@ static bool CalculateBytePerPixelAnd256BBlockSizes(
*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
}
- return true;
}
static double CalculateTWait(
@@ -3709,7 +3614,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*Bandwidth Support Check*/
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -4733,66 +4638,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
if (v->UseMinimumRequiredDCFCLK == true) {
- UseMinimumDCFCLK(
- mode_lib,
- v->MaxInterDCNTileRepeaters,
- MaxPrefetchMode,
- v->FinalDRAMClockChangeLatency,
- v->SREnterPlusExitTime,
- v->ReturnBusWidth,
- v->RoundTripPingLatencyCycles,
- ReorderingBytes,
- v->PixelChunkSizeInKByte,
- v->MetaChunkSize,
- v->GPUVMEnable,
- v->GPUVMMaxPageTableLevels,
- v->HostVMEnable,
- v->NumberOfActivePlanes,
- v->HostVMMinPageSize,
- v->HostVMMaxNonCachedPageTableLevels,
- v->DynamicMetadataVMEnabled,
- v->ImmediateFlipRequirement[0],
- v->ProgressiveToInterlaceUnitInOPP,
- v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
- v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
- v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
- v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
- v->VTotal,
- v->VActive,
- v->DynamicMetadataTransmittedBytes,
- v->DynamicMetadataLinesBeforeActiveRequired,
- v->Interlace,
- v->RequiredDPPCLK,
- v->RequiredDISPCLK,
- v->UrgLatency,
- v->NoOfDPP,
- v->ProjectedDCFCLKDeepSleep,
- v->MaximumVStartup,
- v->TotalVActivePixelBandwidth,
- v->TotalVActiveCursorBandwidth,
- v->TotalMetaRowBandwidth,
- v->TotalDPTERowBandwidth,
- v->TotalNumberOfActiveDPP,
- v->TotalNumberOfDCCActiveDPP,
- v->dpte_group_bytes,
- v->PrefetchLinesY,
- v->PrefetchLinesC,
- v->swath_width_luma_ub_all_states,
- v->swath_width_chroma_ub_all_states,
- v->BytePerPixelY,
- v->BytePerPixelC,
- v->HTotal,
- v->PixelClock,
- v->PDEAndMetaPTEBytesPerFrame,
- v->DPTEBytesPerRow,
- v->MetaRowBytes,
- v->DynamicMetadataEnable,
- v->VActivePixelBandwidth,
- v->VActiveCursorBandwidth,
- v->ReadBandwidthLuma,
- v->ReadBandwidthChroma,
- v->DCFCLKPerState,
- v->DCFCLKState);
+ UseMinimumDCFCLK(mode_lib, v, MaxPrefetchMode, ReorderingBytes);
if (v->ClampMinDCFCLK) {
/* Clamp calculated values to actual minimum */
@@ -6476,10 +6322,6 @@ static void CalculateSwathWidth(
for (k = 0; k < NumberOfActivePlanes; ++k) {
enum odm_combine_mode MainPlaneODMCombine = 0;
- surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
- surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
- surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
- surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
if (SourceScan[k] != dm_vert) {
SwathWidthSingleDPPY[k] = ViewportWidth[k];
@@ -6519,8 +6361,6 @@ static void CalculateSwathWidth(
surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
- surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
- surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
if (SourceScan[k] != dm_vert) {
MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
@@ -6528,6 +6368,7 @@ static void CalculateSwathWidth(
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
if (BytePerPixC[k] > 0) {
+ surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
} else {
@@ -6539,6 +6380,7 @@ static void CalculateSwathWidth(
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
if (BytePerPixC[k] > 0) {
+ surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
} else {
@@ -6651,77 +6493,21 @@ static double CalculateUrgentLatency(
return ret;
}
-
-static void UseMinimumDCFCLK(
+static noinline_for_stack void UseMinimumDCFCLK(
struct display_mode_lib *mode_lib,
- int MaxInterDCNTileRepeaters,
+ struct vba_vars_st *v,
int MaxPrefetchMode,
- double FinalDRAMClockChangeLatency,
- double SREnterPlusExitTime,
- int ReturnBusWidth,
- int RoundTripPingLatencyCycles,
- int ReorderingBytes,
- int PixelChunkSizeInKByte,
- int MetaChunkSize,
- bool GPUVMEnable,
- int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- int NumberOfActivePlanes,
- double HostVMMinPageSize,
- int HostVMMaxNonCachedPageTableLevels,
- bool DynamicMetadataVMEnabled,
- enum immediate_flip_requirement ImmediateFlipRequirement,
- bool ProgressiveToInterlaceUnitInOPP,
- double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
- double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
- int VTotal[],
- int VActive[],
- int DynamicMetadataTransmittedBytes[],
- int DynamicMetadataLinesBeforeActiveRequired[],
- bool Interlace[],
- double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
- double RequiredDISPCLK[][2],
- double UrgLatency[],
- unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
- double ProjectedDCFCLKDeepSleep[][2],
- double MaximumVStartup[][2][DC__NUM_DPP__MAX],
- double TotalVActivePixelBandwidth[][2],
- double TotalVActiveCursorBandwidth[][2],
- double TotalMetaRowBandwidth[][2],
- double TotalDPTERowBandwidth[][2],
- unsigned int TotalNumberOfActiveDPP[][2],
- unsigned int TotalNumberOfDCCActiveDPP[][2],
- int dpte_group_bytes[],
- double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
- double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
- int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int BytePerPixelY[],
- int BytePerPixelC[],
- int HTotal[],
- double PixelClock[],
- double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
- double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
- double MetaRowBytes[][2][DC__NUM_DPP__MAX],
- bool DynamicMetadataEnable[],
- double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
- double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
- double ReadBandwidthLuma[],
- double ReadBandwidthChroma[],
- double DCFCLKPerState[],
- double DCFCLKState[][2])
+ int ReorderingBytes)
{
double NormalEfficiency = 0;
double PTEEfficiency = 0;
double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
unsigned int i, j, k;
- NormalEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
- : PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
- PTEEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
- / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
+ NormalEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
+ : v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
+ PTEEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
+ / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
for (i = 0; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
@@ -6739,58 +6525,58 @@ static void UseMinimumDCFCLK(
double MinimumTvmPlus2Tr0 = 0;
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
- + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
+ + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
}
- for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
- NoOfDPPState[k] = NoOfDPP[i][j][k];
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
+ NoOfDPPState[k] = v->NoOfDPP[i][j][k];
}
- MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
- NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
- DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
- TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
- DCFCLKRequiredForAverageBandwidth = dml_max3(ProjectedDCFCLKDeepSleep[i][j],
- (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth / (MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
- (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / ReturnBusWidth);
-
- ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, TotalNumberOfDCCActiveDPP[i][j],
- MetaChunkSize, GPUVMEnable, HostVMEnable, NumberOfActivePlanes, NoOfDPPState, dpte_group_bytes,
- PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
- HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
- ExtraLatencyCycles = RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
+ NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
+ DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
+ DCFCLKRequiredForAverageBandwidth = dml_max3(v->ProjectedDCFCLKDeepSleep[i][j],
+ (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth / (v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
+ (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / v->ReturnBusWidth);
+
+ ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, v->TotalNumberOfActiveDPP[i][j], v->PixelChunkSizeInKByte, v->TotalNumberOfDCCActiveDPP[i][j],
+ v->MetaChunkSize, v->GPUVMEnable, v->HostVMEnable, v->NumberOfActivePlanes, NoOfDPPState, v->dpte_group_bytes,
+ v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
+ v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels);
+ ExtraLatencyCycles = v->RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double DCFCLKCyclesRequiredInPrefetch = { 0 };
double ExpectedPrefetchBWAcceleration = { 0 };
double PrefetchTime = { 0 };
- PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
- + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
- DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
- / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * DPTEBytesPerRow[i][j][k] / PTEEfficiency
- / NormalEfficiency / ReturnBusWidth + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
- PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
- ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
- DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
- UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
- PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait - UrgLatency[i] * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels
- : GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
+ PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
+ + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
+ / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * v->DPTEBytesPerRow[i][j][k] / PTEEfficiency
+ / NormalEfficiency / v->ReturnBusWidth + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
+ PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
+ ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
+ DynamicMetadataVMExtraLatency[k] = (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
+ v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+ PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - v->UrgLatency[i] * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels
+ : v->GPUVMMaxPageTableLevels - 2) * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
if (PrefetchTime > 0) {
double ExpectedVRatioPrefetch = { 0 };
ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
* dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
- if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
+ if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
- + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / ReturnBusWidth;
+ + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / v->ReturnBusWidth;
}
} else {
- DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
}
- if (DynamicMetadataEnable[k] == true) {
+ if (v->DynamicMetadataEnable[k] == true) {
double TsetupPipe = { 0 };
double TdmbfPipe = { 0 };
double TdmsksPipe = { 0 };
@@ -6798,49 +6584,49 @@ static void UseMinimumDCFCLK(
double AllowedTimeForUrgentExtraLatency = { 0 };
CalculateDynamicMetadataParameters(
- MaxInterDCNTileRepeaters,
- RequiredDPPCLK[i][j][k],
- RequiredDISPCLK[i][j],
- ProjectedDCFCLKDeepSleep[i][j],
- PixelClock[k],
- HTotal[k],
- VTotal[k] - VActive[k],
- DynamicMetadataTransmittedBytes[k],
- DynamicMetadataLinesBeforeActiveRequired[k],
- Interlace[k],
- ProgressiveToInterlaceUnitInOPP,
+ v->MaxInterDCNTileRepeaters,
+ v->RequiredDPPCLK[i][j][k],
+ v->RequiredDISPCLK[i][j],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->PixelClock[k],
+ v->HTotal[k],
+ v->VTotal[k] - v->VActive[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
&TsetupPipe,
&TdmbfPipe,
&TdmecPipe,
&TdmsksPipe);
- AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TsetupPipe
+ AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TsetupPipe
- TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
if (AllowedTimeForUrgentExtraLatency > 0) {
DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
} else {
- DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
}
}
}
DCFCLKRequiredForPeakBandwidth = 0;
- for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
}
- MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? (HostVMEnable == true ?
- (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ MinimumTvmPlus2Tr0 = v->UrgLatency[i] * (v->GPUVMEnable == true ? (v->HostVMEnable == true ?
+ (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 0);
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double MaximumTvmPlus2Tr0PlusTsw = { 0 };
- MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+ MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
- DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
+ DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
} else {
DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
/ (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
(2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
}
}
- DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
+ v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
* dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
index 4e249eaabfdb..daaf0883b84d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
@@ -39,5 +39,16 @@ double dml30_CalculateWriteBackDISPCLK(
long WritebackDestinationWidth,
unsigned int HTotal,
unsigned int WritebackLineBufferSize);
+void dml30_CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC);
#endif /* __DML30_DISPLAY_MODE_VBA_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
index aef854270054..8179be1f34bb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
@@ -29,6 +29,7 @@
#include "../display_mode_vba.h"
#include "../dml_inline_defs.h"
#include "display_rq_dlg_calc_30.h"
+#include "display_mode_vba_30.h"
static bool is_dual_plane(enum source_format_class source_format)
{
@@ -275,96 +276,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
full_swath_bytes_packed_c);
}
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC)
-{
- if (SourcePixelFormat == dm_444_64) {
- *BytePerPixelDETY = 8;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 8;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 4;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_16) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 2;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 1;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 1;
- *BytePerPixelY = 4;
- *BytePerPixelC = 1;
- } else if (SourcePixelFormat == dm_420_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 2;
- *BytePerPixelY = 1;
- *BytePerPixelC = 2;
- } else if (SourcePixelFormat == dm_420_12) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 4;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- } else {
- *BytePerPixelDETY = 4.0 / 3;
- *BytePerPixelDETC = 8.0 / 3;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- }
-
- if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
- || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
- || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
- || SourcePixelFormat == dm_rgbe)) {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- } else if (SourcePixelFormat == dm_444_64) {
- *BlockHeight256BytesY = 4;
- } else if (SourcePixelFormat == dm_444_8) {
- *BlockHeight256BytesY = 16;
- } else {
- *BlockHeight256BytesY = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockHeight256BytesC = 0;
- *BlockWidth256BytesC = 0;
- } else {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- *BlockHeight256BytesC = 1;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 16;
- } else if (SourcePixelFormat == dm_420_8) {
- *BlockHeight256BytesY = 16;
- *BlockHeight256BytesC = 8;
- } else {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
- }
- return true;
-}
-
static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
display_data_rq_dlg_params_st *rq_dlg_param,
display_data_rq_misc_params_st *rq_misc_param,
@@ -450,7 +361,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
double byte_per_pixel_det_y = 0;
double byte_per_pixel_det_c = 0;
- CalculateBytePerPixelAnd256BBlockSizes((enum source_format_class)(source_format),
+ dml30_CalculateBytePerPixelAnd256BBlockSizes((enum source_format_class)(source_format),
(enum dm_swizzle_mode)(tiling),
&bytes_per_element_y,
&bytes_per_element_c,
@@ -1858,8 +1769,6 @@ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 94c32832a0e7..422f17aefd4a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -26,6 +26,7 @@
#include "clk_mgr.h"
#include "dcn20/dcn20_resource.h"
#include "dcn301/dcn301_resource.h"
+#include "clk_mgr/dcn301/vg_clk_mgr.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dcn301_fpu.h"
@@ -214,6 +215,80 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_01_soc = {
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
};
+struct wm_table ddr4_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 6.09,
+ .sr_enter_plus_exit_time_us = 7.14,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ }
+};
+
static void calculate_wm_set_for_vlevel(int vlevel,
struct wm_range_table_entry *table_entry,
struct dcn_watermarks *wm_set,
@@ -247,14 +322,16 @@ static void calculate_wm_set_for_vlevel(int vlevel,
void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
+ struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool);
struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
unsigned int i, closest_clk_lvl;
int j;
dc_assert_fp_enabled();
+ memcpy(s, dcn3_01_soc.clock_limits, sizeof(dcn3_01_soc.clock_limits));
+
/* Default clock levels are used for diags, which may lead to overclocking. */
if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
dcn3_01_ip.max_num_otg = pool->base.res_cap->num_timing_generator;
@@ -271,35 +348,42 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
}
}
- clock_limits[i].state = i;
- clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
- clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
-
- clock_limits[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
- clock_limits[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
- clock_limits[i].dram_bw_per_chan_gbps = dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_limits[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_limits[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_limits[i].phyclk_d18_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_limits[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ s[i].state = i;
+ s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
+
+ s[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+ s[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+ s[i].dram_bw_per_chan_gbps =
+ dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ s[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ s[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ s[i].phyclk_d18_mhz =
+ dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ s[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
}
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_01_soc.clock_limits[i] = clock_limits[i];
-
if (clk_table->num_entries) {
dcn3_01_soc.num_states = clk_table->num_entries;
/* duplicate last level */
- dcn3_01_soc.clock_limits[dcn3_01_soc.num_states] = dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1];
- dcn3_01_soc.clock_limits[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states;
+ s[dcn3_01_soc.num_states] =
+ dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1];
+ s[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states;
}
}
+ memcpy(dcn3_01_soc.clock_limits, s, sizeof(dcn3_01_soc.clock_limits));
+
dcn3_01_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ if ((int)(dcn3_01_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_01_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000.0;
+ }
dml_init_instance(&dc->dml, &dcn3_01_soc, &dcn3_01_ip, DML_PROJECT_DCN30);
}
@@ -327,7 +411,7 @@ void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info)
dcn3_01_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
}
-void dcn301_calculate_wm_and_dlg(struct dc *dc,
+void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
index fc7065d17842..774b0fdfc80b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
@@ -34,7 +34,7 @@ void dcn301_fpu_set_wm_ranges(int i,
void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
-void dcn301_calculate_wm_and_dlg(struct dc *dc,
+void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c
new file mode 100644
index 000000000000..e2bcd205aa93
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "resource.h"
+#include "clk_mgr.h"
+#include "dcn20/dcn20_resource.h"
+#include "dcn302/dcn302_resource.h"
+
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dcn302_fpu.h"
+
+struct _vcs_dpi_ip_params_st dcn3_02_ip = {
+ .use_min_dcfclk = 0,
+ .clamp_min_dcfclk = 0,
+ .odm_capable = 1,
+ .gpuvm_enable = 1,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .pte_group_size_bytes = 2048,
+ .num_dsc = 5,
+ .rob_buffer_size_kbytes = 184,
+ .det_buffer_size_kbytes = 184,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .pde_proc_buffer_size_64k_reqs = 48,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 2,
+ .pte_chunk_size_kbytes = 2, // ?
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 8,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0, // ?
+ .line_buffer_fixed_bpp = 0, // ?
+ .dcc_supported = true,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .writeback_line_buffer_buffer_size = 0,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 5,
+ .max_num_dpp = 5,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.11,
+ .min_vblank_lines = 32,
+ .dppclk_delay_subtotal = 46,
+ .dynamic_metadata_vm_enabled = true,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dcfclk_cstate_latency = 5.2, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .max_num_hdmi_frl_outputs = 1,
+ .odm_combine_4to1_supported = true,
+
+ .xfc_supported = false,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .gfx7_compat_tiling_supported = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_02_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 562.0,
+ .dppclk_mhz = 300.0,
+ .phyclk_mhz = 300.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 405.6,
+ },
+ },
+
+ .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
+ .num_states = 1,
+ .sr_exit_time_us = 26.5,
+ .sr_enter_plus_exit_time_us = 31,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .max_request_size_bytes = 256,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 156,
+ .urgent_out_of_order_return_per_channel_bytes = 4096,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404,
+ .dummy_pstate_latency_us = 5,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3650,
+ .xfc_bus_transport_time_us = 20, // ?
+ .xfc_xbuf_latency_tolerance_us = 4, // ?
+ .use_urgent_burst_bw = 1, // ?
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+static void dcn302_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ bw_from_dram1 = uclk_mts * dcn3_02_soc.num_chans *
+ dcn3_02_soc.dram_channel_width_bytes *
+ (dcn3_02_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_02_soc.num_chans *
+ dcn3_02_soc.dram_channel_width_bytes *
+ (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_02_soc.fabric_datapath_to_dcn_data_return_bytes *
+ (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_02_soc.return_bus_width_bytes *
+ (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+void dcn302_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ unsigned int i, j;
+ unsigned int num_states = 0;
+
+ unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
+ unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
+
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200};
+ unsigned int num_dcfclk_sta_targets = 4;
+ unsigned int num_uclk_states;
+
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dc_bios->vram_info.num_chans)
+ dcn3_02_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dcn3_02_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+
+ dcn3_02_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ if (bw_params->clk_table.entries[0].memclk_mhz) {
+ int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ }
+ if (!max_dcfclk_mhz)
+ max_dcfclk_mhz = dcn3_02_soc.clock_limits[0].dcfclk_mhz;
+ if (!max_dispclk_mhz)
+ max_dispclk_mhz = dcn3_02_soc.clock_limits[0].dispclk_mhz;
+ if (!max_dppclk_mhz)
+ max_dppclk_mhz = dcn3_02_soc.clock_limits[0].dppclk_mhz;
+ if (!max_phyclk_mhz)
+ max_phyclk_mhz = dcn3_02_soc.clock_limits[0].phyclk_mhz;
+
+ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ /* If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array */
+ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ /* If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates */
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
+ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+ /* Update size of array since we "removed" duplicates */
+ num_dcfclk_sta_targets = i + 1;
+ }
+
+ num_uclk_states = bw_params->clk_table.num_entries;
+
+ /* Calculate optimal dcfclk for each uclk */
+ for (i = 0; i < num_uclk_states; i++) {
+ dcn302_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+ &optimal_dcfclk_for_uclk[i], NULL);
+ if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz)
+ optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
+ }
+
+ /* Calculate optimal uclk for each dcfclk sta target */
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ for (j = 0; j < num_uclk_states; j++) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ i = 0;
+ j = 0;
+ /* create the final dcfclk and uclk table */
+ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+ j = num_uclk_states;
+ }
+ }
+ }
+
+ while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
+ dcn3_02_soc.num_states = num_states;
+ for (i = 0; i < dcn3_02_soc.num_states; i++) {
+ dcn3_02_soc.clock_limits[i].state = i;
+ dcn3_02_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_02_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+ dcn3_02_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* Fill all states with max values of all other clocks */
+ dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
+ dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz;
+ else
+ dcn3_02_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
+ /* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */
+ /* FCLK, PHYCLK_D18, DSCCLK */
+ dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz;
+ }
+ /* re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_02_soc, &dcn3_02_ip, DML_PROJECT_DCN30);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_02_soc, &dcn3_02_ip, DML_PROJECT_DCN30);
+ }
+}
+
+void dcn302_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info)
+{
+
+ dc_assert_fp_enabled();
+
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_02_soc.dram_clock_change_latency_us =
+ bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_02_soc.sr_enter_plus_exit_time_us =
+ bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_02_soc.sr_exit_time_us =
+ bb_info.dram_sr_exit_latency_100ns * 10;
+}
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.h
new file mode 100644
index 000000000000..548305d96cee
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN302_FPU_H__
+#define __DCN302_FPU_H__
+
+void dcn302_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
+void dcn302_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+
+#endif /* __DCN302_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
new file mode 100644
index 000000000000..3eb3a021ab7d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright 2019-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "resource.h"
+#include "clk_mgr.h"
+#include "dcn20/dcn20_resource.h"
+#include "dcn303/dcn303_resource.h"
+
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dcn303_fpu.h"
+
+struct _vcs_dpi_ip_params_st dcn3_03_ip = {
+ .use_min_dcfclk = 0,
+ .clamp_min_dcfclk = 0,
+ .odm_capable = 1,
+ .gpuvm_enable = 1,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .pte_group_size_bytes = 2048,
+ .num_dsc = 2,
+ .rob_buffer_size_kbytes = 184,
+ .det_buffer_size_kbytes = 184,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .pde_proc_buffer_size_64k_reqs = 48,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 2,
+ .pte_chunk_size_kbytes = 2, // ?
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 8,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0, // ?
+ .line_buffer_fixed_bpp = 0, // ?
+ .dcc_supported = true,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .writeback_line_buffer_buffer_size = 0,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 2,
+ .max_num_dpp = 2,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.11,
+ .min_vblank_lines = 32,
+ .dppclk_delay_subtotal = 46,
+ .dynamic_metadata_vm_enabled = true,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dcfclk_cstate_latency = 5.2, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .max_num_hdmi_frl_outputs = 1,
+ .odm_combine_4to1_supported = false,
+
+ .xfc_supported = false,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .gfx7_compat_tiling_supported = 0,
+ .number_of_cursors = 1,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 562.0,
+ .dppclk_mhz = 300.0,
+ .phyclk_mhz = 300.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 405.6,
+ },
+ },
+
+ .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
+ .num_states = 1,
+ .sr_exit_time_us = 35.5,
+ .sr_enter_plus_exit_time_us = 40,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .max_request_size_bytes = 256,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 156,
+ .urgent_out_of_order_return_per_channel_bytes = 4096,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404,
+ .dummy_pstate_latency_us = 5,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3650,
+ .xfc_bus_transport_time_us = 20, // ?
+ .xfc_xbuf_latency_tolerance_us = 4, // ?
+ .use_urgent_burst_bw = 1, // ?
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+static void dcn303_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ bw_from_dram1 = uclk_mts * dcn3_03_soc.num_chans *
+ dcn3_03_soc.dram_channel_width_bytes * (dcn3_03_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_03_soc.num_chans *
+ dcn3_03_soc.dram_channel_width_bytes * (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_03_soc.fabric_datapath_to_dcn_data_return_bytes *
+ (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_03_soc.return_bus_width_bytes * (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+
+void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ unsigned int i, j;
+ unsigned int num_states = 0;
+
+ unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
+ unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
+
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200};
+ unsigned int num_dcfclk_sta_targets = 4;
+ unsigned int num_uclk_states;
+
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dc_bios->vram_info.num_chans)
+ dcn3_03_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dcn3_03_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+
+ dcn3_03_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ if (bw_params->clk_table.entries[0].memclk_mhz) {
+ int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ }
+ if (!max_dcfclk_mhz)
+ max_dcfclk_mhz = dcn3_03_soc.clock_limits[0].dcfclk_mhz;
+ if (!max_dispclk_mhz)
+ max_dispclk_mhz = dcn3_03_soc.clock_limits[0].dispclk_mhz;
+ if (!max_dppclk_mhz)
+ max_dppclk_mhz = dcn3_03_soc.clock_limits[0].dppclk_mhz;
+ if (!max_phyclk_mhz)
+ max_phyclk_mhz = dcn3_03_soc.clock_limits[0].phyclk_mhz;
+
+ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
+ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+ /* Update size of array since we "removed" duplicates */
+ num_dcfclk_sta_targets = i + 1;
+ }
+
+ num_uclk_states = bw_params->clk_table.num_entries;
+
+ /* Calculate optimal dcfclk for each uclk */
+ for (i = 0; i < num_uclk_states; i++) {
+ dcn303_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+ &optimal_dcfclk_for_uclk[i], NULL);
+ if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz)
+ optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
+ }
+
+ /* Calculate optimal uclk for each dcfclk sta target */
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ for (j = 0; j < num_uclk_states; j++) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ i = 0;
+ j = 0;
+ /* create the final dcfclk and uclk table */
+ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] =
+ bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+ j = num_uclk_states;
+ }
+ }
+ }
+
+ while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
+ dcn3_03_soc.num_states = num_states;
+ for (i = 0; i < dcn3_03_soc.num_states; i++) {
+ dcn3_03_soc.clock_limits[i].state = i;
+ dcn3_03_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_03_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+ dcn3_03_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* Fill all states with max values of all other clocks */
+ dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
+ dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz;
+ else
+ dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
+ /* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */
+ /* FCLK, PHYCLK_D18, DSCCLK */
+ dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz;
+ }
+
+ if (dcn3_03_soc.num_chans <= 4) {
+ for (i = 0; i < dcn3_03_soc.num_states; i++) {
+ if (dcn3_03_soc.clock_limits[i].dram_speed_mts > 1700)
+ break;
+
+ if (dcn3_03_soc.clock_limits[i].dram_speed_mts >= 1500) {
+ dcn3_03_soc.clock_limits[i].dcfclk_mhz = 100;
+ dcn3_03_soc.clock_limits[i].fabricclk_mhz = 100;
+ }
+ }
+ }
+
+ /* re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30);
+ }
+}
+
+void dcn303_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info)
+{
+ dc_assert_fp_enabled();
+
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_03_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_03_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_03_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.h
new file mode 100644
index 000000000000..92ec833fa528
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN303_FPU_H__
+#define __DCN303_FPU_H__
+
+void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn303_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
+
+#endif /* __DCN303_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
new file mode 100644
index 000000000000..7dd0845d1bd9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -0,0 +1,799 @@
+/*
+ * Copyright 2019-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "resource.h"
+#include "clk_mgr.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn315/dcn315_resource.h"
+#include "dcn316/dcn316_resource.h"
+
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dcn31_fpu.h"
+
+/**
+ * DOC: DCN31x FPU manipulation Overview
+ *
+ * The DCN architecture relies on FPU operations, which require special
+ * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we
+ * want to avoid spreading FPU access across multiple files. With this idea in
+ * mind, this file aims to centralize all DCN3.1.x functions that require FPU
+ * access in a single place. Code in this file follows the following code
+ * pattern:
+ *
+ * 1. Functions that use FPU operations should be isolated in static functions.
+ * 2. The FPU functions should have the noinline attribute to ensure anything
+ * that deals with FP register is contained within this call.
+ * 3. All function that needs to be accessed outside this file requires a
+ * public interface that not uses any FPU reference.
+ * 4. Developers **must not** use DC_FP_START/END in this file, but they need
+ * to ensure that the caller invokes it before access any function available
+ * in this file. For this reason, public functions in this file must invoke
+ * dc_assert_fp_enabled();
+ */
+
+struct _vcs_dpi_ip_params_st dcn3_1_ip = {
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 3,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 625.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 442.0,
+ .sr_enter_plus_exit_z8_time_us = 560.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+struct _vcs_dpi_ip_params_st dcn3_15_ip = {
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE,
+ .min_comp_buffer_size_kbytes = 64,
+ .config_return_buffer_size_in_kbytes = 1024,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 3,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 49,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 9,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 50.0,
+ .sr_enter_plus_exit_z8_time_us = 50.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.38,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .num_chans = 4,
+ .dummy_pstate_latency_us = 10.0
+};
+
+struct _vcs_dpi_ip_params_st dcn3_16_ip = {
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE,
+ .min_comp_buffer_size_kbytes = 64,
+ .config_return_buffer_size_in_kbytes = 1024,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 3,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 556.0,
+ .dppclk_mhz = 556.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 445.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 625.0,
+ .dppclk_mhz = 625.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 625.0,
+ .dppclk_mhz = 625.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1112.0,
+ .dppclk_mhz = 1112.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1250.0,
+ .dppclk_mhz = 1250.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 625.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 442.0,
+ .sr_enter_plus_exit_z8_time_us = 560.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ dc_assert_fp_enabled();
+
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
+
+void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us;
+ }
+}
+
+void dcn315_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) {
+ /* For 315 pstate change is only supported if possible in vactive */
+ if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[context->bw_ctx.dml.vba.VoltageLevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_vactive)
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us;
+ else
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us =
+ dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us =
+ dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us;
+ }
+}
+
+void dcn31_calculate_wm_and_dlg_fp(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ int i, pipe_idx, active_dpp_count = 0;
+ double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+
+ dc_assert_fp_enabled();
+
+ if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
+ dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
+
+ /* We don't recalculate clocks for 0 pipe configs, which can block
+ * S0i3 as high clocks will block low power states
+ * Override any clocks that can block S0i3 to min here
+ */
+ if (pipe_cnt == 0) {
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0
+ return;
+ }
+
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+
+ /* Set A:
+ * All clocks min required
+ *
+ * Set A calculated last so that following calculations are based on Set A
+ */
+ dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a;
+ context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a;
+ context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ active_dpp_count++;
+
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ if (dc->config.forced_clocks || dc->debug.max_disp_clk) {
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+ }
+ if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+ if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+ pipe_idx++;
+ }
+
+ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+ /* For 31x apu pstate change is only supported if possible in vactive or if there are no active dpps */
+ context->bw_ctx.bw.dcn.clk.p_state_change_support =
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_vactive || !active_dpp_count;
+}
+
+void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ unsigned int i, closest_clk_lvl;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ memcpy(s, dcn3_1_soc.clock_limits, sizeof(dcn3_1_soc.clock_limits));
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+
+ dcn3_1_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_1_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_1_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_1_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+
+ s[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz *
+ 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ s[i].dram_bw_per_chan_gbps =
+ dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ s[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ s[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ s[i].phyclk_d18_mhz =
+ dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ s[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+ if (clk_table->num_entries) {
+ dcn3_1_soc.num_states = clk_table->num_entries;
+ }
+ }
+
+ memcpy(dcn3_1_soc.clock_limits, s, sizeof(dcn3_1_soc.clock_limits));
+
+ dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ if ((int)(dcn3_1_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_1_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
+ }
+
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+ dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31);
+ else
+ dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31_FPGA);
+}
+
+void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ int i, max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+
+ dc_assert_fp_enabled();
+
+ dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count;
+
+ if (bw_params->num_channels > 0)
+ dcn3_15_soc.num_chans = bw_params->num_channels;
+ if (bw_params->dram_channel_width_bytes > 0)
+ dcn3_15_soc.dram_channel_width_bytes = bw_params->dram_channel_width_bytes;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Setup soc to always use max dispclk/dppclk to avoid odm-to-lower-voltage */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ dcn3_15_soc.clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ dcn3_15_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ dcn3_15_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ dcn3_15_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ dcn3_15_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+
+ /* These aren't actually read from smu, but rather set in clk_mgr defaults */
+ dcn3_15_soc.clock_limits[i].dtbclk_mhz = clk_table->entries[i].dtbclk_mhz;
+ dcn3_15_soc.clock_limits[i].phyclk_d18_mhz = clk_table->entries[i].phyclk_d18_mhz;
+ dcn3_15_soc.clock_limits[i].phyclk_mhz = clk_table->entries[i].phyclk_mhz;
+
+ /* Clocks independent of voltage level. */
+ dcn3_15_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_15_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_15_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3.0;
+ }
+ dcn3_15_soc.num_states = clk_table->num_entries;
+
+
+ /* Set vco to max_dispclk * 2 to make sure the highest dispclk is always available for dml calcs,
+ * no impact outside of dml validation
+ */
+ dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+
+ if ((int)(dcn3_15_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_15_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
+ }
+
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+ dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN315);
+ else
+ dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31_FPGA);
+}
+
+void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ unsigned int i, closest_clk_lvl;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ memcpy(s, dcn3_16_soc.clock_limits, sizeof(dcn3_16_soc.clock_limits));
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
+
+ dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_16_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <=
+ clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ // Ported from DCN315
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_16_soc.num_states - 1;
+ }
+
+ s[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ s[i].dcfclk_mhz <
+ dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ s[i].dcfclk_mhz =
+ dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+ s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz *
+ 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ s[i].dram_bw_per_chan_gbps =
+ dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ s[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ s[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ s[i].phyclk_d18_mhz =
+ dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ s[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+ if (clk_table->num_entries) {
+ dcn3_16_soc.num_states = clk_table->num_entries;
+ }
+ }
+
+ memcpy(dcn3_16_soc.clock_limits, s, sizeof(dcn3_16_soc.clock_limits));
+
+ if (max_dispclk_mhz) {
+ dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+ if ((int)(dcn3_16_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_16_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
+ }
+
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+ dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31);
+ else
+ dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
new file mode 100644
index 000000000000..fd58b2561ec9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2019-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN31_FPU_H__
+#define __DCN31_FPU_H__
+
+#define DCN3_1_DEFAULT_DET_SIZE 384
+#define DCN3_15_DEFAULT_DET_SIZE 192
+#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_16_DEFAULT_DET_SIZE 192
+
+void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
+void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
+void dcn315_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
+
+void dcn31_calculate_wm_and_dlg_fp(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+
+#endif /* __DCN31_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 6feb23432f8d..b612edb14417 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -26,6 +26,7 @@
#include "dc.h"
#include "dc_link.h"
#include "../display_mode_lib.h"
+#include "../dcn30/display_mode_vba_30.h"
#include "display_mode_vba_31.h"
#include "../dml_inline_defs.h"
@@ -42,6 +43,8 @@
#define BPP_BLENDED_PIPE 0xffffffff
#define DCN31_MAX_DSC_IMAGE_WIDTH 5184
#define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
+#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_15_MAX_DET_SIZE 384
// For DML-C changes that hasn't been propagated to VBA yet
//#define __DML_VBA_ALLOW_DELTA__
@@ -64,6 +67,8 @@ typedef struct {
double DCFCLKDeepSleep;
unsigned int DPPPerPlane;
bool ScalerEnabled;
+ double VRatio;
+ double VRatioChroma;
enum scan_direction_class SourceScan;
unsigned int BlockWidth256BytesY;
unsigned int BlockHeight256BytesY;
@@ -84,17 +89,6 @@ typedef struct {
#define BPP_INVALID 0
#define BPP_BLENDED_PIPE 0xffffffff
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC);
static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
static unsigned int dscceComputeDelay(
@@ -259,33 +253,13 @@ static void CalculateRowBandwidth(
static void CalculateFlipSchedule(
struct display_mode_lib *mode_lib,
+ unsigned int k,
double HostVMInefficiencyFactor,
double UrgentExtraLatency,
double UrgentLatency,
- unsigned int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- unsigned int HostVMMaxNonCachedPageTableLevels,
- bool GPUVMEnable,
- double HostVMMinPageSize,
double PDEAndMetaPTEBytesPerFrame,
double MetaRowBytes,
- double DPTEBytesPerRow,
- double BandwidthAvailableForImmediateFlip,
- unsigned int TotImmediateFlipBytes,
- enum source_format_class SourcePixelFormat,
- double LineTime,
- double VRatio,
- double VRatioChroma,
- double Tno_bw,
- bool DCCEnable,
- unsigned int dpte_row_height,
- unsigned int meta_row_height,
- unsigned int dpte_row_height_chroma,
- unsigned int meta_row_height_chroma,
- double *DestinationLinesToRequestVMInImmediateFlip,
- double *DestinationLinesToRequestRowInImmediateFlip,
- double *final_flip_bw,
- bool *ImmediateFlipSupportedForPipe);
+ double DPTEBytesPerRow);
static double CalculateWriteBackDelay(
enum source_format_class WritebackPixelFormat,
double WritebackHRatio,
@@ -319,64 +293,28 @@ static void CalculateVupdateAndDynamicMetadataParameters(
static void CalculateWatermarksAndDRAMSpeedChangeSupport(
struct display_mode_lib *mode_lib,
unsigned int PrefetchMode,
- unsigned int NumberOfActivePlanes,
- unsigned int MaxLineBufferLines,
- unsigned int LineBufferSize,
- unsigned int WritebackInterfaceBufferSize,
double DCFCLK,
double ReturnBW,
- bool SynchronizedVBlank,
- unsigned int dpte_group_bytes[],
- unsigned int MetaChunkSize,
double UrgentLatency,
double ExtraLatency,
- double WritebackLatency,
- double WritebackChunkSize,
double SOCCLK,
- double DRAMClockChangeLatency,
- double SRExitTime,
- double SREnterPlusExitTime,
- double SRExitZ8Time,
- double SREnterPlusExitZ8Time,
double DCFCLKDeepSleep,
unsigned int DETBufferSizeY[],
unsigned int DETBufferSizeC[],
unsigned int SwathHeightY[],
unsigned int SwathHeightC[],
- unsigned int LBBitPerPixel[],
double SwathWidthY[],
double SwathWidthC[],
- double HRatio[],
- double HRatioChroma[],
- unsigned int vtaps[],
- unsigned int VTAPsChroma[],
- double VRatio[],
- double VRatioChroma[],
- unsigned int HTotal[],
- double PixelClock[],
- unsigned int BlendingAndTiming[],
unsigned int DPPPerPlane[],
double BytePerPixelDETY[],
double BytePerPixelDETC[],
- double DSTXAfterScaler[],
- double DSTYAfterScaler[],
- bool WritebackEnable[],
- enum source_format_class WritebackPixelFormat[],
- double WritebackDestinationWidth[],
- double WritebackDestinationHeight[],
- double WritebackSourceHeight[],
bool UnboundedRequestEnabled,
int unsigned CompressedBufferSizeInkByte,
enum clock_change_support *DRAMClockChangeSupport,
- double *UrgentWatermark,
- double *WritebackUrgentWatermark,
- double *DRAMClockChangeWatermark,
- double *WritebackDRAMClockChangeWatermark,
double *StutterExitWatermark,
double *StutterEnterPlusExitWatermark,
double *Z8StutterExitWatermark,
- double *Z8StutterEnterPlusExitWatermark,
- double *MinActiveDRAMClockChangeLatencySupported);
+ double *Z8StutterEnterPlusExitWatermark);
static void CalculateDCFCLKDeepSleep(
struct display_mode_lib *mode_lib,
@@ -942,6 +880,7 @@ static bool CalculatePrefetchSchedule(
double dst_y_prefetch_equ;
double Tsw_oto;
double prefetch_bw_oto;
+ double prefetch_bw_pr;
double Tvm_oto;
double Tr0_oto;
double Tvm_oto_lines;
@@ -971,6 +910,7 @@ static bool CalculatePrefetchSchedule(
double min_Lsw;
double Tsw_est1 = 0;
double Tsw_est3 = 0;
+ double max_Tsw = 0;
if (GPUVMEnable == true && HostVMEnable == true) {
HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
@@ -1111,11 +1051,14 @@ static bool CalculatePrefetchSchedule(
bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
else
bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
-
+ /*rev 99*/
+ prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
+ max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
+ prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
- min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
+ min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
Tsw_oto = Lsw_oto * LineTime;
@@ -1389,7 +1332,7 @@ static bool CalculatePrefetchSchedule(
dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
#endif
- if ((SwathHeightC > 4)) {
+ if ((SwathHeightC > 4) || VInitPreFillC > 3) {
if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
*VRatioPrefetchC = dml_max(
*VRatioPrefetchC,
@@ -2213,7 +2156,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
DTRACE(" return_bus_bw = %f", v->ReturnBW);
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -2663,6 +2606,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
myPipe.DPPPerPlane = v->DPPPerPlane[k];
myPipe.ScalerEnabled = v->ScalerEnabled[k];
+ myPipe.VRatio = v->VRatio[k];
+ myPipe.VRatioChroma = v->VRatioChroma[k];
myPipe.SourceScan = v->SourceScan[k];
myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
@@ -2905,33 +2850,13 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
CalculateFlipSchedule(
mode_lib,
+ k,
HostVMInefficiencyFactor,
v->UrgentExtraLatency,
v->UrgentLatency,
- v->GPUVMMaxPageTableLevels,
- v->HostVMEnable,
- v->HostVMMaxNonCachedPageTableLevels,
- v->GPUVMEnable,
- v->HostVMMinPageSize,
v->PDEAndMetaPTEBytesFrame[k],
v->MetaRowByte[k],
- v->PixelPTEBytesPerRow[k],
- v->BandwidthAvailableForImmediateFlip,
- v->TotImmediateFlipBytes,
- v->SourcePixelFormat[k],
- v->HTotal[k] / v->PixelClock[k],
- v->VRatio[k],
- v->VRatioChroma[k],
- v->Tno_bw[k],
- v->DCCEnable[k],
- v->dpte_row_height[k],
- v->meta_row_height[k],
- v->dpte_row_height_chroma[k],
- v->meta_row_height_chroma[k],
- &v->DestinationLinesToRequestVMInImmediateFlip[k],
- &v->DestinationLinesToRequestRowInImmediateFlip[k],
- &v->final_flip_bw[k],
- &v->ImmediateFlipSupportedForPipe[k]);
+ v->PixelPTEBytesPerRow[k]);
}
v->total_dcn_read_bw_with_flip = 0.0;
@@ -2987,7 +2912,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->ImmediateFlipSupported)) ? true : false;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
- dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
+ dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
@@ -3018,64 +2943,28 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
CalculateWatermarksAndDRAMSpeedChangeSupport(
mode_lib,
PrefetchMode,
- v->NumberOfActivePlanes,
- v->MaxLineBufferLines,
- v->LineBufferSize,
- v->WritebackInterfaceBufferSize,
v->DCFCLK,
v->ReturnBW,
- v->SynchronizedVBlank,
- v->dpte_group_bytes,
- v->MetaChunkSize,
v->UrgentLatency,
v->UrgentExtraLatency,
- v->WritebackLatency,
- v->WritebackChunkSize,
v->SOCCLK,
- v->DRAMClockChangeLatency,
- v->SRExitTime,
- v->SREnterPlusExitTime,
- v->SRExitZ8Time,
- v->SREnterPlusExitZ8Time,
v->DCFCLKDeepSleep,
v->DETBufferSizeY,
v->DETBufferSizeC,
v->SwathHeightY,
v->SwathHeightC,
- v->LBBitPerPixel,
v->SwathWidthY,
v->SwathWidthC,
- v->HRatio,
- v->HRatioChroma,
- v->vtaps,
- v->VTAPsChroma,
- v->VRatio,
- v->VRatioChroma,
- v->HTotal,
- v->PixelClock,
- v->BlendingAndTiming,
v->DPPPerPlane,
v->BytePerPixelDETY,
v->BytePerPixelDETC,
- v->DSTXAfterScaler,
- v->DSTYAfterScaler,
- v->WritebackEnable,
- v->WritebackPixelFormat,
- v->WritebackDestinationWidth,
- v->WritebackDestinationHeight,
- v->WritebackSourceHeight,
v->UnboundedRequestEnabled,
v->CompressedBufferSizeInkByte,
&DRAMClockChangeSupport,
- &v->UrgentWatermark,
- &v->WritebackUrgentWatermark,
- &v->DRAMClockChangeWatermark,
- &v->WritebackDRAMClockChangeWatermark,
&v->StutterExitWatermark,
&v->StutterEnterPlusExitWatermark,
&v->Z8StutterExitWatermark,
- &v->Z8StutterEnterPlusExitWatermark,
- &v->MinActiveDRAMClockChangeLatencySupported);
+ &v->Z8StutterEnterPlusExitWatermark);
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
if (v->WritebackEnable[k] == true) {
@@ -3406,7 +3295,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&BytePerPixY[k],
@@ -3460,94 +3349,6 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
&dummysinglestring);
}
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC)
-{
- if (SourcePixelFormat == dm_444_64) {
- *BytePerPixelDETY = 8;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 8;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 4;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_16) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 2;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 1;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 1;
- *BytePerPixelY = 4;
- *BytePerPixelC = 1;
- } else if (SourcePixelFormat == dm_420_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 2;
- *BytePerPixelY = 1;
- *BytePerPixelC = 2;
- } else if (SourcePixelFormat == dm_420_12) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 4;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- } else {
- *BytePerPixelDETY = 4.0 / 3;
- *BytePerPixelDETC = 8.0 / 3;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- }
-
- if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
- || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- } else if (SourcePixelFormat == dm_444_64) {
- *BlockHeight256BytesY = 4;
- } else if (SourcePixelFormat == dm_444_8) {
- *BlockHeight256BytesY = 16;
- } else {
- *BlockHeight256BytesY = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockHeight256BytesC = 0;
- *BlockWidth256BytesC = 0;
- } else {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- *BlockHeight256BytesC = 1;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 16;
- } else if (SourcePixelFormat == dm_420_8) {
- *BlockHeight256BytesY = 16;
- *BlockHeight256BytesC = 8;
- } else {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
- }
- return true;
-}
-
static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
{
if (PrefetchMode == 0) {
@@ -3687,61 +3488,43 @@ static void CalculateRowBandwidth(
static void CalculateFlipSchedule(
struct display_mode_lib *mode_lib,
+ unsigned int k,
double HostVMInefficiencyFactor,
double UrgentExtraLatency,
double UrgentLatency,
- unsigned int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- unsigned int HostVMMaxNonCachedPageTableLevels,
- bool GPUVMEnable,
- double HostVMMinPageSize,
double PDEAndMetaPTEBytesPerFrame,
double MetaRowBytes,
- double DPTEBytesPerRow,
- double BandwidthAvailableForImmediateFlip,
- unsigned int TotImmediateFlipBytes,
- enum source_format_class SourcePixelFormat,
- double LineTime,
- double VRatio,
- double VRatioChroma,
- double Tno_bw,
- bool DCCEnable,
- unsigned int dpte_row_height,
- unsigned int meta_row_height,
- unsigned int dpte_row_height_chroma,
- unsigned int meta_row_height_chroma,
- double *DestinationLinesToRequestVMInImmediateFlip,
- double *DestinationLinesToRequestRowInImmediateFlip,
- double *final_flip_bw,
- bool *ImmediateFlipSupportedForPipe)
+ double DPTEBytesPerRow)
{
+ struct vba_vars_st *v = &mode_lib->vba;
double min_row_time = 0.0;
unsigned int HostVMDynamicLevelsTrips;
double TimeForFetchingMetaPTEImmediateFlip;
double TimeForFetchingRowInVBlankImmediateFlip;
double ImmediateFlipBW;
+ double LineTime = v->HTotal[k] / v->PixelClock[k];
- if (GPUVMEnable == true && HostVMEnable == true) {
- HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ if (v->GPUVMEnable == true && v->HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
} else {
HostVMDynamicLevelsTrips = 0;
}
- if (GPUVMEnable == true || DCCEnable == true) {
- ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
}
- if (GPUVMEnable == true) {
+ if (v->GPUVMEnable == true) {
TimeForFetchingMetaPTEImmediateFlip = dml_max3(
- Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
- UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
LineTime / 4.0);
} else {
TimeForFetchingMetaPTEImmediateFlip = 0;
}
- *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
- if ((GPUVMEnable == true || DCCEnable == true)) {
+ v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
+ if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
UrgentLatency * (HostVMDynamicLevelsTrips + 1),
@@ -3750,54 +3533,54 @@ static void CalculateFlipSchedule(
TimeForFetchingRowInVBlankImmediateFlip = 0;
}
- *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
+ v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
- if (GPUVMEnable == true) {
- *final_flip_bw = dml_max(
- PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
- (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
- } else if ((GPUVMEnable == true || DCCEnable == true)) {
- *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
+ if (v->GPUVMEnable == true) {
+ v->final_flip_bw[k] = dml_max(
+ PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
+ } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
+ v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
} else {
- *final_flip_bw = 0;
+ v->final_flip_bw[k] = 0;
}
- if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
- if (GPUVMEnable == true && DCCEnable != true) {
- min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
- } else if (GPUVMEnable != true && DCCEnable == true) {
- min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
+ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
+ min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
} else {
min_row_time = dml_min4(
- dpte_row_height * LineTime / VRatio,
- meta_row_height * LineTime / VRatio,
- dpte_row_height_chroma * LineTime / VRatioChroma,
- meta_row_height_chroma * LineTime / VRatioChroma);
+ v->dpte_row_height[k] * LineTime / v->VRatio[k],
+ v->meta_row_height[k] * LineTime / v->VRatio[k],
+ v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
+ v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
}
} else {
- if (GPUVMEnable == true && DCCEnable != true) {
- min_row_time = dpte_row_height * LineTime / VRatio;
- } else if (GPUVMEnable != true && DCCEnable == true) {
- min_row_time = meta_row_height * LineTime / VRatio;
+ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
+ min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
+ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
+ min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
} else {
- min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
+ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
}
}
- if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
+ if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
- *ImmediateFlipSupportedForPipe = false;
+ v->ImmediateFlipSupportedForPipe[k] = false;
} else {
- *ImmediateFlipSupportedForPipe = true;
+ v->ImmediateFlipSupportedForPipe[k] = true;
}
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
- dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
- dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
#endif
}
@@ -3911,6 +3694,9 @@ static noinline void CalculatePrefetchSchedulePerPlane(
myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
myPipe.ScalerEnabled = v->ScalerEnabled[k];
+ myPipe.VRatio = mode_lib->vba.VRatio[k];
+ myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
+
myPipe.SourceScan = v->SourceScan[k];
myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
@@ -3991,6 +3777,17 @@ static noinline void CalculatePrefetchSchedulePerPlane(
&v->VReadyOffsetPix[k]);
}
+static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int *DETBufferSizeInKByte)
+{
+ int i, total_pipes = 0;
+ for (i = 0; i < NumberOfActivePlanes; i++)
+ total_pipes += NoOfDPPThisState[i];
+ *DETBufferSizeInKByte = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
+ if (*DETBufferSizeInKByte > DCN3_15_MAX_DET_SIZE)
+ *DETBufferSizeInKByte = DCN3_15_MAX_DET_SIZE;
+}
+
+
void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
{
struct vba_vars_st *v = &mode_lib->vba;
@@ -4054,7 +3851,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*Bandwidth Support Check*/
for (k = 0; k < v->NumberOfActivePlanes; k++) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -4298,6 +4095,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
if (v->ODMCombinePolicy == dm_odm_combine_policy_none
|| !(v->Output[k] == dm_dp ||
+ v->Output[k] == dm_dp2p0 ||
v->Output[k] == dm_edp)) {
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
@@ -4748,6 +4546,8 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
}
+ if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315)
+ PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, &v->DETBufferSizeInKByte[0]);
CalculateSwathAndDETConfiguration(
false,
v->NumberOfActivePlanes,
@@ -4987,6 +4787,17 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
&v->meta_row_bandwidth[i][j][k],
&v->dpte_row_bandwidth[i][j][k]);
}
+ /*DCCMetaBufferSizeSupport(i, j) = True
+ For k = 0 To NumberOfActivePlanes - 1
+ If MetaRowBytes(i, j, k) > 24064 Then
+ DCCMetaBufferSizeSupport(i, j) = False
+ End If
+ Next k*/
+ v->DCCMetaBufferSizeSupport[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->MetaRowBytes[i][j][k] > 24064)
+ v->DCCMetaBufferSizeSupport[i][j] = false;
+ }
v->UrgLatency[i] = CalculateUrgentLatency(
v->UrgentLatencyPixelDataOnly,
v->UrgentLatencyPixelMixedWithVMData,
@@ -5374,33 +5185,13 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
for (k = 0; k < v->NumberOfActivePlanes; k++) {
CalculateFlipSchedule(
mode_lib,
+ k,
HostVMInefficiencyFactor,
v->ExtraLatency,
v->UrgLatency[i],
- v->GPUVMMaxPageTableLevels,
- v->HostVMEnable,
- v->HostVMMaxNonCachedPageTableLevels,
- v->GPUVMEnable,
- v->HostVMMinPageSize,
v->PDEAndMetaPTEBytesPerFrame[i][j][k],
v->MetaRowBytes[i][j][k],
- v->DPTEBytesPerRow[i][j][k],
- v->BandwidthAvailableForImmediateFlip,
- v->TotImmediateFlipBytes,
- v->SourcePixelFormat[k],
- v->HTotal[k] / v->PixelClock[k],
- v->VRatio[k],
- v->VRatioChroma[k],
- v->Tno_bw[k],
- v->DCCEnable[k],
- v->dpte_row_height[k],
- v->meta_row_height[k],
- v->dpte_row_height_chroma[k],
- v->meta_row_height_chroma[k],
- &v->DestinationLinesToRequestVMInImmediateFlip[k],
- &v->DestinationLinesToRequestRowInImmediateFlip[k],
- &v->final_flip_bw[k],
- &v->ImmediateFlipSupportedForPipe[k]);
+ v->DPTEBytesPerRow[i][j][k]);
}
v->total_dcn_read_bw_with_flip = 0.0;
for (k = 0; k < v->NumberOfActivePlanes; k++) {
@@ -5458,64 +5249,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
CalculateWatermarksAndDRAMSpeedChangeSupport(
mode_lib,
v->PrefetchModePerState[i][j],
- v->NumberOfActivePlanes,
- v->MaxLineBufferLines,
- v->LineBufferSize,
- v->WritebackInterfaceBufferSize,
v->DCFCLKState[i][j],
v->ReturnBWPerState[i][j],
- v->SynchronizedVBlank,
- v->dpte_group_bytes,
- v->MetaChunkSize,
v->UrgLatency[i],
v->ExtraLatency,
- v->WritebackLatency,
- v->WritebackChunkSize,
v->SOCCLKPerState[i],
- v->DRAMClockChangeLatency,
- v->SRExitTime,
- v->SREnterPlusExitTime,
- v->SRExitZ8Time,
- v->SREnterPlusExitZ8Time,
v->ProjectedDCFCLKDeepSleep[i][j],
v->DETBufferSizeYThisState,
v->DETBufferSizeCThisState,
v->SwathHeightYThisState,
v->SwathHeightCThisState,
- v->LBBitPerPixel,
v->SwathWidthYThisState,
v->SwathWidthCThisState,
- v->HRatio,
- v->HRatioChroma,
- v->vtaps,
- v->VTAPsChroma,
- v->VRatio,
- v->VRatioChroma,
- v->HTotal,
- v->PixelClock,
- v->BlendingAndTiming,
v->NoOfDPPThisState,
v->BytePerPixelInDETY,
v->BytePerPixelInDETC,
- v->DSTXAfterScaler,
- v->DSTYAfterScaler,
- v->WritebackEnable,
- v->WritebackPixelFormat,
- v->WritebackDestinationWidth,
- v->WritebackDestinationHeight,
- v->WritebackSourceHeight,
UnboundedRequestEnabledThisState,
CompressedBufferSizeInkByteThisState,
&v->DRAMClockChangeSupport[i][j],
- &v->UrgentWatermark,
- &v->WritebackUrgentWatermark,
- &v->DRAMClockChangeWatermark,
- &v->WritebackDRAMClockChangeWatermark,
- &dummy,
&dummy,
&dummy,
&dummy,
- &v->MinActiveDRAMClockChangeLatencySupported);
+ &dummy);
}
}
@@ -5640,64 +5395,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
static void CalculateWatermarksAndDRAMSpeedChangeSupport(
struct display_mode_lib *mode_lib,
unsigned int PrefetchMode,
- unsigned int NumberOfActivePlanes,
- unsigned int MaxLineBufferLines,
- unsigned int LineBufferSize,
- unsigned int WritebackInterfaceBufferSize,
double DCFCLK,
double ReturnBW,
- bool SynchronizedVBlank,
- unsigned int dpte_group_bytes[],
- unsigned int MetaChunkSize,
double UrgentLatency,
double ExtraLatency,
- double WritebackLatency,
- double WritebackChunkSize,
double SOCCLK,
- double DRAMClockChangeLatency,
- double SRExitTime,
- double SREnterPlusExitTime,
- double SRExitZ8Time,
- double SREnterPlusExitZ8Time,
double DCFCLKDeepSleep,
unsigned int DETBufferSizeY[],
unsigned int DETBufferSizeC[],
unsigned int SwathHeightY[],
unsigned int SwathHeightC[],
- unsigned int LBBitPerPixel[],
double SwathWidthY[],
double SwathWidthC[],
- double HRatio[],
- double HRatioChroma[],
- unsigned int vtaps[],
- unsigned int VTAPsChroma[],
- double VRatio[],
- double VRatioChroma[],
- unsigned int HTotal[],
- double PixelClock[],
- unsigned int BlendingAndTiming[],
unsigned int DPPPerPlane[],
double BytePerPixelDETY[],
double BytePerPixelDETC[],
- double DSTXAfterScaler[],
- double DSTYAfterScaler[],
- bool WritebackEnable[],
- enum source_format_class WritebackPixelFormat[],
- double WritebackDestinationWidth[],
- double WritebackDestinationHeight[],
- double WritebackSourceHeight[],
bool UnboundedRequestEnabled,
int unsigned CompressedBufferSizeInkByte,
enum clock_change_support *DRAMClockChangeSupport,
- double *UrgentWatermark,
- double *WritebackUrgentWatermark,
- double *DRAMClockChangeWatermark,
- double *WritebackDRAMClockChangeWatermark,
double *StutterExitWatermark,
double *StutterEnterPlusExitWatermark,
double *Z8StutterExitWatermark,
- double *Z8StutterEnterPlusExitWatermark,
- double *MinActiveDRAMClockChangeLatencySupported)
+ double *Z8StutterEnterPlusExitWatermark)
{
struct vba_vars_st *v = &mode_lib->vba;
double EffectiveLBLatencyHidingY;
@@ -5717,103 +5436,103 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
double TotalPixelBW = 0.0;
int k, j;
- *UrgentWatermark = UrgentLatency + ExtraLatency;
+ v->UrgentWatermark = UrgentLatency + ExtraLatency;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
- dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
#endif
- *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
+ v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
- dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
+ dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
#endif
v->TotalActiveWriteback = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
- if (WritebackEnable[k] == true) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true) {
v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
}
}
if (v->TotalActiveWriteback <= 1) {
- *WritebackUrgentWatermark = WritebackLatency;
+ v->WritebackUrgentWatermark = v->WritebackLatency;
} else {
- *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
}
if (v->TotalActiveWriteback <= 1) {
- *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
+ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
} else {
- *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
}
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
TotalPixelBW = TotalPixelBW
- + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
- / (HTotal[k] / PixelClock[k]);
+ + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
+ / (v->HTotal[k] / v->PixelClock[k]);
}
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double EffectiveDETBufferSizeY = DETBufferSizeY[k];
v->LBLatencyHidingSourceLinesY = dml_min(
- (double) MaxLineBufferLines,
- dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
+ (double) v->MaxLineBufferLines,
+ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
v->LBLatencyHidingSourceLinesC = dml_min(
- (double) MaxLineBufferLines,
- dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
+ (double) v->MaxLineBufferLines,
+ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
- EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
- EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
if (UnboundedRequestEnabled) {
EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
- + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
+ + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
}
LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
- FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
if (BytePerPixelDETC[k] > 0) {
LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
- FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
} else {
LinesInDETC = 0;
FullDETBufferingTimeC = 999999;
}
ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
- - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
+ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
- if (NumberOfActivePlanes > 1) {
+ if (v->NumberOfActivePlanes > 1) {
ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
- - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
+ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
}
if (BytePerPixelDETC[k] > 0) {
ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
- - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
+ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
- if (NumberOfActivePlanes > 1) {
+ if (v->NumberOfActivePlanes > 1) {
ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
- - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
+ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
}
v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
} else {
v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
}
- if (WritebackEnable[k] == true) {
- WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
- / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
- if (WritebackPixelFormat[k] == dm_444_64) {
+ if (v->WritebackEnable[k] == true) {
+ WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
+ / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
+ if (v->WritebackPixelFormat[k] == dm_444_64) {
WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
}
WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
@@ -5823,14 +5542,14 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
v->MinActiveDRAMClockChangeMargin = 999999;
PlaneWithMinActiveDRAMClockChangeMargin = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
- if (BlendingAndTiming[k] == k) {
+ if (v->BlendingAndTiming[k] == k) {
PlaneWithMinActiveDRAMClockChangeMargin = k;
} else {
- for (j = 0; j < NumberOfActivePlanes; ++j) {
- if (BlendingAndTiming[k] == j) {
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) {
+ if (v->BlendingAndTiming[k] == j) {
PlaneWithMinActiveDRAMClockChangeMargin = j;
}
}
@@ -5838,11 +5557,11 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
}
}
- *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
+ v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
- if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
&& v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
}
@@ -5850,25 +5569,25 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
v->TotalNumberOfActiveOTG = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
- if (BlendingAndTiming[k] == k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
}
}
if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
- } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
+ } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
|| SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
} else {
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
}
- *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
- *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
- *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
- *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
+ *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
@@ -7007,8 +6726,6 @@ static void CalculateSwathWidth(
{
int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
- int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
- int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
@@ -7019,6 +6736,8 @@ static void CalculateSwathWidth(
MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
if (BytePerPixC[k] > 0) {
+ int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+
swath_width_chroma_ub[k] = dml_min(
surface_width_ub_c,
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
@@ -7030,6 +6749,8 @@ static void CalculateSwathWidth(
MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
if (BytePerPixC[k] > 0) {
+ int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+
swath_width_chroma_ub[k] = dml_min(
surface_height_ub_c,
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
index e0fecf127bd5..35d10b4d018b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
@@ -27,94 +27,7 @@
#include "../display_mode_vba.h"
#include "../dml_inline_defs.h"
#include "display_rq_dlg_calc_31.h"
-
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC)
-{
- if (SourcePixelFormat == dm_444_64) {
- *BytePerPixelDETY = 8;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 8;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 4;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_16) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 2;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 1;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 1;
- *BytePerPixelY = 4;
- *BytePerPixelC = 1;
- } else if (SourcePixelFormat == dm_420_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 2;
- *BytePerPixelY = 1;
- *BytePerPixelC = 2;
- } else if (SourcePixelFormat == dm_420_12) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 4;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- } else {
- *BytePerPixelDETY = 4.0 / 3;
- *BytePerPixelDETC = 8.0 / 3;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- }
-
- if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
- || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- } else if (SourcePixelFormat == dm_444_64) {
- *BlockHeight256BytesY = 4;
- } else if (SourcePixelFormat == dm_444_8) {
- *BlockHeight256BytesY = 16;
- } else {
- *BlockHeight256BytesY = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockHeight256BytesC = 0;
- *BlockWidth256BytesC = 0;
- } else {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- *BlockHeight256BytesC = 1;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 16;
- } else if (SourcePixelFormat == dm_420_8) {
- *BlockHeight256BytesY = 16;
- *BlockHeight256BytesC = 8;
- } else {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
- }
- return true;
-}
+#include "../dcn30/display_mode_vba_30.h"
static bool is_dual_plane(enum source_format_class source_format)
{
@@ -467,7 +380,7 @@ static void get_meta_and_pte_attr(
double byte_per_pixel_det_y;
double byte_per_pixel_det_c;
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
(enum source_format_class) (source_format),
(enum dm_swizzle_mode) (tiling),
&bytes_per_element_y,
@@ -953,7 +866,6 @@ static void dml_rq_dlg_get_dlg_params(
{
const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
- const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout;
const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth;
const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps;
@@ -1003,9 +915,6 @@ static void dml_rq_dlg_get_dlg_params(
unsigned int vupdate_width;
unsigned int vready_offset;
- unsigned int dppclk_delay_subtotal;
- unsigned int dispclk_delay_subtotal;
-
unsigned int vstartup_start;
unsigned int dst_x_after_scaler;
unsigned int dst_y_after_scaler;
@@ -1078,8 +987,9 @@ static void dml_rq_dlg_get_dlg_params(
min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
-
disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2));
+ disp_dlg_regs->optimized_min_dst_y_next_start_us = 0;
+ disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start;
ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
@@ -1123,21 +1033,6 @@ static void dml_rq_dlg_get_dlg_params(
vupdate_width = dst->vupdate_width;
vready_offset = dst->vready_offset;
- dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
- dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
-
- if (scl_enable)
- dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl;
- else
- dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only;
-
- dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor;
-
- if (dout->dsc_enable) {
- double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA
- dispclk_delay_subtotal += dsc_delay;
- }
-
vstartup_start = dst->vstartup_start;
if (interlaced) {
if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
new file mode 100644
index 000000000000..34b6c763a455
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr.h"
+#include "resource.h"
+#include "dcn31/dcn31_hubbub.h"
+#include "dcn314_fpu.h"
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dml/display_mode_vba.h"
+
+struct _vcs_dpi_ip_params_st dcn3_14_ip = {
+ .VBlankNomDefaultUS = 668,
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 16.5,
+ .sr_enter_plus_exit_time_us = 18.5,
+ .sr_exit_z8_time_us = 442.0,
+ .sr_enter_plus_exit_z8_time_us = 560.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st *clock_limits =
+ dcn3_14_soc.clock_limits;
+ unsigned int i, closest_clk_lvl;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) {
+
+ dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
+
+ if (bw_params->dram_channel_width_bytes > 0)
+ dcn3_14_soc.dram_channel_width_bytes = bw_params->dram_channel_width_bytes;
+
+ if (bw_params->num_channels > 0)
+ dcn3_14_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(dcn3_14_soc.num_chans);
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_14_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+ clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+
+ if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
+ clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+ for (i = 0; i < clk_table->num_entries; i++)
+ dcn3_14_soc.clock_limits[i] = clock_limits[i];
+ if (clk_table->num_entries) {
+ dcn3_14_soc.num_states = clk_table->num_entries;
+ }
+ }
+
+ if (max_dispclk_mhz) {
+ dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+
+ if ((int)(dcn3_14_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_14_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
+ }
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+ dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN314);
+ else
+ dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA);
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+ return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe;
+ bool upscaled = false;
+
+ dc_assert_fp_enabled();
+
+ dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+ pipe = &res_ctx->pipe_ctx[i];
+ timing = &pipe->stream->timing;
+
+ if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min
+ && pipe->stream->adjust.v_total_min > timing->v_total)
+ pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
+
+ if (pipe->plane_state &&
+ (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
+ pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
+ upscaled = true;
+
+ /*
+ * Immediate flip can be set dynamically after enabling the plane.
+ * We need to require support for immediate flip or underflow can be
+ * intermittently experienced depending on peak b/w requirements.
+ */
+ pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+ pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
+ pipes[pipe_cnt].pipe.src.gpuvm = true;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+ pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ pipes[pipe_cnt].pipe.dest.vblank_nom =
+ dcn3_14_ip.VBlankNomDefaultUS / (timing->h_total / (timing->pix_clk_100hz / 10000.0));
+ pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+ pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+
+ if (pipes[pipe_cnt].dout.dsc_enable) {
+ switch (timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
+
+ dc->config.enable_4to1MPC = false;
+ if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
+ if (is_dual_plane(pipe->plane_state->format)
+ && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
+ dc->config.enable_4to1MPC = true;
+ } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
+ /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ pipes[0].pipe.src.unbounded_req_mode = true;
+ }
+ } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
+ && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
+ } else if (context->stream_count >= 3 && upscaled) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
+ pipe->stream->apply_seamless_boot_optimization) {
+
+ if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
+ context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
+ break;
+ }
+ }
+ }
+
+ return pipe_cnt;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
new file mode 100644
index 000000000000..d32c5bb99f4c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN314_FPU_H__
+#define __DCN314_FPU_H__
+
+#define DCN3_14_DEFAULT_DET_SIZE 384
+#define DCN3_14_MAX_DET_SIZE 384
+#define DCN3_14_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_14_CRB_SEGMENT_SIZE_KB 64
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
new file mode 100644
index 000000000000..0d12fd079cd6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -0,0 +1,7199 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#define UNIT_TEST 0
+#if !UNIT_TEST
+#include "dc.h"
+#include "dc_link.h"
+#endif
+#include "../display_mode_lib.h"
+#include "display_mode_vba_314.h"
+#include "../dml_inline_defs.h"
+
+/*
+ * NOTE:
+ * This file is gcc-parsable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
+#define BPP_INVALID 0
+#define BPP_BLENDED_PIPE 0xffffffff
+#define DCN314_MAX_DSC_IMAGE_WIDTH 5184
+#define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
+
+// For DML-C changes that hasn't been propagated to VBA yet
+//#define __DML_VBA_ALLOW_DELTA__
+
+// Move these to ip parameters/constant
+
+// At which vstartup the DML start to try if the mode can be supported
+#define __DML_VBA_MIN_VSTARTUP__ 9
+
+// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
+#define __DML_ARB_TO_RET_DELAY__ (7 + 95)
+
+// fudge factor for min dcfclk calclation
+#define __DML_MIN_DCFCLK_FACTOR__ 1.15
+
+typedef struct {
+ double DPPCLK;
+ double DISPCLK;
+ double PixelClock;
+ double DCFCLKDeepSleep;
+ unsigned int DPPPerPlane;
+ bool ScalerEnabled;
+ double VRatio;
+ double VRatioChroma;
+ enum scan_direction_class SourceScan;
+ unsigned int BlockWidth256BytesY;
+ unsigned int BlockHeight256BytesY;
+ unsigned int BlockWidth256BytesC;
+ unsigned int BlockHeight256BytesC;
+ unsigned int InterlaceEnable;
+ unsigned int NumberOfCursors;
+ unsigned int VBlank;
+ unsigned int HTotal;
+ unsigned int DCCEnable;
+ bool ODMCombineIsEnabled;
+ enum source_format_class SourcePixelFormat;
+ int BytePerPixelY;
+ int BytePerPixelC;
+ bool ProgressiveToInterlaceUnitInOPP;
+} Pipe;
+
+#define BPP_INVALID 0
+#define BPP_BLENDED_PIPE 0xffffffff
+
+static bool CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC);
+static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
+static unsigned int dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output);
+static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
+static bool CalculatePrefetchSchedule(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ Pipe *myPipe,
+ unsigned int DSCDelay,
+ double DPPCLKDelaySubtotalPlusCNVCFormater,
+ double DPPCLKDelaySCL,
+ double DPPCLKDelaySCLLBOnly,
+ double DPPCLKDelayCNVCCursor,
+ double DISPCLKDelaySubtotal,
+ unsigned int DPP_RECOUT_WIDTH,
+ enum output_format_class OutputFormat,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ unsigned int GPUVMPageTableLevels,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ double HostVMMinPageSize,
+ bool DynamicMetadataEnable,
+ bool DynamicMetadataVMEnabled,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int DynamicMetadataTransmittedBytes,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ double VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ double VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
+static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
+static void CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ double DETBufferSize,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum scan_direction_class ScanOrientation,
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma);
+static double CalculatePrefetchSourceLines(
+ struct display_mode_lib *mode_lib,
+ double VRatio,
+ double vtaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ unsigned int ViewportYStart,
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath);
+static unsigned int CalculateVMAndRowBytes(
+ struct display_mode_lib *mode_lib,
+ bool DCCEnable,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum scan_direction_class ScanDirection,
+ unsigned int SwathWidth,
+ unsigned int ViewportHeight,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMinPageSize,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int *MacroTileWidth,
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ bool *PTEBufferSizeNotExceeded,
+ int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ int *vm_group_bytes,
+ unsigned int *dpte_group_bytes,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ int *DPDE0BytesFrame,
+ int *MetaPTEBytesFrame);
+static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
+static void CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ double *meta_row_bw,
+ double *dpte_row_bw);
+
+static void CalculateFlipSchedule(
+ struct display_mode_lib *mode_lib,
+ unsigned int k,
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow);
+static double CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ int WritebackDestinationWidth,
+ int WritebackDestinationHeight,
+ int WritebackSourceHeight,
+ unsigned int HTotal);
+
+static void CalculateVupdateAndDynamicMetadataParameters(
+ int MaxInterDCNTileRepeaters,
+ double DPPCLK,
+ double DISPCLK,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ int HTotal,
+ int VBlank,
+ int DynamicMetadataTransmittedBytes,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+
+static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ struct display_mode_lib *mode_lib,
+ unsigned int PrefetchMode,
+ double DCFCLK,
+ double ReturnBW,
+ double UrgentLatency,
+ double ExtraLatency,
+ double SOCCLK,
+ double DCFCLKDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerPlane[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+ enum clock_change_support *DRAMClockChangeSupport,
+ double *StutterExitWatermark,
+ double *StutterEnterPlusExitWatermark,
+ double *Z8StutterExitWatermark,
+ double *Z8StutterEnterPlusExitWatermark);
+
+static void CalculateDCFCLKDeepSleep(
+ struct display_mode_lib *mode_lib,
+ unsigned int NumberOfActivePlanes,
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ int ReturnBusWidth,
+ double *DCFCLKDeepSleep);
+
+static void CalculateUrgentBurstFactor(
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ double DETBufferSizeY,
+ double DETBufferSizeC,
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding);
+
+static void UseMinimumDCFCLK(
+ struct display_mode_lib *mode_lib,
+ int MaxPrefetchMode,
+ int ReorderingBytes);
+
+static void CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActivePlanes,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[]);
+
+static void CalculateMetaAndPTETimes(
+ int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ int MetaChunkSize,
+ int MinMetaChunkSizeBytes,
+ int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ int dpte_row_height[],
+ int dpte_row_height_chroma[],
+ int meta_row_width[],
+ int meta_row_width_chroma[],
+ int meta_row_height[],
+ int meta_row_height_chroma[],
+ int meta_req_width[],
+ int meta_req_width_chroma[],
+ int meta_req_height[],
+ int meta_req_height_chroma[],
+ int dpte_group_bytes[],
+ int PTERequestSizeY[],
+ int PTERequestSizeC[],
+ int PixelPTEReqWidthY[],
+ int PixelPTEReqHeightY[],
+ int PixelPTEReqWidthC[],
+ int PixelPTEReqHeightC[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[]);
+
+static void CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ int meta_pte_bytes_per_frame_ub_l[],
+ int meta_pte_bytes_per_frame_ub_c[],
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[]);
+
+static void CalculateStutterEfficiency(
+ struct display_mode_lib *mode_lib,
+ int CompressedBufferSizeInkByte,
+ bool UnboundedRequestEnabled,
+ int ConfigReturnBufferSizeInKByte,
+ int MetaFIFOSizeInKEntries,
+ int ZeroSizeBufferEntries,
+ int NumberOfActivePlanes,
+ int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ double COMPBUF_RESERVED_SPACE_64B,
+ double COMPBUF_RESERVED_SPACE_ZS,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizedVBlank,
+ double Z8StutterEnterPlusExitWatermark,
+ double StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ int DPPPerPlane[],
+ unsigned int DETBufferSizeY[],
+ int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ int HTotal[],
+ int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum scan_direction_class SourceScan[],
+ int BlockHeight256BytesY[],
+ int BlockWidth256BytesY[],
+ int BlockHeight256BytesC[],
+ int BlockWidth256BytesC[],
+ int DCCYMaxUncompressedBlock[],
+ int DCCCMaxUncompressedBlock[],
+ int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthPlaneLuma[],
+ double ReadBandwidthPlaneChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod);
+
+static void CalculateSwathAndDETConfiguration(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ unsigned int DETBufferSizeInKByte,
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum scan_direction_class SourceScan[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BlendingAndTiming[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ int DPPPerPlane[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool ViewportSizeSupportPerPlane[],
+ bool *ViewportSizeSupport);
+static void CalculateSwathWidth(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ enum source_format_class SourcePixelFormat[],
+ enum scan_direction_class SourceScan[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ int BlendingAndTiming[],
+ int HActive[],
+ double HRatio[],
+ int DPPPerPlane[],
+ double SwathWidthSingleDPPY[],
+ double SwathWidthSingleDPPC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ int MaximumSwathHeightY[],
+ int MaximumSwathHeightC[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[]);
+
+static double CalculateExtraLatency(
+ int RoundTripPingLatencyCycles,
+ int ReorderingBytes,
+ double DCFCLK,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels);
+
+static double CalculateExtraLatencyBytes(
+ int ReorderingBytes,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels);
+
+static double CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClockSingle);
+
+static void CalculateUnboundedRequestAndCompressedBufferSize(
+ unsigned int DETBufferSizeInKByte,
+ int ConfigReturnBufferSizeInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ int TotalActiveDPP,
+ bool NoChromaPlanes,
+ int MaxNumDPP,
+ int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class *Output,
+ bool *UnboundedRequestEnabled,
+ int *CompressedBufferSizeInkByte);
+
+static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
+static unsigned int CalculateMaxVStartup(
+ unsigned int VTotal,
+ unsigned int VActive,
+ unsigned int VBlankNom,
+ unsigned int HTotal,
+ double PixelClock,
+ bool ProgressiveTointerlaceUnitinOPP,
+ bool Interlace,
+ unsigned int VBlankNomDefaultUS,
+ double WritebackDelayTime);
+
+void dml314_recalculate(struct display_mode_lib *mode_lib)
+{
+ ModeSupportAndSystemConfiguration(mode_lib);
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
+ DisplayPipeConfiguration(mode_lib);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
+#endif
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
+}
+
+static unsigned int dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ unsigned int rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
+
+ if (pixelFormat == dm_420)
+ pixelsPerClock = 2;
+ else if (pixelFormat == dm_444)
+ pixelsPerClock = 1;
+ else if (pixelFormat == dm_n422)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
+
+ //compute ssm delay
+ if (bpc == 8)
+ D = 81;
+ else if (bpc == 10)
+ D = 89;
+ else
+ D = 113;
+
+ //divide by pixel per cycle to compute slice width as seen by DSC
+ w = sliceWidth / pixelsPerClock;
+
+ //422 mode has an additional cycle of delay
+ if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
+ s = 0;
+ else
+ s = 1;
+
+ //main calculation for the dscce
+ ix = initalXmitDelay + 45;
+ wx = (w + 2) / 3;
+ P = 3 * wx - w;
+ l0 = ix / w;
+ a = ix + P * l0;
+ ax = (a + 2) / 3 + D + 6 + 1;
+ L = (ax + wx - 1) / wx;
+ if ((ix % w) == 0 && P != 0)
+ lstall = 1;
+ else
+ lstall = 0;
+ Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
+
+ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
+ pixels = Delay * 3 * pixelsPerClock;
+ return pixels;
+}
+
+static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
+{
+ unsigned int Delay = 0;
+
+ if (pixelFormat == dm_420) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 2;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 13;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 3;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else if (pixelFormat == dm_n422) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 25;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 10;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // sft
+ Delay = Delay + 1;
+ }
+
+ return Delay;
+}
+
+static bool CalculatePrefetchSchedule(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ Pipe *myPipe,
+ unsigned int DSCDelay,
+ double DPPCLKDelaySubtotalPlusCNVCFormater,
+ double DPPCLKDelaySCL,
+ double DPPCLKDelaySCLLBOnly,
+ double DPPCLKDelayCNVCCursor,
+ double DISPCLKDelaySubtotal,
+ unsigned int DPP_RECOUT_WIDTH,
+ enum output_format_class OutputFormat,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ unsigned int GPUVMPageTableLevels,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ double HostVMMinPageSize,
+ bool DynamicMetadataEnable,
+ bool DynamicMetadataVMEnabled,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int DynamicMetadataTransmittedBytes,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ double VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ double VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ bool MyError = false;
+ unsigned int DPPCycles, DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+ double Tsw_oto;
+ double prefetch_bw_oto;
+ double prefetch_bw_pr;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingMetaPTE = 0;
+ double TimeForFetchingRowInVBlank = 0;
+ double LinesToRequestPrefetchPixelData = 0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips;
+ double Tr0_trips;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double Lsw_oto;
+ double Tpre_rounded;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double prefetch_sw_bytes;
+ double bytes_pp;
+ double dep_bytes;
+ int max_vratio_pre = 4;
+ double min_Lsw;
+ double Tsw_est1 = 0;
+ double Tsw_est3 = 0;
+ double max_Tsw = 0;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ } else {
+ HostVMDynamicLevelsTrips = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
+#endif
+ CalculateVupdateAndDynamicMetadataParameters(
+ MaxInterDCNTileRepeaters,
+ myPipe->DPPCLK,
+ myPipe->DISPCLK,
+ myPipe->DCFCLKDeepSleep,
+ myPipe->PixelClock,
+ myPipe->HTotal,
+ myPipe->VBlank,
+ DynamicMetadataTransmittedBytes,
+ DynamicMetadataLinesBeforeActiveRequired,
+ myPipe->InterlaceEnable,
+ myPipe->ProgressiveToInterlaceUnitInOPP,
+ TSetup,
+ &Tdmbf,
+ &Tdmec,
+ &Tdmsks,
+ VUpdateOffsetPix,
+ VUpdateWidthPix,
+ VReadyOffsetPix);
+
+ LineTime = myPipe->HTotal / myPipe->PixelClock;
+ trip_to_mem = UrgentLatency;
+ Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
+#else
+ if (DynamicMetadataVMEnabled == true) {
+#endif
+ *Tdmdl = TWait + Tvm_trips + trip_to_mem;
+ } else {
+ *Tdmdl = TWait + UrgentExtraLatency;
+ }
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (DynamicMetadataEnable == false) {
+ *Tdmdl = 0.0;
+ }
+#endif
+
+ if (DynamicMetadataEnable == true) {
+ if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
+ *NotEnoughTimeForDynamicMetadata = true;
+ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
+
+ if (myPipe->ScalerEnabled)
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
+ else
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
+
+ DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
+
+ DISPCLKCycles = DISPCLKDelaySubtotal;
+
+ if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
+ return true;
+
+ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
+ dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
+ dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
+ dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
+ dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
+ dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
+ dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
+ dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
+#endif
+
+ *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
+
+ if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
+ *DSTYAfterScaler = 1;
+ else
+ *DSTYAfterScaler = 0;
+
+ DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
+#endif
+
+ MyError = false;
+
+ Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
+ Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (!myPipe->DCCEnable) {
+ Tr0_trips = 0.0;
+ Tr0_trips_rounded = 0.0;
+ }
+#endif
+
+ if (!GPUVMEnable) {
+ Tvm_trips = 0.0;
+ Tvm_trips_rounded = 0.0;
+ }
+
+ if (GPUVMEnable) {
+ if (GPUVMPageTableLevels >= 3) {
+ *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
+ } else {
+ *Tno_bw = 0;
+ }
+ } else if (!myPipe->DCCEnable) {
+ *Tno_bw = LineTime;
+ } else {
+ *Tno_bw = LineTime / 4;
+ }
+
+ if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+ else
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+ /*rev 99*/
+ prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
+ max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
+ prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
+ prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
+ prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
+
+ min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
+ Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
+ Tsw_oto = Lsw_oto * LineTime;
+
+ prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: HTotal: %d\n", myPipe->HTotal);
+ dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
+ dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
+ dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
+ dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
+ dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
+#endif
+
+ if (GPUVMEnable == true)
+ Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
+ else
+ Tvm_oto = LineTime / 4.0;
+
+ if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
+ LineTime - Tvm_oto,
+ LineTime / 4);
+ } else {
+ Tr0_oto = (LineTime - Tvm_oto) / 2.0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
+#endif
+
+ Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
+ Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
+ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
+ dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ Tpre_rounded = dst_y_prefetch_equ * LineTime;
+
+ dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
+
+ if (prefetch_sw_bytes < dep_bytes)
+ prefetch_sw_bytes = 2 * dep_bytes;
+
+ dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
+ dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
+ dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
+ dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
+ dml_print("DML: LineTime: %f\n", LineTime);
+ dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
+
+ dml_print("DML: LineTime: %f\n", LineTime);
+ dml_print("DML: VStartup: %d\n", VStartup);
+ dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
+ dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
+ dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
+ dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
+ dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
+ dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
+ dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
+ dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
+ dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
+ dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
+ dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
+
+ *PrefetchBandwidth = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ if (dst_y_prefetch_equ > 1) {
+ double PrefetchBandwidth1;
+ double PrefetchBandwidth2;
+ double PrefetchBandwidth3;
+ double PrefetchBandwidth4;
+
+ if (Tpre_rounded - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
+ Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
+ } else {
+ PrefetchBandwidth1 = 0;
+ }
+
+ if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
+ }
+
+ if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
+ else
+ PrefetchBandwidth2 = 0;
+
+ if (Tpre_rounded - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
+ Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
+ } else {
+ PrefetchBandwidth3 = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
+ dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
+#endif
+ if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
+ }
+
+ if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
+ else
+ PrefetchBandwidth4 = 0;
+
+ {
+ bool Case1OK;
+ bool Case2OK;
+ bool Case3OK;
+
+ if (PrefetchBandwidth1 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
+ Case1OK = true;
+ } else {
+ Case1OK = false;
+ }
+ } else {
+ Case1OK = false;
+ }
+
+ if (PrefetchBandwidth2 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
+ Case2OK = true;
+ } else {
+ Case2OK = false;
+ }
+ } else {
+ Case2OK = false;
+ }
+
+ if (PrefetchBandwidth3 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
+ Case3OK = true;
+ } else {
+ Case3OK = false;
+ }
+ } else {
+ Case3OK = false;
+ }
+
+ if (Case1OK) {
+ prefetch_bw_equ = PrefetchBandwidth1;
+ } else if (Case2OK) {
+ prefetch_bw_equ = PrefetchBandwidth2;
+ } else if (Case3OK) {
+ prefetch_bw_equ = PrefetchBandwidth3;
+ } else {
+ prefetch_bw_equ = PrefetchBandwidth4;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
+ dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
+ dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
+#endif
+
+ if (prefetch_bw_equ > 0) {
+ if (GPUVMEnable == true) {
+ Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
+ } else {
+ Tvm_equ = LineTime / 4;
+ }
+
+ if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_equ = dml_max4(
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
+ Tr0_trips,
+ (LineTime - Tvm_equ) / 2,
+ LineTime / 4);
+ } else {
+ Tr0_equ = (LineTime - Tvm_equ) / 2;
+ }
+ } else {
+ Tvm_equ = 0;
+ Tr0_equ = 0;
+ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
+ }
+ }
+
+ if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
+ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
+ TimeForFetchingMetaPTE = Tvm_oto;
+ TimeForFetchingRowInVBlank = Tr0_oto;
+ *PrefetchBandwidth = prefetch_bw_oto;
+ } else {
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ TimeForFetchingMetaPTE = Tvm_equ;
+ TimeForFetchingRowInVBlank = Tr0_equ;
+ *PrefetchBandwidth = prefetch_bw_equ;
+ }
+
+ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
+
+ *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
+ // See note above dated 5/30/2018
+ // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
+ - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
+#else
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
+#endif
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
+#endif
+
+ if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
+
+ *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
+ dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
+#endif
+ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+ *VRatioPrefetchY = dml_max(
+ (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
+ (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ *VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
+#endif
+ }
+
+ *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
+ dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
+#endif
+ if ((SwathHeightC > 4) || VInitPreFillC > 3) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+ *VRatioPrefetchC = dml_max(
+ *VRatioPrefetchC,
+ (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ *VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+
+ *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
+#endif
+
+ *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
+ / LineTime;
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ dml_print(
+ "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
+ (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
+ dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
+ dml_print(
+ "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
+ (double) LinesToRequestPrefetchPixelData * LineTime);
+ dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
+ dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
+ dml_print(
+ "DML: Tslack(pre): %fus - time left over in schedule\n",
+ VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
+ - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
+ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
+
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ }
+
+ {
+ double prefetch_vm_bw;
+ double prefetch_row_bw;
+
+ if (PDEAndMetaPTEBytesFrame == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*DestinationLinesToRequestVMInVBlank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+ prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ }
+
+ if (MetaRowByte + PixelPTEBytesPerRow == 0) {
+ prefetch_row_bw = 0;
+ } else if (*DestinationLinesToRequestRowInVBlank > 0) {
+ prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ }
+
+ *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (MyError) {
+ *PrefetchBandwidth = 0;
+ TimeForFetchingMetaPTE = 0;
+ TimeForFetchingRowInVBlank = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *DestinationLinesForPrefetch = 0;
+ LinesToRequestPrefetchPixelData = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ return MyError;
+}
+
+static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
+{
+ return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
+}
+
+static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
+{
+ return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
+}
+
+static void CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ double DETBufferSize,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum scan_direction_class ScanOrientation,
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma)
+{
+ int yuv420;
+ int horz_div_l;
+ int horz_div_c;
+ int vert_div_l;
+ int vert_div_c;
+
+ int swath_buf_size;
+ double detile_buf_vp_horz_limit;
+ double detile_buf_vp_vert_limit;
+
+ int MAS_vp_horz_limit;
+ int MAS_vp_vert_limit;
+ int max_vp_horz_width;
+ int max_vp_vert_height;
+ int eff_surf_width_l;
+ int eff_surf_width_c;
+ int eff_surf_height_l;
+ int eff_surf_height_c;
+
+ int full_swath_bytes_horz_wc_l;
+ int full_swath_bytes_horz_wc_c;
+ int full_swath_bytes_vert_wc_l;
+ int full_swath_bytes_vert_wc_c;
+ int req128_horz_wc_l;
+ int req128_horz_wc_c;
+ int req128_vert_wc_l;
+ int req128_vert_wc_c;
+ int segment_order_horz_contiguous_luma;
+ int segment_order_horz_contiguous_chroma;
+ int segment_order_vert_contiguous_luma;
+ int segment_order_vert_contiguous_chroma;
+
+ typedef enum {
+ REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
+ } RequestType;
+ RequestType RequestLuma;
+ RequestType RequestChroma;
+
+ yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+ if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
+ horz_div_l = 0;
+ if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
+ horz_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSize / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size
+ / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
+ + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (double) swath_buf_size
+ / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
+ MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
+ max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
+ max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
+ full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
+ full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
+ full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
+ segment_order_horz_contiguous_luma = 0;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ }
+ if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
+ || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
+ segment_order_vert_contiguous_luma = 0;
+ } else {
+ segment_order_vert_contiguous_luma = 1;
+ }
+ if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
+ segment_order_horz_contiguous_chroma = 0;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ }
+ if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
+ || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
+ segment_order_vert_contiguous_chroma = 0;
+ } else {
+ segment_order_vert_contiguous_chroma = 1;
+ }
+
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ } else if (ScanOrientation != dm_vert) {
+ if (req128_horz_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if (segment_order_horz_contiguous_luma == 0) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_horz_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if (segment_order_horz_contiguous_chroma == 0) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ } else {
+ if (req128_vert_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if (segment_order_vert_contiguous_luma == 0) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_vert_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if (segment_order_vert_contiguous_chroma == 0) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ }
+
+ if (RequestLuma == REQ_256Bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (RequestLuma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (RequestChroma == REQ_256Bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (RequestChroma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+}
+
+static double CalculatePrefetchSourceLines(
+ struct display_mode_lib *mode_lib,
+ double VRatio,
+ double vtaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ unsigned int ViewportYStart,
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int MaxPartialSwath;
+
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
+ else
+ *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
+
+ if (!v->IgnoreViewportPositioning) {
+
+ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
+
+ if (*VInitPreFill > 1.0)
+ MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
+ else
+ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
+ MaxPartialSwath = dml_max(1U, MaxPartialSwath);
+
+ } else {
+
+ if (ViewportYStart != 0)
+ dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
+
+ *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
+
+ if (*VInitPreFill > 1.0)
+ MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
+ else
+ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
+ dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
+ dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
+ dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
+ dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
+ dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
+ dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
+ dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
+ dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
+#endif
+ return *MaxNumSwath * SwathHeight + MaxPartialSwath;
+}
+
+static unsigned int CalculateVMAndRowBytes(
+ struct display_mode_lib *mode_lib,
+ bool DCCEnable,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum scan_direction_class ScanDirection,
+ unsigned int SwathWidth,
+ unsigned int ViewportHeight,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMinPageSize,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int *MacroTileWidth,
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ bool *PTEBufferSizeNotExceeded,
+ int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ int *vm_group_bytes,
+ unsigned int *dpte_group_bytes,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ int *DPDE0BytesFrame,
+ int *MetaPTEBytesFrame)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int MPDEBytesFrame;
+ unsigned int DCCMetaSurfaceBytes;
+ unsigned int MacroTileSizeBytes;
+ unsigned int MacroTileHeight;
+ unsigned int ExtraDPDEBytesFrame;
+ unsigned int PDEAndMetaPTEBytesFrame;
+ unsigned int PixelPTEReqHeightPTEs = 0;
+ unsigned int HostVMDynamicLevels = 0;
+ double FractionOfPTEReturnDrop;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048) {
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ } else {
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ }
+ }
+
+ *MetaRequestHeight = 8 * BlockHeight256Bytes;
+ *MetaRequestWidth = 8 * BlockWidth256Bytes;
+ if (ScanDirection != dm_vert) {
+ *meta_row_height = *MetaRequestHeight;
+ *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
+ *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
+ } else {
+ *meta_row_height = *MetaRequestWidth;
+ *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
+ *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
+ }
+ DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
+ if (GPUVMEnable == true) {
+ *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
+ MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
+ } else {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ }
+
+ if (DCCEnable != true) {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ *MetaRowByte = 0;
+ }
+
+ if (SurfaceTiling == dm_sw_linear) {
+ MacroTileSizeBytes = 256;
+ MacroTileHeight = BlockHeight256Bytes;
+ } else {
+ MacroTileSizeBytes = 65536;
+ MacroTileHeight = 16 * BlockHeight256Bytes;
+ }
+ *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
+
+ if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
+ if (ScanDirection != dm_vert) {
+ *DPDE0BytesFrame = 64
+ * (dml_ceil(
+ ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
+ / (8 * 2097152),
+ 1) + 1);
+ } else {
+ *DPDE0BytesFrame = 64
+ * (dml_ceil(
+ ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
+ / (8 * 2097152),
+ 1) + 1);
+ }
+ ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
+ } else {
+ *DPDE0BytesFrame = 0;
+ ExtraDPDEBytesFrame = 0;
+ }
+
+ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
+ dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
+ dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
+ dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+#endif
+
+ if (HostVMEnable == true) {
+ PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+#endif
+
+ if (SurfaceTiling == dm_sw_linear) {
+ PixelPTEReqHeightPTEs = 1;
+ *PixelPTEReqHeight = 1;
+ *PixelPTEReqWidth = 32768.0 / BytePerPixel;
+ *PTERequestSize = 64;
+ FractionOfPTEReturnDrop = 0;
+ } else if (MacroTileSizeBytes == 4096) {
+ PixelPTEReqHeightPTEs = 1;
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * *MacroTileWidth;
+ *PTERequestSize = 64;
+ if (ScanDirection != dm_vert)
+ FractionOfPTEReturnDrop = 0;
+ else
+ FractionOfPTEReturnDrop = 7 / 8;
+ } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
+ PixelPTEReqHeightPTEs = 16;
+ *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
+ *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
+ *PTERequestSize = 128;
+ FractionOfPTEReturnDrop = 0;
+ } else {
+ PixelPTEReqHeightPTEs = 1;
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * *MacroTileWidth;
+ *PTERequestSize = 64;
+ FractionOfPTEReturnDrop = 0;
+ }
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else if (ScanDirection != dm_vert) {
+ *dpte_row_height = *PixelPTEReqHeight;
+ *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else {
+ *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
+ *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
+ }
+
+ if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
+ *PTEBufferSizeNotExceeded = true;
+ } else {
+ *PTEBufferSizeNotExceeded = false;
+ }
+
+ if (GPUVMEnable != true) {
+ *PixelPTEBytesPerRow = 0;
+ *PTEBufferSizeNotExceeded = true;
+ }
+
+ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
+
+ if (HostVMEnable == true) {
+ *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
+ }
+
+ if (HostVMEnable == true) {
+ *vm_group_bytes = 512;
+ *dpte_group_bytes = 512;
+ } else if (GPUVMEnable == true) {
+ *vm_group_bytes = 2048;
+ if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
+ *dpte_group_bytes = 512;
+ } else {
+ *dpte_group_bytes = 2048;
+ }
+ } else {
+ *vm_group_bytes = 0;
+ *dpte_group_bytes = 0;
+ }
+ return PDEAndMetaPTEBytesFrame;
+}
+
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int j, k;
+ double HostVMInefficiencyFactor = 1.0;
+ bool NoChromaPlanes = true;
+ int ReorderBytes;
+ double VMDataOnlyReturnBW;
+ double MaxTotalRDBandwidth = 0;
+ int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
+
+ v->WritebackDISPCLK = 0.0;
+ v->DISPCLKWithRamping = 0;
+ v->DISPCLKWithoutRamping = 0;
+ v->GlobalDPPCLK = 0.0;
+ /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
+ {
+ double IdealFabricAndSDPPortBandwidthPerState = dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
+ v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
+ double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
+
+ if (v->HostVMEnable != true) {
+ v->ReturnBW = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
+ } else {
+ v->ReturnBW = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
+ }
+ }
+ /* End DAL custom code */
+
+ // DISPCLK and DPPCLK Calculation
+ //
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k]) {
+ v->WritebackDISPCLK = dml_max(
+ v->WritebackDISPCLK,
+ dml314_CalculateWriteBackDISPCLK(
+ v->WritebackPixelFormat[k],
+ v->PixelClock[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackHTaps[k],
+ v->WritebackVTaps[k],
+ v->WritebackSourceWidth[k],
+ v->WritebackDestinationWidth[k],
+ v->HTotal[k],
+ v->WritebackLineBufferSize));
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->HRatio[k] > 1) {
+ v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
+ } else {
+ v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+
+ v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
+ * dml_max(
+ v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
+ dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
+
+ if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
+ v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
+ }
+
+ if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
+ && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
+ v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
+ v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (v->HRatioChroma[k] > 1) {
+ v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
+ } else {
+ v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+ v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
+ * dml_max3(
+ v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
+ v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
+ 1.0);
+
+ if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
+ v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
+ }
+
+ v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] != k)
+ continue;
+ if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
+ v->DISPCLKWithRamping = dml_max(
+ v->DISPCLKWithRamping,
+ v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
+ * (1 + v->DISPCLKRampingMargin / 100));
+ v->DISPCLKWithoutRamping = dml_max(
+ v->DISPCLKWithoutRamping,
+ v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
+ } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
+ v->DISPCLKWithRamping = dml_max(
+ v->DISPCLKWithRamping,
+ v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
+ * (1 + v->DISPCLKRampingMargin / 100));
+ v->DISPCLKWithoutRamping = dml_max(
+ v->DISPCLKWithoutRamping,
+ v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
+ } else {
+ v->DISPCLKWithRamping = dml_max(
+ v->DISPCLKWithRamping,
+ v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
+ v->DISPCLKWithoutRamping = dml_max(
+ v->DISPCLKWithoutRamping,
+ v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
+ }
+ }
+
+ v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
+ v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
+
+ ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
+ v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
+ v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
+ v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
+ v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
+ v->DISPCLKDPPCLKVCOSpeed);
+ if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
+ v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
+ } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
+ v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
+ } else {
+ v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
+ }
+ v->DISPCLK = v->DISPCLK_calculated;
+ DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
+ }
+ v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
+ DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->DPPCLK[k] = v->DPPCLK_calculated[k];
+ }
+
+ // Urgent and B P-State/DRAM Clock Change Watermark
+ DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
+ DTRACE(" return_bus_bw = %f", v->ReturnBW);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateBytePerPixelAnd256BBlockSizes(
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ &v->BytePerPixelY[k],
+ &v->BytePerPixelC[k],
+ &v->BytePerPixelDETY[k],
+ &v->BytePerPixelDETC[k],
+ &v->BlockHeight256BytesY[k],
+ &v->BlockHeight256BytesC[k],
+ &v->BlockWidth256BytesY[k],
+ &v->BlockWidth256BytesC[k]);
+ }
+
+ CalculateSwathWidth(
+ false,
+ v->NumberOfActivePlanes,
+ v->SourcePixelFormat,
+ v->SourceScan,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ v->ODMCombineEnabled,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlendingAndTiming,
+ v->HActive,
+ v->HRatio,
+ v->DPPPerPlane,
+ v->SwathWidthSingleDPPY,
+ v->SwathWidthSingleDPPC,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->dummyinteger3,
+ v->dummyinteger4,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
+ * v->VRatio[k];
+ v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
+ * v->VRatioChroma[k];
+ DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
+ }
+
+ // DCFCLK Deep Sleep
+ CalculateDCFCLKDeepSleep(
+ mode_lib,
+ v->NumberOfActivePlanes,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->VRatio,
+ v->VRatioChroma,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->DPPPerPlane,
+ v->HRatio,
+ v->HRatioChroma,
+ v->PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ v->DPPCLK,
+ v->ReadBandwidthPlaneLuma,
+ v->ReadBandwidthPlaneChroma,
+ v->ReturnBusWidth,
+ &v->DCFCLKDeepSleep);
+
+ // DSCCLK
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
+ v->DSCCLK_calculated[k] = 0.0;
+ } else {
+ if (v->OutputFormat[k] == dm_420)
+ v->DSCFormatFactor = 2;
+ else if (v->OutputFormat[k] == dm_444)
+ v->DSCFormatFactor = 1;
+ else if (v->OutputFormat[k] == dm_n422)
+ v->DSCFormatFactor = 2;
+ else
+ v->DSCFormatFactor = 1;
+ if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
+ v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
+ / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
+ v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
+ / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else
+ v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
+ / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ }
+ }
+
+ // DSC Delay
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double BPP = v->OutputBpp[k];
+
+ if (v->DSCEnabled[k] && BPP != 0) {
+ if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
+ v->DSCDelay[k] = dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ BPP,
+ dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
+ v->NumberOfDSCSlices[k],
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
+ } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
+ v->DSCDelay[k] = 2
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ BPP,
+ dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
+ v->NumberOfDSCSlices[k] / 2.0,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ } else {
+ v->DSCDelay[k] = 4
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ BPP,
+ dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
+ v->NumberOfDSCSlices[k] / 4.0,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ }
+ v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
+ } else {
+ v->DSCDelay[k] = 0;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k)
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
+ if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
+ v->DSCDelay[k] = v->DSCDelay[j];
+
+ // Prefetch
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ unsigned int PDEAndMetaPTEBytesFrameY;
+ unsigned int PixelPTEBytesPerRowY;
+ unsigned int MetaRowByteY;
+ unsigned int MetaRowByteC;
+ unsigned int PDEAndMetaPTEBytesFrameC;
+ unsigned int PixelPTEBytesPerRowC;
+ bool PTEBufferSizeNotExceededY;
+ bool PTEBufferSizeNotExceededC;
+
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
+ || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
+ v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
+ }
+
+ PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->BlockHeight256BytesC[k],
+ v->BlockWidth256BytesC[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelC[k],
+ v->SourceScan[k],
+ v->SwathWidthC[k],
+ v->ViewportHeightChroma[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForChroma,
+ v->PitchC[k],
+ v->DCCMetaPitchC[k],
+ &v->MacroTileWidthC[k],
+ &MetaRowByteC,
+ &PixelPTEBytesPerRowC,
+ &PTEBufferSizeNotExceededC,
+ &v->dpte_row_width_chroma_ub[k],
+ &v->dpte_row_height_chroma[k],
+ &v->meta_req_width_chroma[k],
+ &v->meta_req_height_chroma[k],
+ &v->meta_row_width_chroma[k],
+ &v->meta_row_height_chroma[k],
+ &v->dummyinteger1,
+ &v->dummyinteger2,
+ &v->PixelPTEReqWidthC[k],
+ &v->PixelPTEReqHeightC[k],
+ &v->PTERequestSizeC[k],
+ &v->dpde0_bytes_per_frame_ub_c[k],
+ &v->meta_pte_bytes_per_frame_ub_c[k]);
+
+ v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatioChroma[k],
+ v->VTAPsChroma[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightC[k],
+ v->ViewportYStartC[k],
+ &v->VInitPreFillC[k],
+ &v->MaxNumSwathC[k]);
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
+ v->PTEBufferSizeInRequestsForChroma = 0;
+ PixelPTEBytesPerRowC = 0;
+ PDEAndMetaPTEBytesFrameC = 0;
+ MetaRowByteC = 0;
+ v->MaxNumSwathC[k] = 0;
+ v->PrefetchSourceLinesC[k] = 0;
+ }
+
+ PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->BlockHeight256BytesY[k],
+ v->BlockWidth256BytesY[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->SourceScan[k],
+ v->SwathWidthY[k],
+ v->ViewportHeight[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForLuma,
+ v->PitchY[k],
+ v->DCCMetaPitchY[k],
+ &v->MacroTileWidthY[k],
+ &MetaRowByteY,
+ &PixelPTEBytesPerRowY,
+ &PTEBufferSizeNotExceededY,
+ &v->dpte_row_width_luma_ub[k],
+ &v->dpte_row_height[k],
+ &v->meta_req_width[k],
+ &v->meta_req_height[k],
+ &v->meta_row_width[k],
+ &v->meta_row_height[k],
+ &v->vm_group_bytes[k],
+ &v->dpte_group_bytes[k],
+ &v->PixelPTEReqWidthY[k],
+ &v->PixelPTEReqHeightY[k],
+ &v->PTERequestSizeY[k],
+ &v->dpde0_bytes_per_frame_ub_l[k],
+ &v->meta_pte_bytes_per_frame_ub_l[k]);
+
+ v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatio[k],
+ v->vtaps[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightY[k],
+ v->ViewportYStartY[k],
+ &v->VInitPreFillY[k],
+ &v->MaxNumSwathY[k]);
+ v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
+ v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
+ v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
+
+ CalculateRowBandwidth(
+ v->GPUVMEnable,
+ v->SourcePixelFormat[k],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->DCCEnable[k],
+ v->HTotal[k] / v->PixelClock[k],
+ MetaRowByteY,
+ MetaRowByteC,
+ v->meta_row_height[k],
+ v->meta_row_height_chroma[k],
+ PixelPTEBytesPerRowY,
+ PixelPTEBytesPerRowC,
+ v->dpte_row_height[k],
+ v->dpte_row_height_chroma[k],
+ &v->meta_row_bw[k],
+ &v->dpte_row_bw[k]);
+ }
+
+ v->TotalDCCActiveDPP = 0;
+ v->TotalActiveDPP = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
+ if (v->DCCEnable[k])
+ v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
+ || v->SourcePixelFormat[k] == dm_rgbe_alpha)
+ NoChromaPlanes = false;
+ }
+
+ ReorderBytes = v->NumberOfChannels
+ * dml_max3(
+ v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ VMDataOnlyReturnBW = dml_min(
+ dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
+ * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
+ * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
+ dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
+ dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
+ dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
+ dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
+ dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
+ dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
+ dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
+ dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
+#endif
+
+ if (v->GPUVMEnable && v->HostVMEnable)
+ HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
+
+ v->UrgentExtraLatency = CalculateExtraLatency(
+ v->RoundTripPingLatencyCycles,
+ ReorderBytes,
+ v->DCFCLK,
+ v->TotalActiveDPP,
+ v->PixelChunkSizeInKByte,
+ v->TotalDCCActiveDPP,
+ v->MetaChunkSize,
+ v->ReturnBW,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->NumberOfActivePlanes,
+ v->DPPPerPlane,
+ v->dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ v->HostVMMinPageSize,
+ v->HostVMMaxNonCachedPageTableLevels);
+
+ v->TCalc = 24.0 / v->DCFCLKDeepSleep;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackVTaps[k],
+ v->WritebackDestinationWidth[k],
+ v->WritebackDestinationHeight[k],
+ v->WritebackSourceHeight[k],
+ v->HTotal[k]) / v->DISPCLK;
+ } else
+ v->WritebackDelay[v->VoltageLevel][k] = 0;
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) {
+ if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
+ v->WritebackDelay[v->VoltageLevel][k] = dml_max(
+ v->WritebackDelay[v->VoltageLevel][k],
+ v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[j],
+ v->WritebackHRatio[j],
+ v->WritebackVRatio[j],
+ v->WritebackVTaps[j],
+ v->WritebackDestinationWidth[j],
+ v->WritebackDestinationHeight[j],
+ v->WritebackSourceHeight[j],
+ v->HTotal[k]) / v->DISPCLK);
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k)
+ for (j = 0; j < v->NumberOfActivePlanes; ++j)
+ if (v->BlendingAndTiming[k] == j)
+ v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->MaxVStartupLines[k] =
+ CalculateMaxVStartup(
+ v->VTotal[k],
+ v->VActive[k],
+ v->VBlankNom[k],
+ v->HTotal[k],
+ v->PixelClock[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->Interlace[k],
+ v->ip.VBlankNomDefaultUS,
+ v->WritebackDelay[v->VoltageLevel][k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
+ dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
+#endif
+ }
+
+ v->MaximumMaxVStartupLines = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k)
+ v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
+
+ // VBA_DELTA
+ // We don't really care to iterate between the various prefetch modes
+ //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
+
+ v->UrgentLatency = CalculateUrgentLatency(
+ v->UrgentLatencyPixelDataOnly,
+ v->UrgentLatencyPixelMixedWithVMData,
+ v->UrgentLatencyVMDataOnly,
+ v->DoUrgentLatencyAdjustment,
+ v->UrgentLatencyAdjustmentFabricClockComponent,
+ v->UrgentLatencyAdjustmentFabricClockReference,
+ v->FabricClock);
+
+ v->FractionOfUrgentBandwidth = 0.0;
+ v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
+
+ v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
+
+ do {
+ double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
+ bool DestinationLineTimesForPrefetchLessThan2 = false;
+ bool VRatioPrefetchMoreThan4 = false;
+ double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
+
+ MaxTotalRDBandwidth = 0;
+
+ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ Pipe myPipe;
+
+ myPipe.DPPCLK = v->DPPCLK[k];
+ myPipe.DISPCLK = v->DISPCLK;
+ myPipe.PixelClock = v->PixelClock[k];
+ myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
+ myPipe.DPPPerPlane = v->DPPPerPlane[k];
+ myPipe.ScalerEnabled = v->ScalerEnabled[k];
+ myPipe.VRatio = v->VRatio[k];
+ myPipe.VRatioChroma = v->VRatioChroma[k];
+ myPipe.SourceScan = v->SourceScan[k];
+ myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
+ myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
+ myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
+ myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
+ myPipe.InterlaceEnable = v->Interlace[k];
+ myPipe.NumberOfCursors = v->NumberOfCursors[k];
+ myPipe.VBlank = v->VTotal[k] - v->VActive[k];
+ myPipe.HTotal = v->HTotal[k];
+ myPipe.DCCEnable = v->DCCEnable[k];
+ myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
+ || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
+ myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
+ myPipe.BytePerPixelY = v->BytePerPixelY[k];
+ myPipe.BytePerPixelC = v->BytePerPixelC[k];
+ myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
+ v->ErrorResult[k] = CalculatePrefetchSchedule(
+ mode_lib,
+ HostVMInefficiencyFactor,
+ &myPipe,
+ v->DSCDelay[k],
+ v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
+ v->DPPCLKDelaySCL,
+ v->DPPCLKDelaySCLLBOnly,
+ v->DPPCLKDelayCNVCCursor,
+ v->DISPCLKDelaySubtotal,
+ (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
+ v->OutputFormat[k],
+ v->MaxInterDCNTileRepeaters,
+ dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
+ v->MaxVStartupLines[k],
+ v->GPUVMMaxPageTableLevels,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->HostVMMinPageSize,
+ v->DynamicMetadataEnable[k],
+ v->DynamicMetadataVMEnabled,
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->UrgentLatency,
+ v->UrgentExtraLatency,
+ v->TCalc,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k],
+ v->PrefetchSourceLinesY[k],
+ v->SwathWidthY[k],
+ v->VInitPreFillY[k],
+ v->MaxNumSwathY[k],
+ v->PrefetchSourceLinesC[k],
+ v->SwathWidthC[k],
+ v->VInitPreFillC[k],
+ v->MaxNumSwathC[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ TWait,
+ &v->DSTXAfterScaler[k],
+ &v->DSTYAfterScaler[k],
+ &v->DestinationLinesForPrefetch[k],
+ &v->PrefetchBandwidth[k],
+ &v->DestinationLinesToRequestVMInVBlank[k],
+ &v->DestinationLinesToRequestRowInVBlank[k],
+ &v->VRatioPrefetchY[k],
+ &v->VRatioPrefetchC[k],
+ &v->RequiredPrefetchPixDataBWLuma[k],
+ &v->RequiredPrefetchPixDataBWChroma[k],
+ &v->NotEnoughTimeForDynamicMetadata[k],
+ &v->Tno_bw[k],
+ &v->prefetch_vmrow_bw[k],
+ &v->Tdmdl_vm[k],
+ &v->Tdmdl[k],
+ &v->TSetup[k],
+ &v->VUpdateOffsetPix[k],
+ &v->VUpdateWidthPix[k],
+ &v->VReadyOffsetPix[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
+#endif
+ v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
+ }
+
+ v->NoEnoughUrgentLatencyHiding = false;
+ v->NoEnoughUrgentLatencyHidingPre = false;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
+ v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
+
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgentLatency,
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ v->DETBufferSizeY[k],
+ v->DETBufferSizeC[k],
+ &v->UrgBurstFactorCursor[k],
+ &v->UrgBurstFactorLuma[k],
+ &v->UrgBurstFactorChroma[k],
+ &v->NoUrgentLatencyHiding[k]);
+
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgentLatency,
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatioPrefetchY[k],
+ v->VRatioPrefetchC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ v->DETBufferSizeY[k],
+ v->DETBufferSizeC[k],
+ &v->UrgBurstFactorCursorPre[k],
+ &v->UrgBurstFactorLumaPre[k],
+ &v->UrgBurstFactorChromaPre[k],
+ &v->NoUrgentLatencyHidingPre[k]);
+
+ MaxTotalRDBandwidth = MaxTotalRDBandwidth
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
+ + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
+ + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
+ + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
+ v->DPPPerPlane[k]
+ * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
+
+ MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
+ + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
+ v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
+ + v->cursor_bw_pre[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
+
+ dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
+ dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
+
+ dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
+ dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
+ dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
+ dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
+#endif
+
+ if (v->DestinationLinesForPrefetch[k] < 2)
+ DestinationLineTimesForPrefetchLessThan2 = true;
+
+ if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
+ VRatioPrefetchMoreThan4 = true;
+
+ if (v->NoUrgentLatencyHiding[k] == true)
+ v->NoEnoughUrgentLatencyHiding = true;
+
+ if (v->NoUrgentLatencyHidingPre[k] == true)
+ v->NoEnoughUrgentLatencyHidingPre = true;
+ }
+
+ v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
+ dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
+#endif
+
+ if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
+ && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
+ v->PrefetchModeSupported = true;
+ else {
+ v->PrefetchModeSupported = false;
+ dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
+ dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
+ dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
+ dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
+ }
+
+ // PREVIOUS_ERROR
+ // This error result check was done after the PrefetchModeSupported. So we will
+ // still try to calculate flip schedule even prefetch mode not supported
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
+ v->PrefetchModeSupported = false;
+ dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
+ }
+ }
+
+ if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
+ v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
+ - dml_max(
+ v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
+ + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
+ + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
+ v->DPPPerPlane[k]
+ * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
+ }
+
+ v->TotImmediateFlipBytes = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
+ + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateFlipSchedule(
+ mode_lib,
+ k,
+ HostVMInefficiencyFactor,
+ v->UrgentExtraLatency,
+ v->UrgentLatency,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k]);
+ }
+
+ v->total_dcn_read_bw_with_flip = 0.0;
+ v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->DPPPerPlane[k] * v->final_flip_bw[k]
+ + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
+ + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
+ + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
+ v->DPPPerPlane[k]
+ * (v->final_flip_bw[k]
+ + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
+ v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
+ + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
+ v->DPPPerPlane[k]
+ * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
+ + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
+ }
+ v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
+
+ v->ImmediateFlipSupported = true;
+ if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
+#endif
+ v->ImmediateFlipSupported = false;
+ v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ImmediateFlipSupportedForPipe[k] == false) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
+#endif
+ v->ImmediateFlipSupported = false;
+ }
+ }
+ } else {
+ v->ImmediateFlipSupported = false;
+ }
+
+ v->PrefetchAndImmediateFlipSupported =
+ (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
+ && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
+ v->ImmediateFlipSupported)) ? true : false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
+ dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
+ dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
+ dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
+ dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
+ dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
+#endif
+ dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
+
+ v->VStartupLines = v->VStartupLines + 1;
+ } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
+ ASSERT(v->PrefetchAndImmediateFlipSupported);
+
+ // Unbounded Request Enabled
+ CalculateUnboundedRequestAndCompressedBufferSize(
+ v->DETBufferSizeInKByte[0],
+ v->ConfigReturnBufferSizeInKByte,
+ v->UseUnboundedRequesting,
+ v->TotalActiveDPP,
+ NoChromaPlanes,
+ v->MaxNumDPP,
+ v->CompressedBufferSegmentSizeInkByte,
+ v->Output,
+ &v->UnboundedRequestEnabled,
+ &v->CompressedBufferSizeInkByte);
+
+ //Watermarks and NB P-State/DRAM Clock Change Support
+ {
+ enum clock_change_support DRAMClockChangeSupport; // dummy
+
+ CalculateWatermarksAndDRAMSpeedChangeSupport(
+ mode_lib,
+ PrefetchMode,
+ v->DCFCLK,
+ v->ReturnBW,
+ v->UrgentLatency,
+ v->UrgentExtraLatency,
+ v->SOCCLK,
+ v->DCFCLKDeepSleep,
+ v->DETBufferSizeY,
+ v->DETBufferSizeC,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->DPPPerPlane,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ v->UnboundedRequestEnabled,
+ v->CompressedBufferSizeInkByte,
+ &DRAMClockChangeSupport,
+ &v->StutterExitWatermark,
+ &v->StutterEnterPlusExitWatermark,
+ &v->Z8StutterExitWatermark,
+ &v->Z8StutterEnterPlusExitWatermark);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
+ 0,
+ v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
+ } else {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
+ }
+ }
+ }
+
+ //Display Pipeline Delivery Time in Prefetch, Groups
+ CalculatePixelDeliveryTimes(
+ v->NumberOfActivePlanes,
+ v->VRatio,
+ v->VRatioChroma,
+ v->VRatioPrefetchY,
+ v->VRatioPrefetchC,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub,
+ v->DPPPerPlane,
+ v->HRatio,
+ v->HRatioChroma,
+ v->PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ v->DPPCLK,
+ v->BytePerPixelC,
+ v->SourceScan,
+ v->NumberOfCursors,
+ v->CursorWidth,
+ v->CursorBPP,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlockHeight256BytesC,
+ v->DisplayPipeLineDeliveryTimeLuma,
+ v->DisplayPipeLineDeliveryTimeChroma,
+ v->DisplayPipeLineDeliveryTimeLumaPrefetch,
+ v->DisplayPipeLineDeliveryTimeChromaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeLuma,
+ v->DisplayPipeRequestDeliveryTimeChroma,
+ v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
+ v->CursorRequestDeliveryTime,
+ v->CursorRequestDeliveryTimePrefetch);
+
+ CalculateMetaAndPTETimes(
+ v->NumberOfActivePlanes,
+ v->GPUVMEnable,
+ v->MetaChunkSize,
+ v->MinMetaChunkSizeBytes,
+ v->HTotal,
+ v->VRatio,
+ v->VRatioChroma,
+ v->DestinationLinesToRequestRowInVBlank,
+ v->DestinationLinesToRequestRowInImmediateFlip,
+ v->DCCEnable,
+ v->PixelClock,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->SourceScan,
+ v->dpte_row_height,
+ v->dpte_row_height_chroma,
+ v->meta_row_width,
+ v->meta_row_width_chroma,
+ v->meta_row_height,
+ v->meta_row_height_chroma,
+ v->meta_req_width,
+ v->meta_req_width_chroma,
+ v->meta_req_height,
+ v->meta_req_height_chroma,
+ v->dpte_group_bytes,
+ v->PTERequestSizeY,
+ v->PTERequestSizeC,
+ v->PixelPTEReqWidthY,
+ v->PixelPTEReqHeightY,
+ v->PixelPTEReqWidthC,
+ v->PixelPTEReqHeightC,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->DST_Y_PER_PTE_ROW_NOM_L,
+ v->DST_Y_PER_PTE_ROW_NOM_C,
+ v->DST_Y_PER_META_ROW_NOM_L,
+ v->DST_Y_PER_META_ROW_NOM_C,
+ v->TimePerMetaChunkNominal,
+ v->TimePerChromaMetaChunkNominal,
+ v->TimePerMetaChunkVBlank,
+ v->TimePerChromaMetaChunkVBlank,
+ v->TimePerMetaChunkFlip,
+ v->TimePerChromaMetaChunkFlip,
+ v->time_per_pte_group_nom_luma,
+ v->time_per_pte_group_vblank_luma,
+ v->time_per_pte_group_flip_luma,
+ v->time_per_pte_group_nom_chroma,
+ v->time_per_pte_group_vblank_chroma,
+ v->time_per_pte_group_flip_chroma);
+
+ CalculateVMGroupAndRequestTimes(
+ v->NumberOfActivePlanes,
+ v->GPUVMEnable,
+ v->GPUVMMaxPageTableLevels,
+ v->HTotal,
+ v->BytePerPixelC,
+ v->DestinationLinesToRequestVMInVBlank,
+ v->DestinationLinesToRequestVMInImmediateFlip,
+ v->DCCEnable,
+ v->PixelClock,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->vm_group_bytes,
+ v->dpde0_bytes_per_frame_ub_l,
+ v->dpde0_bytes_per_frame_ub_c,
+ v->meta_pte_bytes_per_frame_ub_l,
+ v->meta_pte_bytes_per_frame_ub_c,
+ v->TimePerVMGroupVBlank,
+ v->TimePerVMGroupFlip,
+ v->TimePerVMRequestVBlank,
+ v->TimePerVMRequestFlip);
+
+ // Min TTUVBlank
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (PrefetchMode == 0) {
+ v->AllowDRAMClockChangeDuringVBlank[k] = true;
+ v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
+ v->MinTTUVBlank[k] = dml_max(
+ v->DRAMClockChangeWatermark,
+ dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
+ } else if (PrefetchMode == 1) {
+ v->AllowDRAMClockChangeDuringVBlank[k] = false;
+ v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
+ v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
+ } else {
+ v->AllowDRAMClockChangeDuringVBlank[k] = false;
+ v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
+ v->MinTTUVBlank[k] = v->UrgentWatermark;
+ }
+ if (!v->DynamicMetadataEnable[k])
+ v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
+ }
+
+ // DCC Configuration
+ v->ActiveDPPs = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
+ v->SourcePixelFormat[k],
+ v->SurfaceWidthY[k],
+ v->SurfaceWidthC[k],
+ v->SurfaceHeightY[k],
+ v->SurfaceHeightC[k],
+ v->DETBufferSizeInKByte[0] * 1024,
+ v->BlockHeight256BytesY[k],
+ v->BlockHeight256BytesC[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->BytePerPixelC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ v->SourceScan[k],
+ &v->DCCYMaxUncompressedBlock[k],
+ &v->DCCCMaxUncompressedBlock[k],
+ &v->DCCYMaxCompressedBlock[k],
+ &v->DCCCMaxCompressedBlock[k],
+ &v->DCCYIndependentBlock[k],
+ &v->DCCCIndependentBlock[k]);
+ }
+
+ // VStartup Adjustment
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ bool isInterlaceTiming;
+ double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
+#endif
+
+ v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
+ dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
+#endif
+
+ v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
+ if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
+ v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
+ }
+
+ isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
+ v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
+ if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
+ v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0);
+ } else {
+ v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
+ }
+ v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / (double)v->HTotal[k] / v->PixelClock[k], 1.0) / 4.0;
+ if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
+ <= (isInterlaceTiming ?
+ dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
+ (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
+ } else {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
+ dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
+ dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
+ dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
+ dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
+ dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
+ dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
+ dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
+#endif
+ }
+
+ {
+ //Maximum Bandwidth Used
+ double TotalWRBandwidth = 0;
+ double MaxPerPlaneVActiveWRBandwidth = 0;
+ double WRBandwidth = 0;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
+ WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
+ } else if (v->WritebackEnable[k] == true) {
+ WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
+ }
+ TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
+ MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
+ }
+
+ v->TotalDataReadBandwidth = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
+ }
+ }
+ // Stutter Efficiency
+ CalculateStutterEfficiency(
+ mode_lib,
+ v->CompressedBufferSizeInkByte,
+ v->UnboundedRequestEnabled,
+ v->ConfigReturnBufferSizeInKByte,
+ v->MetaFIFOSizeInKEntries,
+ v->ZeroSizeBufferEntries,
+ v->NumberOfActivePlanes,
+ v->ROBBufferSizeInKByte,
+ v->TotalDataReadBandwidth,
+ v->DCFCLK,
+ v->ReturnBW,
+ v->COMPBUF_RESERVED_SPACE_64B,
+ v->COMPBUF_RESERVED_SPACE_ZS,
+ v->SRExitTime,
+ v->SRExitZ8Time,
+ v->SynchronizedVBlank,
+ v->StutterEnterPlusExitWatermark,
+ v->Z8StutterEnterPlusExitWatermark,
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->Interlace,
+ v->MinTTUVBlank,
+ v->DPPPerPlane,
+ v->DETBufferSizeY,
+ v->BytePerPixelY,
+ v->BytePerPixelDETY,
+ v->SwathWidthY,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->DCCRateLuma,
+ v->DCCRateChroma,
+ v->DCCFractionOfZeroSizeRequestsLuma,
+ v->DCCFractionOfZeroSizeRequestsChroma,
+ v->HTotal,
+ v->VTotal,
+ v->PixelClock,
+ v->VRatio,
+ v->SourceScan,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesC,
+ v->DCCYMaxUncompressedBlock,
+ v->DCCCMaxUncompressedBlock,
+ v->VActive,
+ v->DCCEnable,
+ v->WritebackEnable,
+ v->ReadBandwidthPlaneLuma,
+ v->ReadBandwidthPlaneChroma,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ &v->StutterEfficiencyNotIncludingVBlank,
+ &v->StutterEfficiency,
+ &v->NumberOfStutterBurstsPerFrame,
+ &v->Z8StutterEfficiencyNotIncludingVBlank,
+ &v->Z8StutterEfficiency,
+ &v->Z8NumberOfStutterBurstsPerFrame,
+ &v->StutterPeriod);
+}
+
+static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ // Display Pipe Configuration
+ double BytePerPixDETY[DC__NUM_DPP__MAX];
+ double BytePerPixDETC[DC__NUM_DPP__MAX];
+ int BytePerPixY[DC__NUM_DPP__MAX];
+ int BytePerPixC[DC__NUM_DPP__MAX];
+ int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
+ int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
+ int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
+ int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
+ double dummy1[DC__NUM_DPP__MAX];
+ double dummy2[DC__NUM_DPP__MAX];
+ double dummy3[DC__NUM_DPP__MAX];
+ double dummy4[DC__NUM_DPP__MAX];
+ int dummy5[DC__NUM_DPP__MAX];
+ int dummy6[DC__NUM_DPP__MAX];
+ bool dummy7[DC__NUM_DPP__MAX];
+ bool dummysinglestring;
+
+ unsigned int k;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+
+ CalculateBytePerPixelAnd256BBlockSizes(
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ &BytePerPixY[k],
+ &BytePerPixC[k],
+ &BytePerPixDETY[k],
+ &BytePerPixDETC[k],
+ &Read256BytesBlockHeightY[k],
+ &Read256BytesBlockHeightC[k],
+ &Read256BytesBlockWidthY[k],
+ &Read256BytesBlockWidthC[k]);
+ }
+
+ CalculateSwathAndDETConfiguration(
+ false,
+ v->NumberOfActivePlanes,
+ v->DETBufferSizeInKByte[0],
+ dummy1,
+ dummy2,
+ v->SourceScan,
+ v->SourcePixelFormat,
+ v->SurfaceTiling,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ Read256BytesBlockHeightY,
+ Read256BytesBlockHeightC,
+ Read256BytesBlockWidthY,
+ Read256BytesBlockWidthC,
+ v->ODMCombineEnabled,
+ v->BlendingAndTiming,
+ BytePerPixY,
+ BytePerPixC,
+ BytePerPixDETY,
+ BytePerPixDETC,
+ v->HActive,
+ v->HRatio,
+ v->HRatioChroma,
+ v->DPPPerPlane,
+ dummy5,
+ dummy6,
+ dummy3,
+ dummy4,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->DETBufferSizeY,
+ v->DETBufferSizeC,
+ dummy7,
+ &dummysinglestring);
+}
+
+static bool CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC)
+{
+ if (SourcePixelFormat == dm_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dm_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = 4.0 / 3;
+ *BytePerPixelDETC = 8.0 / 3;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+
+ if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
+ || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ } else if (SourcePixelFormat == dm_444_64) {
+ *BlockHeight256BytesY = 4;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BlockHeight256BytesY = 16;
+ } else {
+ *BlockHeight256BytesY = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+ return true;
+}
+
+static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
+{
+ if (PrefetchMode == 0) {
+ return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
+ } else if (PrefetchMode == 1) {
+ return dml_max(SREnterPlusExitTime, UrgentLatency);
+ } else {
+ return UrgentLatency;
+ }
+}
+
+double dml314_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ long WritebackSourceWidth,
+ long WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize)
+{
+ double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
+ return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
+}
+
+static double CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ int WritebackDestinationWidth,
+ int WritebackDestinationHeight,
+ int WritebackSourceHeight,
+ unsigned int HTotal)
+{
+ double CalculateWriteBackDelay;
+ double Line_length;
+ double Output_lines_last_notclamped;
+ double WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+static void CalculateVupdateAndDynamicMetadataParameters(
+ int MaxInterDCNTileRepeaters,
+ double DPPCLK,
+ double DISPCLK,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ int HTotal,
+ int VBlank,
+ int DynamicMetadataTransmittedBytes,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ double TotalRepeaterDelayTime;
+
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
+ *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
+ *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
+ *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
+ *Tdmec = HTotal / PixelClock;
+ if (DynamicMetadataLinesBeforeActiveRequired == 0) {
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ } else {
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+ }
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
+ *Tdmsks = *Tdmsks / 2;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
+ dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
+ dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
+#endif
+}
+
+static void CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ double *meta_row_bw,
+ double *dpte_row_bw)
+{
+ if (DCCEnable != true) {
+ *meta_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
+ + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+static void CalculateFlipSchedule(
+ struct display_mode_lib *mode_lib,
+ unsigned int k,
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ double min_row_time = 0.0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double TimeForFetchingMetaPTEImmediateFlip;
+ double TimeForFetchingRowInVBlankImmediateFlip;
+ double ImmediateFlipBW;
+ double LineTime = v->HTotal[k] / v->PixelClock[k];
+
+ if (v->GPUVMEnable == true && v->HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
+ } else {
+ HostVMDynamicLevelsTrips = 0;
+ }
+
+ if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
+ }
+
+ if (v->GPUVMEnable == true) {
+ TimeForFetchingMetaPTEImmediateFlip = dml_max3(
+ v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ LineTime / 4.0);
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ }
+
+ v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
+ if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
+ TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
+ UrgentLatency * (HostVMDynamicLevelsTrips + 1),
+ LineTime / 4);
+ } else {
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ }
+
+ v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
+
+ if (v->GPUVMEnable == true) {
+ v->final_flip_bw[k] = dml_max(
+ PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
+ } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
+ v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
+ } else {
+ v->final_flip_bw[k] = 0;
+ }
+
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
+ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
+ min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ } else {
+ min_row_time = dml_min4(
+ v->dpte_row_height[k] * LineTime / v->VRatio[k],
+ v->meta_row_height[k] * LineTime / v->VRatio[k],
+ v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
+ v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ }
+ } else {
+ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
+ min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
+ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
+ min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
+ } else {
+ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
+ }
+ }
+
+ if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
+ || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
+ v->ImmediateFlipSupportedForPipe[k] = false;
+ } else {
+ v->ImmediateFlipSupportedForPipe[k] = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
+ dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
+#endif
+
+}
+
+static double TruncToValidBPP(
+ double LinkBitRate,
+ int Lanes,
+ int HTotal,
+ int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum output_encoder_class Output,
+ enum output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ int DSCSlices,
+ int AudioRate,
+ int AudioLayout,
+ enum odm_combine_mode ODMCombine)
+{
+ double MaxLinkBPP;
+ int MinDSCBPP;
+ double MaxDSCBPP;
+ int NonDSCBPP0;
+ int NonDSCBPP1;
+ int NonDSCBPP2;
+
+ if (Format == dm_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
+ } else if (Format == dm_444) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 30;
+ NonDSCBPP2 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
+ } else {
+
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+
+ if (Format == dm_n422) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
+ }
+ }
+
+ if (DSCEnable && Output == dm_dp) {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
+ }
+
+ if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
+ MaxLinkBPP = 16;
+ } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
+ MaxLinkBPP = 32;
+ }
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP) {
+ return BPP_INVALID;
+ } else if (MaxLinkBPP >= MaxDSCBPP) {
+ return MaxDSCBPP;
+ } else {
+ return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ }
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP2) {
+ return NonDSCBPP2;
+ } else if (MaxLinkBPP >= NonDSCBPP1) {
+ return NonDSCBPP1;
+ } else if (MaxLinkBPP >= NonDSCBPP0) {
+ return 16.0;
+ } else {
+ return BPP_INVALID;
+ }
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
+ || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
+ return BPP_INVALID;
+ } else {
+ return DesiredBPP;
+ }
+ }
+ return BPP_INVALID;
+}
+
+static noinline void CalculatePrefetchSchedulePerPlane(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ int i,
+ unsigned int j,
+ unsigned int k)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ Pipe myPipe;
+
+ myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
+ myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
+ myPipe.PixelClock = v->PixelClock[k];
+ myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
+ myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
+ myPipe.ScalerEnabled = v->ScalerEnabled[k];
+ myPipe.VRatio = mode_lib->vba.VRatio[k];
+ myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
+
+ myPipe.SourceScan = v->SourceScan[k];
+ myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
+ myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
+ myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
+ myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
+ myPipe.InterlaceEnable = v->Interlace[k];
+ myPipe.NumberOfCursors = v->NumberOfCursors[k];
+ myPipe.VBlank = v->VTotal[k] - v->VActive[k];
+ myPipe.HTotal = v->HTotal[k];
+ myPipe.DCCEnable = v->DCCEnable[k];
+ myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+ || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
+ myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
+ myPipe.BytePerPixelY = v->BytePerPixelY[k];
+ myPipe.BytePerPixelC = v->BytePerPixelC[k];
+ myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
+ v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
+ mode_lib,
+ HostVMInefficiencyFactor,
+ &myPipe,
+ v->DSCDelayPerState[i][k],
+ v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
+ v->DPPCLKDelaySCL,
+ v->DPPCLKDelaySCLLBOnly,
+ v->DPPCLKDelayCNVCCursor,
+ v->DISPCLKDelaySubtotal,
+ v->SwathWidthYThisState[k] / v->HRatio[k],
+ v->OutputFormat[k],
+ v->MaxInterDCNTileRepeaters,
+ dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
+ v->MaximumVStartup[i][j][k],
+ v->GPUVMMaxPageTableLevels,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->HostVMMinPageSize,
+ v->DynamicMetadataEnable[k],
+ v->DynamicMetadataVMEnabled,
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->UrgLatency[i],
+ v->ExtraLatency,
+ v->TimeCalc,
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+ v->DPTEBytesPerRow[i][j][k],
+ v->PrefetchLinesY[i][j][k],
+ v->SwathWidthYThisState[k],
+ v->PrefillY[k],
+ v->MaxNumSwY[k],
+ v->PrefetchLinesC[i][j][k],
+ v->SwathWidthCThisState[k],
+ v->PrefillC[k],
+ v->MaxNumSwC[k],
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k],
+ v->TWait,
+ &v->DSTXAfterScaler[k],
+ &v->DSTYAfterScaler[k],
+ &v->LineTimesForPrefetch[k],
+ &v->PrefetchBW[k],
+ &v->LinesForMetaPTE[k],
+ &v->LinesForMetaAndDPTERow[k],
+ &v->VRatioPreY[i][j][k],
+ &v->VRatioPreC[i][j][k],
+ &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
+ &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
+ &v->NoTimeForDynamicMetadata[i][j][k],
+ &v->Tno_bw[k],
+ &v->prefetch_vmrow_bw[k],
+ &v->dummy7[k],
+ &v->dummy8[k],
+ &v->dummy13[k],
+ &v->VUpdateOffsetPix[k],
+ &v->VUpdateWidthPix[k],
+ &v->VReadyOffsetPix[k]);
+}
+
+void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+
+ int i, j;
+ unsigned int k, m;
+ int ReorderingBytes;
+ int MinPrefetchMode = 0, MaxPrefetchMode = 2;
+ bool NoChroma = true;
+ bool EnoughWritebackUnits = true;
+ bool P2IWith420 = false;
+ bool DSCOnlyIfNecessaryWithBPP = false;
+ bool DSC422NativeNotSupported = false;
+ double MaxTotalVActiveRDBandwidth;
+ bool ViewportExceedsSurface = false;
+ bool FMTBufferExceeded = false;
+
+ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+
+ CalculateMinAndMaxPrefetchMode(
+ mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
+ &MinPrefetchMode, &MaxPrefetchMode);
+
+ /*Scale Ratio, taps Support Check*/
+
+ v->ScaleRatioAndTapsSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->ScalerEnabled[k] == false
+ && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
+ && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
+ && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
+ && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
+ || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
+ v->ScaleRatioAndTapsSupport = false;
+ } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
+ || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
+ || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
+ || v->VRatio[k] > v->vtaps[k]
+ || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
+ && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
+ && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
+ && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
+ || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
+ || v->HRatioChroma[k] > v->MaxHSCLRatio
+ || v->VRatioChroma[k] > v->MaxVSCLRatio
+ || v->HRatioChroma[k] > v->HTAPsChroma[k]
+ || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
+ v->ScaleRatioAndTapsSupport = false;
+ }
+ }
+ /*Source Format, Pixel Format and Scan Support Check*/
+
+ v->SourceFormatPixelAndScanSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
+ v->SourceFormatPixelAndScanSupport = false;
+ }
+ }
+ /*Bandwidth Support Check*/
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculateBytePerPixelAnd256BBlockSizes(
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ &v->BytePerPixelY[k],
+ &v->BytePerPixelC[k],
+ &v->BytePerPixelInDETY[k],
+ &v->BytePerPixelInDETC[k],
+ &v->Read256BlockHeightY[k],
+ &v->Read256BlockHeightC[k],
+ &v->Read256BlockWidthY[k],
+ &v->Read256BlockWidthC[k]);
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->SourceScan[k] != dm_vert) {
+ v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
+ v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
+ } else {
+ v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
+ v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
+ v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
+ v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
+ } else if (v->WritebackEnable[k] == true) {
+ v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
+ } else {
+ v->WriteBandwidth[k] = 0.0;
+ }
+ }
+
+ /*Writeback Latency support check*/
+
+ v->WritebackLatencySupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
+ v->WritebackLatencySupport = false;
+ }
+ }
+
+ /*Writeback Mode Support Check*/
+
+ v->TotalNumberOfActiveWriteback = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true) {
+ v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
+ }
+ }
+
+ if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
+ EnoughWritebackUnits = false;
+ }
+
+ /*Writeback Scale Ratio and Taps Support Check*/
+
+ v->WritebackScaleRatioAndTapsSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true) {
+ if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
+ || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
+ || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
+ || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
+ || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
+ || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
+ || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
+ v->WritebackScaleRatioAndTapsSupport = false;
+ }
+ if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
+ v->WritebackScaleRatioAndTapsSupport = false;
+ }
+ }
+ }
+ /*Maximum DISPCLK/DPPCLK Support check*/
+
+ v->WritebackRequiredDISPCLK = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackRequiredDISPCLK = dml_max(
+ v->WritebackRequiredDISPCLK,
+ dml314_CalculateWriteBackDISPCLK(
+ v->WritebackPixelFormat[k],
+ v->PixelClock[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackHTaps[k],
+ v->WritebackVTaps[k],
+ v->WritebackSourceWidth[k],
+ v->WritebackDestinationWidth[k],
+ v->HTotal[k],
+ v->WritebackLineBufferSize));
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->HRatio[k] > 1.0) {
+ v->PSCL_FACTOR[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
+ } else {
+ v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+ if (v->BytePerPixelC[k] == 0.0) {
+ v->PSCL_FACTOR_CHROMA[k] = 0.0;
+ v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
+ * dml_max3(
+ v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
+ v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
+ 1.0);
+ if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
+ v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
+ }
+ } else {
+ if (v->HRatioChroma[k] > 1.0) {
+ v->PSCL_FACTOR_CHROMA[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
+ } else {
+ v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+ v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
+ * dml_max5(
+ v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
+ v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
+ v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
+ v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
+ 1.0);
+ if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
+ && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
+ v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
+ }
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ int MaximumSwathWidthSupportLuma;
+ int MaximumSwathWidthSupportChroma;
+
+ if (v->SurfaceTiling[k] == dm_sw_linear) {
+ MaximumSwathWidthSupportLuma = 8192.0;
+ } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
+ MaximumSwathWidthSupportLuma = 2880.0;
+ } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ MaximumSwathWidthSupportLuma = 3840.0;
+ } else {
+ MaximumSwathWidthSupportLuma = 5760.0;
+ }
+
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
+ MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
+ } else {
+ MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
+ }
+ v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
+ / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
+ if (v->BytePerPixelC[k] == 0.0) {
+ v->MaximumSwathWidthInLineBufferChroma = 0;
+ } else {
+ v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
+ / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
+ }
+ v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
+ v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
+ }
+
+ CalculateSwathAndDETConfiguration(
+ true,
+ v->NumberOfActivePlanes,
+ v->DETBufferSizeInKByte[0],
+ v->MaximumSwathWidthLuma,
+ v->MaximumSwathWidthChroma,
+ v->SourceScan,
+ v->SourcePixelFormat,
+ v->SurfaceTiling,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ v->Read256BlockHeightY,
+ v->Read256BlockHeightC,
+ v->Read256BlockWidthY,
+ v->Read256BlockWidthC,
+ v->odm_combine_dummy,
+ v->BlendingAndTiming,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ v->HActive,
+ v->HRatio,
+ v->HRatioChroma,
+ v->NoOfDPPThisState,
+ v->swath_width_luma_ub_this_state,
+ v->swath_width_chroma_ub_this_state,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->SingleDPPViewportSizeSupportPerPlane,
+ &v->ViewportSizeSupport[0][0]);
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
+ v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
+ v->RequiredDISPCLK[i][j] = 0.0;
+ v->DISPCLK_DPPCLK_Support[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1.0 + v->DISPCLKRampingMargin / 100.0);
+ if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
+ && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1 + v->DISPCLKRampingMargin / 100.0);
+ if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
+ && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1 + v->DISPCLKRampingMargin / 100.0);
+ if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
+ && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+
+ if (v->ODMCombinePolicy == dm_odm_combine_policy_none
+ || !(v->Output[k] == dm_dp ||
+ v->Output[k] == dm_dp2p0 ||
+ v->Output[k] == dm_edp)) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
+
+ if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
+ FMTBufferExceeded = true;
+ } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
+ || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
+ } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ } else {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
+ }
+ if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
+ && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
+ if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
+ } else {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ }
+ }
+ if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
+ && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
+ if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
+
+ if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
+ FMTBufferExceeded = true;
+ } else {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ }
+ }
+ if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 4;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
+ } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 2;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
+ } else if ((v->WhenToDoMPCCombine == dm_mpc_never
+ || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 1;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ } else {
+ v->MPCCombine[i][j][k] = true;
+ v->NoOfDPP[i][j][k] = 2;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
+ }
+ v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
+ if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ > v->MaxDppclkRoundedDownToDFSGranularity)
+ || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
+ v->DISPCLK_DPPCLK_Support[i][j] = false;
+ }
+ }
+ v->TotalNumberOfActiveDPP[i][j] = 0;
+ v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
+ if (v->NoOfDPP[i][j][k] == 1)
+ v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
+ || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
+ NoChroma = false;
+ }
+
+ // UPTO
+ if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
+ && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
+ while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
+ double BWOfNonSplitPlaneOfMaximumBandwidth;
+ unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
+
+ BWOfNonSplitPlaneOfMaximumBandwidth = 0;
+ NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
+ && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
+ BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
+ NumberOfNonSplitPlaneOfMaximumBandwidth = k;
+ }
+ }
+ v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
+ v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
+ v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
+ v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
+ v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
+ v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
+ }
+ }
+ if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
+ v->RequiredDISPCLK[i][j] = 0.0;
+ v->DISPCLK_DPPCLK_Support[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
+ v->MPCCombine[i][j][k] = true;
+ v->NoOfDPP[i][j][k] = 2;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
+ * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
+ } else {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 1;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
+ * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1.0 + v->DISPCLKRampingMargin / 100.0);
+ } else {
+ v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
+ if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ > v->MaxDppclkRoundedDownToDFSGranularity)
+ || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
+ v->DISPCLK_DPPCLK_Support[i][j] = false;
+ }
+ }
+ v->TotalNumberOfActiveDPP[i][j] = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
+ }
+ }
+ v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
+ if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
+ v->DISPCLK_DPPCLK_Support[i][j] = false;
+ }
+ }
+ }
+
+ /*Total Available Pipes Support Check*/
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
+ v->TotalAvailablePipesSupport[i][j] = true;
+ } else {
+ v->TotalAvailablePipesSupport[i][j] = false;
+ }
+ }
+ }
+ /*Display IO and DSC Support Check*/
+
+ v->NonsupportedDSCInputBPC = false;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
+ || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
+ v->NonsupportedDSCInputBPC = true;
+ }
+ }
+
+ /*Number Of DSC Slices*/
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->PixelClockBackEnd[k] > 3200) {
+ v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
+ } else if (v->PixelClockBackEnd[k] > 1360) {
+ v->NumberOfDSCSlices[k] = 8;
+ } else if (v->PixelClockBackEnd[k] > 680) {
+ v->NumberOfDSCSlices[k] = 4;
+ } else if (v->PixelClockBackEnd[k] > 340) {
+ v->NumberOfDSCSlices[k] = 2;
+ } else {
+ v->NumberOfDSCSlices[k] = 1;
+ }
+ } else {
+ v->NumberOfDSCSlices[k] = 0;
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->RequiresDSC[i][k] = false;
+ v->RequiresFEC[i][k] = false;
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->Output[k] == dm_hdmi) {
+ v->RequiresDSC[i][k] = false;
+ v->RequiresFEC[i][k] = false;
+ v->OutputBppPerState[i][k] = TruncToValidBPP(
+ dml_min(600.0, v->PHYCLKPerState[i]) * 10,
+ 3,
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ false,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
+ if (v->DSCEnable[k] == true) {
+ v->RequiresDSC[i][k] = true;
+ v->LinkDSCEnable = true;
+ if (v->Output[k] == dm_dp) {
+ v->RequiresFEC[i][k] = true;
+ } else {
+ v->RequiresFEC[i][k] = false;
+ }
+ } else {
+ v->RequiresDSC[i][k] = false;
+ v->LinkDSCEnable = false;
+ v->RequiresFEC[i][k] = false;
+ }
+
+ v->Outbpp = BPP_INVALID;
+ if (v->PHYCLKPerState[i] >= 270.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 2700,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 5400,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 8100,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 10000,
+ 4,
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
+ v->Outbpp = TruncToValidBPP(
+ 12000,
+ 4,
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
+ }
+ }
+ } else {
+ v->OutputBppPerState[i][k] = 0;
+ }
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ v->LinkCapacitySupport[i] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->BlendingAndTiming[k] == k
+ && (v->Output[k] == dm_dp ||
+ v->Output[k] == dm_edp ||
+ v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
+ v->LinkCapacitySupport[i] = false;
+ }
+ }
+ }
+
+ // UPTO 2172
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k
+ && (v->Output[k] == dm_dp ||
+ v->Output[k] == dm_edp ||
+ v->Output[k] == dm_hdmi)) {
+ if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
+ P2IWith420 = true;
+ }
+ if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
+ && !v->DSC422NativeSupport) {
+ DSC422NativeNotSupported = true;
+ }
+ }
+ }
+
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ v->ODMCombine4To1SupportCheckOK[i] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+ && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
+ || v->Output[k] == dm_hdmi)) {
+ v->ODMCombine4To1SupportCheckOK[i] = false;
+ }
+ }
+ }
+
+ /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ v->NotEnoughDSCUnits[i] = false;
+ v->TotalDSCUnitsRequired = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->RequiresDSC[i][k] == true) {
+ if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
+ } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
+ } else {
+ v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
+ }
+ }
+ }
+ if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
+ v->NotEnoughDSCUnits[i] = true;
+ }
+ }
+ /*DSC Delay per state*/
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->OutputBppPerState[i][k] == BPP_INVALID) {
+ v->BPP = 0.0;
+ } else {
+ v->BPP = v->OutputBppPerState[i][k];
+ }
+ if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
+ if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
+ v->DSCDelayPerState[i][k] = dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ v->BPP,
+ dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
+ v->NumberOfDSCSlices[k],
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
+ } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ v->DSCDelayPerState[i][k] = 2.0
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ v->BPP,
+ dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
+ v->NumberOfDSCSlices[k] / 2,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ } else {
+ v->DSCDelayPerState[i][k] = 4.0
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ v->BPP,
+ dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
+ v->NumberOfDSCSlices[k] / 4,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ }
+ v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
+ } else {
+ v->DSCDelayPerState[i][k] = 0.0;
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ for (m = 0; m < v->NumberOfActivePlanes; m++) {
+ if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
+ v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
+ }
+ }
+ }
+ }
+
+ //Calculate Swath, DET Configuration, DCFCLKDeepSleep
+ //
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
+ v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
+ v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
+ }
+
+ CalculateSwathAndDETConfiguration(
+ false,
+ v->NumberOfActivePlanes,
+ v->DETBufferSizeInKByte[0],
+ v->MaximumSwathWidthLuma,
+ v->MaximumSwathWidthChroma,
+ v->SourceScan,
+ v->SourcePixelFormat,
+ v->SurfaceTiling,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ v->Read256BlockHeightY,
+ v->Read256BlockHeightC,
+ v->Read256BlockWidthY,
+ v->Read256BlockWidthC,
+ v->ODMCombineEnableThisState,
+ v->BlendingAndTiming,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ v->HActive,
+ v->HRatio,
+ v->HRatioChroma,
+ v->NoOfDPPThisState,
+ v->swath_width_luma_ub_this_state,
+ v->swath_width_chroma_ub_this_state,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->dummystring,
+ &v->ViewportSizeSupport[i][j]);
+
+ CalculateDCFCLKDeepSleep(
+ mode_lib,
+ v->NumberOfActivePlanes,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->VRatio,
+ v->VRatioChroma,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->NoOfDPPThisState,
+ v->HRatio,
+ v->HRatioChroma,
+ v->PixelClock,
+ v->PSCL_FACTOR,
+ v->PSCL_FACTOR_CHROMA,
+ v->RequiredDPPCLKThisState,
+ v->ReadBandwidthLuma,
+ v->ReadBandwidthChroma,
+ v->ReturnBusWidth,
+ &v->ProjectedDCFCLKDeepSleep[i][j]);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
+ v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
+ v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
+ v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
+ v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
+ v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
+ v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
+ v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
+ }
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
+ v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
+ v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
+ v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
+ v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
+ v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
+ v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
+ v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
+ }
+
+ v->TotalNumberOfDCCActiveDPP[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->DCCEnable[k] == true) {
+ v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
+ || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+
+ if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
+ && v->SourceScan[k] != dm_vert) {
+ v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
+ / 2;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
+ }
+
+ v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->Read256BlockHeightC[k],
+ v->Read256BlockWidthC[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelC[k],
+ v->SourceScan[k],
+ v->SwathWidthCThisState[k],
+ v->ViewportHeightChroma[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForChroma,
+ v->PitchC[k],
+ 0.0,
+ &v->MacroTileWidthC[k],
+ &v->MetaRowBytesC,
+ &v->DPTEBytesPerRowC,
+ &v->PTEBufferSizeNotExceededC[i][j][k],
+ &v->dummyinteger7,
+ &v->dpte_row_height_chroma[k],
+ &v->dummyinteger28,
+ &v->dummyinteger26,
+ &v->dummyinteger23,
+ &v->meta_row_height_chroma[k],
+ &v->dummyinteger8,
+ &v->dummyinteger9,
+ &v->dummyinteger19,
+ &v->dummyinteger20,
+ &v->dummyinteger17,
+ &v->dummyinteger10,
+ &v->dummyinteger11);
+
+ v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatioChroma[k],
+ v->VTAPsChroma[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightCThisState[k],
+ v->ViewportYStartC[k],
+ &v->PrefillC[k],
+ &v->MaxNumSwC[k]);
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
+ v->PTEBufferSizeInRequestsForChroma = 0;
+ v->PDEAndMetaPTEBytesPerFrameC = 0.0;
+ v->MetaRowBytesC = 0.0;
+ v->DPTEBytesPerRowC = 0.0;
+ v->PrefetchLinesC[i][j][k] = 0.0;
+ v->PTEBufferSizeNotExceededC[i][j][k] = true;
+ }
+ v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->Read256BlockHeightY[k],
+ v->Read256BlockWidthY[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->SourceScan[k],
+ v->SwathWidthYThisState[k],
+ v->ViewportHeight[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForLuma,
+ v->PitchY[k],
+ v->DCCMetaPitchY[k],
+ &v->MacroTileWidthY[k],
+ &v->MetaRowBytesY,
+ &v->DPTEBytesPerRowY,
+ &v->PTEBufferSizeNotExceededY[i][j][k],
+ &v->dummyinteger7,
+ &v->dpte_row_height[k],
+ &v->dummyinteger29,
+ &v->dummyinteger27,
+ &v->dummyinteger24,
+ &v->meta_row_height[k],
+ &v->dummyinteger25,
+ &v->dpte_group_bytes[k],
+ &v->dummyinteger21,
+ &v->dummyinteger22,
+ &v->dummyinteger18,
+ &v->dummyinteger5,
+ &v->dummyinteger6);
+ v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatio[k],
+ v->vtaps[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightYThisState[k],
+ v->ViewportYStartY[k],
+ &v->PrefillY[k],
+ &v->MaxNumSwY[k]);
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
+ v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
+ v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
+
+ CalculateRowBandwidth(
+ v->GPUVMEnable,
+ v->SourcePixelFormat[k],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->DCCEnable[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->MetaRowBytesY,
+ v->MetaRowBytesC,
+ v->meta_row_height[k],
+ v->meta_row_height_chroma[k],
+ v->DPTEBytesPerRowY,
+ v->DPTEBytesPerRowC,
+ v->dpte_row_height[k],
+ v->dpte_row_height_chroma[k],
+ &v->meta_row_bandwidth[i][j][k],
+ &v->dpte_row_bandwidth[i][j][k]);
+ }
+ /*
+ * DCCMetaBufferSizeSupport(i, j) = True
+ * For k = 0 To NumberOfActivePlanes - 1
+ * If MetaRowBytes(i, j, k) > 24064 Then
+ * DCCMetaBufferSizeSupport(i, j) = False
+ * End If
+ * Next k
+ */
+ v->DCCMetaBufferSizeSupport[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->MetaRowBytes[i][j][k] > 24064)
+ v->DCCMetaBufferSizeSupport[i][j] = false;
+ }
+ v->UrgLatency[i] = CalculateUrgentLatency(
+ v->UrgentLatencyPixelDataOnly,
+ v->UrgentLatencyPixelMixedWithVMData,
+ v->UrgentLatencyVMDataOnly,
+ v->DoUrgentLatencyAdjustment,
+ v->UrgentLatencyAdjustmentFabricClockComponent,
+ v->UrgentLatencyAdjustmentFabricClockReference,
+ v->FabricClockPerState[i]);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgLatency[i],
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->BytePerPixelInDETY[k],
+ v->BytePerPixelInDETC[k],
+ v->DETBufferSizeYThisState[k],
+ v->DETBufferSizeCThisState[k],
+ &v->UrgentBurstFactorCursor[k],
+ &v->UrgentBurstFactorLuma[k],
+ &v->UrgentBurstFactorChroma[k],
+ &NotUrgentLatencyHiding[k]);
+ }
+
+ v->NotEnoughUrgentLatencyHidingA[i][j] = false;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ v->NotEnoughUrgentLatencyHidingA[i][j] = true;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
+ + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
+ v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
+ }
+
+ v->TotalVActivePixelBandwidth[i][j] = 0;
+ v->TotalVActiveCursorBandwidth[i][j] = 0;
+ v->TotalMetaRowBandwidth[i][j] = 0;
+ v->TotalDPTERowBandwidth[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
+ v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
+ v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
+ v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
+ }
+ }
+ }
+
+ //Calculate Return BW
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackDelayTime[k] = v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackVTaps[k],
+ v->WritebackDestinationWidth[k],
+ v->WritebackDestinationHeight[k],
+ v->WritebackSourceHeight[k],
+ v->HTotal[k]) / v->RequiredDISPCLK[i][j];
+ } else {
+ v->WritebackDelayTime[k] = 0.0;
+ }
+ for (m = 0; m < v->NumberOfActivePlanes; m++) {
+ if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
+ v->WritebackDelayTime[k] = dml_max(
+ v->WritebackDelayTime[k],
+ v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[m],
+ v->WritebackHRatio[m],
+ v->WritebackVRatio[m],
+ v->WritebackVTaps[m],
+ v->WritebackDestinationWidth[m],
+ v->WritebackDestinationHeight[m],
+ v->WritebackSourceHeight[m],
+ v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
+ }
+ }
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ for (m = 0; m < v->NumberOfActivePlanes; m++) {
+ if (v->BlendingAndTiming[k] == m) {
+ v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
+ }
+ }
+ }
+ v->MaxMaxVStartup[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->MaximumVStartup[i][j][k] =
+ CalculateMaxVStartup(
+ v->VTotal[k],
+ v->VActive[k],
+ v->VBlankNom[k],
+ v->HTotal[k],
+ v->PixelClock[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->Interlace[k],
+ v->ip.VBlankNomDefaultUS,
+ v->WritebackDelayTime[k]);
+ v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
+ }
+ }
+ }
+
+ ReorderingBytes = v->NumberOfChannels
+ * dml_max3(
+ v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
+ }
+ }
+
+ if (v->UseMinimumRequiredDCFCLK == true)
+ UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double IdealFabricAndSDPPortBandwidthPerState = dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[i][j],
+ v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
+ double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
+ double PixelDataOnlyReturnBWPerState = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
+ double PixelMixedWithVMDataReturnBWPerState = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
+
+ if (v->HostVMEnable != true) {
+ v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
+ } else {
+ v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
+ }
+ }
+ }
+
+ //Re-ordering Buffer Support Check
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
+ > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
+ v->ROBSupport[i][j] = true;
+ } else {
+ v->ROBSupport[i][j] = false;
+ }
+ }
+ }
+
+ //Vertical Active BW support check
+
+ MaxTotalVActiveRDBandwidth = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
+ }
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
+ dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[i][j],
+ v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
+ * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
+ v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
+ * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
+
+ if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
+ v->TotalVerticalActiveBandwidthSupport[i][j] = true;
+ } else {
+ v->TotalVerticalActiveBandwidthSupport[i][j] = false;
+ }
+ }
+ }
+
+ v->UrgentLatency = CalculateUrgentLatency(
+ v->UrgentLatencyPixelDataOnly,
+ v->UrgentLatencyPixelMixedWithVMData,
+ v->UrgentLatencyVMDataOnly,
+ v->DoUrgentLatencyAdjustment,
+ v->UrgentLatencyAdjustmentFabricClockComponent,
+ v->UrgentLatencyAdjustmentFabricClockReference,
+ v->FabricClock);
+ //Prefetch Check
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double VMDataOnlyReturnBWPerState;
+ double HostVMInefficiencyFactor = 1;
+ int NextPrefetchModeState = MinPrefetchMode;
+ bool UnboundedRequestEnabledThisState = false;
+ int CompressedBufferSizeInkByteThisState = 0;
+ double dummy;
+
+ v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
+
+ v->BandwidthWithoutPrefetchSupported[i][j] = true;
+ if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
+ + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
+ v->BandwidthWithoutPrefetchSupported[i][j] = false;
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
+ v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
+ v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
+ v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
+ v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
+ v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
+ v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
+ v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
+ v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
+ }
+
+ VMDataOnlyReturnBWPerState = dml_min(
+ dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[i][j],
+ v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
+ * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
+ * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
+ if (v->GPUVMEnable && v->HostVMEnable)
+ HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
+
+ v->ExtraLatency = CalculateExtraLatency(
+ v->RoundTripPingLatencyCycles,
+ ReorderingBytes,
+ v->DCFCLKState[i][j],
+ v->TotalNumberOfActiveDPP[i][j],
+ v->PixelChunkSizeInKByte,
+ v->TotalNumberOfDCCActiveDPP[i][j],
+ v->MetaChunkSize,
+ v->ReturnBWPerState[i][j],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->NumberOfActivePlanes,
+ v->NoOfDPPThisState,
+ v->dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ v->HostVMMinPageSize,
+ v->HostVMMaxNonCachedPageTableLevels);
+
+ v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
+ do {
+ v->PrefetchModePerState[i][j] = NextPrefetchModeState;
+ v->MaxVStartup = v->NextMaxVStartup;
+
+ v->TWait = CalculateTWait(
+ v->PrefetchModePerState[i][j],
+ v->DRAMClockChangeLatency,
+ v->UrgLatency[i],
+ v->SREnterPlusExitTime);
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculatePrefetchSchedulePerPlane(mode_lib,
+ HostVMInefficiencyFactor,
+ i, j, k);
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgentLatency,
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatioPreY[i][j][k],
+ v->VRatioPreC[i][j][k],
+ v->BytePerPixelInDETY[k],
+ v->BytePerPixelInDETC[k],
+ v->DETBufferSizeYThisState[k],
+ v->DETBufferSizeCThisState[k],
+ &v->UrgentBurstFactorCursorPre[k],
+ &v->UrgentBurstFactorLumaPre[k],
+ &v->UrgentBurstFactorChroma[k],
+ &v->NotUrgentLatencyHidingPre[k]);
+ }
+
+ v->MaximumReadBandwidthWithPrefetch = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
+
+ v->MaximumReadBandwidthWithPrefetch =
+ v->MaximumReadBandwidthWithPrefetch
+ + dml_max3(
+ v->VActivePixelBandwidth[i][j][k]
+ + v->VActiveCursorBandwidth[i][j][k]
+ + v->NoOfDPP[i][j][k]
+ * (v->meta_row_bandwidth[i][j][k]
+ + v->dpte_row_bandwidth[i][j][k]),
+ v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
+ v->NoOfDPP[i][j][k]
+ * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
+ * v->UrgentBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
+ * v->UrgentBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
+ }
+
+ v->NotEnoughUrgentLatencyHidingPre = false;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->NotUrgentLatencyHidingPre[k] == true) {
+ v->NotEnoughUrgentLatencyHidingPre = true;
+ }
+ }
+
+ v->PrefetchSupported[i][j] = true;
+ if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
+ || v->NotEnoughUrgentLatencyHidingPre == 1) {
+ v->PrefetchSupported[i][j] = false;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
+ || v->NoTimeForPrefetch[i][j][k] == true) {
+ v->PrefetchSupported[i][j] = false;
+ }
+ }
+
+ v->DynamicMetadataSupported[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
+ v->DynamicMetadataSupported[i][j] = false;
+ }
+ }
+
+ v->VRatioInPrefetchSupported[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
+ v->VRatioInPrefetchSupported[i][j] = false;
+ }
+ }
+ v->AnyLinesForVMOrRowTooLarge = false;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
+ v->AnyLinesForVMOrRowTooLarge = true;
+ }
+ }
+
+ v->NextPrefetchMode = v->NextPrefetchMode + 1;
+
+ if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
+ v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
+ - dml_max(
+ v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
+ v->NoOfDPP[i][j][k]
+ * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
+ * v->UrgentBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
+ * v->UrgentBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
+ }
+ v->TotImmediateFlipBytes = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
+ + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
+ + v->DPTEBytesPerRow[i][j][k];
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculateFlipSchedule(
+ mode_lib,
+ k,
+ HostVMInefficiencyFactor,
+ v->ExtraLatency,
+ v->UrgLatency[i],
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+ v->DPTEBytesPerRow[i][j][k]);
+ }
+ v->total_dcn_read_bw_with_flip = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
+ + dml_max3(
+ v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
+ v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
+ + v->VActiveCursorBandwidth[i][j][k],
+ v->NoOfDPP[i][j][k]
+ * (v->final_flip_bw[k]
+ + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
+ * v->UrgentBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
+ * v->UrgentBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
+ }
+ v->ImmediateFlipSupportedForState[i][j] = true;
+ if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
+ v->ImmediateFlipSupportedForState[i][j] = false;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->ImmediateFlipSupportedForPipe[k] == false) {
+ v->ImmediateFlipSupportedForState[i][j] = false;
+ }
+ }
+ } else {
+ v->ImmediateFlipSupportedForState[i][j] = false;
+ }
+
+ if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
+ v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
+ NextPrefetchModeState = NextPrefetchModeState + 1;
+ } else {
+ v->NextMaxVStartup = v->NextMaxVStartup - 1;
+ }
+ v->NextPrefetchMode = v->NextPrefetchMode + 1;
+ } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
+ && ((v->HostVMEnable == false &&
+ v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
+ || v->ImmediateFlipSupportedForState[i][j] == true))
+ || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
+
+ CalculateUnboundedRequestAndCompressedBufferSize(
+ v->DETBufferSizeInKByte[0],
+ v->ConfigReturnBufferSizeInKByte,
+ v->UseUnboundedRequesting,
+ v->TotalNumberOfActiveDPP[i][j],
+ NoChroma,
+ v->MaxNumDPP,
+ v->CompressedBufferSegmentSizeInkByte,
+ v->Output,
+ &UnboundedRequestEnabledThisState,
+ &CompressedBufferSizeInkByteThisState);
+
+ CalculateWatermarksAndDRAMSpeedChangeSupport(
+ mode_lib,
+ v->PrefetchModePerState[i][j],
+ v->DCFCLKState[i][j],
+ v->ReturnBWPerState[i][j],
+ v->UrgLatency[i],
+ v->ExtraLatency,
+ v->SOCCLKPerState[i],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->NoOfDPPThisState,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ UnboundedRequestEnabledThisState,
+ CompressedBufferSizeInkByteThisState,
+ &v->DRAMClockChangeSupport[i][j],
+ &dummy,
+ &dummy,
+ &dummy,
+ &dummy);
+ }
+ }
+
+ /*PTE Buffer Size Check*/
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ v->PTEBufferSizeNotExceeded[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
+ v->PTEBufferSizeNotExceeded[i][j] = false;
+ }
+ }
+ }
+ }
+
+ /*Cursor Support Check*/
+ v->CursorSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->CursorWidth[k][0] > 0.0) {
+ if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
+ v->CursorSupport = false;
+ }
+ }
+ }
+
+ /*Valid Pitch Check*/
+ v->PitchSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
+ if (v->DCCEnable[k] == true) {
+ v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
+ } else {
+ v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
+ }
+ if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
+ && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
+ && v->SourcePixelFormat[k] != dm_mono_8) {
+ v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
+ if (v->DCCEnable[k] == true) {
+ v->AlignedDCCMetaPitchC[k] = dml_ceil(
+ dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
+ 64.0 * v->Read256BlockWidthC[k]);
+ } else {
+ v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
+ }
+ } else {
+ v->AlignedCPitch[k] = v->PitchC[k];
+ v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
+ }
+ if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
+ || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
+ v->PitchSupport = false;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
+ ViewportExceedsSurface = true;
+ if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
+ && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
+ && v->SourcePixelFormat[k] != dm_rgbe) {
+ if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
+ || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
+ ViewportExceedsSurface = true;
+ }
+ }
+ }
+ }
+
+ /*Mode Support, Voltage State and SOC Configuration*/
+ for (i = v->soc.num_states - 1; i >= 0; i--) {
+ for (j = 0; j < 2; j++) {
+ if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
+ && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
+ && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
+ && v->DTBCLKRequiredMoreThanSupported[i] == false
+ && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
+ && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
+ && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
+ && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
+ && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
+ && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
+ && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
+ && ((v->HostVMEnable == false
+ && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
+ || v->ImmediateFlipSupportedForState[i][j] == true)
+ && FMTBufferExceeded == false) {
+ v->ModeSupport[i][j] = true;
+ } else {
+ v->ModeSupport[i][j] = false;
+ }
+ }
+ }
+
+ {
+ unsigned int MaximumMPCCombine = 0;
+
+ for (i = v->soc.num_states; i >= 0; i--) {
+ if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
+ v->VoltageLevel = i;
+ v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
+ if (v->ModeSupport[i][0] == true) {
+ MaximumMPCCombine = 0;
+ } else {
+ MaximumMPCCombine = 1;
+ }
+ }
+ }
+ v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
+ v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
+ v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
+ }
+ v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
+ v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
+ v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
+ v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
+ v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
+ v->maxMpcComb = MaximumMPCCombine;
+ }
+}
+
+static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ struct display_mode_lib *mode_lib,
+ unsigned int PrefetchMode,
+ double DCFCLK,
+ double ReturnBW,
+ double UrgentLatency,
+ double ExtraLatency,
+ double SOCCLK,
+ double DCFCLKDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerPlane[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+ enum clock_change_support *DRAMClockChangeSupport,
+ double *StutterExitWatermark,
+ double *StutterEnterPlusExitWatermark,
+ double *Z8StutterExitWatermark,
+ double *Z8StutterEnterPlusExitWatermark)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DC__NUM_DPP__MAX];
+ double LinesInDETC;
+ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETCRoundedDownToSwath;
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double ActiveDRAMClockChangeLatencyMarginY;
+ double ActiveDRAMClockChangeLatencyMarginC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double PlaneWithMinActiveDRAMClockChangeMargin;
+ double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
+ double WritebackDRAMClockChangeLatencyHiding;
+ double TotalPixelBW = 0.0;
+ int k, j;
+
+ v->UrgentWatermark = UrgentLatency + ExtraLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
+#endif
+
+ v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
+#endif
+
+ v->TotalActiveWriteback = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true) {
+ v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
+ }
+ }
+
+ if (v->TotalActiveWriteback <= 1) {
+ v->WritebackUrgentWatermark = v->WritebackLatency;
+ } else {
+ v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+
+ if (v->TotalActiveWriteback <= 1) {
+ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
+ } else {
+ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ TotalPixelBW = TotalPixelBW
+ + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
+ / (v->HTotal[k] / v->PixelClock[k]);
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double EffectiveDETBufferSizeY = DETBufferSizeY[k];
+
+ v->LBLatencyHidingSourceLinesY = dml_min(
+ (double) v->MaxLineBufferLines,
+ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
+
+ v->LBLatencyHidingSourceLinesC = dml_min(
+ (double) v->MaxLineBufferLines,
+ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
+
+ EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
+
+ EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
+
+ if (UnboundedRequestEnabled) {
+ EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+ + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
+ }
+
+ LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
+ if (BytePerPixelDETC[k] > 0) {
+ LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
+ } else {
+ LinesInDETC = 0;
+ FullDETBufferingTimeC = 999999;
+ }
+
+ ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
+ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
+
+ if (v->NumberOfActivePlanes > 1) {
+ ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
+ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
+ }
+
+ if (BytePerPixelDETC[k] > 0) {
+ ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
+ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
+
+ if (v->NumberOfActivePlanes > 1) {
+ ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
+ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
+ }
+ v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
+ } else {
+ v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
+ }
+
+ if (v->WritebackEnable[k] == true) {
+ WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
+ / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
+ if (v->WritebackPixelFormat[k] == dm_444_64) {
+ WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
+ }
+ WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
+ v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
+ }
+ }
+
+ v->MinActiveDRAMClockChangeMargin = 999999;
+ PlaneWithMinActiveDRAMClockChangeMargin = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
+ v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
+ if (v->BlendingAndTiming[k] == k) {
+ PlaneWithMinActiveDRAMClockChangeMargin = k;
+ } else {
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) {
+ if (v->BlendingAndTiming[k] == j) {
+ PlaneWithMinActiveDRAMClockChangeMargin = j;
+ }
+ }
+ }
+ }
+ }
+
+ v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
+
+ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
+ && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
+ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
+ }
+ }
+
+ v->TotalNumberOfActiveOTG = 0;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
+ }
+ }
+
+ if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
+ } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
+ || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
+ } else {
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ }
+
+ *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
+ *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
+ dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
+#endif
+}
+
+static void CalculateDCFCLKDeepSleep(
+ struct display_mode_lib *mode_lib,
+ unsigned int NumberOfActivePlanes,
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ int ReturnBusWidth,
+ double *DCFCLKDeepSleep)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ double DisplayPipeLineDeliveryTimeLuma;
+ double DisplayPipeLineDeliveryTimeChroma;
+ double ReadBandwidth = 0.0;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
+
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+
+ *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
+ }
+}
+
+static void CalculateUrgentBurstFactor(
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ double DETBufferSizeY,
+ double DETBufferSizeC,
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ double LinesInDETLuma;
+ double LinesInDETChroma;
+ unsigned int LinesInCursorBuffer;
+ double CursorBufferSizeInTime;
+ double DETBufferSizeInTimeLuma;
+ double DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
+ if (VRatio > 0) {
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 0;
+ } else {
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorCursor = 1;
+ }
+ }
+
+ LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
+ if (VRatio > 0) {
+ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 0;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorLuma = 1;
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
+ if (VRatio > 0) {
+ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 0;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorChroma = 1;
+ }
+ }
+}
+
+static void CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActivePlanes,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[])
+{
+ double req_per_swath_ub;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (SourceScan[k] != dm_vert) {
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
+ } else {
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
+ }
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (SourceScan[k] != dm_vert) {
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
+ } else {
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
+ }
+ DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
+ dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ int cursor_req_per_width;
+
+ cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
+ if (NumberOfCursors[k] > 0) {
+ if (VRatio[k] <= 1) {
+ CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
+ }
+ if (VRatioPrefetchY[k] <= 1) {
+ CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
+ }
+ } else {
+ CursorRequestDeliveryTime[k] = 0;
+ CursorRequestDeliveryTimePrefetch[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
+#endif
+ }
+}
+
+static void CalculateMetaAndPTETimes(
+ int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ int MetaChunkSize,
+ int MinMetaChunkSizeBytes,
+ int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ int dpte_row_height[],
+ int dpte_row_height_chroma[],
+ int meta_row_width[],
+ int meta_row_width_chroma[],
+ int meta_row_height[],
+ int meta_row_height_chroma[],
+ int meta_req_width[],
+ int meta_req_width_chroma[],
+ int meta_req_height[],
+ int meta_req_height_chroma[],
+ int dpte_group_bytes[],
+ int PTERequestSizeY[],
+ int PTERequestSizeC[],
+ int PixelPTEReqWidthY[],
+ int PixelPTEReqHeightY[],
+ int PixelPTEReqWidthC[],
+ int PixelPTEReqHeightC[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[])
+{
+ unsigned int meta_chunk_width;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_chunks_per_row_ub;
+ unsigned int meta_chunk_width_chroma;
+ unsigned int min_meta_chunk_width_chroma;
+ unsigned int meta_chunk_per_row_int_chroma;
+ unsigned int meta_row_remainder_chroma;
+ unsigned int meta_chunk_threshold_chroma;
+ unsigned int meta_chunks_per_row_ub_chroma;
+ unsigned int dpte_group_width_luma;
+ unsigned int dpte_groups_per_row_luma_ub;
+ unsigned int dpte_group_width_chroma;
+ unsigned int dpte_groups_per_row_chroma_ub;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0) {
+ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ } else {
+ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
+ }
+ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0) {
+ DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ } else {
+ DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
+ }
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (DCCEnable[k] == true) {
+ meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
+ min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
+ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = meta_row_width[k] % meta_chunk_width;
+ if (SourceScan[k] != dm_vert) {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
+ } else {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
+ }
+ if (meta_row_remainder <= meta_chunk_threshold) {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ } else {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+ }
+ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ if (BytePerPixelC[k] == 0) {
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
+ meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (SourceScan[k] != dm_vert) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ } else {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+ }
+ TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ TimePerMetaChunkNominal[k] = 0;
+ TimePerMetaChunkVBlank[k] = 0;
+ TimePerMetaChunkFlip[k] = 0;
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ }
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (GPUVMEnable == true) {
+ if (SourceScan[k] != dm_vert) {
+ dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
+ } else {
+ dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
+ }
+ dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
+ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ if (BytePerPixelC[k] == 0) {
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (SourceScan[k] != dm_vert) {
+ dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
+ } else {
+ dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
+ }
+ dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
+ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ time_per_pte_group_nom_luma[k] = 0;
+ time_per_pte_group_vblank_luma[k] = 0;
+ time_per_pte_group_flip_luma[k] = 0;
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ }
+ }
+}
+
+static void CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ int meta_pte_bytes_per_frame_ub_l[],
+ int meta_pte_bytes_per_frame_ub_c[],
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[])
+{
+ int num_group_per_lower_vm_stage;
+ int num_req_per_lower_vm_stage;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+ }
+ }
+
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
+ + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ }
+ }
+
+ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+
+ if (GPUVMMaxPageTableLevels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+ }
+}
+
+static void CalculateStutterEfficiency(
+ struct display_mode_lib *mode_lib,
+ int CompressedBufferSizeInkByte,
+ bool UnboundedRequestEnabled,
+ int ConfigReturnBufferSizeInKByte,
+ int MetaFIFOSizeInKEntries,
+ int ZeroSizeBufferEntries,
+ int NumberOfActivePlanes,
+ int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ double COMPBUF_RESERVED_SPACE_64B,
+ double COMPBUF_RESERVED_SPACE_ZS,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizedVBlank,
+ double Z8StutterEnterPlusExitWatermark,
+ double StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ int DPPPerPlane[],
+ unsigned int DETBufferSizeY[],
+ int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ int HTotal[],
+ int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum scan_direction_class SourceScan[],
+ int BlockHeight256BytesY[],
+ int BlockWidth256BytesY[],
+ int BlockHeight256BytesC[],
+ int BlockWidth256BytesC[],
+ int DCCYMaxUncompressedBlock[],
+ int DCCCMaxUncompressedBlock[],
+ int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthPlaneLuma[],
+ double ReadBandwidthPlaneChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+
+ double DETBufferingTimeY;
+ double SwathWidthYCriticalPlane = 0;
+ double VActiveTimeCriticalPlane = 0;
+ double FrameTimeCriticalPlane = 0;
+ int BytePerPixelYCriticalPlane = 0;
+ double LinesToFinishSwathTransferStutterCriticalPlane = 0;
+ double MinTTUVBlankCriticalPlane = 0;
+ double TotalCompressedReadBandwidth;
+ double TotalRowReadBandwidth;
+ double AverageDCCCompressionRate;
+ double EffectiveCompressedBufferSize;
+ double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
+ double StutterBurstTime;
+ int TotalActiveWriteback;
+ double LinesInDETY;
+ double LinesInDETYRoundedDownToSwath;
+ double MaximumEffectiveCompressionLuma;
+ double MaximumEffectiveCompressionChroma;
+ double TotalZeroSizeRequestReadBandwidth;
+ double TotalZeroSizeCompressedReadBandwidth;
+ double AverageDCCZeroSizeFraction;
+ double AverageZeroSizeCompressionRate;
+ int TotalNumberOfActiveOTG = 0;
+ double LastStutterPeriod = 0.0;
+ double LastZ8StutterPeriod = 0.0;
+ int k;
+
+ TotalZeroSizeRequestReadBandwidth = 0;
+ TotalZeroSizeCompressedReadBandwidth = 0;
+ TotalRowReadBandwidth = 0;
+ TotalCompressedReadBandwidth = 0;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (DCCEnable[k] == true) {
+ if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
+ || DCCYMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionLuma = 2;
+ } else {
+ MaximumEffectiveCompressionLuma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
+ if (ReadBandwidthPlaneChroma[k] > 0) {
+ if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
+ || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionChroma = 2;
+ } else {
+ MaximumEffectiveCompressionChroma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
+ }
+ TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
+ }
+
+ AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
+ AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
+ dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
+ dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
+#endif
+
+ if (AverageDCCZeroSizeFraction == 1) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
+ } else if (AverageDCCZeroSizeFraction > 0) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = dml_min(
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
+ + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
+ (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+ dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print(
+ "DML::%s: min 2 = %f\n",
+ __func__,
+ MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
+ dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+ } else {
+ EffectiveCompressedBufferSize = dml_min(
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
+ dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
+ dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+#endif
+
+ *StutterPeriod = 0;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
+ / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
+ DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
+ dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
+ dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
+ dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
+ dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
+ dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
+#endif
+
+ if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
+ bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
+
+ *StutterPeriod = DETBufferingTimeY;
+ FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
+ VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
+ BytePerPixelYCriticalPlane = BytePerPixelY[k];
+ SwathWidthYCriticalPlane = SwathWidthY[k];
+ LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
+ MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+ dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
+ dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
+ dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
+ dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
+ dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
+ dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
+#endif
+ }
+ }
+
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
+ dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+ dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
+ dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+#endif
+
+ StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
+ + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
+ + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
+ dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
+ dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+#endif
+ StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
+
+ dml_print(
+ "DML::%s: Time to finish residue swath=%f\n",
+ __func__,
+ LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
+
+ TotalActiveWriteback = 0;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (WritebackEnable[k]) {
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+ }
+
+ if (TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
+ dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
+ dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+#endif
+ *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
+ *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
+ *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
+ *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
+ } else {
+ *StutterEfficiencyNotIncludingVBlank = 0.;
+ *Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *NumberOfStutterBurstsPerFrame = 0;
+ *Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ }
+ }
+
+ if (*StutterEfficiencyNotIncludingVBlank > 0) {
+ LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+
+ if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
+ *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
+ / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
+ } else {
+ *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *StutterEfficiency = 0;
+ }
+
+ if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+ if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
+ *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
+ / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
+ } else {
+ *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *Z8StutterEfficiency = 0.;
+ }
+
+ dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+ dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
+ dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+}
+
+static void CalculateSwathAndDETConfiguration(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ unsigned int DETBufferSizeInKByte,
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum scan_direction_class SourceScan[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BlendingAndTiming[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ int DPPPerPlane[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool ViewportSizeSupportPerPlane[],
+ bool *ViewportSizeSupport)
+{
+ int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+ int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+ int MinimumSwathHeightY;
+ int MinimumSwathHeightC;
+ int RoundedUpMaxSwathSizeBytesY;
+ int RoundedUpMaxSwathSizeBytesC;
+ int RoundedUpMinSwathSizeBytesY;
+ int RoundedUpMinSwathSizeBytesC;
+ int RoundedUpSwathSizeBytesY;
+ int RoundedUpSwathSizeBytesC;
+ double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
+ double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
+ int k;
+
+ CalculateSwathWidth(
+ ForceSingleDPP,
+ NumberOfActivePlanes,
+ SourcePixelFormat,
+ SourceScan,
+ ViewportWidth,
+ ViewportHeight,
+ SurfaceWidthY,
+ SurfaceWidthC,
+ SurfaceHeightY,
+ SurfaceHeightC,
+ ODMCombineEnabled,
+ BytePerPixY,
+ BytePerPixC,
+ Read256BytesBlockHeightY,
+ Read256BytesBlockHeightC,
+ Read256BytesBlockWidthY,
+ Read256BytesBlockWidthC,
+ BlendingAndTiming,
+ HActive,
+ HRatio,
+ DPPPerPlane,
+ SwathWidthSingleDPP,
+ SwathWidthSingleDPPChroma,
+ SwathWidth,
+ SwathWidthChroma,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
+ swath_width_luma_ub,
+ swath_width_chroma_ub);
+
+ *ViewportSizeSupport = true;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
+ || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
+ if (SurfaceTiling[k] == dm_sw_linear
+ || (SourcePixelFormat[k] == dm_444_64
+ && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
+ && SourceScan[k] != dm_vert)) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ } else {
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ }
+ MinimumSwathHeightC = MaximumSwathHeightC[k];
+ } else {
+ if (SurfaceTiling[k] == dm_sw_linear) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ MinimumSwathHeightC = MaximumSwathHeightC[k];
+ } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ MinimumSwathHeightC = MaximumSwathHeightC[k];
+ } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
+ } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
+ } else {
+ MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ }
+ }
+
+ RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
+ RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
+ if (SourcePixelFormat[k] == dm_420_10) {
+ RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
+ RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
+ }
+ RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
+ RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
+ if (SourcePixelFormat[k] == dm_420_10) {
+ RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
+ RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
+ }
+
+ if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
+ } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
+ && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
+ SwathHeightY[k] = MinimumSwathHeightY;
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
+ } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
+ && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MinimumSwathHeightC;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
+ } else {
+ SwathHeightY[k] = MinimumSwathHeightY;
+ SwathHeightC[k] = MinimumSwathHeightC;
+ RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
+ }
+ {
+ double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
+
+ if (SwathHeightC[k] == 0) {
+ DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
+ DETBufferSizeC[k] = 0;
+ } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
+ DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
+ DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
+ } else {
+ DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
+ DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
+ }
+
+ if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
+ || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
+ *ViewportSizeSupport = false;
+ ViewportSizeSupportPerPlane[k] = false;
+ } else {
+ ViewportSizeSupportPerPlane[k] = true;
+ }
+ }
+ }
+}
+
+static void CalculateSwathWidth(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ enum source_format_class SourcePixelFormat[],
+ enum scan_direction_class SourceScan[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ int BlendingAndTiming[],
+ int HActive[],
+ double HRatio[],
+ int DPPPerPlane[],
+ double SwathWidthSingleDPPY[],
+ double SwathWidthSingleDPPC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ int MaximumSwathHeightY[],
+ int MaximumSwathHeightC[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[])
+{
+ enum odm_combine_mode MainPlaneODMCombine;
+ int j, k;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
+#endif
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (SourceScan[k] != dm_vert) {
+ SwathWidthSingleDPPY[k] = ViewportWidth[k];
+ } else {
+ SwathWidthSingleDPPY[k] = ViewportHeight[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
+ dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
+#endif
+
+ MainPlaneODMCombine = ODMCombineEnabled[k];
+ for (j = 0; j < NumberOfActivePlanes; ++j) {
+ if (BlendingAndTiming[k] == j) {
+ MainPlaneODMCombine = ODMCombineEnabled[j];
+ }
+ }
+
+ if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
+ SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
+ else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
+ SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
+ else if (DPPPerPlane[k] == 2)
+ SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
+ else
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
+ dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ SwathWidthC[k] = SwathWidthSingleDPPC[k];
+ }
+ {
+ int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
+ int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
+#endif
+
+ if (SourceScan[k] != dm_vert) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
+ if (BytePerPixC[k] > 0) {
+ int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+
+ swath_width_chroma_ub[k] = dml_min(
+ surface_width_ub_c,
+ (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
+ if (BytePerPixC[k] > 0) {
+ int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+
+ swath_width_chroma_ub[k] = dml_min(
+ surface_height_ub_c,
+ (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+ }
+ }
+}
+
+static double CalculateExtraLatency(
+ int RoundTripPingLatencyCycles,
+ int ReorderingBytes,
+ double DCFCLK,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels)
+{
+ double ExtraLatencyBytes;
+ double ExtraLatency;
+
+ ExtraLatencyBytes = CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ TotalNumberOfActiveDPP,
+ PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP,
+ MetaChunkSize,
+ GPUVMEnable,
+ HostVMEnable,
+ NumberOfActivePlanes,
+ NumberOfDPP,
+ dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+
+ ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
+ dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
+#endif
+
+ return ExtraLatency;
+}
+
+static double CalculateExtraLatencyBytes(
+ int ReorderingBytes,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels)
+{
+ double ret;
+ int HostVMDynamicLevels = 0, k;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ } else {
+ HostVMDynamicLevels = 0;
+ }
+
+ ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
+
+ if (GPUVMEnable == true) {
+ for (k = 0; k < NumberOfActivePlanes; ++k)
+ ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ }
+ return ret;
+}
+
+static double CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock)
+{
+ double ret;
+
+ ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true)
+ ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ return ret;
+}
+
+static void UseMinimumDCFCLK(
+ struct display_mode_lib *mode_lib,
+ int MaxPrefetchMode,
+ int ReorderingBytes)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ int dummy1, i, j, k;
+ double NormalEfficiency, dummy2, dummy3;
+ double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
+
+ NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
+ double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
+ double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
+ double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
+ double MinimumTWait;
+ double NonDPTEBandwidth;
+ double DPTEBandwidth;
+ double DCFCLKRequiredForAverageBandwidth;
+ double ExtraLatencyBytes;
+ double ExtraLatencyCycles;
+ double DCFCLKRequiredForPeakBandwidth;
+ int NoOfDPPState[DC__NUM_DPP__MAX];
+ double MinimumTvmPlus2Tr0;
+
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+ + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
+ }
+
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
+ NoOfDPPState[k] = v->NoOfDPP[i][j][k];
+
+ MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
+ NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
+ DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
+ DCFCLKRequiredForAverageBandwidth = dml_max3(
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
+ / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
+ (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
+
+ ExtraLatencyBytes = CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ v->TotalNumberOfActiveDPP[i][j],
+ v->PixelChunkSizeInKByte,
+ v->TotalNumberOfDCCActiveDPP[i][j],
+ v->MetaChunkSize,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->NumberOfActivePlanes,
+ NoOfDPPState,
+ v->dpte_group_bytes,
+ 1,
+ v->HostVMMinPageSize,
+ v->HostVMMaxNonCachedPageTableLevels);
+ ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double DCFCLKCyclesRequiredInPrefetch;
+ double ExpectedPrefetchBWAcceleration;
+ double PrefetchTime;
+
+ PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
+ + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
+ + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
+ + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
+ PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
+ ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
+ / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
+ DynamicMetadataVMExtraLatency[k] =
+ (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
+ v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+ PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
+ - v->UrgLatency[i]
+ * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
+ * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
+ - DynamicMetadataVMExtraLatency[k];
+
+ if (PrefetchTime > 0) {
+ double ExpectedVRatioPrefetch;
+
+ ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
+ / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
+ * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
+ if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
+ + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
+ }
+ } else {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
+ }
+ if (v->DynamicMetadataEnable[k] == true) {
+ double TSetupPipe;
+ double TdmbfPipe;
+ double TdmsksPipe;
+ double TdmecPipe;
+ double AllowedTimeForUrgentExtraLatency;
+
+ CalculateVupdateAndDynamicMetadataParameters(
+ v->MaxInterDCNTileRepeaters,
+ v->RequiredDPPCLK[i][j][k],
+ v->RequiredDISPCLK[i][j],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->PixelClock[k],
+ v->HTotal[k],
+ v->VTotal[k] - v->VActive[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ &TSetupPipe,
+ &TdmbfPipe,
+ &TdmecPipe,
+ &TdmsksPipe,
+ &dummy1,
+ &dummy2,
+ &dummy3);
+ AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
+ - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
+ if (AllowedTimeForUrgentExtraLatency > 0) {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
+ DCFCLKRequiredForPeakBandwidthPerPlane[k],
+ ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
+ } else {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
+ }
+ }
+ }
+ DCFCLKRequiredForPeakBandwidth = 0;
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
+ DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
+
+ MinimumTvmPlus2Tr0 = v->UrgLatency[i]
+ * (v->GPUVMEnable == true ?
+ (v->HostVMEnable == true ?
+ (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
+ 0);
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double MaximumTvmPlus2Tr0PlusTsw;
+
+ MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+ if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
+ DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
+ } else {
+ DCFCLKRequiredForPeakBandwidth = dml_max3(
+ DCFCLKRequiredForPeakBandwidth,
+ 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
+ (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
+ }
+ }
+ v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
+ }
+ }
+}
+
+static void CalculateUnboundedRequestAndCompressedBufferSize(
+ unsigned int DETBufferSizeInKByte,
+ int ConfigReturnBufferSizeInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ int TotalActiveDPP,
+ bool NoChromaPlanes,
+ int MaxNumDPP,
+ int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class *Output,
+ bool *UnboundedRequestEnabled,
+ int *CompressedBufferSizeInkByte)
+{
+ double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
+
+ *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
+ *CompressedBufferSizeInkByte = (
+ *UnboundedRequestEnabled == true ?
+ ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
+ ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
+ dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
+ dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+#endif
+}
+
+static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
+{
+ bool ret_val = false;
+
+ ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
+ if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
+ ret_val = false;
+ return ret_val;
+}
+
+static unsigned int CalculateMaxVStartup(
+ unsigned int VTotal,
+ unsigned int VActive,
+ unsigned int VBlankNom,
+ unsigned int HTotal,
+ double PixelClock,
+ bool ProgressiveTointerlaceUnitinOPP,
+ bool Interlace,
+ unsigned int VBlankNomDefaultUS,
+ double WritebackDelayTime)
+{
+ unsigned int MaxVStartup = 0;
+ unsigned int vblank_size = 0;
+ double line_time_us = HTotal / PixelClock;
+ unsigned int vblank_actual = VTotal - VActive;
+ unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0);
+ unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
+ unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
+
+ vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail);
+ if (Interlace && !ProgressiveTointerlaceUnitinOPP)
+ MaxVStartup = dml_floor(vblank_size / 2.0, 1.0);
+ else
+ MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0));
+ if (MaxVStartup > 1023)
+ MaxVStartup = 1023;
+ return MaxVStartup;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h
new file mode 100644
index 000000000000..a8199ab7d26a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML314_DISPLAY_MODE_VBA_H__
+#define __DML314_DISPLAY_MODE_VBA_H__
+
+void dml314_recalculate(struct display_mode_lib *mode_lib);
+void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
+double dml314_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ long WritebackSourceWidth,
+ long WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize);
+
+#endif /* __DML314_DISPLAY_MODE_VBA_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
new file mode 100644
index 000000000000..61ee9ba063a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
@@ -0,0 +1,1733 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "../display_mode_lib.h"
+#include "../display_mode_vba.h"
+#include "../dml_inline_defs.h"
+#include "display_rq_dlg_calc_314.h"
+
+static bool CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC)
+{
+ if (SourcePixelFormat == dm_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dm_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = 4.0 / 3;
+ *BytePerPixelDETC = 8.0 / 3;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+
+ if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
+ || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
+ if (SurfaceTiling == dm_sw_linear)
+ *BlockHeight256BytesY = 1;
+ else if (SourcePixelFormat == dm_444_64)
+ *BlockHeight256BytesY = 4;
+ else if (SourcePixelFormat == dm_444_8)
+ *BlockHeight256BytesY = 16;
+ else
+ *BlockHeight256BytesY = 8;
+
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+ return true;
+}
+
+static bool is_dual_plane(enum source_format_class source_format)
+{
+ bool ret_val = 0;
+
+ if ((source_format == dm_420_12) || (source_format == dm_420_8) || (source_format == dm_420_10) || (source_format == dm_rgbe_alpha))
+ ret_val = 1;
+
+ return ret_val;
+}
+
+static double get_refcyc_per_delivery(
+ struct display_mode_lib *mode_lib,
+ double refclk_freq_in_mhz,
+ double pclk_freq_in_mhz,
+ unsigned int odm_combine,
+ unsigned int recout_width,
+ unsigned int hactive,
+ double vratio,
+ double hscale_pixel_rate,
+ unsigned int delivery_width,
+ unsigned int req_per_swath_ub)
+{
+ double refcyc_per_delivery = 0.0;
+
+ if (vratio <= 1.0) {
+ if (odm_combine)
+ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) ((unsigned int) odm_combine * 2)
+ * dml_min((double) recout_width, (double) hactive / ((unsigned int) odm_combine * 2)) / pclk_freq_in_mhz / (double) req_per_swath_ub;
+ else
+ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width / pclk_freq_in_mhz / (double) req_per_swath_ub;
+ } else {
+ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width / (double) hscale_pixel_rate / (double) req_per_swath_ub;
+ }
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width);
+ dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio);
+ dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub);
+ dml_print("DML_DLG: %s: hscale_pixel_rate = %3.2f\n", __func__, hscale_pixel_rate);
+ dml_print("DML_DLG: %s: delivery_width = %d\n", __func__, delivery_width);
+ dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery);
+#endif
+
+ return refcyc_per_delivery;
+
+}
+
+static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size)
+{
+ if (tile_size == dm_256k_tile)
+ return (256 * 1024);
+ else if (tile_size == dm_64k_tile)
+ return (64 * 1024);
+ else
+ return (4 * 1024);
+}
+
+static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, display_data_rq_regs_st *rq_regs, const display_data_rq_sizing_params_st *rq_sizing)
+{
+ print__data_rq_sizing_params_st(mode_lib, rq_sizing);
+
+ rq_regs->chunk_size = dml_log2(rq_sizing->chunk_bytes) - 10;
+
+ if (rq_sizing->min_chunk_bytes == 0)
+ rq_regs->min_chunk_size = 0;
+ else
+ rq_regs->min_chunk_size = dml_log2(rq_sizing->min_chunk_bytes) - 8 + 1;
+
+ rq_regs->meta_chunk_size = dml_log2(rq_sizing->meta_chunk_bytes) - 10;
+ if (rq_sizing->min_meta_chunk_bytes == 0)
+ rq_regs->min_meta_chunk_size = 0;
+ else
+ rq_regs->min_meta_chunk_size = dml_log2(rq_sizing->min_meta_chunk_bytes) - 6 + 1;
+
+ rq_regs->dpte_group_size = dml_log2(rq_sizing->dpte_group_bytes) - 6;
+ rq_regs->mpte_group_size = dml_log2(rq_sizing->mpte_group_bytes) - 6;
+}
+
+static void extract_rq_regs(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_rq_params_st *rq_param)
+{
+ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
+ unsigned int detile_buf_plane1_addr = 0;
+
+ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), &rq_param->sizing.rq_l);
+
+ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_l.dpte_row_height), 1) - 3;
+
+ if (rq_param->yuv420) {
+ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), &rq_param->sizing.rq_c);
+ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_c.dpte_row_height), 1) - 3;
+ }
+
+ rq_regs->rq_regs_l.swath_height = dml_log2(rq_param->dlg.rq_l.swath_height);
+ rq_regs->rq_regs_c.swath_height = dml_log2(rq_param->dlg.rq_c.swath_height);
+
+ // FIXME: take the max between luma, chroma chunk size?
+ // okay for now, as we are setting chunk_bytes to 8kb anyways
+ if (rq_param->sizing.rq_l.chunk_bytes >= 32 * 1024 || (rq_param->yuv420 && rq_param->sizing.rq_c.chunk_bytes >= 32 * 1024)) { //32kb
+ rq_regs->drq_expansion_mode = 0;
+ } else {
+ rq_regs->drq_expansion_mode = 2;
+ }
+ rq_regs->prq_expansion_mode = 1;
+ rq_regs->mrq_expansion_mode = 1;
+ rq_regs->crq_expansion_mode = 1;
+
+ // Note: detile_buf_plane1_addr is in unit of 1KB
+ if (rq_param->yuv420) {
+ if ((double) rq_param->misc.rq_l.stored_swath_bytes / (double) rq_param->misc.rq_c.stored_swath_bytes <= 1.5) {
+ detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
+#endif
+ } else {
+ detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0; // 2/3 to luma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
+#endif
+ }
+ }
+ rq_regs->plane1_base_address = detile_buf_plane1_addr;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %0d\n", __func__, detile_buf_size_in_bytes);
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d\n", __func__, detile_buf_plane1_addr);
+ dml_print("DML_DLG: %s: plane1_base_address = %0d\n", __func__, rq_regs->plane1_base_address);
+ dml_print("DML_DLG: %s: rq_l.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_l.stored_swath_bytes);
+ dml_print("DML_DLG: %s: rq_c.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_c.stored_swath_bytes);
+ dml_print("DML_DLG: %s: rq_l.swath_height = %0d\n", __func__, rq_param->dlg.rq_l.swath_height);
+ dml_print("DML_DLG: %s: rq_c.swath_height = %0d\n", __func__, rq_param->dlg.rq_c.swath_height);
+#endif
+}
+
+static void handle_det_buf_split(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_source_params_st *pipe_src_param)
+{
+ unsigned int total_swath_bytes = 0;
+ unsigned int swath_bytes_l = 0;
+ unsigned int swath_bytes_c = 0;
+ unsigned int full_swath_bytes_packed_l = 0;
+ unsigned int full_swath_bytes_packed_c = 0;
+ bool req128_l = 0;
+ bool req128_c = 0;
+ bool surf_linear = (pipe_src_param->sw_mode == dm_sw_linear);
+ bool surf_vert = (pipe_src_param->source_scan == dm_vert);
+ unsigned int log2_swath_height_l = 0;
+ unsigned int log2_swath_height_c = 0;
+ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
+
+ full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes;
+ full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", __func__, full_swath_bytes_packed_l);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", __func__, full_swath_bytes_packed_c);
+#endif
+
+ if (rq_param->yuv420_10bpc) {
+ full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2.0 / 3.0, 256, 1) + 256;
+ full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2.0 / 3.0, 256, 1) + 256;
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d (3-2 packing)\n", __func__, full_swath_bytes_packed_l);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d (3-2 packing)\n", __func__, full_swath_bytes_packed_c);
+#endif
+ }
+
+ if (rq_param->yuv420)
+ total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
+ else
+ total_swath_bytes = 2 * full_swath_bytes_packed_l;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: total_swath_bytes = %0d\n", __func__, total_swath_bytes);
+ dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %0d\n", __func__, detile_buf_size_in_bytes);
+#endif
+
+ if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request
+ req128_l = 0;
+ req128_c = 0;
+ swath_bytes_l = full_swath_bytes_packed_l;
+ swath_bytes_c = full_swath_bytes_packed_c;
+ } else if (!rq_param->yuv420) {
+ req128_l = 1;
+ req128_c = 0;
+ swath_bytes_c = full_swath_bytes_packed_c;
+ swath_bytes_l = full_swath_bytes_packed_l / 2;
+ } else if ((double) full_swath_bytes_packed_l / (double) full_swath_bytes_packed_c < 1.5) {
+ req128_l = 0;
+ req128_c = 1;
+ swath_bytes_l = full_swath_bytes_packed_l;
+ swath_bytes_c = full_swath_bytes_packed_c / 2;
+
+ total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c;
+
+ if (total_swath_bytes > detile_buf_size_in_bytes) {
+ req128_l = 1;
+ swath_bytes_l = full_swath_bytes_packed_l / 2;
+ }
+ } else {
+ req128_l = 1;
+ req128_c = 0;
+ swath_bytes_l = full_swath_bytes_packed_l / 2;
+ swath_bytes_c = full_swath_bytes_packed_c;
+
+ total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c;
+
+ if (total_swath_bytes > detile_buf_size_in_bytes) {
+ req128_c = 1;
+ swath_bytes_c = full_swath_bytes_packed_c / 2;
+ }
+ }
+
+ if (rq_param->yuv420)
+ total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c;
+ else
+ total_swath_bytes = 2 * swath_bytes_l;
+
+ rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l;
+ rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: total_swath_bytes = %0d\n", __func__, total_swath_bytes);
+ dml_print("DML_DLG: %s: rq_l.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_l.stored_swath_bytes);
+ dml_print("DML_DLG: %s: rq_c.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_c.stored_swath_bytes);
+#endif
+ if (surf_linear) {
+ log2_swath_height_l = 0;
+ log2_swath_height_c = 0;
+ } else {
+ unsigned int swath_height_l;
+ unsigned int swath_height_c;
+
+ if (!surf_vert) {
+ swath_height_l = rq_param->misc.rq_l.blk256_height;
+ swath_height_c = rq_param->misc.rq_c.blk256_height;
+ } else {
+ swath_height_l = rq_param->misc.rq_l.blk256_width;
+ swath_height_c = rq_param->misc.rq_c.blk256_width;
+ }
+
+ if (swath_height_l > 0)
+ log2_swath_height_l = dml_log2(swath_height_l);
+
+ if (req128_l && log2_swath_height_l > 0)
+ log2_swath_height_l -= 1;
+
+ if (swath_height_c > 0)
+ log2_swath_height_c = dml_log2(swath_height_c);
+
+ if (req128_c && log2_swath_height_c > 0)
+ log2_swath_height_c -= 1;
+ }
+
+ rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
+ rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l);
+ dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", __func__, full_swath_bytes_packed_l);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", __func__, full_swath_bytes_packed_c);
+ dml_print("DML_DLG: %s: swath_height luma = %0d\n", __func__, rq_param->dlg.rq_l.swath_height);
+ dml_print("DML_DLG: %s: swath_height chroma = %0d\n", __func__, rq_param->dlg.rq_c.swath_height);
+#endif
+}
+
+static void get_meta_and_pte_attr(
+ struct display_mode_lib *mode_lib,
+ display_data_rq_dlg_params_st *rq_dlg_param,
+ display_data_rq_misc_params_st *rq_misc_param,
+ display_data_rq_sizing_params_st *rq_sizing_param,
+ unsigned int vp_width,
+ unsigned int vp_height,
+ unsigned int data_pitch,
+ unsigned int meta_pitch,
+ unsigned int source_format,
+ unsigned int tiling,
+ unsigned int macro_tile_size,
+ unsigned int source_scan,
+ unsigned int hostvm_enable,
+ unsigned int is_chroma,
+ unsigned int surface_height)
+{
+ bool surf_linear = (tiling == dm_sw_linear);
+ bool surf_vert = (source_scan == dm_vert);
+
+ unsigned int bytes_per_element;
+ unsigned int bytes_per_element_y;
+ unsigned int bytes_per_element_c;
+
+ unsigned int blk256_width = 0;
+ unsigned int blk256_height = 0;
+
+ unsigned int blk256_width_y = 0;
+ unsigned int blk256_height_y = 0;
+ unsigned int blk256_width_c = 0;
+ unsigned int blk256_height_c = 0;
+ unsigned int log2_bytes_per_element;
+ unsigned int log2_blk256_width;
+ unsigned int log2_blk256_height;
+ unsigned int blk_bytes;
+ unsigned int log2_blk_bytes;
+ unsigned int log2_blk_height;
+ unsigned int log2_blk_width;
+ unsigned int log2_meta_req_bytes;
+ unsigned int log2_meta_req_height;
+ unsigned int log2_meta_req_width;
+ unsigned int meta_req_width;
+ unsigned int meta_req_height;
+ unsigned int log2_meta_row_height;
+ unsigned int meta_row_width_ub;
+ unsigned int log2_meta_chunk_bytes;
+ unsigned int log2_meta_chunk_height;
+
+ //full sized meta chunk width in unit of data elements
+ unsigned int log2_meta_chunk_width;
+ unsigned int log2_min_meta_chunk_bytes;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_blk_height;
+ unsigned int meta_surface_bytes;
+ unsigned int vmpg_bytes;
+ unsigned int meta_pte_req_per_frame_ub;
+ unsigned int meta_pte_bytes_per_frame_ub;
+ const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.gpuvm_min_page_size_bytes);
+ const bool dual_plane_en = is_dual_plane((enum source_format_class) (source_format));
+ const unsigned int dpte_buf_in_pte_reqs =
+ dual_plane_en ? (is_chroma ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma) : (mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma
+ + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
+
+ unsigned int log2_vmpg_height = 0;
+ unsigned int log2_vmpg_width = 0;
+ unsigned int log2_dpte_req_height_ptes = 0;
+ unsigned int log2_dpte_req_height = 0;
+ unsigned int log2_dpte_req_width = 0;
+ unsigned int log2_dpte_row_height_linear = 0;
+ unsigned int log2_dpte_row_height = 0;
+ unsigned int log2_dpte_group_width = 0;
+ unsigned int dpte_row_width_ub = 0;
+ unsigned int dpte_req_height = 0;
+ unsigned int dpte_req_width = 0;
+ unsigned int dpte_group_width = 0;
+ unsigned int log2_dpte_group_bytes = 0;
+ unsigned int log2_dpte_group_length = 0;
+ double byte_per_pixel_det_y;
+ double byte_per_pixel_det_c;
+
+ CalculateBytePerPixelAnd256BBlockSizes(
+ (enum source_format_class) (source_format),
+ (enum dm_swizzle_mode) (tiling),
+ &bytes_per_element_y,
+ &bytes_per_element_c,
+ &byte_per_pixel_det_y,
+ &byte_per_pixel_det_c,
+ &blk256_height_y,
+ &blk256_height_c,
+ &blk256_width_y,
+ &blk256_width_c);
+
+ if (!is_chroma) {
+ blk256_width = blk256_width_y;
+ blk256_height = blk256_height_y;
+ bytes_per_element = bytes_per_element_y;
+ } else {
+ blk256_width = blk256_width_c;
+ blk256_height = blk256_height_c;
+ bytes_per_element = bytes_per_element_c;
+ }
+
+ log2_bytes_per_element = dml_log2(bytes_per_element);
+
+ dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear);
+ dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert);
+ dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width);
+ dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height);
+
+ log2_blk256_width = dml_log2((double) blk256_width);
+ log2_blk256_height = dml_log2((double) blk256_height);
+ blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
+ log2_blk_bytes = dml_log2((double) blk_bytes);
+ log2_blk_height = 0;
+ log2_blk_width = 0;
+
+ // remember log rule
+ // "+" in log is multiply
+ // "-" in log is divide
+ // "/2" is like square root
+ // blk is vertical biased
+ if (tiling != dm_sw_linear)
+ log2_blk_height = log2_blk256_height + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1);
+ else
+ log2_blk_height = 0; // blk height of 1
+
+ log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height;
+
+ if (!surf_vert) {
+ unsigned int temp;
+
+ temp = dml_round_to_multiple(vp_width - 1, blk256_width, 1) + blk256_width;
+ if (data_pitch < blk256_width) {
+ dml_print("WARNING: DML_DLG: %s: swath_size calculation ignoring data_pitch=%u < blk256_width=%u\n", __func__, data_pitch, blk256_width);
+ } else {
+ if (temp > data_pitch) {
+ if (data_pitch >= vp_width)
+ temp = data_pitch;
+ else
+ dml_print("WARNING: DML_DLG: %s: swath_size calculation ignoring data_pitch=%u < vp_width=%u\n", __func__, data_pitch, vp_width);
+ }
+ }
+ rq_dlg_param->swath_width_ub = temp;
+ rq_dlg_param->req_per_swath_ub = temp >> log2_blk256_width;
+ } else {
+ unsigned int temp;
+
+ temp = dml_round_to_multiple(vp_height - 1, blk256_height, 1) + blk256_height;
+ if (surface_height < blk256_height) {
+ dml_print("WARNING: DML_DLG: %s swath_size calculation ignored surface_height=%u < blk256_height=%u\n", __func__, surface_height, blk256_height);
+ } else {
+ if (temp > surface_height) {
+ if (surface_height >= vp_height)
+ temp = surface_height;
+ else
+ dml_print("WARNING: DML_DLG: %s swath_size calculation ignored surface_height=%u < vp_height=%u\n", __func__, surface_height, vp_height);
+ }
+ }
+ rq_dlg_param->swath_width_ub = temp;
+ rq_dlg_param->req_per_swath_ub = temp >> log2_blk256_height;
+ }
+
+ if (!surf_vert)
+ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height * bytes_per_element;
+ else
+ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width * bytes_per_element;
+
+ rq_misc_param->blk256_height = blk256_height;
+ rq_misc_param->blk256_width = blk256_width;
+
+ // -------
+ // meta
+ // -------
+ log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element
+
+ // each 64b meta request for dcn is 8x8 meta elements and
+ // a meta element covers one 256b block of the data surface.
+ log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256
+ log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height;
+ meta_req_width = 1 << log2_meta_req_width;
+ meta_req_height = 1 << log2_meta_req_height;
+ log2_meta_row_height = 0;
+ meta_row_width_ub = 0;
+
+ // the dimensions of a meta row are meta_row_width x meta_row_height in elements.
+ // calculate upper bound of the meta_row_width
+ if (!surf_vert) {
+ log2_meta_row_height = log2_meta_req_height;
+ meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) + meta_req_width;
+ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width;
+ } else {
+ log2_meta_row_height = log2_meta_req_width;
+ meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) + meta_req_height;
+ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height;
+ }
+ rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64;
+
+ rq_dlg_param->meta_row_height = 1 << log2_meta_row_height;
+
+ log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes);
+ log2_meta_chunk_height = log2_meta_row_height;
+
+ //full sized meta chunk width in unit of data elements
+ log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element - log2_meta_chunk_height;
+ log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes);
+ min_meta_chunk_width = 1 << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element - log2_meta_chunk_height);
+ meta_chunk_width = 1 << log2_meta_chunk_width;
+ meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width);
+ meta_row_remainder = meta_row_width_ub % meta_chunk_width;
+ meta_chunk_threshold = 0;
+ meta_blk_height = blk256_height * 64;
+ meta_surface_bytes = meta_pitch * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) * bytes_per_element / 256;
+ vmpg_bytes = mode_lib->soc.gpuvm_min_page_size_bytes;
+ meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, 8 * vmpg_bytes, 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes);
+ meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request
+ rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub;
+
+ dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height);
+ dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes);
+ dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", __func__, meta_pte_req_per_frame_ub);
+ dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", __func__, meta_pte_bytes_per_frame_ub);
+
+ if (!surf_vert)
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width;
+ else
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height;
+
+ if (meta_row_remainder <= meta_chunk_threshold)
+ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ else
+ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+
+ // ------
+ // dpte
+ // ------
+ if (surf_linear)
+ log2_vmpg_height = 0; // one line high
+ else
+ log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height;
+
+ log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height;
+
+ // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4.
+ if (surf_linear) { //one 64B PTE request returns 8 PTEs
+ log2_dpte_req_height_ptes = 0;
+ log2_dpte_req_width = log2_vmpg_width + 3;
+ log2_dpte_req_height = 0;
+ } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size
+ //one 64B req gives 8x1 PTEs for 4KB tile
+ log2_dpte_req_height_ptes = 0;
+ log2_dpte_req_width = log2_blk_width + 3;
+ log2_dpte_req_height = log2_blk_height + 0;
+ } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB
+ //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB
+ log2_dpte_req_height_ptes = 4;
+ log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width
+ log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height
+ } else { //64KB page size and must 64KB tile block
+ //one 64B req gives 8x1 PTEs for 64KB tile
+ log2_dpte_req_height_ptes = 0;
+ log2_dpte_req_width = log2_blk_width + 3;
+ log2_dpte_req_height = log2_blk_height + 0;
+ }
+
+ // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height
+ // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent
+ // That depends on the pte shape (i.e. 8x1, 4x2, 2x4)
+ //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes;
+ //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes;
+ dpte_req_height = 1 << log2_dpte_req_height;
+ dpte_req_width = 1 << log2_dpte_req_width;
+
+ // calculate pitch dpte row buffer can hold
+ // round the result down to a power of two.
+ if (surf_linear) {
+ unsigned int dpte_row_height;
+
+ log2_dpte_row_height_linear = dml_floor(dml_log2(dpte_buf_in_pte_reqs * dpte_req_width / data_pitch), 1);
+
+ dml_print("DML_DLG: %s: is_chroma = %d\n", __func__, is_chroma);
+ dml_print("DML_DLG: %s: dpte_buf_in_pte_reqs = %d\n", __func__, dpte_buf_in_pte_reqs);
+ dml_print("DML_DLG: %s: log2_dpte_row_height_linear = %d\n", __func__, log2_dpte_row_height_linear);
+
+ ASSERT(log2_dpte_row_height_linear >= 3);
+
+ if (log2_dpte_row_height_linear > 7)
+ log2_dpte_row_height_linear = 7;
+
+ log2_dpte_row_height = log2_dpte_row_height_linear;
+ // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary.
+ // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering.
+ dpte_row_height = 1 << log2_dpte_row_height;
+ dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, dpte_req_width, 1) + dpte_req_width;
+ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
+ } else {
+ // the upper bound of the dpte_row_width without dependency on viewport position follows.
+ // for tiled mode, row height is the same as req height and row store up to vp size upper bound
+ if (!surf_vert) {
+ log2_dpte_row_height = log2_dpte_req_height;
+ dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) + dpte_req_width;
+ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
+ } else {
+ log2_dpte_row_height = (log2_blk_width < log2_dpte_req_width) ? log2_blk_width : log2_dpte_req_width;
+ dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) + dpte_req_height;
+ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height;
+ }
+ }
+ if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB
+ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request
+ else
+ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request
+
+ rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height;
+
+ // the dpte_group_bytes is reduced for the specific case of vertical
+ // access of a tile surface that has dpte request of 8x1 ptes.
+ if (hostvm_enable)
+ rq_sizing_param->dpte_group_bytes = 512;
+ else {
+ if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ rq_sizing_param->dpte_group_bytes = 512;
+ else
+ rq_sizing_param->dpte_group_bytes = 2048;
+ }
+
+ //since pte request size is 64byte, the number of data pte requests per full sized group is as follows.
+ log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes);
+ log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests
+
+ // full sized data pte group width in elements
+ if (!surf_vert)
+ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width;
+ else
+ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height;
+
+ //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B
+ if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB
+ log2_dpte_group_width = log2_dpte_group_width - 1;
+
+ dpte_group_width = 1 << log2_dpte_group_width;
+
+ // since dpte groups are only aligned to dpte_req_width and not dpte_group_width,
+ // the upper bound for the dpte groups per row is as follows.
+ rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, 1);
+}
+
+static void get_surf_rq_param(
+ struct display_mode_lib *mode_lib,
+ display_data_rq_sizing_params_st *rq_sizing_param,
+ display_data_rq_dlg_params_st *rq_dlg_param,
+ display_data_rq_misc_params_st *rq_misc_param,
+ const display_pipe_params_st *pipe_param,
+ bool is_chroma,
+ bool is_alpha)
+{
+ bool mode_422 = 0;
+ unsigned int vp_width = 0;
+ unsigned int vp_height = 0;
+ unsigned int data_pitch = 0;
+ unsigned int meta_pitch = 0;
+ unsigned int surface_height = 0;
+ unsigned int ppe = mode_422 ? 2 : 1;
+
+ // FIXME check if ppe apply for both luma and chroma in 422 case
+ if (is_chroma | is_alpha) {
+ vp_width = pipe_param->src.viewport_width_c / ppe;
+ vp_height = pipe_param->src.viewport_height_c;
+ data_pitch = pipe_param->src.data_pitch_c;
+ meta_pitch = pipe_param->src.meta_pitch_c;
+ surface_height = pipe_param->src.surface_height_y / 2.0;
+ } else {
+ vp_width = pipe_param->src.viewport_width / ppe;
+ vp_height = pipe_param->src.viewport_height;
+ data_pitch = pipe_param->src.data_pitch;
+ meta_pitch = pipe_param->src.meta_pitch;
+ surface_height = pipe_param->src.surface_height_y;
+ }
+
+ if (pipe_param->dest.odm_combine) {
+ unsigned int access_dir;
+ unsigned int full_src_vp_width;
+ unsigned int hactive_odm;
+ unsigned int src_hactive_odm;
+
+ access_dir = (pipe_param->src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
+ hactive_odm = pipe_param->dest.hactive / ((unsigned int) pipe_param->dest.odm_combine * 2);
+ if (is_chroma) {
+ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio_c * pipe_param->dest.full_recout_width;
+ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio_c * hactive_odm;
+ } else {
+ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio * pipe_param->dest.full_recout_width;
+ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio * hactive_odm;
+ }
+
+ if (access_dir == 0) {
+ vp_width = dml_min(full_src_vp_width, src_hactive_odm);
+ dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width);
+ } else {
+ vp_height = dml_min(full_src_vp_width, src_hactive_odm);
+ dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height);
+
+ }
+ dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width);
+ dml_print("DML_DLG: %s: hactive_odm = %d\n", __func__, hactive_odm);
+ dml_print("DML_DLG: %s: src_hactive_odm = %d\n", __func__, src_hactive_odm);
+ }
+
+ rq_sizing_param->chunk_bytes = 8192;
+
+ if (is_alpha)
+ rq_sizing_param->chunk_bytes = 4096;
+
+ if (rq_sizing_param->chunk_bytes == 64 * 1024)
+ rq_sizing_param->min_chunk_bytes = 0;
+ else
+ rq_sizing_param->min_chunk_bytes = 1024;
+
+ rq_sizing_param->meta_chunk_bytes = 2048;
+ rq_sizing_param->min_meta_chunk_bytes = 256;
+
+ if (pipe_param->src.hostvm)
+ rq_sizing_param->mpte_group_bytes = 512;
+ else
+ rq_sizing_param->mpte_group_bytes = 2048;
+
+ get_meta_and_pte_attr(
+ mode_lib,
+ rq_dlg_param,
+ rq_misc_param,
+ rq_sizing_param,
+ vp_width,
+ vp_height,
+ data_pitch,
+ meta_pitch,
+ pipe_param->src.source_format,
+ pipe_param->src.sw_mode,
+ pipe_param->src.macro_tile_size,
+ pipe_param->src.source_scan,
+ pipe_param->src.hostvm,
+ is_chroma,
+ surface_height);
+}
+
+static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_params_st *pipe_param)
+{
+ // get param for luma surface
+ rq_param->yuv420 = pipe_param->src.source_format == dm_420_8 || pipe_param->src.source_format == dm_420_10 || pipe_param->src.source_format == dm_rgbe_alpha
+ || pipe_param->src.source_format == dm_420_12;
+
+ rq_param->yuv420_10bpc = pipe_param->src.source_format == dm_420_10;
+
+ rq_param->rgbe_alpha = (pipe_param->src.source_format == dm_rgbe_alpha) ? 1 : 0;
+
+ get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_l), &(rq_param->dlg.rq_l), &(rq_param->misc.rq_l), pipe_param, 0, 0);
+
+ if (is_dual_plane((enum source_format_class) (pipe_param->src.source_format))) {
+ // get param for chroma surface
+ get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_c), &(rq_param->dlg.rq_c), &(rq_param->misc.rq_c), pipe_param, 1, rq_param->rgbe_alpha);
+ }
+
+ // calculate how to split the det buffer space between luma and chroma
+ handle_det_buf_split(mode_lib, rq_param, &pipe_param->src);
+ print__rq_params_st(mode_lib, rq_param);
+}
+
+void dml314_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_pipe_params_st *pipe_param)
+{
+ display_rq_params_st rq_param = {0};
+
+ memset(rq_regs, 0, sizeof(*rq_regs));
+ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param);
+ extract_rq_regs(mode_lib, rq_regs, &rq_param);
+
+ print__rq_regs_st(mode_lib, rq_regs);
+}
+
+static void calculate_ttu_cursor(
+ struct display_mode_lib *mode_lib,
+ double *refcyc_per_req_delivery_pre_cur,
+ double *refcyc_per_req_delivery_cur,
+ double refclk_freq_in_mhz,
+ double ref_freq_to_pix_freq,
+ double hscale_pixel_rate_l,
+ double hscl_ratio,
+ double vratio_pre_l,
+ double vratio_l,
+ unsigned int cur_width,
+ enum cursor_bpp cur_bpp)
+{
+ unsigned int cur_src_width = cur_width;
+ unsigned int cur_req_size = 0;
+ unsigned int cur_req_width = 0;
+ double cur_width_ub = 0.0;
+ double cur_req_per_width = 0.0;
+ double hactive_cur = 0.0;
+
+ ASSERT(cur_src_width <= 256);
+
+ *refcyc_per_req_delivery_pre_cur = 0.0;
+ *refcyc_per_req_delivery_cur = 0.0;
+ if (cur_src_width > 0) {
+ unsigned int cur_bit_per_pixel = 0;
+
+ if (cur_bpp == dm_cur_2bit) {
+ cur_req_size = 64; // byte
+ cur_bit_per_pixel = 2;
+ } else { // 32bit
+ cur_bit_per_pixel = 32;
+ if (cur_src_width >= 1 && cur_src_width <= 16)
+ cur_req_size = 64;
+ else if (cur_src_width >= 17 && cur_src_width <= 31)
+ cur_req_size = 128;
+ else
+ cur_req_size = 256;
+ }
+
+ cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0);
+ cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) * (double) cur_req_width;
+ cur_req_per_width = cur_width_ub / (double) cur_req_width;
+ hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor
+
+ if (vratio_pre_l <= 1.0)
+ *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq / (double) cur_req_per_width;
+ else
+ *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz * (double) cur_src_width / hscale_pixel_rate_l / (double) cur_req_per_width;
+
+ ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13));
+
+ if (vratio_l <= 1.0)
+ *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq / (double) cur_req_per_width;
+ else
+ *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz * (double) cur_src_width / hscale_pixel_rate_l / (double) cur_req_per_width;
+
+ dml_print("DML_DLG: %s: cur_req_width = %d\n", __func__, cur_req_width);
+ dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", __func__, cur_width_ub);
+ dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", __func__, cur_req_per_width);
+ dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", __func__, hactive_cur);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", __func__, *refcyc_per_req_delivery_pre_cur);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", __func__, *refcyc_per_req_delivery_cur);
+
+ ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13));
+ }
+}
+
+// Note: currently taken in as is.
+// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma.
+static void dml_rq_dlg_get_dlg_params(
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ display_dlg_regs_st *disp_dlg_regs,
+ display_ttu_regs_st *disp_ttu_regs,
+ const display_rq_dlg_params_st *rq_dlg_param,
+ const display_dlg_sys_params_st *dlg_sys_param,
+ const bool cstate_en,
+ const bool pstate_en,
+ const bool vm_en,
+ const bool ignore_viewport_pos,
+ const bool immediate_flip_support)
+{
+ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
+ const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
+ const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout;
+ const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
+ const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth;
+ const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps;
+ unsigned int pipe_index_in_combine[DC__NUM_PIPES__MAX];
+
+ // -------------------------
+ // Section 1.15.2.1: OTG dependent Params
+ // -------------------------
+ // Timing
+ unsigned int htotal = dst->htotal;
+ unsigned int hblank_end = dst->hblank_end;
+ unsigned int vblank_start = dst->vblank_start;
+ unsigned int vblank_end = dst->vblank_end;
+
+ double dppclk_freq_in_mhz = clks->dppclk_mhz;
+ double refclk_freq_in_mhz = clks->refclk_mhz;
+ double pclk_freq_in_mhz = dst->pixel_rate_mhz;
+ bool interlaced = dst->interlaced;
+ double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
+ double min_ttu_vblank;
+ unsigned int dlg_vblank_start;
+ bool dual_plane;
+ bool mode_422;
+ unsigned int access_dir;
+ unsigned int vp_height_l;
+ unsigned int vp_width_l;
+ unsigned int vp_height_c;
+ unsigned int vp_width_c;
+
+ // Scaling
+ unsigned int htaps_l;
+ unsigned int htaps_c;
+ double hratio_l;
+ double hratio_c;
+ double vratio_l;
+ double vratio_c;
+ bool scl_enable;
+
+ unsigned int swath_width_ub_l;
+ unsigned int dpte_groups_per_row_ub_l;
+ unsigned int swath_width_ub_c;
+ unsigned int dpte_groups_per_row_ub_c;
+
+ unsigned int meta_chunks_per_row_ub_l;
+ unsigned int meta_chunks_per_row_ub_c;
+ unsigned int vupdate_offset;
+ unsigned int vupdate_width;
+ unsigned int vready_offset;
+
+ unsigned int dppclk_delay_subtotal;
+ unsigned int dispclk_delay_subtotal;
+
+ unsigned int vstartup_start;
+ unsigned int dst_x_after_scaler;
+ unsigned int dst_y_after_scaler;
+ double dst_y_prefetch;
+ double dst_y_per_vm_vblank;
+ double dst_y_per_row_vblank;
+ double dst_y_per_vm_flip;
+ double dst_y_per_row_flip;
+ double max_dst_y_per_vm_vblank;
+ double max_dst_y_per_row_vblank;
+ double vratio_pre_l;
+ double vratio_pre_c;
+ unsigned int req_per_swath_ub_l;
+ unsigned int req_per_swath_ub_c;
+ unsigned int meta_row_height_l;
+ unsigned int meta_row_height_c;
+ unsigned int swath_width_pixels_ub_l;
+ unsigned int swath_width_pixels_ub_c;
+ unsigned int scaler_rec_in_width_l;
+ unsigned int scaler_rec_in_width_c;
+ unsigned int dpte_row_height_l;
+ unsigned int dpte_row_height_c;
+ double hscale_pixel_rate_l;
+ double hscale_pixel_rate_c;
+ double min_hratio_fact_l;
+ double min_hratio_fact_c;
+ double refcyc_per_line_delivery_pre_l;
+ double refcyc_per_line_delivery_pre_c;
+ double refcyc_per_line_delivery_l;
+ double refcyc_per_line_delivery_c;
+
+ double refcyc_per_req_delivery_pre_l;
+ double refcyc_per_req_delivery_pre_c;
+ double refcyc_per_req_delivery_l;
+ double refcyc_per_req_delivery_c;
+
+ unsigned int full_recout_width;
+ double refcyc_per_req_delivery_pre_cur0;
+ double refcyc_per_req_delivery_cur0;
+ double refcyc_per_req_delivery_pre_cur1;
+ double refcyc_per_req_delivery_cur1;
+ unsigned int vba__min_dst_y_next_start = get_min_dst_y_next_start(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA
+ unsigned int vba__vready_after_vcount0 = get_vready_at_or_after_vsync(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ float vba__refcyc_per_line_delivery_pre_l = get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_line_delivery_l = get_refcyc_per_line_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ int blank_lines = 0;
+
+ memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
+ memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
+
+ dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en);
+ dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en);
+ dml_print("DML_DLG: %s: vm_en = %d\n", __func__, vm_en);
+ dml_print("DML_DLG: %s: ignore_viewport_pos = %d\n", __func__, ignore_viewport_pos);
+ dml_print("DML_DLG: %s: immediate_flip_support = %d\n", __func__, immediate_flip_support);
+
+ dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); ASSERT(ref_freq_to_pix_freq < 4.0);
+
+ disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
+ disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal * dml_pow(2, 8));
+ disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
+
+ //set_prefetch_mode(mode_lib, cstate_en, pstate_en, ignore_viewport_pos, immediate_flip_support);
+ min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
+ disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start;
+ disp_dlg_regs->optimized_min_dst_y_next_start_us = 0;
+ disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2));
+ blank_lines = (dst->vblank_end + dst->vtotal_min - dst->vblank_start - dst->vstartup_start - 1);
+ if (blank_lines < 0)
+ blank_lines = 0;
+ if (blank_lines != 0) {
+ disp_dlg_regs->optimized_min_dst_y_next_start = vba__min_dst_y_next_start;
+ disp_dlg_regs->optimized_min_dst_y_next_start_us = (disp_dlg_regs->optimized_min_dst_y_next_start * dst->hactive) / (unsigned int) dst->pixel_rate_mhz;
+ disp_dlg_regs->min_dst_y_next_start = disp_dlg_regs->optimized_min_dst_y_next_start;
+ }
+ ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
+
+ dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
+ dml_print("DML_DLG: %s: min_dst_y_next_start = 0x%0x\n", __func__, disp_dlg_regs->min_dst_y_next_start);
+ dml_print("DML_DLG: %s: dlg_vblank_start = 0x%0x\n", __func__, dlg_vblank_start);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+ dml_print("DML_DLG: %s: vba__min_dst_y_next_start = 0x%0x\n", __func__, vba__min_dst_y_next_start);
+
+ //old_impl_vs_vba_impl("min_dst_y_next_start", dlg_vblank_start, vba__min_dst_y_next_start);
+
+ // -------------------------
+ // Section 1.15.2.2: Prefetch, Active and TTU
+ // -------------------------
+ // Prefetch Calc
+ // Source
+ dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
+ mode_422 = 0;
+ access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
+ vp_height_l = src->viewport_height;
+ vp_width_l = src->viewport_width;
+ vp_height_c = src->viewport_height_c;
+ vp_width_c = src->viewport_width_c;
+
+ // Scaling
+ htaps_l = taps->htaps;
+ htaps_c = taps->htaps_c;
+ hratio_l = scl->hscl_ratio;
+ hratio_c = scl->hscl_ratio_c;
+ vratio_l = scl->vscl_ratio;
+ vratio_c = scl->vscl_ratio_c;
+ scl_enable = scl->scl_enable;
+
+ swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub;
+ dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub;
+ swath_width_ub_c = rq_dlg_param->rq_c.swath_width_ub;
+ dpte_groups_per_row_ub_c = rq_dlg_param->rq_c.dpte_groups_per_row_ub;
+
+ meta_chunks_per_row_ub_l = rq_dlg_param->rq_l.meta_chunks_per_row_ub;
+ meta_chunks_per_row_ub_c = rq_dlg_param->rq_c.meta_chunks_per_row_ub;
+ vupdate_offset = dst->vupdate_offset;
+ vupdate_width = dst->vupdate_width;
+ vready_offset = dst->vready_offset;
+
+ dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
+ dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
+
+ if (scl_enable)
+ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl;
+ else
+ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only;
+
+ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor;
+
+ if (dout->dsc_enable) {
+ double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA
+
+ dispclk_delay_subtotal += dsc_delay;
+ }
+
+ vstartup_start = dst->vstartup_start;
+ if (interlaced) {
+ if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0)
+ disp_dlg_regs->vready_after_vcount0 = 1;
+ else
+ disp_dlg_regs->vready_after_vcount0 = 0;
+ } else {
+ if (vstartup_start - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end)
+ disp_dlg_regs->vready_after_vcount0 = 1;
+ else
+ disp_dlg_regs->vready_after_vcount0 = 0;
+ }
+
+ dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
+ dml_print("DML_DLG: %s: vba__vready_after_vcount0 = %d\n", __func__, vba__vready_after_vcount0);
+ //old_impl_vs_vba_impl("vready_after_vcount0", disp_dlg_regs->vready_after_vcount0, vba__vready_after_vcount0);
+
+ if (interlaced)
+ vstartup_start = vstartup_start / 2;
+
+ dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ // do some adjustment on the dst_after scaler to account for odm combine mode
+ dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: input dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler);
+
+ // need to figure out which side of odm combine we're in
+ if (dst->odm_combine) {
+ // figure out which pipes go together
+ bool visited[DC__NUM_PIPES__MAX];
+ unsigned int i, j, k;
+
+ for (k = 0; k < num_pipes; ++k) {
+ visited[k] = false;
+ pipe_index_in_combine[k] = 0;
+ }
+
+ for (i = 0; i < num_pipes; i++) {
+ if (e2e_pipe_param[i].pipe.src.is_hsplit && !visited[i]) {
+
+ unsigned int grp = e2e_pipe_param[i].pipe.src.hsplit_grp;
+ unsigned int grp_idx = 0;
+
+ for (j = i; j < num_pipes; j++) {
+ if (e2e_pipe_param[j].pipe.src.hsplit_grp == grp && e2e_pipe_param[j].pipe.src.is_hsplit && !visited[j]) {
+ pipe_index_in_combine[j] = grp_idx;
+ dml_print("DML_DLG: %s: pipe[%d] is in grp %d idx %d\n", __func__, j, grp, grp_idx);
+ grp_idx++;
+ visited[j] = true;
+ }
+ }
+ }
+ }
+
+ }
+
+ if (dst->odm_combine == dm_odm_combine_mode_disabled) {
+ disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end * ref_freq_to_pix_freq);
+ } else {
+ unsigned int odm_combine_factor = (dst->odm_combine == dm_odm_combine_mode_2to1 ? 2 : 4); // TODO: We should really check that 4to1 is supported before setting it to 4
+ unsigned int odm_pipe_index = pipe_index_in_combine[pipe_idx];
+
+ disp_dlg_regs->refcyc_h_blank_end = (unsigned int) (((double) hblank_end + odm_pipe_index * (double) dst->hactive / odm_combine_factor) * ref_freq_to_pix_freq);
+ } ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13));
+
+ dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal);
+ dml_print("DML_DLG: %s: dst_x_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: dst_y_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_y_after_scaler);
+
+ dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ max_dst_y_per_vm_vblank = 32.0; //U5.2
+ max_dst_y_per_row_vblank = 16.0; //U4.2
+
+ // magic!
+ if (htotal <= 75) {
+ max_dst_y_per_vm_vblank = 100.0;
+ max_dst_y_per_row_vblank = 100.0;
+ }
+
+ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
+ dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip);
+ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
+
+ ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank);
+
+ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
+
+ vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l);
+ dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c);
+
+ // Active
+ req_per_swath_ub_l = rq_dlg_param->rq_l.req_per_swath_ub;
+ req_per_swath_ub_c = rq_dlg_param->rq_c.req_per_swath_ub;
+ meta_row_height_l = rq_dlg_param->rq_l.meta_row_height;
+ meta_row_height_c = rq_dlg_param->rq_c.meta_row_height;
+ swath_width_pixels_ub_l = 0;
+ swath_width_pixels_ub_c = 0;
+ scaler_rec_in_width_l = 0;
+ scaler_rec_in_width_c = 0;
+ dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
+ dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
+
+ if (mode_422) {
+ swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
+ swath_width_pixels_ub_c = swath_width_ub_c * 2;
+ } else {
+ swath_width_pixels_ub_l = swath_width_ub_l * 1;
+ swath_width_pixels_ub_c = swath_width_ub_c * 1;
+ }
+
+ hscale_pixel_rate_l = 0.;
+ hscale_pixel_rate_c = 0.;
+ min_hratio_fact_l = 1.0;
+ min_hratio_fact_c = 1.0;
+
+ if (hratio_l <= 1)
+ min_hratio_fact_l = 2.0;
+ else if (htaps_l <= 6) {
+ if ((hratio_l * 2.0) > 4.0)
+ min_hratio_fact_l = 4.0;
+ else
+ min_hratio_fact_l = hratio_l * 2.0;
+ } else {
+ if (hratio_l > 4.0)
+ min_hratio_fact_l = 4.0;
+ else
+ min_hratio_fact_l = hratio_l;
+ }
+
+ hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
+
+ dml_print("DML_DLG: %s: hratio_l = %3.2f\n", __func__, hratio_l);
+ dml_print("DML_DLG: %s: min_hratio_fact_l = %3.2f\n", __func__, min_hratio_fact_l);
+ dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", __func__, hscale_pixel_rate_l);
+
+ if (hratio_c <= 1)
+ min_hratio_fact_c = 2.0;
+ else if (htaps_c <= 6) {
+ if ((hratio_c * 2.0) > 4.0)
+ min_hratio_fact_c = 4.0;
+ else
+ min_hratio_fact_c = hratio_c * 2.0;
+ } else {
+ if (hratio_c > 4.0)
+ min_hratio_fact_c = 4.0;
+ else
+ min_hratio_fact_c = hratio_c;
+ }
+
+ hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz;
+
+ refcyc_per_line_delivery_pre_l = 0.;
+ refcyc_per_line_delivery_pre_c = 0.;
+ refcyc_per_line_delivery_l = 0.;
+ refcyc_per_line_delivery_c = 0.;
+
+ refcyc_per_req_delivery_pre_l = 0.;
+ refcyc_per_req_delivery_pre_c = 0.;
+ refcyc_per_req_delivery_l = 0.;
+ refcyc_per_req_delivery_c = 0.;
+
+ full_recout_width = 0;
+ // In ODM
+ if (src->is_hsplit) {
+ // This "hack" is only allowed (and valid) for MPC combine. In ODM
+ // combine, you MUST specify the full_recout_width...according to Oswin
+ if (dst->full_recout_width == 0 && !dst->odm_combine) {
+ dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", __func__);
+ full_recout_width = dst->recout_width * 2; // assume half split for dcn1
+ } else
+ full_recout_width = dst->full_recout_width;
+ } else
+ full_recout_width = dst->recout_width;
+
+ // As of DCN2, mpc_combine and odm_combine are mutually exclusive
+ refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_l,
+ hscale_pixel_rate_l,
+ swath_width_pixels_ub_l,
+ 1); // per line
+
+ refcyc_per_line_delivery_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_l,
+ hscale_pixel_rate_l,
+ swath_width_pixels_ub_l,
+ 1); // per line
+
+ dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width);
+ dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", __func__, hscale_pixel_rate_l);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, vba__refcyc_per_line_delivery_pre_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_l = %3.2f\n", __func__, vba__refcyc_per_line_delivery_l);
+
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_pre_l", refcyc_per_line_delivery_pre_l, vba__refcyc_per_line_delivery_pre_l);
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_l", refcyc_per_line_delivery_l, vba__refcyc_per_line_delivery_l);
+
+ if (dual_plane) {
+ float vba__refcyc_per_line_delivery_pre_c = get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_line_delivery_c = get_refcyc_per_line_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_c,
+ hscale_pixel_rate_c,
+ swath_width_pixels_ub_c,
+ 1); // per line
+
+ refcyc_per_line_delivery_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_c,
+ hscale_pixel_rate_c,
+ swath_width_pixels_ub_c,
+ 1); // per line
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, refcyc_per_line_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, refcyc_per_line_delivery_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, vba__refcyc_per_line_delivery_pre_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_c = %3.2f\n", __func__, vba__refcyc_per_line_delivery_c);
+
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_pre_c", refcyc_per_line_delivery_pre_c, vba__refcyc_per_line_delivery_pre_c);
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_c", refcyc_per_line_delivery_c, vba__refcyc_per_line_delivery_c);
+ }
+
+ if (src->dynamic_metadata_enable && src->gpuvm)
+ disp_dlg_regs->refcyc_per_vm_dmdata = get_refcyc_per_vm_dmdata_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ disp_dlg_regs->dmdata_dl_delta = get_dmdata_dl_delta_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ // TTU - Luma / Chroma
+ if (access_dir) { // vertical access
+ scaler_rec_in_width_l = vp_height_l;
+ scaler_rec_in_width_c = vp_height_c;
+ } else {
+ scaler_rec_in_width_l = vp_width_l;
+ scaler_rec_in_width_c = vp_width_c;
+ }
+
+ refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_l,
+ hscale_pixel_rate_l,
+ scaler_rec_in_width_l,
+ req_per_swath_ub_l); // per req
+
+ refcyc_per_req_delivery_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_l,
+ hscale_pixel_rate_l,
+ scaler_rec_in_width_l,
+ req_per_swath_ub_l); // per req
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_l = %3.2f\n", __func__, vba__refcyc_per_req_delivery_l);
+
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_l", refcyc_per_req_delivery_pre_l, vba__refcyc_per_req_delivery_pre_l);
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_l", refcyc_per_req_delivery_l, vba__refcyc_per_req_delivery_l);
+
+ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
+
+ if (dual_plane) {
+ float vba__refcyc_per_req_delivery_pre_c = get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_req_delivery_c = get_refcyc_per_req_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_c,
+ hscale_pixel_rate_c,
+ scaler_rec_in_width_c,
+ req_per_swath_ub_c); // per req
+ refcyc_per_req_delivery_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_c,
+ hscale_pixel_rate_c,
+ scaler_rec_in_width_c,
+ req_per_swath_ub_c); // per req
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, refcyc_per_req_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_c = %3.2f\n", __func__, vba__refcyc_per_req_delivery_c);
+
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_c", refcyc_per_req_delivery_pre_c, vba__refcyc_per_req_delivery_pre_c);
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_c", refcyc_per_req_delivery_c, vba__refcyc_per_req_delivery_c);
+
+ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
+ }
+
+ // TTU - Cursor
+ refcyc_per_req_delivery_pre_cur0 = 0.0;
+ refcyc_per_req_delivery_cur0 = 0.0;
+
+ ASSERT(src->num_cursors <= 1);
+
+ if (src->num_cursors > 0) {
+ float vba__refcyc_per_req_delivery_pre_cur0;
+ float vba__refcyc_per_req_delivery_cur0;
+
+ calculate_ttu_cursor(
+ mode_lib,
+ &refcyc_per_req_delivery_pre_cur0,
+ &refcyc_per_req_delivery_cur0,
+ refclk_freq_in_mhz,
+ ref_freq_to_pix_freq,
+ hscale_pixel_rate_l,
+ scl->hscl_ratio,
+ vratio_pre_l,
+ vratio_l,
+ src->cur0_src_width,
+ (enum cursor_bpp) (src->cur0_bpp));
+
+ vba__refcyc_per_req_delivery_pre_cur0 = get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ vba__refcyc_per_req_delivery_cur0 = get_refcyc_per_cursor_req_delivery_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_cur0);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, vba__refcyc_per_req_delivery_cur0);
+
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_cur0", refcyc_per_req_delivery_pre_cur0, vba__refcyc_per_req_delivery_pre_cur0);
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_cur0", refcyc_per_req_delivery_cur0, vba__refcyc_per_req_delivery_cur0);
+ }
+
+ refcyc_per_req_delivery_pre_cur1 = 0.0;
+ refcyc_per_req_delivery_cur1 = 0.0;
+
+ // TTU - Misc
+ // all hard-coded
+
+ // Assignment to register structures
+ disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
+ ASSERT(disp_dlg_regs->dst_y_after_scaler < 8);
+ disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk
+ ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13));
+ disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2));
+
+ disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
+ disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
+
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
+
+ // hack for FPGA
+ if (mode_lib->project == DML_PROJECT_DCN31_FPGA) {
+ if (disp_dlg_regs->vratio_prefetch >= (unsigned int) dml_pow(2, 22)) {
+ disp_dlg_regs->vratio_prefetch = (unsigned int) dml_pow(2, 22) - 1;
+ dml_print("vratio_prefetch exceed the max value, the register field is [21:0]\n");
+ }
+ }
+
+ disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l);
+ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
+
+ if (dual_plane) {
+ disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_c);
+ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)dml_pow(2, 13));
+ }
+
+ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l);
+ ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13));
+
+ disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
+
+ disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l;
+ disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l;
+
+ if (dual_plane) {
+ disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c;
+ disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c;
+ }
+
+ disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+ disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+
+ // Clamp to max for now
+ if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1;
+
+ if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1;
+
+ if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1;
+
+ if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1;
+
+ disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l / (double) vratio_l * dml_pow(2, 2));
+ ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17));
+ if (dual_plane) {
+ disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c / (double) vratio_c * dml_pow(2, 2));
+ if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) {
+ dml_print(
+ "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n",
+ __func__,
+ disp_dlg_regs->dst_y_per_pte_row_nom_c,
+ (unsigned int) dml_pow(2, 17) - 1);
+ }
+ }
+
+ disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l / (double) vratio_l * dml_pow(2, 2));
+ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17));
+
+ disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int) ((double) meta_row_height_c / (double) vratio_c * dml_pow(2, 2));
+ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_c < (unsigned int)dml_pow(2, 17));
+
+ disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
+ / (double) dpte_groups_per_row_ub_l);
+ if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
+ / (double) meta_chunks_per_row_ub_l);
+ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
+
+ if (dual_plane) {
+ disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int) ((double) dpte_row_height_c / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq
+ / (double) dpte_groups_per_row_ub_c);
+ if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
+
+ // TODO: Is this the right calculation? Does htotal need to be halved?
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int) ((double) meta_row_height_c / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq
+ / (double) meta_chunks_per_row_ub_c);
+ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1;
+ }
+
+ disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, 1);
+ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13)); ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13));
+
+ disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, 1);
+ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13)); ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13));
+
+ disp_dlg_regs->chunk_hdl_adjust_cur0 = 3;
+ disp_dlg_regs->dst_y_offset_cur0 = 0;
+ disp_dlg_regs->chunk_hdl_adjust_cur1 = 3;
+ disp_dlg_regs->dst_y_offset_cur1 = 0;
+
+ disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
+
+ disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 * dml_pow(2, 10));
+
+ disp_ttu_regs->qos_level_low_wm = 0;
+ ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14));
+
+ disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal * ref_freq_to_pix_freq);
+ ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));
+
+ disp_ttu_regs->qos_level_flip = 14;
+ disp_ttu_regs->qos_level_fixed_l = 8;
+ disp_ttu_regs->qos_level_fixed_c = 8;
+ disp_ttu_regs->qos_level_fixed_cur0 = 8;
+ disp_ttu_regs->qos_ramp_disable_l = 0;
+ disp_ttu_regs->qos_ramp_disable_c = 0;
+ disp_ttu_regs->qos_ramp_disable_cur0 = 0;
+
+ disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
+ ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24));
+
+ print__ttu_regs_st(mode_lib, disp_ttu_regs);
+ print__dlg_regs_st(mode_lib, disp_dlg_regs);
+}
+
+void dml314_rq_dlg_get_dlg_reg(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+ const bool pstate_en,
+ const bool vm_en,
+ const bool ignore_viewport_pos,
+ const bool immediate_flip_support)
+{
+ display_rq_params_st rq_param = {0};
+ display_dlg_sys_params_st dlg_sys_param = {0};
+
+ // Get watermark and Tex.
+ dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes);
+
+ print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
+
+ // system parameter calculation done
+
+ dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
+ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe);
+ dml_rq_dlg_get_dlg_params(
+ mode_lib,
+ e2e_pipe_param,
+ num_pipes,
+ pipe_idx,
+ dlg_regs,
+ ttu_regs,
+ &rq_param.dlg,
+ &dlg_sys_param,
+ cstate_en,
+ pstate_en,
+ vm_en,
+ ignore_viewport_pos,
+ immediate_flip_support);
+ dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h
new file mode 100644
index 000000000000..49cb85d1056c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML314_DISPLAY_RQ_DLG_CALC_H__
+#define __DML314_DISPLAY_RQ_DLG_CALC_H__
+
+#include "../display_rq_dlg_helpers.h"
+
+struct display_mode_lib;
+
+// Function: dml_rq_dlg_get_rq_reg
+// Main entry point for test to get the register values out of this DML class.
+// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate
+// and then populate the rq_regs struct
+// Input:
+// pipe_param - pipe source configuration (e.g. vp, pitch, scaling, dest, etc.)
+// Output:
+// rq_regs - struct that holds all the RQ registers field value.
+// See also: <display_rq_regs_st>
+void dml314_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+ const display_pipe_params_st *pipe_param);
+
+// Function: dml_rq_dlg_get_dlg_reg
+// Calculate and return DLG and TTU register struct given the system setting
+// Output:
+// dlg_regs - output DLG register struct
+// ttu_regs - output DLG TTU register struct
+// Input:
+// e2e_pipe_param - "compacted" array of e2e pipe param struct
+// num_pipes - num of active "pipe" or "route"
+// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg
+// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered.
+// Added for legacy or unrealistic timing tests.
+void dml314_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+ const bool pstate_en,
+ const bool vm_en,
+ const bool ignore_viewport_pos,
+ const bool immediate_flip_support);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
new file mode 100644
index 000000000000..659323ebd79d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -0,0 +1,2527 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "dcn32_fpu.h"
+#include "dc_link_dp.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn20/dcn20_resource.h"
+#include "display_mode_vba_util_32.h"
+// We need this includes for WATERMARKS_* defines
+#include "clk_mgr/dcn32/dcn32_smu13_driver_if.h"
+#include "dcn30/dcn30_resource.h"
+
+#define DC_LOGGER_INIT(logger)
+
+struct _vcs_dpi_ip_params_st dcn3_2_ip = {
+ .gpuvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_enable = 0,
+ .rob_buffer_size_kbytes = 128,
+ .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1280,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 22,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .alpha_pixel_chunk_size_kbytes = 4,
+ .min_pixel_chunk_size_bytes = 1024,
+ .dcc_meta_buffer_size_bytes = 6272,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,
+ .maximum_pixels_per_line_per_dsc_unit = 6016,
+ .dsc422_native_support = true,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 57,
+ .line_buffer_size_bits = 1171920,
+ .max_line_buffer_lines = 32,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+ .max_num_dp2p0_outputs = 2,
+ .max_num_dp2p0_streams = 4,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 1564.0,
+ .fabricclk_mhz = 400.0,
+ .dispclk_mhz = 2150.0,
+ .dppclk_mhz = 2150.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .phyclk_d32_mhz = 625.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 716.667,
+ .dram_speed_mts = 16000.0,
+ .dtbclk_mhz = 1564.0,
+ },
+ },
+ .num_states = 1,
+ .sr_exit_time_us = 42.97,
+ .sr_enter_plus_exit_time_us = 49.94,
+ .sr_exit_z8_time_us = 285.0,
+ .sr_enter_plus_exit_z8_time_us = 320,
+ .writeback_latency_us = 12.0,
+ .round_trip_ping_latency_dcfclk_cycles = 263,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .fclk_change_latency_us = 20,
+ .usr_retraining_latency_us = 2,
+ .smn_latency_us = 2,
+ .mall_allocated_for_dcn_mbytes = 64,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 100.0,
+ .pct_ideal_fabric_bw_after_urgent = 67.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented
+ .pct_ideal_dram_bw_after_urgent_strobe = 67.0,
+ .max_avg_sdp_bw_use_normal_percent = 80.0,
+ .max_avg_fabric_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_strobe_percent = 50.0,
+ .max_avg_dram_bw_use_normal_percent = 15.0,
+ .num_chans = 8,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .dram_clock_change_latency_us = 400,
+ .dispclk_dppclk_vco_speed_mhz = 4300.0,
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
+{
+ /* defaults */
+ double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
+ double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us;
+ double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us;
+ double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
+ /* For min clocks use as reported by PM FW and report those as min */
+ uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz;
+ uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz;
+ uint16_t setb_min_uclk_mhz = min_uclk_mhz;
+ uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz;
+
+ dc_assert_fp_enabled();
+
+ /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */
+ if (dcfclk_mhz_for_the_second_state)
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state;
+ else
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz;
+
+ if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz)
+ setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz;
+
+ /* Set A - Normal - default values */
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
+ /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */
+ if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) {
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
+ clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
+ clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
+ clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
+ clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[3].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
+ }
+ /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */
+ /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us / 2; // TBD
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us / 2; // TBD
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
+}
+
+/*
+ * Finds dummy_latency_index when MCLK switching using firmware based
+ * vblank stretch is enabled. This function will iterate through the
+ * table of dummy pstate latencies until the lowest value that allows
+ * dm_allow_self_refresh_and_mclk_switch to happen is found
+ */
+int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ const int max_latency_table_entries = 4;
+ const struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ int dummy_latency_index = 0;
+
+ dc_assert_fp_enabled();
+
+ while (dummy_latency_index < max_latency_table_entries) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+
+ if (vlevel < context->bw_ctx.dml.vba.soc.num_states &&
+ vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported)
+ break;
+
+ dummy_latency_index++;
+ }
+
+ if (dummy_latency_index == max_latency_table_entries) {
+ ASSERT(dummy_latency_index != max_latency_table_entries);
+ /* If the execution gets here, it means dummy p_states are
+ * not possible. This should never happen and would mean
+ * something is severely wrong.
+ * Here we reset dummy_latency_index to 3, because it is
+ * better to have underflows than system crashes.
+ */
+ dummy_latency_index = max_latency_table_entries - 1;
+ }
+
+ return dummy_latency_index;
+}
+
+/**
+ * dcn32_helper_populate_phantom_dlg_params - Get DLG params for phantom pipes
+ * and populate pipe_ctx with those params.
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @pipes: [in] DML pipe params array
+ * @pipe_cnt: [in] DML pipe count
+ *
+ * This function must be called AFTER the phantom pipes are added to context
+ * and run through DML (so that the DLG params for the phantom pipes can be
+ * populated), and BEFORE we program the timing for the phantom pipes.
+ */
+void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ uint32_t i, pipe_idx;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ pipes[pipe_idx].pipe.dest.vstartup_start =
+ get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_offset =
+ get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_width =
+ get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vready_offset =
+ get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+ }
+ pipe_idx++;
+ }
+}
+
+/**
+ * dcn32_predict_pipe_split - Predict if pipe split will occur for a given DML pipe
+ * @context: [in] New DC state to be programmed
+ * @pipe_e2e: [in] DML pipe end to end context
+ *
+ * This function takes in a DML pipe (pipe_e2e) and predicts if pipe split is required (both
+ * ODM and MPC). For pipe split, ODM combine is determined by the ODM mode, and MPC combine is
+ * determined by DPPClk requirements
+ *
+ * This function follows the same policy as DML:
+ * - Check for ODM combine requirements / policy first
+ * - MPC combine is only chosen if there is no ODM combine requirements / policy in place, and
+ * MPC is required
+ *
+ * Return: Number of splits expected (1 for 2:1 split, 3 for 4:1 split, 0 for no splits).
+ */
+uint8_t dcn32_predict_pipe_split(struct dc_state *context,
+ display_e2e_pipe_params_st *pipe_e2e)
+{
+ double pscl_throughput;
+ double pscl_throughput_chroma;
+ double dpp_clk_single_dpp, clock;
+ double clk_frequency = 0.0;
+ double vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz;
+ bool total_available_pipes_support = false;
+ uint32_t number_of_dpp = 0;
+ enum odm_combine_mode odm_mode = dm_odm_combine_mode_disabled;
+ double req_dispclk_per_surface = 0;
+ uint8_t num_splits = 0;
+
+ dc_assert_fp_enabled();
+
+ dml32_CalculateODMMode(context->bw_ctx.dml.ip.maximum_pixels_per_line_per_dsc_unit,
+ pipe_e2e->pipe.dest.hactive,
+ pipe_e2e->dout.output_format,
+ pipe_e2e->dout.output_type,
+ pipe_e2e->pipe.dest.odm_combine_policy,
+ context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dispclk_mhz,
+ context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dispclk_mhz,
+ pipe_e2e->dout.dsc_enable != 0,
+ 0, /* TotalNumberOfActiveDPP can be 0 since we're predicting pipe split requirement */
+ context->bw_ctx.dml.ip.max_num_dpp,
+ pipe_e2e->pipe.dest.pixel_rate_mhz,
+ context->bw_ctx.dml.soc.dcn_downspread_percent,
+ context->bw_ctx.dml.ip.dispclk_ramp_margin_percent,
+ context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz,
+ pipe_e2e->dout.dsc_slices,
+ /* Output */
+ &total_available_pipes_support,
+ &number_of_dpp,
+ &odm_mode,
+ &req_dispclk_per_surface);
+
+ dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe_e2e->pipe.scale_ratio_depth.hscl_ratio,
+ pipe_e2e->pipe.scale_ratio_depth.hscl_ratio_c,
+ pipe_e2e->pipe.scale_ratio_depth.vscl_ratio,
+ pipe_e2e->pipe.scale_ratio_depth.vscl_ratio_c,
+ context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk,
+ context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk,
+ pipe_e2e->pipe.dest.pixel_rate_mhz,
+ pipe_e2e->pipe.src.source_format,
+ pipe_e2e->pipe.scale_taps.htaps,
+ pipe_e2e->pipe.scale_taps.htaps_c,
+ pipe_e2e->pipe.scale_taps.vtaps,
+ pipe_e2e->pipe.scale_taps.vtaps_c,
+ /* Output */
+ &pscl_throughput, &pscl_throughput_chroma,
+ &dpp_clk_single_dpp);
+
+ clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100);
+
+ if (clock > 0)
+ clk_frequency = vco_speed * 4.0 / ((int)(vco_speed * 4.0) / clock);
+
+ if (odm_mode == dm_odm_combine_mode_2to1)
+ num_splits = 1;
+ else if (odm_mode == dm_odm_combine_mode_4to1)
+ num_splits = 3;
+ else if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dppclk_mhz)
+ num_splits = 1;
+
+ return num_splits;
+}
+
+static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ float memory_bw_kbytes_sec;
+ float fabric_bw_kbytes_sec;
+ float sdp_bw_kbytes_sec;
+ float limiting_bw_kbytes_sec;
+
+ memory_bw_kbytes_sec = entry->dram_speed_mts *
+ dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes *
+ ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ fabric_bw_kbytes_sec = entry->fabricclk_mhz *
+ dcn3_2_soc.return_bus_width_bytes *
+ ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ sdp_bw_kbytes_sec = entry->dcfclk_mhz *
+ dcn3_2_soc.return_bus_width_bytes *
+ ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ limiting_bw_kbytes_sec = memory_bw_kbytes_sec;
+
+ if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;
+
+ if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;
+
+ return limiting_bw_kbytes_sec;
+}
+
+static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ if (entry->dcfclk_mhz > 0) {
+ float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->fabricclk_mhz > 0) {
+ float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->dram_speed_mts > 0) {
+ float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ }
+}
+
+void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+ unsigned int *num_entries,
+ struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ int i = 0;
+ int index = 0;
+ float net_bw_of_new_state = 0;
+
+ dc_assert_fp_enabled();
+
+ get_optimal_ntuple(entry);
+
+ if (*num_entries == 0) {
+ table[0] = *entry;
+ (*num_entries)++;
+ } else {
+ net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
+ while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) {
+ index++;
+ if (index >= *num_entries)
+ break;
+ }
+
+ for (i = *num_entries; i > index; i--)
+ table[i] = table[i - 1];
+
+ table[index] = *entry;
+ (*num_entries)++;
+ }
+}
+
+/**
+ * dcn32_set_phantom_stream_timing - Set timing params for the phantom stream
+ * @dc: current dc state
+ * @context: new dc state
+ * @ref_pipe: Main pipe for the phantom stream
+ * @phantom_stream: target phantom stream state
+ * @pipes: DML pipe params
+ * @pipe_cnt: number of DML pipes
+ * @dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe)
+ *
+ * Set timing params of the phantom stream based on calculated output from DML.
+ * This function first gets the DML pipe index using the DC pipe index, then
+ * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of
+ * lines required for SubVP MCLK switching and assigns to the phantom stream
+ * accordingly.
+ *
+ * - The number of SubVP lines calculated in DML does not take into account
+ * FW processing delays and required pstate allow width, so we must include
+ * that separately.
+ *
+ * - Set phantom backporch = vstartup of main pipe
+ */
+void dcn32_set_phantom_stream_timing(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *ref_pipe,
+ struct dc_stream_state *phantom_stream,
+ display_e2e_pipe_params_st *pipes,
+ unsigned int pipe_cnt,
+ unsigned int dc_pipe_idx)
+{
+ unsigned int i, pipe_idx;
+ struct pipe_ctx *pipe;
+ uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines;
+ unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel;
+ unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel];
+
+ dc_assert_fp_enabled();
+
+ // Find DML pipe index (pipe_idx) using dc_pipe_idx
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (i == dc_pipe_idx)
+ break;
+
+ pipe_idx++;
+ }
+
+ // Calculate lines required for pstate allow width and FW processing delays
+ pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us +
+ dc->caps.subvp_pstate_allow_width_us) / 1000000) *
+ (ref_pipe->stream->timing.pix_clk_100hz * 100) /
+ (double)ref_pipe->stream->timing.h_total;
+
+ // Update clks_cfg for calling into recalculate
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = socclk;
+
+ // DML calculation for MALL region doesn't take into account FW delay
+ // and required pstate allow width for multi-display cases
+ /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned
+ * to 2 swaths (i.e. 16 lines)
+ */
+ phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) +
+ pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines;
+
+ // For backporch of phantom pipe, use vstartup of the main pipe
+ phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ phantom_stream->dst.y = 0;
+ phantom_stream->dst.height = phantom_vactive;
+ phantom_stream->src.y = 0;
+ phantom_stream->src.height = phantom_vactive;
+
+ phantom_stream->timing.v_addressable = phantom_vactive;
+ phantom_stream->timing.v_front_porch = 1;
+ phantom_stream->timing.v_total = phantom_stream->timing.v_addressable +
+ phantom_stream->timing.v_front_porch +
+ phantom_stream->timing.v_sync_width +
+ phantom_bp;
+ phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing
+}
+
+/**
+ * dcn32_get_num_free_pipes - Calculate number of free pipes
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * This function assumes that a "used" pipe is a pipe that has
+ * both a stream and a plane assigned to it.
+ *
+ * Return: Number of free pipes available in the context
+ */
+static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context)
+{
+ unsigned int i;
+ unsigned int free_pipes = 0;
+ unsigned int num_pipes = 0;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && !pipe->top_pipe) {
+ while (pipe) {
+ num_pipes++;
+ pipe = pipe->bottom_pipe;
+ }
+ }
+ }
+
+ free_pipes = dc->res_pool->pipe_count - num_pipes;
+ return free_pipes;
+}
+
+/**
+ * dcn32_assign_subvp_pipe - Function to decide which pipe will use Sub-VP.
+ * @dc: current dc state
+ * @context: new dc state
+ * @index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned
+ *
+ * We enter this function if we are Sub-VP capable (i.e. enough pipes available)
+ * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if
+ * we are forcing SubVP P-State switching on the current config.
+ *
+ * The number of pipes used for the chosen surface must be less than or equal to the
+ * number of free pipes available.
+ *
+ * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK).
+ * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own
+ * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't
+ * support MCLK switching naturally [i.e. ACTIVE or VBLANK]).
+ *
+ * Return: True if a valid pipe assignment was found for Sub-VP. Otherwise false.
+ */
+static bool dcn32_assign_subvp_pipe(struct dc *dc,
+ struct dc_state *context,
+ unsigned int *index)
+{
+ unsigned int i, pipe_idx;
+ unsigned int max_frame_time = 0;
+ bool valid_assignment_found = false;
+ unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context);
+ bool current_assignment_freesync = false;
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ unsigned int num_pipes = 0;
+ unsigned int refresh_rate = 0;
+
+ if (!pipe->stream)
+ continue;
+
+ // Round up
+ refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 +
+ pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1)
+ / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total);
+ /* SubVP pipe candidate requirements:
+ * - Refresh rate < 120hz
+ * - Not able to switch in vactive naturally (switching in active means the
+ * DET provides enough buffer to hide the P-State switch latency -- trying
+ * to combine this with SubVP can cause issues with the scheduling).
+ * - Not TMZ surface
+ */
+ if (pipe->plane_state && !pipe->top_pipe &&
+ pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120 && !pipe->plane_state->address.tmz_surface &&
+ vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) {
+ while (pipe) {
+ num_pipes++;
+ pipe = pipe->bottom_pipe;
+ }
+
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (num_pipes <= free_pipes) {
+ struct dc_stream_state *stream = pipe->stream;
+ unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total /
+ (double)(stream->timing.pix_clk_100hz * 100)) * 1000000;
+ if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) {
+ *index = i;
+ max_frame_time = frame_us;
+ valid_assignment_found = true;
+ current_assignment_freesync = false;
+ /* For the 2-Freesync display case, still choose the one with the
+ * longest frame time
+ */
+ } else if (stream->ignore_msa_timing_param && (!valid_assignment_found ||
+ (current_assignment_freesync && frame_us > max_frame_time))) {
+ *index = i;
+ valid_assignment_found = true;
+ current_assignment_freesync = true;
+ }
+ }
+ }
+ pipe_idx++;
+ }
+ return valid_assignment_found;
+}
+
+/**
+ * dcn32_enough_pipes_for_subvp - Function to check if there are "enough" pipes for SubVP.
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * This function returns true if there are enough free pipes
+ * to create the required phantom pipes for any given stream
+ * (that does not already have phantom pipe assigned).
+ *
+ * e.g. For a 2 stream config where the first stream uses one
+ * pipe and the second stream uses 2 pipes (i.e. pipe split),
+ * this function will return true because there is 1 remaining
+ * pipe which can be used as the phantom pipe for the non pipe
+ * split pipe.
+ *
+ * Return:
+ * True if there are enough free pipes to assign phantom pipes to at least one
+ * stream that does not already have phantom pipes assigned. Otherwise false.
+ */
+static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context)
+{
+ unsigned int i, split_cnt, free_pipes;
+ unsigned int min_pipe_split = dc->res_pool->pipe_count + 1; // init as max number of pipes + 1
+ bool subvp_possible = false;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ // Find the minimum pipe split count for non SubVP pipes
+ if (pipe->stream && !pipe->top_pipe &&
+ pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ split_cnt = 0;
+ while (pipe) {
+ split_cnt++;
+ pipe = pipe->bottom_pipe;
+ }
+
+ if (split_cnt < min_pipe_split)
+ min_pipe_split = split_cnt;
+ }
+ }
+
+ free_pipes = dcn32_get_num_free_pipes(dc, context);
+
+ // SubVP only possible if at least one pipe is being used (i.e. free_pipes
+ // should not equal to the pipe_count)
+ if (free_pipes >= min_pipe_split && free_pipes < dc->res_pool->pipe_count)
+ subvp_possible = true;
+
+ return subvp_possible;
+}
+
+/**
+ * subvp_subvp_schedulable - Determine if SubVP + SubVP config is schedulable
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * High level algorithm:
+ * 1. Find longest microschedule length (in us) between the two SubVP pipes
+ * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both
+ * pipes still allows for the maximum microschedule to fit in the active
+ * region for both pipes.
+ *
+ * Return: True if the SubVP + SubVP config is schedulable, false otherwise
+ */
+static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context)
+{
+ struct pipe_ctx *subvp_pipes[2];
+ struct dc_stream_state *phantom = NULL;
+ uint32_t microschedule_lines = 0;
+ uint32_t index = 0;
+ uint32_t i;
+ uint32_t max_microschedule_us = 0;
+ int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ uint32_t time_us = 0;
+
+ /* Loop to calculate the maximum microschedule time between the two SubVP pipes,
+ * and also to store the two main SubVP pipe pointers in subvp_pipes[2].
+ */
+ if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
+ pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+ phantom = pipe->stream->mall_stream_config.paired_stream;
+ microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
+ phantom->timing.v_addressable;
+
+ // Round up when calculating microschedule time (+ 1 at the end)
+ time_us = (microschedule_lines * phantom->timing.h_total) /
+ (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 +
+ dc->caps.subvp_prefetch_end_to_mall_start_us +
+ dc->caps.subvp_fw_processing_delay_us + 1;
+ if (time_us > max_microschedule_us)
+ max_microschedule_us = time_us;
+
+ subvp_pipes[index] = pipe;
+ index++;
+
+ // Maximum 2 SubVP pipes
+ if (index == 2)
+ break;
+ }
+ }
+ vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) /
+ (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) /
+ (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) *
+ subvp_pipes[0]->stream->timing.h_total) /
+ (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) *
+ subvp_pipes[1]->stream->timing.h_total) /
+ (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+
+ if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us &&
+ (vactive2_us - vblank1_us) / 2 > max_microschedule_us)
+ return true;
+
+ return false;
+}
+
+/**
+ * subvp_drr_schedulable - Determine if SubVP + DRR config is schedulable
+ * @dc: current dc state
+ * @context: new dc state
+ * @drr_pipe: DRR pipe_ctx for the SubVP + DRR config
+ *
+ * High level algorithm:
+ * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe
+ * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching
+ * (the margin is equal to the MALL region + DRR margin (500us))
+ * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame))
+ * then report the configuration as supported
+ *
+ * Return: True if the SubVP + DRR config is schedulable, false otherwise
+ */
+static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struct pipe_ctx *drr_pipe)
+{
+ bool schedulable = false;
+ uint32_t i;
+ struct pipe_ctx *pipe = NULL;
+ struct dc_crtc_timing *main_timing = NULL;
+ struct dc_crtc_timing *phantom_timing = NULL;
+ struct dc_crtc_timing *drr_timing = NULL;
+ int16_t prefetch_us = 0;
+ int16_t mall_region_us = 0;
+ int16_t drr_frame_us = 0; // nominal frame time
+ int16_t subvp_active_us = 0;
+ int16_t stretched_drr_us = 0;
+ int16_t drr_stretched_vblank_us = 0;
+ int16_t max_vblank_mallregion = 0;
+
+ // Find SubVP pipe
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ // We check for master pipe, but it shouldn't matter since we only need
+ // the pipe for timing info (stream should be same for any pipe splits)
+ if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+
+ // Find the SubVP pipe
+ if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ break;
+ }
+
+ main_timing = &pipe->stream->timing;
+ phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing;
+ drr_timing = &drr_pipe->stream->timing;
+ prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+ dc->caps.subvp_prefetch_end_to_mall_start_us;
+ subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+ (double)(main_timing->pix_clk_100hz * 100) * 1000000;
+ drr_frame_us = drr_timing->v_total * drr_timing->h_total /
+ (double)(drr_timing->pix_clk_100hz * 100) * 1000000;
+ // P-State allow width and FW delays already included phantom_timing->v_addressable
+ mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+ stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
+ drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
+ (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
+ max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+
+ /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the
+ * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis
+ * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time,
+ * and the max of (VBLANK blanking time, MALL region)).
+ */
+ if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 &&
+ subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0)
+ schedulable = true;
+
+ return schedulable;
+}
+
+
+/**
+ * subvp_vblank_schedulable - Determine if SubVP + VBLANK config is schedulable
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * High level algorithm:
+ * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe
+ * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time))
+ * then report the configuration as supported
+ * 3. If the VBLANK display is DRR, then take the DRR static schedulability path
+ *
+ * Return: True if the SubVP + VBLANK/DRR config is schedulable, false otherwise
+ */
+static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
+{
+ struct pipe_ctx *pipe = NULL;
+ struct pipe_ctx *subvp_pipe = NULL;
+ bool found = false;
+ bool schedulable = false;
+ uint32_t i = 0;
+ uint8_t vblank_index = 0;
+ uint16_t prefetch_us = 0;
+ uint16_t mall_region_us = 0;
+ uint16_t vblank_frame_us = 0;
+ uint16_t subvp_active_us = 0;
+ uint16_t vblank_blank_us = 0;
+ uint16_t max_vblank_mallregion = 0;
+ struct dc_crtc_timing *main_timing = NULL;
+ struct dc_crtc_timing *phantom_timing = NULL;
+ struct dc_crtc_timing *vblank_timing = NULL;
+
+ /* For SubVP + VBLANK/DRR cases, we assume there can only be
+ * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
+ * is supported, it is either a single VBLANK case or two VBLANK
+ * displays which are synchronized (in which case they have identical
+ * timings).
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ // We check for master pipe, but it shouldn't matter since we only need
+ // the pipe for timing info (stream should be same for any pipe splits)
+ if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+
+ if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
+ vblank_index = i;
+ found = true;
+ }
+
+ if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ subvp_pipe = pipe;
+ }
+ // Use ignore_msa_timing_param flag to identify as DRR
+ if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) {
+ // SUBVP + DRR case
+ schedulable = subvp_drr_schedulable(dc, context, &context->res_ctx.pipe_ctx[vblank_index]);
+ } else if (found) {
+ main_timing = &subvp_pipe->stream->timing;
+ phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+ vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
+ // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
+ // Also include the prefetch end to mallstart delay time
+ prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+ dc->caps.subvp_prefetch_end_to_mall_start_us;
+ // P-State allow width and FW delays already included phantom_timing->v_addressable
+ mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+ vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total /
+ (double)(vblank_timing->pix_clk_100hz * 100) * 1000000;
+ vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total /
+ (double)(vblank_timing->pix_clk_100hz * 100) * 1000000;
+ subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+ (double)(main_timing->pix_clk_100hz * 100) * 1000000;
+ max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us;
+
+ // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time,
+ // and the max of (VBLANK blanking time, MALL region)
+ // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0)
+ if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0)
+ schedulable = true;
+ }
+ return schedulable;
+}
+
+/**
+ * subvp_validate_static_schedulability - Check which SubVP case is calculated
+ * and handle static analysis based on the case.
+ * @dc: current dc state
+ * @context: new dc state
+ * @vlevel: Voltage level calculated by DML
+ *
+ * Three cases:
+ * 1. SubVP + SubVP
+ * 2. SubVP + VBLANK (DRR checked internally)
+ * 3. SubVP + VACTIVE (currently unsupported)
+ *
+ * Return: True if statically schedulable, false otherwise
+ */
+static bool subvp_validate_static_schedulability(struct dc *dc,
+ struct dc_state *context,
+ int vlevel)
+{
+ bool schedulable = true; // true by default for single display case
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ uint32_t i, pipe_idx;
+ uint8_t subvp_count = 0;
+ uint8_t vactive_count = 0;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->plane_state && !pipe->top_pipe &&
+ pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ subvp_count++;
+
+ // Count how many planes that aren't SubVP/phantom are capable of VACTIVE
+ // switching (SubVP + VACTIVE unsupported). In situations where we force
+ // SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
+ if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 &&
+ pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ vactive_count++;
+ }
+ pipe_idx++;
+ }
+
+ if (subvp_count == 2) {
+ // Static schedulability check for SubVP + SubVP case
+ schedulable = subvp_subvp_schedulable(dc, context);
+ } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) {
+ // Static schedulability check for SubVP + VBLANK case. Also handle the case where
+ // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK)
+ if (vactive_count > 0)
+ schedulable = false;
+ else
+ schedulable = subvp_vblank_schedulable(dc, context);
+ } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vactive_w_mall_sub_vp &&
+ vactive_count > 0) {
+ // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default.
+ // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count.
+ // SubVP + VACTIVE currently unsupported
+ schedulable = false;
+ }
+ return schedulable;
+}
+
+static void dcn32_full_validate_bw_helper(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *vlevel,
+ int *split,
+ bool *merge,
+ int *pipe_cnt)
+{
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ unsigned int dc_pipe_idx = 0;
+ bool found_supported_config = false;
+ struct pipe_ctx *pipe = NULL;
+ uint32_t non_subvp_pipes = 0;
+ bool drr_pipe_found = false;
+ uint32_t drr_pipe_index = 0;
+ uint32_t i = 0;
+
+ dc_assert_fp_enabled();
+
+ /*
+ * DML favors voltage over p-state, but we're more interested in
+ * supporting p-state over voltage. We can't support p-state in
+ * prefetch mode > 0 so try capping the prefetch mode to start.
+ * Override present for testing.
+ */
+ if (dc->debug.dml_disallow_alternate_prefetch_modes)
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_uclk_fclk_and_stutter;
+ else
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+ /* This may adjust vlevel and maxMpcComb */
+ if (*vlevel < context->bw_ctx.dml.soc.num_states) {
+ *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
+ vba->VoltageLevel = *vlevel;
+ }
+
+ /* Conditions for setting up phantom pipes for SubVP:
+ * 1. Not force disable SubVP
+ * 2. Full update (i.e. !fast_validate)
+ * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?)
+ * 4. Display configuration passes validation
+ * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch)
+ */
+ if (!dc->debug.force_disable_subvp && dcn32_all_pipes_have_stream_and_plane(dc, context) &&
+ !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) &&
+ (*vlevel == context->bw_ctx.dml.soc.num_states ||
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported ||
+ dc->debug.force_subvp_mclk_switch)) {
+
+ dcn32_merge_pipes_for_subvp(dc, context);
+ memset(merge, 0, MAX_PIPES * sizeof(bool));
+
+ /* to re-initialize viewport after the pipe merge */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx->plane_state || !pipe_ctx->stream)
+ continue;
+
+ resource_build_scaling_params(pipe_ctx);
+ }
+
+ while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) &&
+ dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) {
+ /* For the case where *vlevel = num_states, bandwidth validation has failed for this config.
+ * Adding phantom pipes won't change the validation result, so change the DML input param
+ * for P-State support before adding phantom pipes and recalculating the DML result.
+ * However, this case is only applicable for SubVP + DRR cases because the prefetch mode
+ * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched
+ * enough to support MCLK switching.
+ */
+ if (*vlevel == context->bw_ctx.dml.soc.num_states &&
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final ==
+ dm_prefetch_support_uclk_fclk_and_stutter) {
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_stutter;
+ /* There are params (such as FabricClock) that need to be recalculated
+ * after validation fails (otherwise it will be 0). Calculation for
+ * phantom vactive requires call into DML, so we must ensure all the
+ * vba params are valid otherwise we'll get incorrect phantom vactive.
+ */
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+ }
+
+ dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx);
+
+ *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
+ // Populate dppclk to trigger a recalculate in dml_get_voltage_level
+ // so the phantom pipe DLG params can be assigned correctly.
+ pipes[0].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, *pipe_cnt, 0);
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+
+ if (*vlevel < context->bw_ctx.dml.soc.num_states &&
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported
+ && subvp_validate_static_schedulability(dc, context, *vlevel)) {
+ found_supported_config = true;
+ } else if (*vlevel < context->bw_ctx.dml.soc.num_states &&
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
+ /* Case where 1 SubVP is added, and DML reports MCLK unsupported. This handles
+ * the case for SubVP + DRR, where the DRR display does not support MCLK switch
+ * at it's native refresh rate / timing.
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
+ pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ non_subvp_pipes++;
+ // Use ignore_msa_timing_param flag to identify as DRR
+ if (pipe->stream->ignore_msa_timing_param) {
+ drr_pipe_found = true;
+ drr_pipe_index = i;
+ }
+ }
+ }
+ // If there is only 1 remaining non SubVP pipe that is DRR, check static
+ // schedulability for SubVP + DRR.
+ if (non_subvp_pipes == 1 && drr_pipe_found) {
+ found_supported_config = subvp_drr_schedulable(dc, context,
+ &context->res_ctx.pipe_ctx[drr_pipe_index]);
+ }
+ }
+ }
+
+ // If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
+ // remove phantom pipes and repopulate dml pipes
+ if (!found_supported_config) {
+ dc->res_pool->funcs->remove_phantom_pipes(dc, context);
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
+ *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
+
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+ /* This may adjust vlevel and maxMpcComb */
+ if (*vlevel < context->bw_ctx.dml.soc.num_states) {
+ *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
+ vba->VoltageLevel = *vlevel;
+ }
+ } else {
+ // Most populate phantom DLG params before programming hardware / timing for phantom pipe
+ DC_FP_START();
+ dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt);
+ DC_FP_END();
+
+ /* Call validate_apply_pipe_split flags after calling DML getters for
+ * phantom dlg params, or some of the VBA params indicating pipe split
+ * can be overwritten by the getters.
+ *
+ * When setting up SubVP config, all pipes are merged before attempting to
+ * add phantom pipes. If pipe split (ODM / MPC) is required, both the main
+ * and phantom pipes will be split in the regular pipe splitting sequence.
+ */
+ memset(split, 0, MAX_PIPES * sizeof(int));
+ memset(merge, 0, MAX_PIPES * sizeof(bool));
+ *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
+ vba->VoltageLevel = *vlevel;
+ // Note: We can't apply the phantom pipes to hardware at this time. We have to wait
+ // until driver has acquired the DMCUB lock to do it safely.
+ }
+ }
+}
+
+static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i]))
+ return true;
+ }
+ return false;
+}
+
+static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt, int vlevel)
+{
+ int i, pipe_idx;
+ bool usr_retraining_support = false;
+ bool unbounded_req_enabled = false;
+
+ dc_assert_fp_enabled();
+
+ /* Writeback MCIF_WB arbitration parameters */
+ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
+
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16;
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support =
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
+ != dm_dram_clock_change_unsupported;
+ context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context);
+
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
+ context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000;
+ if (context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_fclock_change_unsupported)
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false;
+ else
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true;
+
+ usr_retraining_support = context->bw_ctx.dml.vba.USRRetrainingSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ ASSERT(usr_retraining_support);
+
+ if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ if (unbounded_req_enabled && pipe_cnt > 1) {
+ // Unbounded requesting should not ever be used when more than 1 pipe is enabled.
+ ASSERT(false);
+ unbounded_req_enabled = false;
+ }
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+
+ if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
+ context->res_ctx.pipe_ctx[i].unbounded_req = false;
+ } else {
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled;
+ }
+
+ if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+ pipe_idx++;
+ }
+ /*save a original dppclock copy*/
+ context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
+ context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz
+ * 1000;
+ context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz
+ * 1000;
+
+ context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].stream)
+ context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[i].det_buffer_size_kb;
+ }
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg_v2(&context->bw_ctx.dml,
+ &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs, pipes,
+ pipe_cnt, pipe_idx);
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg_v2(&context->res_ctx.pipe_ctx[i].rq_regs,
+ &context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipe_idx++;
+ }
+}
+
+static struct pipe_ctx *dcn32_find_split_pipe(
+ struct dc *dc,
+ struct dc_state *context,
+ int old_index)
+{
+ struct pipe_ctx *pipe = NULL;
+ int i;
+
+ if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) {
+ pipe = &context->res_ctx.pipe_ctx[old_index];
+ pipe->pipe_idx = old_index;
+ }
+
+ if (!pipe)
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL
+ && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
+ if (context->res_ctx.pipe_ctx[i].stream == NULL) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ pipe->pipe_idx = i;
+ break;
+ }
+ }
+ }
+
+ /*
+ * May need to fix pipes getting tossed from 1 opp to another on flip
+ * Add for debugging transient underflow during topology updates:
+ * ASSERT(pipe);
+ */
+ if (!pipe)
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (context->res_ctx.pipe_ctx[i].stream == NULL) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ pipe->pipe_idx = i;
+ break;
+ }
+ }
+
+ return pipe;
+}
+
+static bool dcn32_split_stream_for_mpc_or_odm(
+ const struct dc *dc,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *pri_pipe,
+ struct pipe_ctx *sec_pipe,
+ bool odm)
+{
+ int pipe_idx = sec_pipe->pipe_idx;
+ const struct resource_pool *pool = dc->res_pool;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (odm && pri_pipe->plane_state) {
+ /* ODM + window MPO, where MPO window is on left half only */
+ if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <=
+ pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) {
+
+ DC_LOG_SCALER("%s - ODM + window MPO(left). pri_pipe:%d\n",
+ __func__,
+ pri_pipe->pipe_idx);
+ return true;
+ }
+
+ /* ODM + window MPO, where MPO window is on right half only */
+ if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) {
+
+ DC_LOG_SCALER("%s - ODM + window MPO(right). pri_pipe:%d\n",
+ __func__,
+ pri_pipe->pipe_idx);
+ return true;
+ }
+ }
+
+ *sec_pipe = *pri_pipe;
+
+ sec_pipe->pipe_idx = pipe_idx;
+ sec_pipe->plane_res.mi = pool->mis[pipe_idx];
+ sec_pipe->plane_res.hubp = pool->hubps[pipe_idx];
+ sec_pipe->plane_res.ipp = pool->ipps[pipe_idx];
+ sec_pipe->plane_res.xfm = pool->transforms[pipe_idx];
+ sec_pipe->plane_res.dpp = pool->dpps[pipe_idx];
+ sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst;
+ sec_pipe->stream_res.dsc = NULL;
+ if (odm) {
+ if (pri_pipe->next_odm_pipe) {
+ ASSERT(pri_pipe->next_odm_pipe != sec_pipe);
+ sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe;
+ sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe;
+ }
+ if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) {
+ pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe;
+ sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe;
+ }
+ if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) {
+ pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe;
+ sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe;
+ }
+ pri_pipe->next_odm_pipe = sec_pipe;
+ sec_pipe->prev_odm_pipe = pri_pipe;
+ ASSERT(sec_pipe->top_pipe == NULL);
+
+ if (!sec_pipe->top_pipe)
+ sec_pipe->stream_res.opp = pool->opps[pipe_idx];
+ else
+ sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp;
+ if (sec_pipe->stream->timing.flags.DSC == 1) {
+ dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx);
+ ASSERT(sec_pipe->stream_res.dsc);
+ if (sec_pipe->stream_res.dsc == NULL)
+ return false;
+ }
+ } else {
+ if (pri_pipe->bottom_pipe) {
+ ASSERT(pri_pipe->bottom_pipe != sec_pipe);
+ sec_pipe->bottom_pipe = pri_pipe->bottom_pipe;
+ sec_pipe->bottom_pipe->top_pipe = sec_pipe;
+ }
+ pri_pipe->bottom_pipe = sec_pipe;
+ sec_pipe->top_pipe = pri_pipe;
+
+ ASSERT(pri_pipe->plane_state);
+ }
+
+ return true;
+}
+
+bool dcn32_internal_validate_bw(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *pipe_cnt_out,
+ int *vlevel_out,
+ bool fast_validate)
+{
+ bool out = false;
+ bool repopulate_pipes = false;
+ int split[MAX_PIPES] = { 0 };
+ bool merge[MAX_PIPES] = { false };
+ bool newly_split[MAX_PIPES] = { false };
+ int pipe_cnt, i, pipe_idx;
+ int vlevel = context->bw_ctx.dml.soc.num_states;
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+
+ dc_assert_fp_enabled();
+
+ ASSERT(pipes);
+ if (!pipes)
+ return false;
+
+ // For each full update, remove all existing phantom pipes first
+ dc->res_pool->funcs->remove_phantom_pipes(dc, context);
+
+ dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+
+ if (!pipe_cnt) {
+ out = true;
+ goto validate_out;
+ }
+
+ dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ if (!fast_validate) {
+ DC_FP_START();
+ dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt);
+ DC_FP_END();
+ }
+
+ if (fast_validate ||
+ (dc->debug.dml_disallow_alternate_prefetch_modes &&
+ (vlevel == context->bw_ctx.dml.soc.num_states ||
+ vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) {
+ /*
+ * If dml_disallow_alternate_prefetch_modes is false, then we have already
+ * tried alternate prefetch modes during full validation.
+ *
+ * If mode is unsupported or there is no p-state support, then
+ * fall back to favouring voltage.
+ *
+ * If Prefetch mode 0 failed for this config, or passed with Max UCLK, then try
+ * to support with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
+ */
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_fclk_and_stutter;
+
+ vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ /* Last attempt with Prefetch mode 2 (dm_prefetch_support_stutter == 3) */
+ if (vlevel == context->bw_ctx.dml.soc.num_states) {
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_stutter;
+ vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+ }
+
+ if (vlevel < context->bw_ctx.dml.soc.num_states) {
+ memset(split, 0, sizeof(split));
+ memset(merge, 0, sizeof(merge));
+ vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
+ // dcn20_validate_apply_pipe_split_flags can modify voltage level outside of DML
+ vba->VoltageLevel = vlevel;
+ }
+ }
+
+ dml_log_mode_support_params(&context->bw_ctx.dml);
+
+ if (vlevel == context->bw_ctx.dml.soc.num_states)
+ goto validate_fail;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *mpo_pipe = pipe->bottom_pipe;
+
+ if (!pipe->stream)
+ continue;
+
+ if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled
+ && !dc->config.enable_windowed_mpo_odm
+ && pipe->plane_state && mpo_pipe
+ && memcmp(&mpo_pipe->plane_res.scl_data.recout,
+ &pipe->plane_res.scl_data.recout,
+ sizeof(struct rect)) != 0) {
+ ASSERT(mpo_pipe->plane_state != pipe->plane_state);
+ goto validate_fail;
+ }
+ pipe_idx++;
+ }
+
+ /* merge pipes if necessary */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ /*skip pipes that don't need merging*/
+ if (!merge[i])
+ continue;
+
+ /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */
+ if (pipe->prev_odm_pipe) {
+ /*split off odm pipe*/
+ pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe;
+ if (pipe->next_odm_pipe)
+ pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe;
+
+ /*2:1ODM+MPC Split MPO to Single Pipe + MPC Split MPO*/
+ if (pipe->bottom_pipe) {
+ if (pipe->bottom_pipe->prev_odm_pipe || pipe->bottom_pipe->next_odm_pipe) {
+ /*MPC split rules will handle this case*/
+ pipe->bottom_pipe->top_pipe = NULL;
+ } else {
+ /* when merging an ODM pipes, the bottom MPC pipe must now point to
+ * the previous ODM pipe and its associated stream assets
+ */
+ if (pipe->prev_odm_pipe->bottom_pipe) {
+ /* 3 plane MPO*/
+ pipe->bottom_pipe->top_pipe = pipe->prev_odm_pipe->bottom_pipe;
+ pipe->prev_odm_pipe->bottom_pipe->bottom_pipe = pipe->bottom_pipe;
+ } else {
+ /* 2 plane MPO*/
+ pipe->bottom_pipe->top_pipe = pipe->prev_odm_pipe;
+ pipe->prev_odm_pipe->bottom_pipe = pipe->bottom_pipe;
+ }
+
+ memcpy(&pipe->bottom_pipe->stream_res, &pipe->bottom_pipe->top_pipe->stream_res, sizeof(struct stream_resource));
+ }
+ }
+
+ if (pipe->top_pipe) {
+ pipe->top_pipe->bottom_pipe = NULL;
+ }
+
+ pipe->bottom_pipe = NULL;
+ pipe->next_odm_pipe = NULL;
+ pipe->plane_state = NULL;
+ pipe->stream = NULL;
+ pipe->top_pipe = NULL;
+ pipe->prev_odm_pipe = NULL;
+ if (pipe->stream_res.dsc)
+ dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc);
+ memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
+ memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
+ repopulate_pipes = true;
+ } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
+ struct pipe_ctx *top_pipe = pipe->top_pipe;
+ struct pipe_ctx *bottom_pipe = pipe->bottom_pipe;
+
+ top_pipe->bottom_pipe = bottom_pipe;
+ if (bottom_pipe)
+ bottom_pipe->top_pipe = top_pipe;
+
+ pipe->top_pipe = NULL;
+ pipe->bottom_pipe = NULL;
+ pipe->plane_state = NULL;
+ pipe->stream = NULL;
+ memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
+ memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
+ repopulate_pipes = true;
+ } else
+ ASSERT(0); /* Should never try to merge master pipe */
+
+ }
+
+ for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *hsplit_pipe = NULL;
+ bool odm;
+ int old_index = -1;
+
+ if (!pipe->stream || newly_split[i])
+ continue;
+
+ pipe_idx++;
+ odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled;
+
+ if (!pipe->plane_state && !odm)
+ continue;
+
+ if (split[i]) {
+ if (odm) {
+ if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
+ else if (old_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->pipe_idx;
+ } else {
+ if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx;
+ else if (old_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->pipe_idx;
+ }
+ hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(hsplit_pipe);
+ if (!hsplit_pipe)
+ goto validate_fail;
+
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ pipe, hsplit_pipe, odm))
+ goto validate_fail;
+
+ newly_split[hsplit_pipe->pipe_idx] = true;
+ repopulate_pipes = true;
+ }
+ if (split[i] == 4) {
+ struct pipe_ctx *pipe_4to1;
+
+ if (odm && old_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->pipe_idx;
+ else if (!odm && old_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->pipe_idx;
+ else
+ old_index = -1;
+ pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(pipe_4to1);
+ if (!pipe_4to1)
+ goto validate_fail;
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ pipe, pipe_4to1, odm))
+ goto validate_fail;
+ newly_split[pipe_4to1->pipe_idx] = true;
+
+ if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe
+ && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
+ else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx;
+ else
+ old_index = -1;
+ pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(pipe_4to1);
+ if (!pipe_4to1)
+ goto validate_fail;
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ hsplit_pipe, pipe_4to1, odm))
+ goto validate_fail;
+ newly_split[pipe_4to1->pipe_idx] = true;
+ }
+ if (odm)
+ dcn20_build_mapped_resource(dc, context, pipe->stream);
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state) {
+ if (!resource_build_scaling_params(pipe))
+ goto validate_fail;
+ }
+ }
+
+ /* Actual dsc count per stream dsc validation*/
+ if (!dcn20_validate_dsc(dc, context)) {
+ vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE;
+ goto validate_fail;
+ }
+
+ if (repopulate_pipes) {
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+
+ /* repopulate_pipes = 1 means the pipes were either split or merged. In this case
+ * we have to re-calculate the DET allocation and run through DML once more to
+ * ensure all the params are calculated correctly. We do not need to run the
+ * pipe split check again after this call (pipes are already split / merged).
+ * */
+ if (!fast_validate) {
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+ vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+ }
+ }
+ *vlevel_out = vlevel;
+ *pipe_cnt_out = pipe_cnt;
+
+ out = true;
+ goto validate_out;
+
+validate_fail:
+ out = false;
+
+validate_out:
+ return out;
+}
+
+
+void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ int i, pipe_idx, vlevel_temp = 0;
+ double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
+ double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation;
+ bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] !=
+ dm_dram_clock_change_unsupported;
+ unsigned int dummy_latency_index = 0;
+ int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+ unsigned int min_dram_speed_mts_margin;
+
+ dc_assert_fp_enabled();
+
+ // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK
+ if (!pstate_en && dcn32_subvp_in_use(dc, context)) {
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
+ pstate_en = true;
+ }
+
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
+
+ if (!pstate_en) {
+ /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching =
+ dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context);
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ dummy_latency_index = dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(dc,
+ context, pipes, pipe_cnt, vlevel);
+
+ /* After calling dcn30_find_dummy_latency_index_for_fw_based_mclk_switch
+ * we reinstate the original dram_clock_change_latency_us on the context
+ * and all variables that may have changed up to this point, except the
+ * newly found dummy_latency_index
+ */
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+ maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ dcfclk_from_fw_based_mclk_switching = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] !=
+ dm_dram_clock_change_unsupported;
+ }
+ }
+
+ /* Set B:
+ * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present,
+ * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark
+ * calculations to cover bootup clocks.
+ * DCFCLK: soc.clock_limits[2] when available
+ * UCLK: soc.clock_limits[2] when available
+ */
+ if (dcn3_2_soc.num_states > 2) {
+ vlevel_temp = 2;
+ dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz;
+ } else
+ dcfclk = 615; //DCFCLK Vmin_lv
+
+ pipes[0].clks_cfg.voltage = vlevel_temp;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
+ }
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ /* Set D:
+ * All clocks min.
+ * DCFCLK: Min, as reported by PM FW when available
+ * UCLK : Min, as reported by PM FW when available
+ * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr)
+ */
+
+ if (dcn3_2_soc.num_states > 2) {
+ vlevel_temp = 0;
+ dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz;
+ } else
+ dcfclk = 615; //DCFCLK Vmin_lv
+
+ pipes[0].clks_cfg.voltage = vlevel_temp;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
+ }
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ /* Set C, for Dummy P-State:
+ * All clocks min.
+ * DCFCLK: Min, as reported by PM FW, when available
+ * UCLK : Min, as reported by PM FW, when available
+ * pstate latency as per UCLK state dummy pstate latency
+ */
+
+ // For Set A and Set C use values from validation
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_fw_based_mclk_switching;
+ }
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
+ min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+ min_dram_speed_mts_margin = 160;
+
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us;
+
+ if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] ==
+ dm_dram_clock_change_unsupported) {
+ int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1;
+
+ min_dram_speed_mts =
+ dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
+ }
+
+ if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ /* find largest table entry that is lower than dram speed,
+ * but lower than DPM0 still uses DPM0
+ */
+ for (dummy_latency_index = 3; dummy_latency_index > 0; dummy_latency_index--)
+ if (min_dram_speed_mts + min_dram_speed_mts_margin >
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dram_speed_mts)
+ break;
+ }
+
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
+ }
+
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ /* On DCN32/321, PMFW will set PSTATE_CHANGE_TYPE = 1 (FCLK) for UCLK dummy p-state.
+ * In this case we must program FCLK WM Set C to use the UCLK dummy p-state WM
+ * value.
+ */
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) {
+ /* The only difference between A and C is p-state latency, if p-state is not supported
+ * with full p-state latency we want to calculate DLG based on dummy p-state latency,
+ * Set A p-state watermark set to 0 on DCN30, when p-state unsupported, for now keep as DCN30.
+ */
+ context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
+ } else {
+ /* Set A:
+ * All clocks min.
+ * DCFCLK: Min, as reported by PM FW, when available
+ * UCLK: Min, as reported by PM FW, when available
+ */
+ dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ }
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ if (dc->config.forced_clocks) {
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+ }
+ if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+ if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+ pipe_idx++;
+ }
+
+ context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
+
+ dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+
+ if (!pstate_en)
+ /* Restore full p-state latency */
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
+ dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context);
+}
+
+static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
+ unsigned int index)
+{
+ int i;
+
+ if (*num_entries == 0)
+ return;
+
+ for (i = index; i < *num_entries - 1; i++) {
+ table[i] = table[i + 1];
+ }
+ memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st));
+}
+
+void dcn32_patch_dpm_table(struct clk_bw_params *bw_params)
+{
+ int i;
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0,
+ max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz)
+ max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz)
+ max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz)
+ max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ /* Scan through clock values we currently have and if they are 0,
+ * then populate it with dcn3_2_soc.clock_limits[] value.
+ *
+ * Do it for DCFCLK, DISPCLK, DTBCLK and UCLK as any of those being
+ * 0, will cause it to skip building the clock table.
+ */
+ if (max_dcfclk_mhz == 0)
+ bw_params->clk_table.entries[0].dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
+ if (max_dispclk_mhz == 0)
+ bw_params->clk_table.entries[0].dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz;
+ if (max_dtbclk_mhz == 0)
+ bw_params->clk_table.entries[0].dtbclk_mhz = dcn3_2_soc.clock_limits[0].dtbclk_mhz;
+ if (max_uclk_mhz == 0)
+ bw_params->clk_table.entries[0].memclk_mhz = dcn3_2_soc.clock_limits[0].dram_speed_mts / 16;
+}
+
+static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
+ struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ int i, j;
+ struct _vcs_dpi_voltage_scaling_st entry = {0};
+
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0,
+ max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0;
+
+ unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299;
+
+ static const unsigned int num_dcfclk_stas = 5;
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564};
+
+ unsigned int num_uclk_dpms = 0;
+ unsigned int num_fclk_dpms = 0;
+ unsigned int num_dcfclk_dpms = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz)
+ max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz)
+ max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz)
+ max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+
+ if (bw_params->clk_table.entries[i].memclk_mhz > 0)
+ num_uclk_dpms++;
+ if (bw_params->clk_table.entries[i].fclk_mhz > 0)
+ num_fclk_dpms++;
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > 0)
+ num_dcfclk_dpms++;
+ }
+
+ if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz)
+ return -1;
+
+ if (max_dppclk_mhz == 0)
+ max_dppclk_mhz = max_dispclk_mhz;
+
+ if (max_fclk_mhz == 0)
+ max_fclk_mhz = max_dcfclk_mhz * dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / dcn3_2_soc.pct_ideal_fabric_bw_after_urgent;
+
+ if (max_phyclk_mhz == 0)
+ max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz;
+
+ *num_entries = 0;
+ entry.dispclk_mhz = max_dispclk_mhz;
+ entry.dscclk_mhz = max_dispclk_mhz / 3;
+ entry.dppclk_mhz = max_dppclk_mhz;
+ entry.dtbclk_mhz = max_dtbclk_mhz;
+ entry.phyclk_mhz = max_phyclk_mhz;
+ entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz;
+ entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz;
+
+ // Insert all the DCFCLK STAs
+ for (i = 0; i < num_dcfclk_stas; i++) {
+ entry.dcfclk_mhz = dcfclk_sta_targets[i];
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ DC_FP_START();
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ DC_FP_END();
+ }
+
+ // Insert the max DCFCLK
+ entry.dcfclk_mhz = max_dcfclk_mhz;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ DC_FP_START();
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ DC_FP_END();
+
+ // Insert the UCLK DPMS
+ for (i = 0; i < num_uclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;
+
+ DC_FP_START();
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ DC_FP_END();
+ }
+
+ // If FCLK is coarse grained, insert individual DPMs.
+ if (num_fclk_dpms > 2) {
+ for (i = 0; i < num_fclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ DC_FP_START();
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ DC_FP_END();
+ }
+ }
+ // If FCLK fine grained, only insert max
+ else {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = max_fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ DC_FP_START();
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ DC_FP_END();
+ }
+
+ // At this point, the table contains all "points of interest" based on
+ // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock
+ // ratios (by derate, are exact).
+
+ // Remove states that require higher clocks than are supported
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz > max_dcfclk_mhz ||
+ table[i].fabricclk_mhz > max_fclk_mhz ||
+ table[i].dram_speed_mts > max_uclk_mhz * 16)
+ remove_entry_from_table_at_index(table, num_entries, i);
+ }
+
+ // At this point, the table only contains supported points of interest
+ // it could be used as is, but some states may be redundant due to
+ // coarse grained nature of some clocks, so we want to round up to
+ // coarse grained DPMs and remove duplicates.
+
+ // Round up UCLKs
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_uclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) {
+ table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ // If FCLK is coarse grained, round up to next DPMs
+ if (num_fclk_dpms > 2) {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_fclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) {
+ table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz;
+ break;
+ }
+ }
+ }
+ }
+ // Otherwise, round up to minimum.
+ else {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].fabricclk_mhz < min_fclk_mhz) {
+ table[i].fabricclk_mhz = min_fclk_mhz;
+ break;
+ }
+ }
+ }
+
+ // Round DCFCLKs up to minimum
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz < min_dcfclk_mhz) {
+ table[i].dcfclk_mhz = min_dcfclk_mhz;
+ break;
+ }
+ }
+
+ // Remove duplicate states, note duplicate states are always neighbouring since table is sorted.
+ i = 0;
+ while (i < *num_entries - 1) {
+ if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz &&
+ table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz &&
+ table[i].dram_speed_mts == table[i + 1].dram_speed_mts)
+ remove_entry_from_table_at_index(table, num_entries, i + 1);
+ else
+ i++;
+ }
+
+ // Fix up the state indicies
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ table[i].state = i;
+ }
+
+ return 0;
+}
+
+/*
+ * dcn32_update_bw_bounding_box
+ *
+ * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from
+ * spreadsheet with actual values as per dGPU SKU:
+ * - with passed few options from dc->config
+ * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might
+ * need to get it from PM FW)
+ * - with passed latency values (passed in ns units) in dc-> bb override for
+ * debugging purposes
+ * - with passed latencies from VBIOS (in 100_ns units) if available for
+ * certain dGPU SKU
+ * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU
+ * of the same ASIC)
+ * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM
+ * FW for different clocks (which might differ for certain dGPU SKU of the
+ * same ASIC)
+ */
+void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ dc_assert_fp_enabled();
+
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
+ /* Overrides from dc->config options */
+ dcn3_2_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk;
+
+ /* Override from passed dc->bb_overrides if available*/
+ if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
+ && dc->bb_overrides.sr_exit_time_ns) {
+ dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000)
+ != dc->bb_overrides.sr_enter_plus_exit_time_ns
+ && dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ dcn3_2_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
+ && dc->bb_overrides.urgent_latency_ns) {
+ dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ dcn3_2_soc.urgent_latency_pixel_data_only_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000)
+ != dc->bb_overrides.dram_clock_change_latency_ns
+ && dc->bb_overrides.dram_clock_change_latency_ns) {
+ dcn3_2_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000)
+ != dc->bb_overrides.fclk_clock_change_latency_ns
+ && dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dcn3_2_soc.fclk_change_latency_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+ }
+
+ if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000)
+ != dc->bb_overrides.dummy_clock_change_latency_ns
+ && dc->bb_overrides.dummy_clock_change_latency_ns) {
+ dcn3_2_soc.dummy_pstate_latency_us =
+ dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0;
+ }
+
+ /* Override from VBIOS if VBIOS bb_info available */
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
+ struct bp_soc_bb_info bb_info = {0};
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_2_soc.dram_clock_change_latency_us =
+ bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_2_soc.sr_enter_plus_exit_time_us =
+ bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_2_soc.sr_exit_time_us =
+ bb_info.dram_sr_exit_latency_100ns * 10;
+ }
+ }
+
+ /* Override from VBIOS for num_chan */
+ if (dc->ctx->dc_bios->vram_info.num_chans)
+ dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+ }
+
+ /* DML DSC delay factor workaround */
+ dcn3_2_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0;
+
+ dcn3_2_ip.min_prefetch_in_strobe_us = dc->debug.min_prefetch_in_strobe_ns / 1000.0;
+
+ /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
+ dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */
+ if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) {
+ if (dc->debug.use_legacy_soc_bb_mechanism) {
+ unsigned int i = 0, j = 0, num_states = 0;
+
+ unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
+ unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
+ unsigned int min_dcfclk = UINT_MAX;
+ /* Set 199 as first value in STA target array to have a minimum DCFCLK value.
+ * For DCN32 we set min to 199 so minimum FCLK DPM0 (300Mhz can be achieved) */
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564};
+ unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0;
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dcfclk_mhz != 0 &&
+ bw_params->clk_table.entries[i].dcfclk_mhz < min_dcfclk)
+ min_dcfclk = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ }
+ if (min_dcfclk > dcfclk_sta_targets[0])
+ dcfclk_sta_targets[0] = min_dcfclk;
+ if (!max_dcfclk_mhz)
+ max_dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
+ if (!max_dispclk_mhz)
+ max_dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz;
+ if (!max_dppclk_mhz)
+ max_dppclk_mhz = dcn3_2_soc.clock_limits[0].dppclk_mhz;
+ if (!max_phyclk_mhz)
+ max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz;
+
+ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
+ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
+ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+ // Update size of array since we "removed" duplicates
+ num_dcfclk_sta_targets = i + 1;
+ }
+
+ num_uclk_states = bw_params->clk_table.num_entries;
+
+ // Calculate optimal dcfclk for each uclk
+ for (i = 0; i < num_uclk_states; i++) {
+ dcn32_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+ &optimal_dcfclk_for_uclk[i], NULL);
+ if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
+ optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
+ }
+ }
+
+ // Calculate optimal uclk for each dcfclk sta target
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ for (j = 0; j < num_uclk_states; j++) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ i = 0;
+ j = 0;
+ // create the final dcfclk and uclk table
+ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+ j = num_uclk_states;
+ }
+ }
+ }
+
+ while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
+ dcn3_2_soc.num_states = num_states;
+ for (i = 0; i < dcn3_2_soc.num_states; i++) {
+ dcn3_2_soc.clock_limits[i].state = i;
+ dcn3_2_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_2_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+
+ /* Fill all states with max values of all these clocks */
+ dcn3_2_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_2_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_2_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ dcn3_2_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3;
+
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (i > 0) {
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_2_soc.clock_limits[i].dtbclk_mhz = dcn3_2_soc.clock_limits[i-1].dtbclk_mhz;
+ } else {
+ dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+ } else if (bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_2_soc.clock_limits[i].socclk_mhz = dcn3_2_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_2_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
+
+ if (!dram_speed_mts[i] && i > 0)
+ dcn3_2_soc.clock_limits[i].dram_speed_mts = dcn3_2_soc.clock_limits[i-1].dram_speed_mts;
+ else
+ dcn3_2_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* These clocks cannot come from bw_params, always fill from dcn3_2_soc[0] */
+ /* PHYCLK_D18, PHYCLK_D32 */
+ dcn3_2_soc.clock_limits[i].phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_2_soc.clock_limits[i].phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz;
+ }
+ } else {
+ build_synthetic_soc_states(bw_params, dcn3_2_soc.clock_limits, &dcn3_2_soc.num_states);
+ }
+
+ /* Re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32);
+ }
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
new file mode 100644
index 000000000000..3a3dc2ce4c73
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN32_FPU_H__
+#define __DCN32_FPU_H__
+
+#include "clk_mgr_internal.h"
+
+void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr);
+
+void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
+uint8_t dcn32_predict_pipe_split(struct dc_state *context,
+ display_e2e_pipe_params_st *pipe_e2e);
+
+void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+ unsigned int *num_entries,
+ struct _vcs_dpi_voltage_scaling_st *entry);
+
+void dcn32_set_phantom_stream_timing(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *ref_pipe,
+ struct dc_stream_state *phantom_stream,
+ display_e2e_pipe_params_st *pipes,
+ unsigned int pipe_cnt,
+ unsigned int dc_pipe_idx);
+
+bool dcn32_internal_validate_bw(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *pipe_cnt_out,
+ int *vlevel_out,
+ bool fast_validate);
+
+void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+
+int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
new file mode 100644
index 000000000000..244fd15d24b4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -0,0 +1,3686 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dc.h"
+#include "dc_link.h"
+#include "../display_mode_lib.h"
+#include "display_mode_vba_32.h"
+#include "../dml_inline_defs.h"
+#include "display_mode_vba_util_32.h"
+
+void dml32_recalculate(struct display_mode_lib *mode_lib);
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
+ struct display_mode_lib *mode_lib);
+void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
+
+void dml32_recalculate(struct display_mode_lib *mode_lib)
+{
+ ModeSupportAndSystemConfiguration(mode_lib);
+
+ dml32_CalculateMaxDETAndMinCompressedBufferSize(mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.ROBBufferSizeInKByte,
+ DC__NUM_DPP,
+ false, //mode_lib->vba.override_setting.nomDETInKByteOverrideEnable,
+ 0, //mode_lib->vba.override_setting.nomDETInKByteOverrideValue,
+
+ /* Output */
+ &mode_lib->vba.MaxTotalDETInKByte, &mode_lib->vba.nomDETInKByte,
+ &mode_lib->vba.MinCompressedBufferSizeInKByte);
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
+#endif
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
+}
+
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
+ struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int j, k;
+ bool ImmediateFlipRequirementFinal;
+ int iteration;
+ double MaxTotalRDBandwidth;
+ unsigned int NextPrefetchMode;
+ double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
+ bool DestinationLineTimesForPrefetchLessThan2 = false;
+ bool VRatioPrefetchMoreThanMax = false;
+ double TWait;
+ double TotalWRBandwidth = 0;
+ double WRBandwidth = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- START ---\n", __func__);
+ dml_print("DML::%s: mode_lib->vba.PrefetchMode = %d\n", __func__, mode_lib->vba.PrefetchMode);
+ dml_print("DML::%s: mode_lib->vba.ImmediateFlipSupport = %d\n", __func__, mode_lib->vba.ImmediateFlipSupport);
+ dml_print("DML::%s: mode_lib->vba.VoltageLevel = %d\n", __func__, mode_lib->vba.VoltageLevel);
+#endif
+
+ v->WritebackDISPCLK = 0.0;
+ v->GlobalDPPCLK = 0.0;
+
+ // DISPCLK and DPPCLK Calculation
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k]) {
+ v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
+ dml32_CalculateWriteBackDISPCLK(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.PixelClock[k], mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackHTaps[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackSourceWidth[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.HTotal[k], mode_lib->vba.WritebackLineBufferSize,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed));
+ }
+ }
+
+ v->DISPCLK_calculated = v->WritebackDISPCLK;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ v->DISPCLK_calculated = dml_max(v->DISPCLK_calculated,
+ dml32_CalculateRequiredDispclk(
+ mode_lib->vba.ODMCombineEnabled[k],
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKRampingMargin,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed,
+ mode_lib->vba.MaxDppclk[v->soc.num_states - 1]));
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(mode_lib->vba.HRatio[k],
+ mode_lib->vba.HRatioChroma[k],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.MaxDCHUBToPSCLThroughput,
+ mode_lib->vba.MaxPSCLToLBThroughput,
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.htaps[k],
+ mode_lib->vba.HTAPsChroma[k],
+ mode_lib->vba.vtaps[k],
+ mode_lib->vba.VTAPsChroma[k],
+
+ /* Output */
+ &v->PSCL_THROUGHPUT_LUMA[k], &v->PSCL_THROUGHPUT_CHROMA[k],
+ &v->DPPCLKUsingSingleDPP[k]);
+ }
+
+ dml32_CalculateDPPCLK(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed, v->DPPCLKUsingSingleDPP, mode_lib->vba.DPPPerPlane,
+ /* Output */
+ &v->GlobalDPPCLK, v->DPPCLK);
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ v->DPPCLK_calculated[k] = v->DPPCLK[k];
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateBytePerPixelAndBlockSizes(
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.SurfaceTiling[k],
+
+ /* Output */
+ &v->BytePerPixelY[k],
+ &v->BytePerPixelC[k],
+ &v->BytePerPixelDETY[k],
+ &v->BytePerPixelDETC[k],
+ &v->BlockHeight256BytesY[k],
+ &v->BlockHeight256BytesC[k],
+ &v->BlockWidth256BytesY[k],
+ &v->BlockWidth256BytesC[k],
+ &v->BlockHeightY[k],
+ &v->BlockHeightC[k],
+ &v->BlockWidthY[k],
+ &v->BlockWidthC[k]);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: %d\n", __func__, __LINE__);
+#endif
+ dml32_CalculateSwathWidth(
+ false, // ForceSingleDPP
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.ODMCombineEnabled,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.DPPPerPlane,
+
+ /* Output */
+ v->SwathWidthSingleDPPY, v->SwathWidthSingleDPPC, v->SwathWidthY, v->SwathWidthC,
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_integer_array[0], // Integer MaximumSwathHeightY[]
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_integer_array[1], // Integer MaximumSwathHeightC[]
+ v->swath_width_luma_ub, v->swath_width_chroma_ub);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->ReadBandwidthSurfaceLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ v->ReadBandwidthSurfaceChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
+ * mode_lib->vba.VRatioChroma[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n",
+ __func__, k, v->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n",
+ __func__, k, v->ReadBandwidthSurfaceChroma[k]);
+#endif
+ }
+
+ {
+ // VBA_DELTA
+ // Calculate DET size, swath height
+ dml32_CalculateSwathAndDETConfiguration(
+ mode_lib->vba.DETSizeOverride,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.MaxTotalDETInKByte,
+ mode_lib->vba.MinCompressedBufferSizeInKByte,
+ false, /* ForceSingleDPP */
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.nomDETInKByte,
+ mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.ip.pixel_chunk_size_kbytes,
+ mode_lib->vba.ip.rob_buffer_size_kbytes,
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_output_encoder_array, /* output_encoder_class Output[] */
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_single_array[0], /* Single MaximumSwathWidthLuma[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_single_array[1], /* Single MaximumSwathWidthChroma[] */
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SurfaceTiling,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ mode_lib->vba.ODMCombineEnabled,
+ mode_lib->vba.BlendingAndTiming,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.DPPPerPlane,
+
+ /* Output */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_long_array[0], /* Long swath_width_luma_ub[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_long_array[1], /* Long swath_width_chroma_ub[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_double_array[0], /* Long SwathWidth[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_double_array[1], /* Long SwathWidthChroma[] */
+ mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ mode_lib->vba.DETBufferSizeInKByte,
+ mode_lib->vba.DETBufferSizeY,
+ mode_lib->vba.DETBufferSizeC,
+ &v->UnboundedRequestEnabled,
+ &v->CompressedBufferSizeInkByte,
+ &v->CompBufReservedSpaceKBytes,
+ &v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean, /* bool *CompBufReservedSpaceNeedAjustment */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean_array, /* bool ViewportSizeSupportPerSurface[] */
+ &v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean); /* bool *ViewportSizeSupport */
+ }
+
+ v->CompBufReservedSpaceZs = v->CompBufReservedSpaceKBytes * 1024.0 / 256.0;
+ v->CompBufReservedSpace64B = v->CompBufReservedSpaceKBytes * 1024.0 / 64.0;
+
+ // DCFCLK Deep Sleep
+ dml32_CalculateDCFCLKDeepSleep(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ mode_lib->vba.DPPCLK,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ mode_lib->vba.ReturnBusWidth,
+
+ /* Output */
+ &v->DCFCLKDeepSleep);
+
+ // DSCCLK
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) {
+ v->DSCCLK_calculated[k] = 0.0;
+ } else {
+ if (mode_lib->vba.OutputFormat[k] == dm_420)
+ mode_lib->vba.DSCFormatFactor = 2;
+ else if (mode_lib->vba.OutputFormat[k] == dm_444)
+ mode_lib->vba.DSCFormatFactor = 1;
+ else if (mode_lib->vba.OutputFormat[k] == dm_n422)
+ mode_lib->vba.DSCFormatFactor = 2;
+ else
+ mode_lib->vba.DSCFormatFactor = 1;
+ if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
+ v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 12
+ / mode_lib->vba.DSCFormatFactor
+ / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
+ v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 6
+ / mode_lib->vba.DSCFormatFactor
+ / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else
+ v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 3
+ / mode_lib->vba.DSCFormatFactor
+ / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ }
+ }
+
+ // DSC Delay
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->DSCDelay[k] = dml32_DSCDelayRequirement(mode_lib->vba.DSCEnabled[k],
+ mode_lib->vba.ODMCombineEnabled[k], mode_lib->vba.DSCInputBitPerComponent[k],
+ mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k],
+ mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k],
+ mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.Output[k], mode_lib->vba.PixelClock[k],
+ mode_lib->vba.PixelClockBackEnd[k], mode_lib->vba.ip.dsc_delay_factor_wa);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) // NumberOfSurfaces
+ if (j != k && mode_lib->vba.BlendingAndTiming[k] == j && mode_lib->vba.DSCEnabled[j])
+ v->DSCDelay[k] = v->DSCDelay[j];
+
+ //Immediate Flip
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->ImmediateFlipSupportedSurface[k] = mode_lib->vba.ImmediateFlipSupport
+ && (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required);
+ }
+
+ // Prefetch
+ dml32_CalculateSurfaceSizeInMall(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ v->BytePerPixelY,
+ mode_lib->vba.ViewportWidthChroma,
+ mode_lib->vba.ViewportHeightChroma,
+ v->BytePerPixelC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidthY,
+ v->BlockWidthC,
+ v->BlockHeightY,
+ v->BlockHeightC,
+
+ /* Output */
+ v->SurfaceSizeInMALL,
+ &v->dummy_vars.
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean2); /* Boolean *ExceededMALLSize */
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DPPPerSurface = mode_lib->vba.DPPPerPlane[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportHeight = mode_lib->vba.ViewportHeight[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportHeightChroma = mode_lib->vba.ViewportHeightChroma[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidth256BytesY = v->BlockWidth256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeight256BytesY = v->BlockHeight256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidth256BytesC = v->BlockWidth256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeight256BytesC = v->BlockHeight256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidthY = v->BlockWidthY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeightY = v->BlockHeightY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidthC = v->BlockWidthC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeightC = v->BlockHeightC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SurfaceTiling = mode_lib->vba.SurfaceTiling[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BytePerPixelY = v->BytePerPixelY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BytePerPixelC = v->BytePerPixelC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VRatio = mode_lib->vba.VRatio[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VRatioChroma = mode_lib->vba.VRatioChroma[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VTaps = mode_lib->vba.vtaps[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VTapsChroma = mode_lib->vba.VTAPsChroma[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PitchY = mode_lib->vba.PitchY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCMetaPitchY = mode_lib->vba.DCCMetaPitchY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PitchC = mode_lib->vba.PitchC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCMetaPitchC = mode_lib->vba.DCCMetaPitchC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportStationary = mode_lib->vba.ViewportStationary[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportXStart = mode_lib->vba.ViewportXStartY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportYStart = mode_lib->vba.ViewportYStartY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportXStartC = mode_lib->vba.ViewportXStartC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportYStartC = mode_lib->vba.ViewportYStartC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->vba.ForceOneRowForFrame[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SwathHeightY = mode_lib->vba.SwathHeightY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SwathHeightC = mode_lib->vba.SwathHeightC[k];
+ }
+
+ {
+
+ dml32_CalculateVMRowAndSwath(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters,
+ v->SurfaceSizeInMALL,
+ mode_lib->vba.PTEBufferSizeInRequestsLuma,
+ mode_lib->vba.PTEBufferSizeInRequestsChroma,
+ mode_lib->vba.DCCMetaBufferSizeBytes,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.GPUVMMinPageSizeKBytes,
+ mode_lib->vba.HostVMMinPageSize,
+
+ /* Output */
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean_array2[0], // Boolean PTEBufferSizeNotExceeded[]
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean_array2[1], // Boolean DCCMetaBufferSizeNotExceeded[]
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->dpte_row_height,
+ v->dpte_row_height_chroma,
+ v->dpte_row_height_linear,
+ v->dpte_row_height_linear_chroma,
+ v->meta_req_width,
+ v->meta_req_width_chroma,
+ v->meta_req_height,
+ v->meta_req_height_chroma,
+ v->meta_row_width,
+ v->meta_row_width_chroma,
+ v->meta_row_height,
+ v->meta_row_height_chroma,
+ v->vm_group_bytes,
+ v->dpte_group_bytes,
+ v->PixelPTEReqWidthY,
+ v->PixelPTEReqHeightY,
+ v->PTERequestSizeY,
+ v->PixelPTEReqWidthC,
+ v->PixelPTEReqHeightC,
+ v->PTERequestSizeC,
+ v->dpde0_bytes_per_frame_ub_l,
+ v->meta_pte_bytes_per_frame_ub_l,
+ v->dpde0_bytes_per_frame_ub_c,
+ v->meta_pte_bytes_per_frame_ub_c,
+ v->PrefetchSourceLinesY,
+ v->PrefetchSourceLinesC,
+ v->VInitPreFillY, v->VInitPreFillC,
+ v->MaxNumSwathY,
+ v->MaxNumSwathC,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->PixelPTEBytesPerRow,
+ v->PDEAndMetaPTEBytesFrame,
+ v->MetaRowByte,
+ v->Use_One_Row_For_Frame,
+ v->Use_One_Row_For_Frame_Flip,
+ v->UsesMALLForStaticScreen,
+ v->PTE_BUFFER_MODE,
+ v->BIGK_FRAGMENT_SIZE);
+ }
+
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.ReorderBytes = mode_lib->vba.NumberOfChannels
+ * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.VMDataOnlyReturnBW = dml32_get_return_bw_mbps_vm_only(
+ &mode_lib->vba.soc,
+ mode_lib->vba.VoltageLevel,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.FabricClock,
+ mode_lib->vba.DRAMSpeed);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: mode_lib->vba.ReturnBusWidth = %f\n", __func__, mode_lib->vba.ReturnBusWidth);
+ dml_print("DML::%s: mode_lib->vba.DCFCLK = %f\n", __func__, mode_lib->vba.DCFCLK);
+ dml_print("DML::%s: mode_lib->vba.FabricClock = %f\n", __func__, mode_lib->vba.FabricClock);
+ dml_print("DML::%s: mode_lib->vba.FabricDatapathToDCNDataReturn = %f\n", __func__,
+ mode_lib->vba.FabricDatapathToDCNDataReturn);
+ dml_print("DML::%s: mode_lib->vba.PercentOfIdealSDPPortBWReceivedAfterUrgLatency = %f\n",
+ __func__, mode_lib->vba.PercentOfIdealSDPPortBWReceivedAfterUrgLatency);
+ dml_print("DML::%s: mode_lib->vba.DRAMSpeed = %f\n", __func__, mode_lib->vba.DRAMSpeed);
+ dml_print("DML::%s: mode_lib->vba.NumberOfChannels = %f\n", __func__, mode_lib->vba.NumberOfChannels);
+ dml_print("DML::%s: mode_lib->vba.DRAMChannelWidth = %f\n", __func__, mode_lib->vba.DRAMChannelWidth);
+ dml_print("DML::%s: mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n",
+ __func__, mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->vba.ReturnBW);
+#endif
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor = 1.0;
+
+ if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable)
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .HostVMInefficiencyFactor =
+ mode_lib->vba.ReturnBW / v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .VMDataOnlyReturnBW;
+
+ mode_lib->vba.TotalDCCActiveDPP = 0;
+ mode_lib->vba.TotalActiveDPP = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + mode_lib->vba.DPPPerPlane[k];
+ if (mode_lib->vba.DCCEnable[k])
+ mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP
+ + mode_lib->vba.DPPPerPlane[k];
+ }
+
+ v->UrgentExtraLatency = dml32_CalculateExtraLatency(
+ mode_lib->vba.RoundTripPingLatencyCycles,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.ReorderBytes,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.TotalActiveDPP,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.TotalDCCActiveDPP,
+ mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.ReturnBW,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.DPPPerPlane,
+ v->dpte_group_bytes,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels);
+
+ mode_lib->vba.TCalc = 24.0 / v->DCFCLKDeepSleep;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] = mode_lib->vba.WritebackLatency
+ + dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.WritebackDestinationHeight[k],
+ mode_lib->vba.WritebackSourceHeight[k],
+ mode_lib->vba.HTotal[k]) / mode_lib->vba.DISPCLK;
+ } else
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0;
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.BlendingAndTiming[j] == k &&
+ mode_lib->vba.WritebackEnable[j] == true) {
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
+ dml_max(v->WritebackDelay[mode_lib->vba.VoltageLevel][k],
+ mode_lib->vba.WritebackLatency +
+ dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[j],
+ mode_lib->vba.WritebackHRatio[j],
+ mode_lib->vba.WritebackVRatio[j],
+ mode_lib->vba.WritebackVTaps[j],
+ mode_lib->vba.WritebackDestinationWidth[j],
+ mode_lib->vba.WritebackDestinationHeight[j],
+ mode_lib->vba.WritebackSourceHeight[j],
+ mode_lib->vba.HTotal[k]) / mode_lib->vba.DISPCLK);
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j)
+ if (mode_lib->vba.BlendingAndTiming[k] == j)
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][j];
+
+ v->UrgentLatency = dml32_CalculateUrgentLatency(mode_lib->vba.UrgentLatencyPixelDataOnly,
+ mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
+ mode_lib->vba.UrgentLatencyVMDataOnly,
+ mode_lib->vba.DoUrgentLatencyAdjustment,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockComponent,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockReference,
+ mode_lib->vba.FabricClock);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateUrgentBurstFactor(mode_lib->vba.UsesMALLForPStateChange[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ mode_lib->vba.SwathHeightY[k],
+ mode_lib->vba.SwathHeightC[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ v->UrgentLatency,
+ mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0],
+ mode_lib->vba.CursorBPP[k][0],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ mode_lib->vba.DETBufferSizeY[k],
+ mode_lib->vba.DETBufferSizeC[k],
+
+ /* output */
+ &v->UrgBurstFactorCursor[k],
+ &v->UrgBurstFactorLuma[k],
+ &v->UrgBurstFactorChroma[k],
+ &v->NoUrgentLatencyHiding[k]);
+
+ v->cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 8 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->MaxVStartupLines[k] = ((mode_lib->vba.Interlace[k] &&
+ !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ?
+ dml_floor((mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]) / 2.0, 1.0) :
+ mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]) - dml_max(1.0,
+ dml_ceil((double) v->WritebackDelay[mode_lib->vba.VoltageLevel][k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1));
+
+ // Clamp to max OTG vstartup register limit
+ if (v->MaxVStartupLines[k] > 1023)
+ v->MaxVStartupLines[k] = 1023;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, mode_lib->vba.VoltageLevel);
+ dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__,
+ k, v->WritebackDelay[mode_lib->vba.VoltageLevel][k]);
+#endif
+ }
+
+ v->MaximumMaxVStartupLines = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
+
+ ImmediateFlipRequirementFinal = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ ImmediateFlipRequirementFinal = ImmediateFlipRequirementFinal
+ || (mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ImmediateFlipRequirementFinal = %d\n", __func__, ImmediateFlipRequirementFinal);
+#endif
+ // ModeProgramming will not repeat the schedule calculation using different prefetch mode,
+ //it is just calcualated once with given prefetch mode
+ dml32_CalculateMinAndMaxPrefetchMode(
+ mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal,
+ &mode_lib->vba.MinPrefetchMode,
+ &mode_lib->vba.MaxPrefetchMode);
+
+ v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
+
+ iteration = 0;
+ MaxTotalRDBandwidth = 0;
+ NextPrefetchMode = mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
+
+ do {
+ MaxTotalRDBandwidth = 0;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines);
+#endif
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ /* NOTE PerfetchMode variable is invalid in DAL as per the input received.
+ * Hence the direction is to use PrefetchModePerState.
+ */
+ TWait = dml32_CalculateTWait(
+ mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.DRRDisplay[k],
+ mode_lib->vba.DRAMClockChangeLatency,
+ mode_lib->vba.FCLKChangeLatency, v->UrgentLatency,
+ mode_lib->vba.SREnterPlusExitTime);
+
+ memset(&v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, 0, sizeof(DmlPipe));
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dppclk = mode_lib->vba.DPPCLK[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dispclk = mode_lib->vba.DISPCLK;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HActive = mode_lib->vba.HActive[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+ v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(
+ v,
+ k,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe,
+ v->DSCDelay[k],
+ (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
+ dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
+ v->MaxVStartupLines[k],
+ v->UrgentLatency,
+ v->UrgentExtraLatency,
+ v->TCalc,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k],
+ v->PrefetchSourceLinesY[k],
+ v->SwathWidthY[k],
+ v->VInitPreFillY[k],
+ v->MaxNumSwathY[k],
+ v->PrefetchSourceLinesC[k],
+ v->SwathWidthC[k],
+ v->VInitPreFillC[k],
+ v->MaxNumSwathC[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ TWait,
+ v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ ?
+ mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+ /* Output */
+ &v->DSTXAfterScaler[k],
+ &v->DSTYAfterScaler[k],
+ &v->DestinationLinesForPrefetch[k],
+ &v->PrefetchBandwidth[k],
+ &v->DestinationLinesToRequestVMInVBlank[k],
+ &v->DestinationLinesToRequestRowInVBlank[k],
+ &v->VRatioPrefetchY[k],
+ &v->VRatioPrefetchC[k],
+ &v->RequiredPrefetchPixDataBWLuma[k],
+ &v->RequiredPrefetchPixDataBWChroma[k],
+ &v->NotEnoughTimeForDynamicMetadata[k],
+ &v->Tno_bw[k], &v->prefetch_vmrow_bw[k],
+ &v->Tdmdl_vm[k],
+ &v->Tdmdl[k],
+ &v->TSetup[k],
+ &v->VUpdateOffsetPix[k],
+ &v->VUpdateWidthPix[k],
+ &v->VReadyOffsetPix[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d Prefetch calculation errResult=%0d\n",
+ __func__, k, mode_lib->vba.ErrorResult[k]);
+#endif
+ v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateUrgentBurstFactor(mode_lib->vba.UsesMALLForPStateChange[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ mode_lib->vba.SwathHeightY[k],
+ mode_lib->vba.SwathHeightC[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ v->UrgentLatency,
+ mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0],
+ mode_lib->vba.CursorBPP[k][0],
+ v->VRatioPrefetchY[k],
+ v->VRatioPrefetchC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ mode_lib->vba.DETBufferSizeY[k],
+ mode_lib->vba.DETBufferSizeC[k],
+ /* Output */
+ &v->UrgBurstFactorCursorPre[k],
+ &v->UrgBurstFactorLumaPre[k],
+ &v->UrgBurstFactorChromaPre[k],
+ &v->NoUrgentLatencyHidingPre[k]);
+
+ v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] /
+ 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPrefetchY[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DPPPerSurface=%d\n", __func__, k, mode_lib->vba.DPPPerPlane[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k,
+ v->UrgBurstFactorLumaPre[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k,
+ v->UrgBurstFactorChromaPre[k]);
+
+ dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
+ dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, mode_lib->vba.VRatio[k]);
+
+ dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthSurfaceLuma=%f\n", __func__, k,
+ v->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthSurfaceChroma=%f\n", __func__, k,
+ v->ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
+ dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
+ dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k,
+ v->RequiredPrefetchPixDataBWLuma[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k,
+ v->RequiredPrefetchPixDataBWChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
+ dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k,
+ MaxTotalRDBandwidthNoUrgentBurst);
+#endif
+ if (v->DestinationLinesForPrefetch[k] < 2)
+ DestinationLineTimesForPrefetchLessThan2 = true;
+
+ if (v->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__
+ || v->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__)
+ VRatioPrefetchMoreThanMax = true;
+
+ //bool DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false;
+ //bool DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false;
+ //if (v->DestinationLinesToRequestVMInVBlank[k] >= 32) {
+ // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true;
+ //}
+
+ //if (v->DestinationLinesToRequestRowInVBlank[k] >= 16) {
+ // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true;
+ //}
+ }
+
+ v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n",
+ __func__, MaxTotalRDBandwidthNoUrgentBurst);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, mode_lib->vba.ReturnBW);
+ dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n",
+ __func__, mode_lib->vba.FractionOfUrgentBandwidth);
+#endif
+
+ {
+ dml32_CalculatePrefetchBandwithSupport(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->NoUrgentLatencyHidingPre,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->UrgBurstFactorLuma,
+ v->UrgBurstFactorChroma,
+ v->UrgBurstFactorCursor,
+ v->UrgBurstFactorLumaPre,
+ v->UrgBurstFactorChromaPre,
+ v->UrgBurstFactorCursorPre,
+
+ /* output */
+ &MaxTotalRDBandwidth,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0],
+ &v->PrefetchModeSupported);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector[k] = 1.0;
+
+ {
+ dml32_CalculatePrefetchBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->NoUrgentLatencyHidingPre,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+
+ /* output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0],
+ &v->FractionOfUrgentBandwidth,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean);
+ }
+
+ if (VRatioPrefetchMoreThanMax != false || DestinationLineTimesForPrefetchLessThan2 != false) {
+ v->PrefetchModeSupported = false;
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k]) {
+ v->PrefetchModeSupported = false;
+ }
+ }
+
+ if (v->PrefetchModeSupported == true && mode_lib->vba.ImmediateFlipSupport == true) {
+ mode_lib->vba.BandwidthAvailableForImmediateFlip = dml32_CalculateBandwidthAvailableForImmediateFlip(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->cursor_bw_pre,
+ mode_lib->vba.DPPPerPlane,
+ v->UrgBurstFactorLuma,
+ v->UrgBurstFactorChroma,
+ v->UrgBurstFactorCursor,
+ v->UrgBurstFactorLumaPre,
+ v->UrgBurstFactorChromaPre,
+ v->UrgBurstFactorCursorPre);
+
+ mode_lib->vba.TotImmediateFlipBytes = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
+ mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes
+ + mode_lib->vba.DPPPerPlane[k]
+ * (v->PDEAndMetaPTEBytesFrame[k]
+ + v->MetaRowByte[k]);
+ if (v->use_one_row_for_frame_flip[k][0][0]) {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + 2 * v->PixelPTEBytesPerRow[k];
+ } else {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + v->PixelPTEBytesPerRow[k];
+ }
+ }
+ }
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateFlipSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ v->UrgentExtraLatency,
+ v->UrgentLatency,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMMinPageSize,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k],
+ mode_lib->vba.BandwidthAvailableForImmediateFlip,
+ mode_lib->vba.TotImmediateFlipBytes,
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ v->Tno_bw[k],
+ mode_lib->vba.DCCEnable[k],
+ v->dpte_row_height[k],
+ v->meta_row_height[k],
+ v->dpte_row_height_chroma[k],
+ v->meta_row_height_chroma[k],
+ v->Use_One_Row_For_Frame_Flip[k],
+
+ /* Output */
+ &v->DestinationLinesToRequestVMInImmediateFlip[k],
+ &v->DestinationLinesToRequestRowInImmediateFlip[k],
+ &v->final_flip_bw[k],
+ &v->ImmediateFlipSupportedForPipe[k]);
+ }
+
+ {
+ dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ mode_lib->vba.ImmediateFlipRequirement,
+ v->final_flip_bw,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->UrgBurstFactorLuma,
+ v->UrgBurstFactorChroma,
+ v->UrgBurstFactorCursor,
+ v->UrgBurstFactorLumaPre,
+ v->UrgBurstFactorChromaPre,
+ v->UrgBurstFactorCursorPre,
+
+ /* output */
+ &v->total_dcn_read_bw_with_flip, // Single *TotalBandwidth
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], // Single *FractionOfUrgentBandwidth
+ &v->ImmediateFlipSupported); // Boolean *ImmediateFlipBandwidthSupport
+
+ dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ mode_lib->vba.ImmediateFlipRequirement,
+ v->final_flip_bw,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+
+ /* output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], // Single *TotalBandwidth
+ &v->FractionOfUrgentBandwidthImmediateFlip, // Single *FractionOfUrgentBandwidth
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); // Boolean *ImmediateFlipBandwidthSupport
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required && v->ImmediateFlipSupportedForPipe[k] == false) {
+ v->ImmediateFlipSupported = false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
+#endif
+ }
+ }
+ } else {
+ v->ImmediateFlipSupported = false;
+ }
+
+ /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */
+ v->PrefetchAndImmediateFlipSupported = (v->PrefetchModeSupported == true &&
+ ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable && !ImmediateFlipRequirementFinal) ||
+ v->ImmediateFlipSupported)) ? true : false;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchModeSupported = %d\n", __func__, locals->PrefetchModeSupported);
+ for (uint k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ dml_print("DML::%s: ImmediateFlipRequirement[%d] = %d\n", __func__, k, mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required);
+ dml_print("DML::%s: ImmediateFlipSupported = %d\n", __func__, locals->ImmediateFlipSupported);
+ dml_print("DML::%s: ImmediateFlipSupport = %d\n", __func__, mode_lib->vba.ImmediateFlipSupport);
+ dml_print("DML::%s: HostVMEnable = %d\n", __func__, mode_lib->vba.HostVMEnable);
+ dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %d\n", __func__, locals->PrefetchAndImmediateFlipSupported);
+ dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup=%d\n", __func__, locals->VStartupLines, locals->MaximumMaxVStartupLines);
+#endif
+
+ v->VStartupLines = v->VStartupLines + 1;
+
+ if (v->VStartupLines > v->MaximumMaxVStartupLines) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Vstartup exceeds max vstartup, exiting loop\n", __func__);
+#endif
+ break; // VBA_DELTA: Implementation divergence! Gabe is *still* iterating across prefetch modes which we don't care to do
+ }
+ iteration++;
+ if (iteration > 2500) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: too many errors, exit now\n", __func__);
+ assert(0);
+#endif
+ }
+ } while (!(v->PrefetchAndImmediateFlipSupported || NextPrefetchMode > mode_lib->vba.MaxPrefetchMode));
+
+
+ if (v->VStartupLines <= v->MaximumMaxVStartupLines) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Good, Prefetch and flip scheduling found solution at VStartupLines=%d\n", __func__, locals->VStartupLines-1);
+#endif
+ }
+
+
+ //Watermarks and NB P-State/DRAM Clock Change Support
+ {
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.UrgentLatency = v->UrgentLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.ExtraLatency = v->UrgentExtraLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency;
+
+ dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ v,
+ v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb],
+ v->DCFCLK,
+ v->ReturnBW,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters,
+ v->SOCCLK,
+ v->DCFCLKDeepSleep,
+ v->DETBufferSizeY,
+ v->DETBufferSizeC,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->DPPPerPlane,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ v->DSTXAfterScaler,
+ v->DSTYAfterScaler,
+ v->UnboundedRequestEnabled,
+ v->CompressedBufferSizeInkByte,
+
+ /* Output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support,
+ v->MaxActiveDRAMClockChangeLatencySupported,
+ v->SubViewportLinesNeededInMALL,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_fclkchange_support,
+ &v->MinActiveFCLKChangeLatencySupported,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_USRRetrainingSupport,
+ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin);
+
+ /* DCN32 has a new struct Watermarks (typedef) which is used to store
+ * calculated WM values. Copy over values from struct to vba varaibles
+ * to ensure that the DCN32 getters return the correct value.
+ */
+ v->UrgentWatermark = v->Watermark.UrgentWatermark;
+ v->WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark;
+ v->DRAMClockChangeWatermark = v->Watermark.DRAMClockChangeWatermark;
+ v->WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark;
+ v->StutterExitWatermark = v->Watermark.StutterExitWatermark;
+ v->StutterEnterPlusExitWatermark = v->Watermark.StutterEnterPlusExitWatermark;
+ v->Z8StutterExitWatermark = v->Watermark.Z8StutterExitWatermark;
+ v->Z8StutterEnterPlusExitWatermark = v->Watermark.Z8StutterEnterPlusExitWatermark;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
+ v->VStartup[k] * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]
+ - v->Watermark.WritebackDRAMClockChangeWatermark);
+ v->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0,
+ v->VStartup[k] * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]
+ - v->Watermark.WritebackFCLKChangeWatermark);
+ } else {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
+ v->WritebackAllowFCLKChangeEndPosition[k] = 0;
+ }
+ }
+ }
+
+ //Display Pipeline Delivery Time in Prefetch, Groups
+ dml32_CalculatePixelDeliveryTimes(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ v->VRatioPrefetchY,
+ v->VRatioPrefetchC,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ mode_lib->vba.DPPCLK,
+ v->BytePerPixelC,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.NumberOfCursors,
+ mode_lib->vba.CursorWidth,
+ mode_lib->vba.CursorBPP,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlockHeight256BytesC,
+
+ /* Output */
+ v->DisplayPipeLineDeliveryTimeLuma,
+ v->DisplayPipeLineDeliveryTimeChroma,
+ v->DisplayPipeLineDeliveryTimeLumaPrefetch,
+ v->DisplayPipeLineDeliveryTimeChromaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeLuma,
+ v->DisplayPipeRequestDeliveryTimeChroma,
+ v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
+ v->CursorRequestDeliveryTime,
+ v->CursorRequestDeliveryTimePrefetch);
+
+ dml32_CalculateMetaAndPTETimes(v->Use_One_Row_For_Frame,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.MinMetaChunkSizeBytes,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ v->DestinationLinesToRequestRowInVBlank,
+ v->DestinationLinesToRequestRowInImmediateFlip,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.PixelClock,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ mode_lib->vba.SourceRotation,
+ v->dpte_row_height,
+ v->dpte_row_height_chroma,
+ v->meta_row_width,
+ v->meta_row_width_chroma,
+ v->meta_row_height,
+ v->meta_row_height_chroma,
+ v->meta_req_width,
+ v->meta_req_width_chroma,
+ v->meta_req_height,
+ v->meta_req_height_chroma,
+ v->dpte_group_bytes,
+ v->PTERequestSizeY,
+ v->PTERequestSizeC,
+ v->PixelPTEReqWidthY,
+ v->PixelPTEReqHeightY,
+ v->PixelPTEReqWidthC,
+ v->PixelPTEReqHeightC,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+
+ /* Output */
+ v->DST_Y_PER_PTE_ROW_NOM_L,
+ v->DST_Y_PER_PTE_ROW_NOM_C,
+ v->DST_Y_PER_META_ROW_NOM_L,
+ v->DST_Y_PER_META_ROW_NOM_C,
+ v->TimePerMetaChunkNominal,
+ v->TimePerChromaMetaChunkNominal,
+ v->TimePerMetaChunkVBlank,
+ v->TimePerChromaMetaChunkVBlank,
+ v->TimePerMetaChunkFlip,
+ v->TimePerChromaMetaChunkFlip,
+ v->time_per_pte_group_nom_luma,
+ v->time_per_pte_group_vblank_luma,
+ v->time_per_pte_group_flip_luma,
+ v->time_per_pte_group_nom_chroma,
+ v->time_per_pte_group_vblank_chroma,
+ v->time_per_pte_group_flip_chroma);
+
+ dml32_CalculateVMGroupAndRequestTimes(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HTotal,
+ v->BytePerPixelC,
+ v->DestinationLinesToRequestVMInVBlank,
+ v->DestinationLinesToRequestVMInImmediateFlip,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.PixelClock,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->vm_group_bytes,
+ v->dpde0_bytes_per_frame_ub_l,
+ v->dpde0_bytes_per_frame_ub_c,
+ v->meta_pte_bytes_per_frame_ub_l,
+ v->meta_pte_bytes_per_frame_ub_c,
+
+ /* Output */
+ v->TimePerVMGroupVBlank,
+ v->TimePerVMGroupFlip,
+ v->TimePerVMRequestVBlank,
+ v->TimePerVMRequestFlip);
+
+ // Min TTUVBlank
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
+ v->MinTTUVBlank[k] = dml_max4(v->Watermark.DRAMClockChangeWatermark,
+ v->Watermark.FCLKChangeWatermark, v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.UrgentWatermark);
+ } else if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]
+ == 1) {
+ v->MinTTUVBlank[k] = dml_max3(v->Watermark.FCLKChangeWatermark,
+ v->Watermark.StutterEnterPlusExitWatermark, v->Watermark.UrgentWatermark);
+ } else if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]
+ == 2) {
+ v->MinTTUVBlank[k] = dml_max(v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.UrgentWatermark);
+ } else {
+ v->MinTTUVBlank[k] = v->Watermark.UrgentWatermark;
+ }
+ if (!mode_lib->vba.DynamicMetadataEnable[k])
+ v->MinTTUVBlank[k] = mode_lib->vba.TCalc + v->MinTTUVBlank[k];
+ }
+
+ // DCC Configuration
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calculate DCC configuration for surface k=%d\n", __func__, k);
+#endif
+ dml32_CalculateDCCConfiguration(
+ mode_lib->vba.DCCEnable[k],
+ mode_lib->vba.DCCProgrammingAssumesScanDirectionUnknownFinal,
+ mode_lib->vba.SourcePixelFormat[k], mode_lib->vba.SurfaceWidthY[k],
+ mode_lib->vba.SurfaceWidthC[k],
+ mode_lib->vba.SurfaceHeightY[k],
+ mode_lib->vba.SurfaceHeightC[k],
+ mode_lib->vba.nomDETInKByte,
+ v->BlockHeight256BytesY[k],
+ v->BlockHeight256BytesC[k],
+ mode_lib->vba.SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->BytePerPixelC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ (enum dm_rotation_angle) mode_lib->vba.SourceScan[k],
+ /* Output */
+ &v->DCCYMaxUncompressedBlock[k],
+ &v->DCCCMaxUncompressedBlock[k],
+ &v->DCCYMaxCompressedBlock[k],
+ &v->DCCCMaxCompressedBlock[k],
+ &v->DCCYIndependentBlock[k],
+ &v->DCCCIndependentBlock[k]);
+ }
+
+ // VStartup Adjustment
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ bool isInterlaceTiming;
+ double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k,
+ v->MinTTUVBlank[k]);
+#endif
+
+ v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
+ dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
+#endif
+
+ v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
+ if (mode_lib->vba.DynamicMetadataEnable[k] && mode_lib->vba.DynamicMetadataVMEnabled)
+ v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
+
+ isInterlaceTiming = (mode_lib->vba.Interlace[k] &&
+ !mode_lib->vba.ProgressiveToInterlaceUnitInOPP);
+
+ v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((mode_lib->vba.VTotal[k] -
+ mode_lib->vba.VFrontPorch[k]) / 2.0, 1.0) :
+ mode_lib->vba.VTotal[k]) - mode_lib->vba.VFrontPorch[k])
+ + dml_max(1.0,
+ dml_ceil(v->WritebackDelay[mode_lib->vba.VoltageLevel][k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0))
+ + dml_floor(4.0 * v->TSetup[k] / (mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k]), 1.0) / 4.0;
+
+ v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
+
+ if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k])
+ / mode_lib->vba.HTotal[k]) <= (isInterlaceTiming ? dml_floor((mode_lib->vba.VTotal[k]
+ - mode_lib->vba.VActive[k] - mode_lib->vba.VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
+ (int) (mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
+ - mode_lib->vba.VFrontPorch[k] - v->VStartup[k]))) {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
+ } else {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
+ dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
+ dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
+ dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, mode_lib->vba.HTotal[k]);
+ dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, mode_lib->vba.VTotal[k]);
+ dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, mode_lib->vba.VActive[k]);
+ dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, mode_lib->vba.VFrontPorch[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, TSetup = %f\n", __func__, k, v->TSetup[k]);
+ dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
+ dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k,
+ v->VREADY_AT_OR_AFTER_VSYNC[k]);
+#endif
+ }
+
+ {
+ //Maximum Bandwidth Used
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k] == true
+ && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
+ WRBandwidth = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.HTotal[k] * mode_lib->vba.WritebackSourceHeight[k]
+ / mode_lib->vba.PixelClock[k]) * 4;
+ } else if (mode_lib->vba.WritebackEnable[k] == true) {
+ WRBandwidth = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.HTotal[k] * mode_lib->vba.WritebackSourceHeight[k]
+ / mode_lib->vba.PixelClock[k]) * 8;
+ }
+ TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
+ }
+
+ v->TotalDataReadBandwidth = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthSurfaceLuma[k]
+ + v->ReadBandwidthSurfaceChroma[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, TotalDataReadBandwidth = %f\n",
+ __func__, k, v->TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, v->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%d, ReadBandwidthSurfaceChroma = %f\n",
+ __func__, k, v->ReadBandwidthSurfaceChroma[k]);
+#endif
+ }
+ }
+
+ // Stutter Efficiency
+ dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte,
+ mode_lib->vba.UsesMALLForPStateChange,
+ v->UnboundedRequestEnabled,
+ mode_lib->vba.MetaFIFOSizeInKEntries,
+ mode_lib->vba.ZeroSizeBufferEntries,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ROBBufferSizeInKByte,
+ v->TotalDataReadBandwidth,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.ReturnBW,
+ v->CompbufReservedSpace64B,
+ v->CompbufReservedSpaceZs,
+ mode_lib->vba.SRExitTime,
+ mode_lib->vba.SRExitZ8Time,
+ mode_lib->vba.SynchronizeTimingsFinal,
+ mode_lib->vba.BlendingAndTiming,
+ v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.Z8StutterEnterPlusExitWatermark,
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+ mode_lib->vba.Interlace,
+ v->MinTTUVBlank, mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.DETBufferSizeY,
+ v->BytePerPixelY,
+ v->BytePerPixelDETY,
+ v->SwathWidthY,
+ mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ mode_lib->vba.DCCRateLuma,
+ mode_lib->vba.DCCRateChroma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsLuma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsChroma,
+ mode_lib->vba.HTotal, mode_lib->vba.VTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.SourceRotation,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesC,
+ v->DCCYMaxUncompressedBlock,
+ v->DCCCMaxUncompressedBlock,
+ mode_lib->vba.VActive,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.WritebackEnable,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ /* Output */
+ &v->StutterEfficiencyNotIncludingVBlank,
+ &v->StutterEfficiency,
+ &v->NumberOfStutterBurstsPerFrame,
+ &v->Z8StutterEfficiencyNotIncludingVBlank,
+ &v->Z8StutterEfficiency,
+ &v->Z8NumberOfStutterBurstsPerFrame,
+ &v->StutterPeriod,
+ &v->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ {
+ unsigned int dummy_integer[1];
+
+ // Calculate z8 stutter eff assuming 0 reserved space
+ dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte,
+ mode_lib->vba.UsesMALLForPStateChange,
+ v->UnboundedRequestEnabled,
+ mode_lib->vba.MetaFIFOSizeInKEntries,
+ mode_lib->vba.ZeroSizeBufferEntries,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ROBBufferSizeInKByte,
+ v->TotalDataReadBandwidth,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.ReturnBW,
+ 0, //CompbufReservedSpace64B,
+ 0, //CompbufReservedSpaceZs,
+ mode_lib->vba.SRExitTime,
+ mode_lib->vba.SRExitZ8Time,
+ mode_lib->vba.SynchronizeTimingsFinal,
+ mode_lib->vba.BlendingAndTiming,
+ v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.Z8StutterEnterPlusExitWatermark,
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+ mode_lib->vba.Interlace,
+ v->MinTTUVBlank,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.DETBufferSizeY,
+ v->BytePerPixelY, v->BytePerPixelDETY,
+ v->SwathWidthY, mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ mode_lib->vba.DCCRateLuma,
+ mode_lib->vba.DCCRateChroma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsLuma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsChroma,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.VTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.SourceRotation,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesC,
+ v->DCCYMaxUncompressedBlock,
+ v->DCCCMaxUncompressedBlock,
+ mode_lib->vba.VActive,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.WritebackEnable,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->meta_row_bw, v->dpte_row_bw,
+
+ /* Output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0],
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1],
+ &dummy_integer[0],
+ &v->Z8StutterEfficiencyNotIncludingVBlankBestCase,
+ &v->Z8StutterEfficiencyBestCase,
+ &v->Z8NumberOfStutterBurstsPerFrameBestCase,
+ &v->StutterPeriodBestCase,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean);
+ }
+#else
+ v->Z8StutterEfficiencyNotIncludingVBlankBestCase = v->Z8StutterEfficiencyNotIncludingVBlank;
+ v->Z8StutterEfficiencyBestCase = v->Z8StutterEfficiency;
+ v->Z8NumberOfStutterBurstsPerFrameBestCase = v->Z8NumberOfStutterBurstsPerFrame;
+ v->StutterPeriodBestCase = v->StutterPeriod;
+#endif
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- END ---\n", __func__);
+#endif
+}
+
+static void mode_support_configuration(struct vba_vars_st *v,
+ struct display_mode_lib *mode_lib)
+{
+ int i, j;
+
+ for (i = v->soc.num_states - 1; i >= 0; i--) {
+ for (j = 0; j < 2; j++) {
+ if (mode_lib->vba.ScaleRatioAndTapsSupport == true
+ && mode_lib->vba.SourceFormatPixelAndScanSupport == true
+ && mode_lib->vba.ViewportSizeSupport[i][j] == true
+ && !mode_lib->vba.LinkRateDoesNotMatchDPVersion
+ && !mode_lib->vba.LinkRateForMultistreamNotIndicated
+ && !mode_lib->vba.BPPForMultistreamNotIndicated
+ && !mode_lib->vba.MultistreamWithHDMIOreDP
+ && !mode_lib->vba.ExceededMultistreamSlots[i]
+ && !mode_lib->vba.MSOOrODMSplitWithNonDPLink
+ && !mode_lib->vba.NotEnoughLanesForMSO
+ && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420
+ //&& !mode_lib->vba.DSCOnlyIfNecessaryWithBPP
+ && !mode_lib->vba.DSC422NativeNotSupported
+ && !mode_lib->vba.MPCCombineMethodIncompatible
+ && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true
+ && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true
+ && mode_lib->vba.NotEnoughDSCUnits[i] == false
+ && !mode_lib->vba.NotEnoughDSCSlices[i]
+ && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
+ && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen
+ && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false
+ && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i]
+ && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false
+ && !mode_lib->vba.InvalidCombinationOfMALLUseForPState
+ && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ && mode_lib->vba.ROBSupport[i][j] == true
+ && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true
+ && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true
+ && mode_lib->vba.NumberOfOTGSupport == true
+ && mode_lib->vba.NumberOfHDMIFRLSupport == true
+ && mode_lib->vba.EnoughWritebackUnits == true
+ && mode_lib->vba.WritebackLatencySupport == true
+ && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true
+ && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true
+ && mode_lib->vba.ViewportExceedsSurface == false
+ && mode_lib->vba.PrefetchSupported[i][j] == true
+ && mode_lib->vba.VActiveBandwithSupport[i][j] == true
+ && mode_lib->vba.DynamicMetadataSupported[i][j] == true
+ && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true
+ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true
+ && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true
+ && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true
+ && mode_lib->vba.NonsupportedDSCInputBPC == false
+ && !mode_lib->vba.ExceededMALLSize
+ && ((mode_lib->vba.HostVMEnable == false
+ && !mode_lib->vba.ImmediateFlipRequiredFinal)
+ || mode_lib->vba.ImmediateFlipSupportedForState[i][j])
+ && (!mode_lib->vba.DRAMClockChangeRequirementFinal
+ || i == v->soc.num_states - 1
+ || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported)
+ && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1
+ || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported)
+ && (!mode_lib->vba.USRRetrainingRequiredFinal
+ || mode_lib->vba.USRRetrainingSupport[i][j])) {
+ mode_lib->vba.ModeSupport[i][j] = true;
+ } else {
+ mode_lib->vba.ModeSupport[i][j] = false;
+ }
+ }
+ }
+}
+
+void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ int i, j;
+ unsigned int k, m;
+ unsigned int MaximumMPCCombine;
+ unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth;
+ unsigned int TotalSlots;
+ bool CompBufReservedSpaceNeedAdjustment;
+ bool CompBufReservedSpaceNeedAdjustmentSingleDPP;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: called\n", __func__);
+#endif
+
+ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+
+ /*Scale Ratio, taps Support Check*/
+
+ mode_lib->vba.ScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.ScalerEnabled[k] == false
+ && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha)
+ || mode_lib->vba.HRatio[k] != 1.0 || mode_lib->vba.htaps[k] != 1.0
+ || mode_lib->vba.VRatio[k] != 1.0 || mode_lib->vba.vtaps[k] != 1.0)) {
+ mode_lib->vba.ScaleRatioAndTapsSupport = false;
+ } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 || mode_lib->vba.htaps[k] < 1.0
+ || mode_lib->vba.htaps[k] > 8.0
+ || (mode_lib->vba.htaps[k] > 1.0 && (mode_lib->vba.htaps[k] % 2) == 1)
+ || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio
+ || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio
+ || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k]
+ || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k]
+ || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe
+ && (mode_lib->vba.VTAPsChroma[k] < 1
+ || mode_lib->vba.VTAPsChroma[k] > 8
+ || mode_lib->vba.HTAPsChroma[k] < 1
+ || mode_lib->vba.HTAPsChroma[k] > 8
+ || (mode_lib->vba.HTAPsChroma[k] > 1
+ && mode_lib->vba.HTAPsChroma[k] % 2
+ == 1)
+ || mode_lib->vba.HRatioChroma[k]
+ > mode_lib->vba.MaxHSCLRatio
+ || mode_lib->vba.VRatioChroma[k]
+ > mode_lib->vba.MaxVSCLRatio
+ || mode_lib->vba.HRatioChroma[k]
+ > mode_lib->vba.HTAPsChroma[k]
+ || mode_lib->vba.VRatioChroma[k]
+ > mode_lib->vba.VTAPsChroma[k]))) {
+ mode_lib->vba.ScaleRatioAndTapsSupport = false;
+ }
+ }
+
+ /*Source Format, Pixel Format and Scan Support Check*/
+ mode_lib->vba.SourceFormatPixelAndScanSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
+ && (!(!IsVertical((enum dm_rotation_angle) mode_lib->vba.SourceScan[k]))
+ || mode_lib->vba.DCCEnable[k] == true)) {
+ mode_lib->vba.SourceFormatPixelAndScanSupport = false;
+ }
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateBytePerPixelAndBlockSizes(
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.SurfaceTiling[k],
+
+ /* Output */
+ &mode_lib->vba.BytePerPixelY[k],
+ &mode_lib->vba.BytePerPixelC[k],
+ &mode_lib->vba.BytePerPixelInDETY[k],
+ &mode_lib->vba.BytePerPixelInDETC[k],
+ &mode_lib->vba.Read256BlockHeightY[k],
+ &mode_lib->vba.Read256BlockHeightC[k],
+ &mode_lib->vba.Read256BlockWidthY[k],
+ &mode_lib->vba.Read256BlockWidthC[k],
+ &mode_lib->vba.MacroTileHeightY[k],
+ &mode_lib->vba.MacroTileHeightC[k],
+ &mode_lib->vba.MacroTileWidthY[k],
+ &mode_lib->vba.MacroTileWidthC[k]);
+ }
+
+ /*Bandwidth Support Check*/
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!IsVertical(mode_lib->vba.SourceRotation[k])) {
+ v->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k];
+ v->SwathWidthCSingleDPP[k] = mode_lib->vba.ViewportWidthChroma[k];
+ } else {
+ v->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k];
+ v->SwathWidthCSingleDPP[k] = mode_lib->vba.ViewportHeightChroma[k];
+ }
+ }
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]
+ / 2.0;
+ }
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true && mode_lib->vba.WritebackPixelFormat[k] == dm_444_64) {
+ v->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.WritebackSourceHeight[k] * mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k]) * 8.0;
+ } else if (mode_lib->vba.WritebackEnable[k] == true) {
+ v->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.WritebackSourceHeight[k] * mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k]) * 4.0;
+ } else {
+ v->WriteBandwidth[k] = 0.0;
+ }
+ }
+
+ /*Writeback Latency support check*/
+
+ mode_lib->vba.WritebackLatencySupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true
+ && (v->WriteBandwidth[k]
+ > mode_lib->vba.WritebackInterfaceBufferSize * 1024
+ / mode_lib->vba.WritebackLatency)) {
+ mode_lib->vba.WritebackLatencySupport = false;
+ }
+ }
+
+ /*Writeback Mode Support Check*/
+ mode_lib->vba.EnoughWritebackUnits = true;
+ mode_lib->vba.TotalNumberOfActiveWriteback = 0;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true)
+ mode_lib->vba.TotalNumberOfActiveWriteback = mode_lib->vba.TotalNumberOfActiveWriteback + 1;
+ }
+
+ if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback)
+ mode_lib->vba.EnoughWritebackUnits = false;
+
+ /*Writeback Scale Ratio and Taps Support Check*/
+ mode_lib->vba.WritebackScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio
+ || mode_lib->vba.WritebackVRatio[k] > mode_lib->vba.WritebackMaxVSCLRatio
+ || mode_lib->vba.WritebackHRatio[k] < mode_lib->vba.WritebackMinHSCLRatio
+ || mode_lib->vba.WritebackVRatio[k] < mode_lib->vba.WritebackMinVSCLRatio
+ || mode_lib->vba.WritebackHTaps[k] > mode_lib->vba.WritebackMaxHSCLTaps
+ || mode_lib->vba.WritebackVTaps[k] > mode_lib->vba.WritebackMaxVSCLTaps
+ || mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackHTaps[k]
+ || mode_lib->vba.WritebackVRatio[k] > mode_lib->vba.WritebackVTaps[k]
+ || (mode_lib->vba.WritebackHTaps[k] > 2.0
+ && ((mode_lib->vba.WritebackHTaps[k] % 2) == 1))) {
+ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
+ }
+ if (2.0 * mode_lib->vba.WritebackDestinationWidth[k] * (mode_lib->vba.WritebackVTaps[k] - 1)
+ * 57 > mode_lib->vba.WritebackLineBufferSize) {
+ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(mode_lib->vba.HRatio[k], mode_lib->vba.HRatioChroma[k],
+ mode_lib->vba.VRatio[k], mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.MaxDCHUBToPSCLThroughput, mode_lib->vba.MaxPSCLToLBThroughput,
+ mode_lib->vba.PixelClock[k], mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.htaps[k], mode_lib->vba.HTAPsChroma[k], mode_lib->vba.vtaps[k],
+ mode_lib->vba.VTAPsChroma[k],
+ /* Output */
+ &mode_lib->vba.PSCL_FACTOR[k], &mode_lib->vba.PSCL_FACTOR_CHROMA[k],
+ &mode_lib->vba.MinDPPCLKUsingSingleDPP[k]);
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+
+ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 8192;
+ } else if (!IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelC[k] > 0
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 7680;
+ } else if (IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelC[k] > 0
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 4320;
+ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 3840;
+ } else if (IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelY[k] == 8 &&
+ mode_lib->vba.DCCEnable[k] == true) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 3072;
+ } else {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 6144;
+ }
+
+ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 || mode_lib->vba.SourcePixelFormat[k] == dm_420_10
+ || mode_lib->vba.SourcePixelFormat[k] == dm_420_12) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma / 2.0;
+ } else {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma;
+ }
+ v->MaximumSwathWidthInLineBufferLuma = mode_lib->vba.LineBufferSizeFinal
+ * dml_max(mode_lib->vba.HRatio[k], 1.0) / mode_lib->vba.LBBitPerPixel[k]
+ / (mode_lib->vba.vtaps[k] + dml_max(dml_ceil(mode_lib->vba.VRatio[k], 1.0) - 2, 0.0));
+ if (v->BytePerPixelC[k] == 0.0) {
+ v->MaximumSwathWidthInLineBufferChroma = 0;
+ } else {
+ v->MaximumSwathWidthInLineBufferChroma = mode_lib->vba.LineBufferSizeFinal
+ * dml_max(mode_lib->vba.HRatioChroma[k], 1.0) / mode_lib->vba.LBBitPerPixel[k]
+ / (mode_lib->vba.VTAPsChroma[k]
+ + dml_max(dml_ceil(mode_lib->vba.VRatioChroma[k], 1.0) - 2,
+ 0.0));
+ }
+ v->MaximumSwathWidthLuma[k] = dml_min(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma,
+ v->MaximumSwathWidthInLineBufferLuma);
+ v->MaximumSwathWidthChroma[k] = dml_min(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma,
+ v->MaximumSwathWidthInLineBufferChroma);
+ }
+
+ dml32_CalculateSwathAndDETConfiguration(
+ mode_lib->vba.DETSizeOverride,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.MaxTotalDETInKByte,
+ mode_lib->vba.MinCompressedBufferSizeInKByte,
+ 1, /* ForceSingleDPP */
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.nomDETInKByte,
+ mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.ip.pixel_chunk_size_kbytes,
+ mode_lib->vba.ip.rob_buffer_size_kbytes,
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
+ mode_lib->vba.Output,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.MaximumSwathWidthLuma,
+ mode_lib->vba.MaximumSwathWidthChroma,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SurfaceTiling,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.Read256BlockHeightY,
+ mode_lib->vba.Read256BlockHeightC,
+ mode_lib->vba.Read256BlockWidthY,
+ mode_lib->vba.Read256BlockWidthC,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_odm_mode,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.BytePerPixelInDETY,
+ mode_lib->vba.BytePerPixelInDETC,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0], /* Integer DPPPerSurface[] */
+
+ /* Output */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1], /* Long swath_width_luma_ub[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[2], /* Long swath_width_chroma_ub[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_double_array[0], /* Long SwathWidth[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_double_array[1], /* Long SwathWidthChroma[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[3], /* Integer SwathHeightY[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[4], /* Integer SwathHeightC[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[5], /* Long DETBufferSizeInKByte[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[6], /* Long DETBufferSizeY[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7], /* Long DETBufferSizeC[] */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0][0], /* bool *UnboundedRequestEnabled */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0][0], /* Long *CompressedBufferSizeInkByte */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1][0], /* Long *CompBufReservedSpaceKBytes */
+ &CompBufReservedSpaceNeedAdjustmentSingleDPP,
+ mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_reduce_voltage_and_clocks)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = true;
+ if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_always_when_possible)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = true;
+ }
+ mode_lib->vba.MPCCombineMethodIncompatible = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage
+ && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible;
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] = 0;
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = true;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC = dm_odm_combine_mode_disabled;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC = dm_odm_combine_mode_disabled;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateODMMode(
+ mode_lib->vba.MaximumPixelsPerLinePerDSCUnit,
+ mode_lib->vba.HActive[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.Output[k],
+ mode_lib->vba.ODMUse[k],
+ mode_lib->vba.MaxDispclk[i],
+ mode_lib->vba.MaxDispclk[v->soc.num_states - 1],
+ false,
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j],
+ mode_lib->vba.MaxNumDPP,
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKRampingMargin,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed,
+ mode_lib->vba.NumberOfDSCSlices[k],
+
+ /* Output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC);
+
+ dml32_CalculateODMMode(
+ mode_lib->vba.MaximumPixelsPerLinePerDSCUnit,
+ mode_lib->vba.HActive[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.Output[k],
+ mode_lib->vba.ODMUse[k],
+ mode_lib->vba.MaxDispclk[i],
+ mode_lib->vba.MaxDispclk[v->soc.num_states - 1],
+ true,
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j],
+ mode_lib->vba.MaxNumDPP,
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKRampingMargin,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed,
+ mode_lib->vba.NumberOfDSCSlices[k],
+
+ /* Output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC);
+
+ dml32_CalculateOutputLink(
+ mode_lib->vba.PHYCLKPerState[i],
+ mode_lib->vba.PHYCLKD18PerState[i],
+ mode_lib->vba.PHYCLKD32PerState[i],
+ mode_lib->vba.Downspreading,
+ (mode_lib->vba.BlendingAndTiming[k] == k),
+ mode_lib->vba.Output[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.HTotal[k],
+ mode_lib->vba.HActive[k],
+ mode_lib->vba.PixelClockBackEnd[k],
+ mode_lib->vba.ForcedOutputLinkBPP[k],
+ mode_lib->vba.DSCInputBitPerComponent[k],
+ mode_lib->vba.NumberOfDSCSlices[k],
+ mode_lib->vba.AudioSampleRate[k],
+ mode_lib->vba.AudioSampleLayout[k],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC,
+ mode_lib->vba.DSCEnable[k],
+ mode_lib->vba.OutputLinkDPLanes[k],
+ mode_lib->vba.OutputLinkDPRate[k],
+
+ /* Output */
+ &mode_lib->vba.RequiresDSC[i][k],
+ &mode_lib->vba.RequiresFEC[i][k],
+ &mode_lib->vba.OutputBppPerState[i][k],
+ &mode_lib->vba.OutputTypePerState[i][k],
+ &mode_lib->vba.OutputRatePerState[i][k],
+ &mode_lib->vba.RequiredSlots[i][k]);
+
+ if (mode_lib->vba.RequiresDSC[i][k] == false) {
+ mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC;
+ mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] =
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC;
+ if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC)
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC;
+ } else {
+ mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC;
+ mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] =
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC;
+ if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC)
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC;
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 4;
+ } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 2;
+ } else if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_never) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 1;
+ } else if (dml32_RoundToDFSGranularity(
+ mode_lib->vba.MinDPPCLKUsingSingleDPP[k]
+ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
+ / 100), 1,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed) <= mode_lib->vba.MaxDppclk[i] &&
+ mode_lib->vba.SingleDPPViewportSizeSupportPerSurface[k] == true) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 1;
+ } else if (mode_lib->vba.TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP) {
+ mode_lib->vba.MPCCombine[i][j][k] = true;
+ mode_lib->vba.NoOfDPP[i][j][k] = 2;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + 1;
+ } else {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 1;
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ }
+ }
+
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = true;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.NoOfDPP[i][j][k] == 1)
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] =
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] + 1;
+ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
+ || mode_lib->vba.SourcePixelFormat[k] == dm_420_10
+ || mode_lib->vba.SourcePixelFormat[k] == dm_420_12
+ || mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = false;
+ }
+ }
+
+ // if TotalNumberOfActiveDPP is > 1, then there should be no unbounded req mode (hw limitation), the comp buf reserved adjustment is not needed regardless
+ // if TotalNumberOfActiveDPP is == 1, then will use the SingleDPP version of unbounded_req for the decision
+ CompBufReservedSpaceNeedAdjustment = (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > 1) ? 0 : CompBufReservedSpaceNeedAdjustmentSingleDPP;
+
+
+
+ if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma,
+ mode_lib->vba.Output[0],
+ mode_lib->vba.SurfaceTiling[0],
+ CompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) {
+ while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP
+ || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = 0;
+ NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.MPCCombineUse[k]
+ != dm_mpc_never &&
+ mode_lib->vba.MPCCombineUse[k] != dm_mpc_reduce_voltage &&
+ mode_lib->vba.ReadBandwidthLuma[k] +
+ mode_lib->vba.ReadBandwidthChroma[k] >
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth &&
+ (mode_lib->vba.ODMCombineEnablePerState[i][k] !=
+ dm_odm_combine_mode_2to1 &&
+ mode_lib->vba.ODMCombineEnablePerState[i][k] !=
+ dm_odm_combine_mode_4to1) &&
+ mode_lib->vba.MPCCombine[i][j][k] == false) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth =
+ mode_lib->vba.ReadBandwidthLuma[k]
+ + mode_lib->vba.ReadBandwidthChroma[k];
+ NumberOfNonCombinedSurfaceOfMaximumBandwidth = k;
+ }
+ }
+ mode_lib->vba.MPCCombine[i][j][NumberOfNonCombinedSurfaceOfMaximumBandwidth] =
+ true;
+ mode_lib->vba.NoOfDPP[i][j][NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + 1;
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] =
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] - 1;
+ }
+ }
+
+ //DISPCLK/DPPCLK
+ mode_lib->vba.WritebackRequiredDISPCLK = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k]) {
+ mode_lib->vba.WritebackRequiredDISPCLK = dml_max(
+ mode_lib->vba.WritebackRequiredDISPCLK,
+ dml32_CalculateWriteBackDISPCLK(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackHTaps[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackSourceWidth[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.HTotal[k],
+ mode_lib->vba.WritebackLineBufferSize,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed));
+ }
+ }
+
+ mode_lib->vba.RequiredDISPCLK[i][j] = mode_lib->vba.WritebackRequiredDISPCLK;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.RequiredDISPCLK[i][j] = dml_max(mode_lib->vba.RequiredDISPCLK[i][j],
+ mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k]);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+
+ dml32_CalculateDPPCLK(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed, mode_lib->vba.MinDPPCLKUsingSingleDPP,
+ mode_lib->vba.NoOfDPPThisState,
+ /* Output */
+ &mode_lib->vba.GlobalDPPCLK, mode_lib->vba.RequiredDPPCLKThisState);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ mode_lib->vba.RequiredDPPCLK[i][j][k] = mode_lib->vba.RequiredDPPCLKThisState[k];
+
+ mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] = !((mode_lib->vba.RequiredDISPCLK[i][j]
+ > mode_lib->vba.MaxDispclk[i])
+ || (mode_lib->vba.GlobalDPPCLK > mode_lib->vba.MaxDppclk[i]));
+
+ if (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP)
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ } // j
+ } // i (VOLTAGE_STATE)
+
+ /* Total Available OTG, HDMIFRL, DP Support Check */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG + 1;
+ if (mode_lib->vba.Output[k] == dm_dp2p0) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 + 1;
+ if (mode_lib->vba.OutputMultistreamId[k]
+ == k || mode_lib->vba.OutputMultistreamEn[k] == false) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs + 1;
+ }
+ }
+ }
+ }
+
+ mode_lib->vba.NumberOfOTGSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG);
+ mode_lib->vba.NumberOfHDMIFRLSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs);
+ mode_lib->vba.NumberOfDP2p0Support = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams
+ && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs);
+
+ /* Display IO and DSC Support Check */
+ mode_lib->vba.NonsupportedDSCInputBPC = false;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0
+ || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0
+ || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)
+ || mode_lib->vba.DSCInputBitPerComponent[k] > mode_lib->vba.MaximumDSCBitsPerComponent) {
+ mode_lib->vba.NonsupportedDSCInputBPC = true;
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ mode_lib->vba.ExceededMultistreamSlots[i] = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k) {
+ TotalSlots = mode_lib->vba.RequiredSlots[i][k];
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.OutputMultistreamId[j] == k)
+ TotalSlots = TotalSlots + mode_lib->vba.RequiredSlots[i][j];
+ }
+ if (mode_lib->vba.Output[k] == dm_dp && TotalSlots > 63)
+ mode_lib->vba.ExceededMultistreamSlots[i] = true;
+ if (mode_lib->vba.Output[k] == dm_dp2p0 && TotalSlots > 64)
+ mode_lib->vba.ExceededMultistreamSlots[i] = true;
+ }
+ }
+ mode_lib->vba.LinkCapacitySupport[i] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0
+ || mode_lib->vba.Output[k] == dm_edp
+ || mode_lib->vba.Output[k] == dm_hdmi)
+ && mode_lib->vba.OutputBppPerState[i][k] == 0) {
+ mode_lib->vba.LinkCapacitySupport[i] = false;
+ }
+ }
+ }
+
+ mode_lib->vba.P2IWith420 = false;
+ mode_lib->vba.DSCOnlyIfNecessaryWithBPP = false;
+ mode_lib->vba.DSC422NativeNotSupported = false;
+ mode_lib->vba.LinkRateDoesNotMatchDPVersion = false;
+ mode_lib->vba.LinkRateForMultistreamNotIndicated = false;
+ mode_lib->vba.BPPForMultistreamNotIndicated = false;
+ mode_lib->vba.MultistreamWithHDMIOreDP = false;
+ mode_lib->vba.MSOOrODMSplitWithNonDPLink = false;
+ mode_lib->vba.NotEnoughLanesForMSO = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0
+ || mode_lib->vba.Output[k] == dm_edp
+ || mode_lib->vba.Output[k] == dm_hdmi)) {
+ if (mode_lib->vba.OutputFormat[k]
+ == dm_420 && mode_lib->vba.Interlace[k] == 1 &&
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)
+ mode_lib->vba.P2IWith420 = true;
+
+ if (mode_lib->vba.DSCEnable[k] && mode_lib->vba.ForcedOutputLinkBPP[k] != 0)
+ mode_lib->vba.DSCOnlyIfNecessaryWithBPP = true;
+ if ((mode_lib->vba.DSCEnable[k] || mode_lib->vba.DSCEnable[k])
+ && mode_lib->vba.OutputFormat[k] == dm_n422
+ && !mode_lib->vba.DSC422NativeSupport)
+ mode_lib->vba.DSC422NativeNotSupported = true;
+
+ if (((mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr2
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr3)
+ && mode_lib->vba.Output[k] != dm_dp && mode_lib->vba.Output[k] != dm_edp)
+ || ((mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr10
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr20)
+ && mode_lib->vba.Output[k] != dm_dp2p0))
+ mode_lib->vba.LinkRateDoesNotMatchDPVersion = true;
+
+ if (mode_lib->vba.OutputMultistreamEn[k] == true) {
+ if (mode_lib->vba.OutputMultistreamId[k] == k
+ && mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_na)
+ mode_lib->vba.LinkRateForMultistreamNotIndicated = true;
+ if (mode_lib->vba.OutputMultistreamId[k] == k && mode_lib->vba.ForcedOutputLinkBPP[k] == 0)
+ mode_lib->vba.BPPForMultistreamNotIndicated = true;
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.OutputMultistreamId[k] == j
+ && mode_lib->vba.ForcedOutputLinkBPP[k] == 0)
+ mode_lib->vba.BPPForMultistreamNotIndicated = true;
+ }
+ }
+
+ if ((mode_lib->vba.Output[k] == dm_edp || mode_lib->vba.Output[k] == dm_hdmi)) {
+ if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k)
+ mode_lib->vba.MultistreamWithHDMIOreDP = true;
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == j)
+ mode_lib->vba.MultistreamWithHDMIOreDP = true;
+ }
+ }
+
+ if (mode_lib->vba.Output[k] != dm_dp
+ && (mode_lib->vba.ODMUse[k] == dm_odm_split_policy_1to2
+ || mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to2
+ || mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to4))
+ mode_lib->vba.MSOOrODMSplitWithNonDPLink = true;
+
+ if ((mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to2
+ && mode_lib->vba.OutputLinkDPLanes[k] < 2)
+ || (mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to4
+ && mode_lib->vba.OutputLinkDPLanes[k] < 4))
+ mode_lib->vba.NotEnoughLanesForMSO = true;
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && dml32_RequiredDTBCLK(mode_lib->vba.RequiresDSC[i][k],
+ mode_lib->vba.PixelClockBackEnd[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.OutputBppPerState[i][k],
+ mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.HTotal[k],
+ mode_lib->vba.HActive[k], mode_lib->vba.AudioSampleRate[k],
+ mode_lib->vba.AudioSampleLayout[k])
+ > mode_lib->vba.DTBCLKPerState[i]) {
+ mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] = true;
+ }
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ mode_lib->vba.ODMCombine2To1SupportCheckOK[i] = true;
+ mode_lib->vba.ODMCombine4To1SupportCheckOK[i] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1
+ && mode_lib->vba.Output[k] == dm_hdmi) {
+ mode_lib->vba.ODMCombine2To1SupportCheckOK[i] = false;
+ }
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+ && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_edp
+ || mode_lib->vba.Output[k] == dm_hdmi)) {
+ mode_lib->vba.ODMCombine4To1SupportCheckOK[i] = false;
+ }
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = false;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ if (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0
+ || mode_lib->vba.Output[k] == dm_edp) {
+ if (mode_lib->vba.OutputFormat[k] == dm_420) {
+ mode_lib->vba.DSCFormatFactor = 2;
+ } else if (mode_lib->vba.OutputFormat[k] == dm_444) {
+ mode_lib->vba.DSCFormatFactor = 1;
+ } else if (mode_lib->vba.OutputFormat[k] == dm_n422) {
+ mode_lib->vba.DSCFormatFactor = 2;
+ } else {
+ mode_lib->vba.DSCFormatFactor = 1;
+ }
+ if (mode_lib->vba.RequiresDSC[i][k] == true) {
+ if (mode_lib->vba.ODMCombineEnablePerState[i][k]
+ == dm_odm_combine_mode_4to1) {
+ if (mode_lib->vba.PixelClockBackEnd[k] / 12.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i])
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true;
+ } else if (mode_lib->vba.ODMCombineEnablePerState[i][k]
+ == dm_odm_combine_mode_2to1) {
+ if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i])
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true;
+ } else {
+ if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i])
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* Check DSC Unit and Slices Support */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0;
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ mode_lib->vba.NotEnoughDSCUnits[i] = false;
+ mode_lib->vba.NotEnoughDSCSlices[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0;
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.RequiresDSC[i][k] == true) {
+ if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ if (mode_lib->vba.HActive[k]
+ > 4 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit)
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 4;
+ if (mode_lib->vba.NumberOfDSCSlices[k] > 16)
+ mode_lib->vba.NotEnoughDSCSlices[i] = true;
+ } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ if (mode_lib->vba.HActive[k]
+ > 2 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit)
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 2;
+ if (mode_lib->vba.NumberOfDSCSlices[k] > 8)
+ mode_lib->vba.NotEnoughDSCSlices[i] = true;
+ } else {
+ if (mode_lib->vba.HActive[k] > mode_lib->vba.MaximumPixelsPerLinePerDSCUnit)
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 1;
+ if (mode_lib->vba.NumberOfDSCSlices[k] > 4)
+ mode_lib->vba.NotEnoughDSCSlices[i] = true;
+ }
+ }
+ }
+ if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC)
+ mode_lib->vba.NotEnoughDSCUnits[i] = true;
+ }
+
+ /*DSC Delay per state*/
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.DSCDelayPerState[i][k] = dml32_DSCDelayRequirement(
+ mode_lib->vba.RequiresDSC[i][k], mode_lib->vba.ODMCombineEnablePerState[i][k],
+ mode_lib->vba.DSCInputBitPerComponent[k],
+ mode_lib->vba.OutputBppPerState[i][k], mode_lib->vba.HActive[k],
+ mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k],
+ mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k],
+ mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k],
+ mode_lib->vba.ip.dsc_delay_factor_wa);
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) {
+ for (j = 0; j <= mode_lib->vba.NumberOfActiveSurfaces - 1; j++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == m &&
+ mode_lib->vba.RequiresDSC[i][m] == true) {
+ mode_lib->vba.DSCDelayPerState[i][k] =
+ mode_lib->vba.DSCDelayPerState[i][m];
+ }
+ }
+ }
+ }
+ }
+
+ //Calculate Swath, DET Configuration, DCFCLKDeepSleep
+ //
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.RequiredDPPCLKThisState[k] = mode_lib->vba.RequiredDPPCLK[i][j][k];
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+ mode_lib->vba.ODMCombineEnableThisState[k] =
+ mode_lib->vba.ODMCombineEnablePerState[i][k];
+ }
+
+ dml32_CalculateSwathAndDETConfiguration(
+ mode_lib->vba.DETSizeOverride,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.MaxTotalDETInKByte,
+ mode_lib->vba.MinCompressedBufferSizeInKByte,
+ false, /* ForceSingleDPP */
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.nomDETInKByte,
+ mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.ip.pixel_chunk_size_kbytes,
+ mode_lib->vba.ip.rob_buffer_size_kbytes,
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
+ mode_lib->vba.Output,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.MaximumSwathWidthLuma,
+ mode_lib->vba.MaximumSwathWidthChroma,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SurfaceTiling,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.Read256BlockHeightY,
+ mode_lib->vba.Read256BlockHeightC,
+ mode_lib->vba.Read256BlockWidthY,
+ mode_lib->vba.Read256BlockWidthC,
+ mode_lib->vba.ODMCombineEnableThisState,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.BytePerPixelInDETY,
+ mode_lib->vba.BytePerPixelInDETC,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.NoOfDPPThisState,
+ /* Output */
+ mode_lib->vba.swath_width_luma_ub_this_state,
+ mode_lib->vba.swath_width_chroma_ub_this_state,
+ mode_lib->vba.SwathWidthYThisState,
+ mode_lib->vba.SwathWidthCThisState,
+ mode_lib->vba.SwathHeightYThisState,
+ mode_lib->vba.SwathHeightCThisState,
+ mode_lib->vba.DETBufferSizeInKByteThisState,
+ mode_lib->vba.DETBufferSizeYThisState,
+ mode_lib->vba.DETBufferSizeCThisState,
+ &mode_lib->vba.UnboundedRequestEnabledThisState,
+ &mode_lib->vba.CompressedBufferSizeInkByteThisState,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], /* Long CompBufReservedSpaceKBytes */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0],
+ &mode_lib->vba.ViewportSizeSupport[i][j]);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.swath_width_luma_ub_all_states[i][j][k] =
+ mode_lib->vba.swath_width_luma_ub_this_state[k];
+ mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k] =
+ mode_lib->vba.swath_width_chroma_ub_this_state[k];
+ mode_lib->vba.SwathWidthYAllStates[i][j][k] = mode_lib->vba.SwathWidthYThisState[k];
+ mode_lib->vba.SwathWidthCAllStates[i][j][k] = mode_lib->vba.SwathWidthCThisState[k];
+ mode_lib->vba.SwathHeightYAllStates[i][j][k] = mode_lib->vba.SwathHeightYThisState[k];
+ mode_lib->vba.SwathHeightCAllStates[i][j][k] = mode_lib->vba.SwathHeightCThisState[k];
+ mode_lib->vba.UnboundedRequestEnabledAllStates[i][j] =
+ mode_lib->vba.UnboundedRequestEnabledThisState;
+ mode_lib->vba.CompressedBufferSizeInkByteAllStates[i][j] =
+ mode_lib->vba.CompressedBufferSizeInkByteThisState;
+ mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k] =
+ mode_lib->vba.DETBufferSizeInKByteThisState[k];
+ mode_lib->vba.DETBufferSizeYAllStates[i][j][k] =
+ mode_lib->vba.DETBufferSizeYThisState[k];
+ mode_lib->vba.DETBufferSizeCAllStates[i][j][k] =
+ mode_lib->vba.DETBufferSizeCThisState[k];
+ }
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0]
+ * mode_lib->vba.CursorBPP[k][0] / 8.0
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ }
+
+ dml32_CalculateSurfaceSizeInMall(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.ViewportWidthChroma,
+ mode_lib->vba.ViewportHeightChroma,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.Read256BlockWidthY,
+ mode_lib->vba.Read256BlockWidthC,
+ mode_lib->vba.Read256BlockHeightY,
+ mode_lib->vba.Read256BlockHeightC,
+ mode_lib->vba.MacroTileWidthY,
+ mode_lib->vba.MacroTileWidthC,
+ mode_lib->vba.MacroTileHeightY,
+ mode_lib->vba.MacroTileHeightC,
+
+ /* Output */
+ mode_lib->vba.SurfaceSizeInMALL,
+ &mode_lib->vba.ExceededMALLSize);
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.swath_width_luma_ub_this_state[k] =
+ mode_lib->vba.swath_width_luma_ub_all_states[i][j][k];
+ mode_lib->vba.swath_width_chroma_ub_this_state[k] =
+ mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k];
+ mode_lib->vba.SwathWidthYThisState[k] = mode_lib->vba.SwathWidthYAllStates[i][j][k];
+ mode_lib->vba.SwathWidthCThisState[k] = mode_lib->vba.SwathWidthCAllStates[i][j][k];
+ mode_lib->vba.SwathHeightYThisState[k] = mode_lib->vba.SwathHeightYAllStates[i][j][k];
+ mode_lib->vba.SwathHeightCThisState[k] = mode_lib->vba.SwathHeightCAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeInKByteThisState[k] =
+ mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeYThisState[k] =
+ mode_lib->vba.DETBufferSizeYAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeCThisState[k] =
+ mode_lib->vba.DETBufferSizeCAllStates[i][j][k];
+ mode_lib->vba.RequiredDPPCLKThisState[k] = mode_lib->vba.RequiredDPPCLK[i][j][k];
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+ }
+
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.DCCEnable[k] == true) {
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j]
+ + mode_lib->vba.NoOfDPP[i][j][k];
+ }
+ }
+
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportHeight = mode_lib->vba.ViewportHeight[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportHeightChroma = mode_lib->vba.ViewportHeightChroma[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthY = mode_lib->vba.MacroTileWidthY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightY = mode_lib->vba.MacroTileHeightY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthC = mode_lib->vba.MacroTileWidthC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightC = mode_lib->vba.MacroTileHeightC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SurfaceTiling = mode_lib->vba.SurfaceTiling[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BytePerPixelY = mode_lib->vba.BytePerPixelY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BytePerPixelC = mode_lib->vba.BytePerPixelC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ProgressiveToInterlaceUnitInOPP =
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VRatio = mode_lib->vba.VRatio[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VRatioChroma = mode_lib->vba.VRatioChroma[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VTaps = mode_lib->vba.vtaps[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VTapsChroma = mode_lib->vba.VTAPsChroma[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PitchY = mode_lib->vba.PitchY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCMetaPitchY = mode_lib->vba.DCCMetaPitchY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PitchC = mode_lib->vba.PitchC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCMetaPitchC = mode_lib->vba.DCCMetaPitchC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportStationary = mode_lib->vba.ViewportStationary[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportXStart = mode_lib->vba.ViewportXStartY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportYStart = mode_lib->vba.ViewportYStartY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportXStartC = mode_lib->vba.ViewportXStartC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportYStartC = mode_lib->vba.ViewportYStartC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->vba.ForceOneRowForFrame[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SwathHeightY = mode_lib->vba.SwathHeightYThisState[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SwathHeightC = mode_lib->vba.SwathHeightCThisState[k];
+ }
+
+ {
+ dml32_CalculateVMRowAndSwath(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters,
+ mode_lib->vba.SurfaceSizeInMALL,
+ mode_lib->vba.PTEBufferSizeInRequestsLuma,
+ mode_lib->vba.PTEBufferSizeInRequestsChroma,
+ mode_lib->vba.DCCMetaBufferSizeBytes,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ mode_lib->vba.SwathWidthYThisState,
+ mode_lib->vba.SwathWidthCThisState,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.GPUVMMinPageSizeKBytes,
+ mode_lib->vba.HostVMMinPageSize,
+
+ /* Output */
+ mode_lib->vba.PTEBufferSizeNotExceededPerState,
+ mode_lib->vba.DCCMetaBufferSizeNotExceededPerState,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1],
+ mode_lib->vba.dpte_row_height,
+ mode_lib->vba.dpte_row_height_chroma,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[2],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[3],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[4],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[5],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[6],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[8],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[9],
+ mode_lib->vba.meta_row_height,
+ mode_lib->vba.meta_row_height_chroma,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[10],
+ mode_lib->vba.dpte_group_bytes,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[11],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[12],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[13],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[14],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[15],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[16],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[17],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[18],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[19],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[20],
+ mode_lib->vba.PrefetchLinesYThisState,
+ mode_lib->vba.PrefetchLinesCThisState,
+ mode_lib->vba.PrefillY,
+ mode_lib->vba.PrefillC,
+ mode_lib->vba.MaxNumSwY,
+ mode_lib->vba.MaxNumSwC,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.DPTEBytesPerRowThisState,
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrameThisState,
+ mode_lib->vba.MetaRowBytesThisState,
+ mode_lib->vba.use_one_row_for_frame_this_state,
+ mode_lib->vba.use_one_row_for_frame_flip_this_state,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], // Boolean UsesMALLForStaticScreen[]
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1], // Boolean PTE_BUFFER_MODE[]
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[21]); // Long BIGK_FRAGMENT_SIZE[]
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.PrefetchLinesY[i][j][k] = mode_lib->vba.PrefetchLinesYThisState[k];
+ mode_lib->vba.PrefetchLinesC[i][j][k] = mode_lib->vba.PrefetchLinesCThisState[k];
+ mode_lib->vba.meta_row_bandwidth[i][j][k] =
+ mode_lib->vba.meta_row_bandwidth_this_state[k];
+ mode_lib->vba.dpte_row_bandwidth[i][j][k] =
+ mode_lib->vba.dpte_row_bandwidth_this_state[k];
+ mode_lib->vba.DPTEBytesPerRow[i][j][k] = mode_lib->vba.DPTEBytesPerRowThisState[k];
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k] =
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrameThisState[k];
+ mode_lib->vba.MetaRowBytes[i][j][k] = mode_lib->vba.MetaRowBytesThisState[k];
+ mode_lib->vba.use_one_row_for_frame[i][j][k] =
+ mode_lib->vba.use_one_row_for_frame_this_state[k];
+ mode_lib->vba.use_one_row_for_frame_flip[i][j][k] =
+ mode_lib->vba.use_one_row_for_frame_flip_this_state[k];
+ }
+
+ mode_lib->vba.PTEBufferSizeNotExceeded[i][j] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.PTEBufferSizeNotExceededPerState[k] == false)
+ mode_lib->vba.PTEBufferSizeNotExceeded[i][j] = false;
+ }
+
+ mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.DCCMetaBufferSizeNotExceededPerState[k] == false)
+ mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] = false;
+ }
+
+ mode_lib->vba.UrgLatency[i] = dml32_CalculateUrgentLatency(
+ mode_lib->vba.UrgentLatencyPixelDataOnly,
+ mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
+ mode_lib->vba.UrgentLatencyVMDataOnly, mode_lib->vba.DoUrgentLatencyAdjustment,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockComponent,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockReference,
+ mode_lib->vba.FabricClockPerState[i]);
+
+ //bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateUrgentBurstFactor(
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.swath_width_luma_ub_this_state[k],
+ mode_lib->vba.swath_width_chroma_ub_this_state[k],
+ mode_lib->vba.SwathHeightYThisState[k],
+ mode_lib->vba.SwathHeightCThisState[k],
+ (double) mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0],
+ mode_lib->vba.CursorBPP[k][0],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.BytePerPixelInDETY[k],
+ mode_lib->vba.BytePerPixelInDETC[k],
+ mode_lib->vba.DETBufferSizeYThisState[k],
+ mode_lib->vba.DETBufferSizeCThisState[k],
+ /* Output */
+ &mode_lib->vba.UrgentBurstFactorCursor[k],
+ &mode_lib->vba.UrgentBurstFactorLuma[k],
+ &mode_lib->vba.UrgentBurstFactorChroma[k],
+ &mode_lib->vba.NoUrgentLatencyHiding[k]);
+ }
+
+ dml32_CalculateDCFCLKDeepSleep(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ mode_lib->vba.SwathWidthYThisState,
+ mode_lib->vba.SwathWidthCThisState,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.PSCL_FACTOR,
+ mode_lib->vba.PSCL_FACTOR_CHROMA,
+ mode_lib->vba.RequiredDPPCLKThisState,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.ReturnBusWidth,
+
+ /* Output */
+ &mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]);
+ }
+ }
+
+ //Calculate Return BW
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ mode_lib->vba.WritebackDelayTime[k] =
+ mode_lib->vba.WritebackLatency
+ + dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.WritebackDestinationHeight[k],
+ mode_lib->vba.WritebackSourceHeight[k],
+ mode_lib->vba.HTotal[k])
+ / mode_lib->vba.RequiredDISPCLK[i][j];
+ } else {
+ mode_lib->vba.WritebackDelayTime[k] = 0.0;
+ }
+ for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) {
+ if (mode_lib->vba.BlendingAndTiming[m]
+ == k && mode_lib->vba.WritebackEnable[m] == true) {
+ mode_lib->vba.WritebackDelayTime[k] =
+ dml_max(mode_lib->vba.WritebackDelayTime[k],
+ mode_lib->vba.WritebackLatency
+ + dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[m],
+ mode_lib->vba.WritebackHRatio[m],
+ mode_lib->vba.WritebackVRatio[m],
+ mode_lib->vba.WritebackVTaps[m],
+ mode_lib->vba.WritebackDestinationWidth[m],
+ mode_lib->vba.WritebackDestinationHeight[m],
+ mode_lib->vba.WritebackSourceHeight[m],
+ mode_lib->vba.HTotal[m]) /
+ mode_lib->vba.RequiredDISPCLK[i][j]);
+ }
+ }
+ }
+ }
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == m) {
+ mode_lib->vba.WritebackDelayTime[k] =
+ mode_lib->vba.WritebackDelayTime[m];
+ }
+ }
+ }
+ mode_lib->vba.MaxMaxVStartup[i][j] = 0;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.MaximumVStartup[i][j][k] = ((mode_lib->vba.Interlace[k] &&
+ !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ?
+ dml_floor((mode_lib->vba.VTotal[k] -
+ mode_lib->vba.VActive[k]) / 2.0, 1.0) :
+ mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k])
+ - dml_max(1.0, dml_ceil(1.0 *
+ mode_lib->vba.WritebackDelayTime[k] /
+ (mode_lib->vba.HTotal[k] /
+ mode_lib->vba.PixelClock[k]), 1.0));
+
+ // Clamp to max OTG vstartup register limit
+ if (mode_lib->vba.MaximumVStartup[i][j][k] > 1023)
+ mode_lib->vba.MaximumVStartup[i][j][k] = 1023;
+
+ mode_lib->vba.MaxMaxVStartup[i][j] = dml_max(mode_lib->vba.MaxMaxVStartup[i][j],
+ mode_lib->vba.MaximumVStartup[i][j][k]);
+ }
+ }
+ }
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes = mode_lib->vba.NumberOfChannels
+ * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ dml32_CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal,
+ &mode_lib->vba.MinPrefetchMode,
+ &mode_lib->vba.MaxPrefetchMode);
+
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j)
+ mode_lib->vba.DCFCLKState[i][j] = mode_lib->vba.DCFCLKPerState[i];
+ }
+
+ /* Immediate Flip and MALL parameters */
+ mode_lib->vba.ImmediateFlipRequiredFinal = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.ImmediateFlipRequiredFinal = mode_lib->vba.ImmediateFlipRequiredFinal
+ || (mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required);
+ }
+
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified =
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ || ((mode_lib->vba.ImmediateFlipRequirement[k]
+ != dm_immediate_flip_required)
+ && (mode_lib->vba.ImmediateFlipRequirement[k]
+ != dm_immediate_flip_not_required));
+ }
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified =
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ && mode_lib->vba.ImmediateFlipRequiredFinal;
+
+ mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
+ mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
+ ((mode_lib->vba.HostVMEnable == true || mode_lib->vba.ImmediateFlipRequirement[k] !=
+ dm_immediate_flip_not_required) &&
+ (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame ||
+ mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe));
+ }
+
+ mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen =
+ mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen
+ || ((mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable
+ || mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_optimize)
+ && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe))
+ || ((mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_disable
+ || mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_optimize)
+ && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame));
+ }
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = true;
+ if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = true;
+ if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = true;
+ }
+ mode_lib->vba.InvalidCombinationOfMALLUseForPState = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod
+ != v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod) || (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod);
+
+ if (mode_lib->vba.UseMinimumRequiredDCFCLK == true) {
+ dml32_UseMinimumDCFCLK(
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.DRRDisplay,
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.MaxInterDCNTileRepeaters,
+ mode_lib->vba.MaxPrefetchMode,
+ mode_lib->vba.DRAMClockChangeLatency,
+ mode_lib->vba.FCLKChangeLatency,
+ mode_lib->vba.SREnterPlusExitTime,
+ mode_lib->vba.ReturnBusWidth,
+ mode_lib->vba.RoundTripPingLatencyCycles,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.DynamicMetadataVMEnabled,
+ mode_lib->vba.ImmediateFlipRequiredFinal,
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+ mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
+ mode_lib->vba.PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
+ mode_lib->vba.VTotal,
+ mode_lib->vba.VActive,
+ mode_lib->vba.DynamicMetadataTransmittedBytes,
+ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired,
+ mode_lib->vba.Interlace,
+ mode_lib->vba.RequiredDPPCLK,
+ mode_lib->vba.RequiredDISPCLK,
+ mode_lib->vba.UrgLatency,
+ mode_lib->vba.NoOfDPP,
+ mode_lib->vba.ProjectedDCFCLKDeepSleep,
+ mode_lib->vba.MaximumVStartup,
+ mode_lib->vba.TotalNumberOfActiveDPP,
+ mode_lib->vba.TotalNumberOfDCCActiveDPP,
+ mode_lib->vba.dpte_group_bytes,
+ mode_lib->vba.PrefetchLinesY,
+ mode_lib->vba.PrefetchLinesC,
+ mode_lib->vba.swath_width_luma_ub_all_states,
+ mode_lib->vba.swath_width_chroma_ub_all_states,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrame,
+ mode_lib->vba.DPTEBytesPerRow,
+ mode_lib->vba.MetaRowBytes,
+ mode_lib->vba.DynamicMetadataEnable,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.DCFCLKPerState,
+
+ /* Output */
+ mode_lib->vba.DCFCLKState);
+ } // UseMinimumRequiredDCFCLK == true
+
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ mode_lib->vba.ReturnBWPerState[i][j] = dml32_get_return_bw_mbps(&mode_lib->vba.soc, i,
+ mode_lib->vba.HostVMEnable, mode_lib->vba.DCFCLKState[i][j],
+ mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]);
+ }
+ }
+
+ //Re-ordering Buffer Support Check
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024
+ / mode_lib->vba.ReturnBWPerState[i][j]
+ > (mode_lib->vba.RoundTripPingLatencyCycles + 32)
+ / mode_lib->vba.DCFCLKState[i][j]
+ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) {
+ mode_lib->vba.ROBSupport[i][j] = true;
+ } else {
+ mode_lib->vba.ROBSupport[i][j] = false;
+ }
+ }
+ }
+
+ //Vertical Active BW support check
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth += mode_lib->vba.ReadBandwidthLuma[k]
+ + mode_lib->vba.ReadBandwidthChroma[k];
+ }
+
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][j] =
+ dml_min3(mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKState[i][j]
+ * mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
+ mode_lib->vba.FabricClockPerState[i]
+ * mode_lib->vba.FabricDatapathToDCNDataReturn
+ * mode_lib->vba.MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation / 100,
+ mode_lib->vba.DRAMSpeedPerState[i]
+ * mode_lib->vba.NumberOfChannels
+ * mode_lib->vba.DRAMChannelWidth
+ * (i < 2 ? mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE : mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation) / 100);
+
+ if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth
+ <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
+ mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] = true;
+ } else {
+ mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] = false;
+ }
+ }
+ }
+
+ /* Prefetch Check */
+
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+
+ mode_lib->vba.TimeCalc = 24 / mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j];
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+ mode_lib->vba.swath_width_luma_ub_this_state[k] =
+ mode_lib->vba.swath_width_luma_ub_all_states[i][j][k];
+ mode_lib->vba.swath_width_chroma_ub_this_state[k] =
+ mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k];
+ mode_lib->vba.SwathWidthYThisState[k] = mode_lib->vba.SwathWidthYAllStates[i][j][k];
+ mode_lib->vba.SwathWidthCThisState[k] = mode_lib->vba.SwathWidthCAllStates[i][j][k];
+ mode_lib->vba.SwathHeightYThisState[k] = mode_lib->vba.SwathHeightYAllStates[i][j][k];
+ mode_lib->vba.SwathHeightCThisState[k] = mode_lib->vba.SwathHeightCAllStates[i][j][k];
+ mode_lib->vba.UnboundedRequestEnabledThisState =
+ mode_lib->vba.UnboundedRequestEnabledAllStates[i][j];
+ mode_lib->vba.CompressedBufferSizeInkByteThisState =
+ mode_lib->vba.CompressedBufferSizeInkByteAllStates[i][j];
+ mode_lib->vba.DETBufferSizeInKByteThisState[k] =
+ mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeYThisState[k] =
+ mode_lib->vba.DETBufferSizeYAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeCThisState[k] =
+ mode_lib->vba.DETBufferSizeCAllStates[i][j][k];
+ }
+
+ mode_lib->vba.VActiveBandwithSupport[i][j] = dml32_CalculateVActiveBandwithSupport(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.NoUrgentLatencyHiding,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor);
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i,
+ mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i],
+ mode_lib->vba.DRAMSpeedPerState[i]);
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = 1;
+
+ if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j]
+ / v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState;
+
+ mode_lib->vba.ExtraLatency = dml32_CalculateExtraLatency(
+ mode_lib->vba.RoundTripPingLatencyCycles, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes,
+ mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.TotalNumberOfActiveDPP[i][j],
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.ReturnBWPerState[i][j], mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.NoOfDPPThisState, mode_lib->vba.dpte_group_bytes,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels);
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = mode_lib->vba.MinPrefetchMode;
+
+ mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j];
+
+ do {
+ mode_lib->vba.PrefetchModePerState[i][j] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState;
+ mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup;
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.TWait = dml32_CalculateTWait(
+ mode_lib->vba.PrefetchModePerState[i][j],
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.DRRDisplay[k],
+ mode_lib->vba.DRAMClockChangeLatency,
+ mode_lib->vba.FCLKChangeLatency, mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.SREnterPlusExitTime);
+
+ memset(&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull, 0, sizeof(DmlPipe));
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HActive = mode_lib->vba.HActive[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ProgressiveToInterlaceUnitInOPP =
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+
+ mode_lib->vba.NoTimeForPrefetch[i][j][k] =
+ dml32_CalculatePrefetchSchedule(
+ v,
+ k,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe,
+ v->DSCDelayPerState[i][k],
+ v->SwathWidthYThisState[k] / v->HRatio[k],
+ dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
+ v->MaximumVStartup[i][j][k],
+ v->UrgLatency[i],
+ v->ExtraLatency,
+ v->TimeCalc,
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+ v->DPTEBytesPerRow[i][j][k],
+ v->PrefetchLinesY[i][j][k],
+ v->SwathWidthYThisState[k],
+ v->PrefillY[k],
+ v->MaxNumSwY[k],
+ v->PrefetchLinesC[i][j][k],
+ v->SwathWidthCThisState[k],
+ v->PrefillC[k],
+ v->MaxNumSwC[k],
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k], v->TWait,
+ v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ ?
+ mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+
+ /* Output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler[k],
+ &v->LineTimesForPrefetch[k],
+ &v->PrefetchBW[k],
+ &v->LinesForMetaPTE[k],
+ &v->LinesForMetaAndDPTERow[k],
+ &v->VRatioPreY[i][j][k],
+ &v->VRatioPreC[i][j][k],
+ &v->RequiredPrefetchPixelDataBWLuma[0][0][k],
+ &v->RequiredPrefetchPixelDataBWChroma[0][0][k],
+ &v->NoTimeForDynamicMetadata[i][j][k],
+ &v->Tno_bw[k],
+ &v->prefetch_vmrow_bw[k],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // unsigned int *VUpdateOffsetPix
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[3], // unsigned int *VUpdateWidthPix
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[4]); // unsigned int *VReadyOffsetPix
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateUrgentBurstFactor(
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.swath_width_luma_ub_this_state[k],
+ mode_lib->vba.swath_width_chroma_ub_this_state[k],
+ mode_lib->vba.SwathHeightYThisState[k],
+ mode_lib->vba.SwathHeightCThisState[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ mode_lib->vba.UrgLatency[i], mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0], mode_lib->vba.CursorBPP[k][0],
+ mode_lib->vba.VRatioPreY[i][j][k],
+ mode_lib->vba.VRatioPreC[i][j][k],
+ mode_lib->vba.BytePerPixelInDETY[k],
+ mode_lib->vba.BytePerPixelInDETC[k],
+ mode_lib->vba.DETBufferSizeYThisState[k],
+ mode_lib->vba.DETBufferSizeCThisState[k],
+ /* Output */
+ &mode_lib->vba.UrgentBurstFactorCursorPre[k],
+ &mode_lib->vba.UrgentBurstFactorLumaPre[k],
+ &mode_lib->vba.UrgentBurstFactorChroma[k],
+ &mode_lib->vba.NotUrgentLatencyHidingPre[k]);
+ }
+
+ {
+ dml32_CalculatePrefetchBandwithSupport(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.NotUrgentLatencyHidingPre,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0],
+ mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0],
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.cursor_bw_pre,
+ mode_lib->vba.prefetch_vmrow_bw,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor,
+ mode_lib->vba.UrgentBurstFactorLumaPre,
+ mode_lib->vba.UrgentBurstFactorChromaPre,
+ mode_lib->vba.UrgentBurstFactorCursorPre,
+
+ /* output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // Single *PrefetchBandwidth
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // Single *FractionOfUrgentBandwidth
+ &mode_lib->vba.PrefetchSupported[i][j]);
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.LineTimesForPrefetch[k]
+ < 2.0 || mode_lib->vba.LinesForMetaPTE[k] >= 32.0
+ || mode_lib->vba.LinesForMetaAndDPTERow[k] >= 16.0
+ || mode_lib->vba.NoTimeForPrefetch[i][j][k] == true) {
+ mode_lib->vba.PrefetchSupported[i][j] = false;
+ }
+ }
+
+ mode_lib->vba.DynamicMetadataSupported[i][j] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.NoTimeForDynamicMetadata[i][j][k] == true)
+ mode_lib->vba.DynamicMetadataSupported[i][j] = false;
+ }
+
+ mode_lib->vba.VRatioInPrefetchSupported[i][j] = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.VRatioPreY[i][j][k] > __DML_MAX_VRATIO_PRE__
+ || mode_lib->vba.VRatioPreC[i][j][k] > __DML_MAX_VRATIO_PRE__
+ || mode_lib->vba.NoTimeForPrefetch[i][j][k] == true) {
+ mode_lib->vba.VRatioInPrefetchSupported[i][j] = false;
+ }
+ }
+ mode_lib->vba.AnyLinesForVMOrRowTooLarge = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.LinesForMetaAndDPTERow[k] >= 16
+ || mode_lib->vba.LinesForMetaPTE[k] >= 32) {
+ mode_lib->vba.AnyLinesForVMOrRowTooLarge = true;
+ }
+ }
+
+ if (mode_lib->vba.PrefetchSupported[i][j] == true
+ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) {
+ mode_lib->vba.BandwidthAvailableForImmediateFlip =
+ dml32_CalculateBandwidthAvailableForImmediateFlip(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0],
+ mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0],
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.cursor_bw_pre,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor,
+ mode_lib->vba.UrgentBurstFactorLumaPre,
+ mode_lib->vba.UrgentBurstFactorChromaPre,
+ mode_lib->vba.UrgentBurstFactorCursorPre);
+
+ mode_lib->vba.TotImmediateFlipBytes = 0.0;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!(mode_lib->vba.ImmediateFlipRequirement[k] ==
+ dm_immediate_flip_not_required)) {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + mode_lib->vba.NoOfDPP[i][j][k]
+ * mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k]
+ + mode_lib->vba.MetaRowBytes[i][j][k];
+ if (mode_lib->vba.use_one_row_for_frame_flip[i][j][k]) {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes + 2
+ * mode_lib->vba.DPTEBytesPerRow[i][j][k];
+ } else {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + mode_lib->vba.DPTEBytesPerRow[i][j][k];
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateFlipSchedule(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,
+ mode_lib->vba.ExtraLatency,
+ mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k],
+ mode_lib->vba.MetaRowBytes[i][j][k],
+ mode_lib->vba.DPTEBytesPerRow[i][j][k],
+ mode_lib->vba.BandwidthAvailableForImmediateFlip,
+ mode_lib->vba.TotImmediateFlipBytes,
+ mode_lib->vba.SourcePixelFormat[k],
+ (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]),
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.Tno_bw[k],
+ mode_lib->vba.DCCEnable[k],
+ mode_lib->vba.dpte_row_height[k],
+ mode_lib->vba.meta_row_height[k],
+ mode_lib->vba.dpte_row_height_chroma[k],
+ mode_lib->vba.meta_row_height_chroma[k],
+ mode_lib->vba.use_one_row_for_frame_flip[i][j][k], // 24
+
+ /* Output */
+ &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k],
+ &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k],
+ &mode_lib->vba.final_flip_bw[k],
+ &mode_lib->vba.ImmediateFlipSupportedForPipe[k]);
+ }
+
+ {
+ dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.ImmediateFlipRequirement,
+ mode_lib->vba.final_flip_bw,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0],
+ mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0],
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.cursor_bw_pre,
+ mode_lib->vba.prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor,
+ mode_lib->vba.UrgentBurstFactorLumaPre,
+ mode_lib->vba.UrgentBurstFactorChromaPre,
+ mode_lib->vba.UrgentBurstFactorCursorPre,
+
+ /* output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // Single *TotalBandwidth
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // Single *FractionOfUrgentBandwidth
+ &mode_lib->vba.ImmediateFlipSupportedForState[i][j]); // Boolean *ImmediateFlipBandwidthSupport
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!(mode_lib->vba.ImmediateFlipRequirement[k]
+ == dm_immediate_flip_not_required)
+ && (mode_lib->vba.ImmediateFlipSupportedForPipe[k]
+ == false))
+ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
+ }
+ } else { // if prefetch not support, assume iflip not supported
+ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
+ }
+
+ if (mode_lib->vba.MaxVStartup <= __DML_VBA_MIN_VSTARTUP__
+ || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) {
+ mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState + 1;
+ } else {
+ mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1;
+ }
+ } while (!((mode_lib->vba.PrefetchSupported[i][j] == true
+ && mode_lib->vba.DynamicMetadataSupported[i][j] == true
+ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true &&
+ // consider flip support is okay if when there is no hostvm and the
+ // user does't require a iflip OR the flip bw is ok
+ // If there is hostvm, DCN needs to support iflip for invalidation
+ ((mode_lib->vba.HostVMEnable == false
+ && !mode_lib->vba.ImmediateFlipRequiredFinal)
+ || mode_lib->vba.ImmediateFlipSupportedForState[i][j] == true))
+ || (mode_lib->vba.NextMaxVStartup == mode_lib->vba.MaxMaxVStartup[i][j]
+ && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode)));
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.use_one_row_for_frame_this_state[k] =
+ mode_lib->vba.use_one_row_for_frame[i][j][k];
+ }
+
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.UrgentLatency = mode_lib->vba.UrgLatency[i];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.ExtraLatency = mode_lib->vba.ExtraLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SRExitTime = mode_lib->vba.SRExitTime;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SMNLatency = mode_lib->vba.SMNLatency;
+
+ {
+ dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ v,
+ v->PrefetchModePerState[i][j],
+ v->DCFCLKState[i][j],
+ v->ReturnBWPerState[i][j],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters,
+ v->SOCCLKPerState[i],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->SwathWidthYThisState, // 24
+ v->SwathWidthCThisState,
+ v->NoOfDPPThisState,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler,
+ v->UnboundedRequestEnabledThisState,
+ v->CompressedBufferSizeInkByteThisState,
+
+ /* Output */
+ &v->DRAMClockChangeSupport[i][j],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[]
+ &v->FCLKChangeSupport[i][j],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported
+ &mode_lib->vba.USRRetrainingSupport[i][j],
+ mode_lib->vba.ActiveDRAMClockChangeLatencyMarginPerState[i][j]);
+ }
+ }
+ } // End of Prefetch Check
+
+ /*Cursor Support Check*/
+ mode_lib->vba.CursorSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.CursorWidth[k][0] > 0.0) {
+ if (mode_lib->vba.CursorBPP[k][0] == 64 && mode_lib->vba.Cursor64BppSupport == false)
+ mode_lib->vba.CursorSupport = false;
+ }
+ }
+
+ /*Valid Pitch Check*/
+ mode_lib->vba.PitchSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.AlignedYPitch[k] = dml_ceil(
+ dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.SurfaceWidthY[k]),
+ mode_lib->vba.MacroTileWidthY[k]);
+ if (mode_lib->vba.DCCEnable[k] == true) {
+ mode_lib->vba.AlignedDCCMetaPitchY[k] = dml_ceil(
+ dml_max(mode_lib->vba.DCCMetaPitchY[k], mode_lib->vba.SurfaceWidthY[k]),
+ 64.0 * mode_lib->vba.Read256BlockWidthY[k]);
+ } else {
+ mode_lib->vba.AlignedDCCMetaPitchY[k] = mode_lib->vba.DCCMetaPitchY[k];
+ }
+ if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) {
+ mode_lib->vba.AlignedCPitch[k] = dml_ceil(
+ dml_max(mode_lib->vba.PitchC[k], mode_lib->vba.SurfaceWidthC[k]),
+ mode_lib->vba.MacroTileWidthC[k]);
+ if (mode_lib->vba.DCCEnable[k] == true) {
+ mode_lib->vba.AlignedDCCMetaPitchC[k] = dml_ceil(
+ dml_max(mode_lib->vba.DCCMetaPitchC[k],
+ mode_lib->vba.SurfaceWidthC[k]),
+ 64.0 * mode_lib->vba.Read256BlockWidthC[k]);
+ } else {
+ mode_lib->vba.AlignedDCCMetaPitchC[k] = mode_lib->vba.DCCMetaPitchC[k];
+ }
+ } else {
+ mode_lib->vba.AlignedCPitch[k] = mode_lib->vba.PitchC[k];
+ mode_lib->vba.AlignedDCCMetaPitchC[k] = mode_lib->vba.DCCMetaPitchC[k];
+ }
+ if (mode_lib->vba.AlignedYPitch[k] > mode_lib->vba.PitchY[k]
+ || mode_lib->vba.AlignedCPitch[k] > mode_lib->vba.PitchC[k]
+ || mode_lib->vba.AlignedDCCMetaPitchY[k] > mode_lib->vba.DCCMetaPitchY[k]
+ || mode_lib->vba.AlignedDCCMetaPitchC[k] > mode_lib->vba.DCCMetaPitchC[k]) {
+ mode_lib->vba.PitchSupport = false;
+ }
+ }
+
+ mode_lib->vba.ViewportExceedsSurface = false;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.ViewportWidth[k] > mode_lib->vba.SurfaceWidthY[k]
+ || mode_lib->vba.ViewportHeight[k] > mode_lib->vba.SurfaceHeightY[k]) {
+ mode_lib->vba.ViewportExceedsSurface = true;
+ if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_8
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe) {
+ if (mode_lib->vba.ViewportWidthChroma[k] > mode_lib->vba.SurfaceWidthC[k]
+ || mode_lib->vba.ViewportHeightChroma[k]
+ > mode_lib->vba.SurfaceHeightC[k]) {
+ mode_lib->vba.ViewportExceedsSurface = true;
+ }
+ }
+ }
+ }
+
+ /*Mode Support, Voltage State and SOC Configuration*/
+ mode_support_configuration(v, mode_lib);
+
+ MaximumMPCCombine = 0;
+
+ for (i = v->soc.num_states; i >= 0; i--) {
+ if (i == v->soc.num_states || mode_lib->vba.ModeSupport[i][0] == true ||
+ mode_lib->vba.ModeSupport[i][1] == true) {
+ mode_lib->vba.VoltageLevel = i;
+ mode_lib->vba.ModeIsSupported = mode_lib->vba.ModeSupport[i][0] == true
+ || mode_lib->vba.ModeSupport[i][1] == true;
+
+ if (mode_lib->vba.ModeSupport[i][0] == true)
+ MaximumMPCCombine = 0;
+ else
+ MaximumMPCCombine = 1;
+ }
+ }
+
+ mode_lib->vba.ImmediateFlipSupport =
+ mode_lib->vba.ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.UnboundedRequestEnabled =
+ mode_lib->vba.UnboundedRequestEnabledAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.CompressedBufferSizeInkByte =
+ mode_lib->vba.CompressedBufferSizeInkByteAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; // Not used, informational
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.MPCCombineEnable[k] =
+ mode_lib->vba.MPCCombine[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DPPPerPlane[k] = mode_lib->vba.NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.SwathHeightY[k] =
+ mode_lib->vba.SwathHeightYAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.SwathHeightC[k] =
+ mode_lib->vba.SwathHeightCAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DETBufferSizeInKByte[k] =
+ mode_lib->vba.DETBufferSizeInKByteAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DETBufferSizeY[k] =
+ mode_lib->vba.DETBufferSizeYAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DETBufferSizeC[k] =
+ mode_lib->vba.DETBufferSizeCAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.OutputType[k] = mode_lib->vba.OutputTypePerState[mode_lib->vba.VoltageLevel][k];
+ mode_lib->vba.OutputRate[k] = mode_lib->vba.OutputRatePerState[mode_lib->vba.VoltageLevel][k];
+ }
+
+ mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.DISPCLK = mode_lib->vba.RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.maxMpcComb = MaximumMPCCombine;
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ mode_lib->vba.ODMCombineEnabled[k] =
+ mode_lib->vba.ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
+ } else {
+ mode_lib->vba.ODMCombineEnabled[k] = dm_odm_combine_mode_disabled;
+ }
+
+ mode_lib->vba.DSCEnabled[k] = mode_lib->vba.RequiresDSC[mode_lib->vba.VoltageLevel][k];
+ mode_lib->vba.FECEnable[k] = mode_lib->vba.RequiresFEC[mode_lib->vba.VoltageLevel][k];
+ mode_lib->vba.OutputBpp[k] = mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k];
+ }
+
+ mode_lib->vba.UrgentWatermark = mode_lib->vba.Watermark.UrgentWatermark;
+ mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.Watermark.StutterEnterPlusExitWatermark;
+ mode_lib->vba.StutterExitWatermark = mode_lib->vba.Watermark.StutterExitWatermark;
+ mode_lib->vba.WritebackDRAMClockChangeWatermark = mode_lib->vba.Watermark.WritebackDRAMClockChangeWatermark;
+ mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.Watermark.DRAMClockChangeWatermark;
+ mode_lib->vba.UrgentLatency = mode_lib->vba.UrgLatency[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+
+ /* VBA has Error type to Error Msg output here, but not necessary for DML-C */
+} // ModeSupportAndSystemConfigurationFull
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h
new file mode 100644
index 000000000000..f82e14cd9d8a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML32_DISPLAY_MODE_VBA_H__
+#define __DML32_DISPLAY_MODE_VBA_H__
+
+#include "../display_mode_enums.h"
+
+// To enable a lot of debug msg
+//#define __DML_VBA_DEBUG__
+// For DML-C changes that hasn't been propagated to VBA yet
+//#define __DML_VBA_ALLOW_DELTA__
+
+// Move these to ip parameters/constant
+// At which vstartup the DML start to try if the mode can be supported
+#define __DML_VBA_MIN_VSTARTUP__ 9
+
+// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
+#define __DML_ARB_TO_RET_DELAY__ 7 + 95
+
+// fudge factor for min dcfclk calclation
+#define __DML_MIN_DCFCLK_FACTOR__ 1.15
+
+// Prefetch schedule max vratio
+#define __DML_MAX_VRATIO_PRE__ 4.0
+
+#define BPP_INVALID 0
+#define BPP_BLENDED_PIPE 0xffffffff
+
+#define MEM_STROBE_FREQ_MHZ 1600
+#define MEM_STROBE_MAX_DELIVERY_TIME_US 60.0
+
+struct display_mode_lib;
+
+void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
+void dml32_recalculate(struct display_mode_lib *mode_lib);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
new file mode 100644
index 000000000000..635fc54338fa
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -0,0 +1,6231 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "display_mode_vba_util_32.h"
+#include "../dml_inline_defs.h"
+#include "display_mode_vba_32.h"
+#include "../display_mode_lib.h"
+
+#define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
+
+unsigned int dml32_dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line,
+ // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ unsigned int rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
+ Delay, pixels;
+
+ if (pixelFormat == dm_420)
+ pixelsPerClock = 2;
+ else if (pixelFormat == dm_n422)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
+
+ //compute ssm delay
+ if (bpc == 8)
+ D = 81;
+ else if (bpc == 10)
+ D = 89;
+ else
+ D = 113;
+
+ //divide by pixel per cycle to compute slice width as seen by DSC
+ w = sliceWidth / pixelsPerClock;
+
+ //422 mode has an additional cycle of delay
+ if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
+ s = 0;
+ else
+ s = 1;
+
+ //main calculation for the dscce
+ ix = initalXmitDelay + 45;
+ wx = (w + 2) / 3;
+ p = 3 * wx - w;
+ l0 = ix / w;
+ a = ix + p * l0;
+ ax = (a + 2) / 3 + D + 6 + 1;
+ L = (ax + wx - 1) / wx;
+ if ((ix % w) == 0 && p != 0)
+ lstall = 1;
+ else
+ lstall = 0;
+ Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
+
+ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
+ pixels = Delay * 3 * pixelsPerClock;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: bpc: %d\n", __func__, bpc);
+ dml_print("DML::%s: BPP: %f\n", __func__, BPP);
+ dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
+ dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
+ dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
+ dml_print("DML::%s: Output: %d\n", __func__, Output);
+ dml_print("DML::%s: pixels: %d\n", __func__, pixels);
+#endif
+
+ return pixels;
+}
+
+unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
+{
+ unsigned int Delay = 0;
+
+ if (pixelFormat == dm_420) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 2;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 13;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 3;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 25;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 10;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // sft
+ Delay = Delay + 1;
+ }
+
+ return Delay;
+}
+
+
+bool IsVertical(enum dm_rotation_angle Scan)
+{
+ bool is_vert = false;
+
+ if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
+ is_vert = true;
+ else
+ is_vert = false;
+ return is_vert;
+}
+
+void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
+ double HRatio,
+ double HRatioChroma,
+ double VRatio,
+ double VRatioChroma,
+ double MaxDCHUBToPSCLThroughput,
+ double MaxPSCLToLBThroughput,
+ double PixelClock,
+ enum source_format_class SourcePixelFormat,
+ unsigned int HTaps,
+ unsigned int HTapsChroma,
+ unsigned int VTaps,
+ unsigned int VTapsChroma,
+
+ /* output */
+ double *PSCL_THROUGHPUT,
+ double *PSCL_THROUGHPUT_CHROMA,
+ double *DPPCLKUsingSingleDPP)
+{
+ double DPPCLKUsingSingleDPPLuma;
+ double DPPCLKUsingSingleDPPChroma;
+
+ if (HRatio > 1) {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
+ dml_ceil((double) HTaps / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+
+ DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
+ *PSCL_THROUGHPUT, 1);
+
+ if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
+
+ if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
+ SourcePixelFormat != dm_rgbe_alpha)) {
+ *PSCL_THROUGHPUT_CHROMA = 0;
+ *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (HRatioChroma > 1) {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
+ HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+ DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
+ HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
+ if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
+ *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
+ }
+}
+
+void dml32_CalculateBytePerPixelAndBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+
+ /* Output */
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC,
+ unsigned int *MacroTileHeightY,
+ unsigned int *MacroTileHeightC,
+ unsigned int *MacroTileWidthY,
+ unsigned int *MacroTileWidthC)
+{
+ if (SourcePixelFormat == dm_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dm_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = 4.0 / 3;
+ *BytePerPixelDETC = 8.0 / 3;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
+ dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
+ dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
+ dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
+#endif
+ if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
+ || SourcePixelFormat == dm_444_16
+ || SourcePixelFormat == dm_444_8
+ || SourcePixelFormat == dm_mono_16
+ || SourcePixelFormat == dm_mono_8
+ || SourcePixelFormat == dm_rgbe)) {
+ if (SurfaceTiling == dm_sw_linear)
+ *BlockHeight256BytesY = 1;
+ else if (SourcePixelFormat == dm_444_64)
+ *BlockHeight256BytesY = 4;
+ else if (SourcePixelFormat == dm_444_8)
+ *BlockHeight256BytesY = 16;
+ else
+ *BlockHeight256BytesY = 8;
+
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
+ dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
+ dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
+ dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
+#endif
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *MacroTileHeightY = *BlockHeight256BytesY;
+ *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
+ } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
+ SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
+ *MacroTileHeightY = 16 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 16 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
+ } else {
+ *MacroTileHeightY = 32 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 32 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
+ dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
+ dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
+ dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
+#endif
+} // CalculateBytePerPixelAndBlockSizes
+
+void dml32_CalculateSwathAndDETConfiguration(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ double ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int nomDETInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ unsigned int PixelChunkSizeKBytes,
+ unsigned int ROBSizeKBytes,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class Output[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BlendingAndTiming[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ unsigned int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool *UnboundedRequestEnabled,
+ unsigned int *CompressedBufferSizeInkByte,
+ unsigned int *CompBufReservedSpaceKBytes,
+ bool *CompBufReservedSpaceNeedAdjustment,
+ bool ViewportSizeSupportPerSurface[],
+ bool *ViewportSizeSupport)
+{
+ unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+ unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpSwathSizeBytesY;
+ unsigned int RoundedUpSwathSizeBytesC;
+ double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
+ double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
+ unsigned int k;
+ unsigned int TotalActiveDPP = 0;
+ bool NoChromaSurfaces = true;
+ unsigned int DETBufferSizeInKByteForSwathCalculation;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
+ dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
+#endif
+ dml32_CalculateSwathWidth(ForceSingleDPP,
+ NumberOfActiveSurfaces,
+ SourcePixelFormat,
+ SourceRotation,
+ ViewportStationary,
+ ViewportWidth,
+ ViewportHeight,
+ ViewportXStart,
+ ViewportYStart,
+ ViewportXStartC,
+ ViewportYStartC,
+ SurfaceWidthY,
+ SurfaceWidthC,
+ SurfaceHeightY,
+ SurfaceHeightC,
+ ODMMode,
+ BytePerPixY,
+ BytePerPixC,
+ Read256BytesBlockHeightY,
+ Read256BytesBlockHeightC,
+ Read256BytesBlockWidthY,
+ Read256BytesBlockWidthC,
+ BlendingAndTiming,
+ HActive,
+ HRatio,
+ DPPPerSurface,
+
+ /* Output */
+ SwathWidthdoubleDPP,
+ SwathWidthdoubleDPPChroma,
+ SwathWidth,
+ SwathWidthChroma,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
+ swath_width_luma_ub,
+ swath_width_chroma_ub);
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
+ RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_10) {
+ RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
+ RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
+ NoChromaSurfaces = false;
+ }
+ }
+
+ // By default, just set the reserved space to 2 pixel chunks size
+ *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
+
+ // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
+ // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
+ // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
+ *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
+
+ if (*CompBufReservedSpaceNeedAdjustment == 1) {
+ *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
+ }
+
+ #ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
+ dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
+ #endif
+
+ *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+
+ dml32_CalculateDETBufferSize(DETSizeOverride,
+ UseMALLForPStateChange,
+ ForceSingleDPP,
+ NumberOfActiveSurfaces,
+ *UnboundedRequestEnabled,
+ nomDETInKByte,
+ MaxTotalDETInKByte,
+ ConfigReturnBufferSizeInKByte,
+ MinCompressedBufferSizeInKByte,
+ CompressedBufferSegmentSizeInkByteFinal,
+ SourcePixelFormat,
+ ReadBandwidthLuma,
+ ReadBandwidthChroma,
+ RoundedUpMaxSwathSizeBytesY,
+ RoundedUpMaxSwathSizeBytesC,
+ DPPPerSurface,
+
+ /* Output */
+ DETBufferSizeInKByte, // per hubp pipe
+ CompressedBufferSizeInkByte);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+#endif
+
+ *ViewportSizeSupport = true;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
+ dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
+ DETBufferSizeInKByteForSwathCalculation);
+#endif
+
+ if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
+ } else {
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
+ }
+
+ if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
+ || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
+ SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
+ *ViewportSizeSupport = false;
+ ViewportSizeSupportPerSurface[k] = false;
+ } else {
+ ViewportSizeSupportPerSurface[k] = true;
+ }
+
+ if (SwathHeightC[k] == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
+ DETBufferSizeC[k] = 0;
+ } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
+ DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
+ } else {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
+ DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
+ dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
+ k, RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
+ k, RoundedUpMaxSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
+ dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
+ dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
+ ViewportSizeSupportPerSurface[k]);
+#endif
+
+ }
+} // CalculateSwathAndDETConfiguration
+
+void dml32_CalculateSwathWidth(
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ enum source_format_class SourcePixelFormat[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int BlendingAndTiming[],
+ unsigned int HActive[],
+ double HRatio[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ double SwathWidthdoubleDPPY[],
+ double SwathWidthdoubleDPPC[],
+ double SwathWidthY[], // per-pipe
+ double SwathWidthC[], // per-pipe
+ unsigned int MaximumSwathHeightY[],
+ unsigned int MaximumSwathHeightC[],
+ unsigned int swath_width_luma_ub[], // per-pipe
+ unsigned int swath_width_chroma_ub[]) // per-pipe
+{
+ unsigned int k, j;
+ enum odm_combine_mode MainSurfaceODMMode;
+
+ unsigned int surface_width_ub_l;
+ unsigned int surface_height_ub_l;
+ unsigned int surface_width_ub_c = 0;
+ unsigned int surface_height_ub_c = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!IsVertical(SourceRotation[k]))
+ SwathWidthdoubleDPPY[k] = ViewportWidth[k];
+ else
+ SwathWidthdoubleDPPY[k] = ViewportHeight[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
+ dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
+#endif
+
+ MainSurfaceODMMode = ODMMode[k];
+ for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ if (BlendingAndTiming[k] == j)
+ MainSurfaceODMMode = ODMMode[j];
+ }
+
+ if (ForceSingleDPP) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ } else {
+ if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
+ SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
+ dml_round(HActive[k] / 4.0 * HRatio[k]));
+ } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
+ SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
+ dml_round(HActive[k] / 2.0 * HRatio[k]));
+ } else if (DPPPerSurface[k] == 2) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
+ } else {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
+ dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
+ dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
+ dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ SwathWidthC[k] = SwathWidthdoubleDPPC[k];
+ }
+
+ surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
+ surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
+
+ if (!IsVertical(SourceRotation[k])) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
+ dml_floor(ViewportXStart[k] +
+ SwathWidthY[k] +
+ Read256BytesBlockWidthY[k] - 1,
+ Read256BytesBlockWidthY[k]) -
+ dml_floor(ViewportXStart[k],
+ Read256BytesBlockWidthY[k]));
+ } else {
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
+ dml_ceil(SwathWidthY[k] - 1,
+ Read256BytesBlockWidthY[k]) +
+ Read256BytesBlockWidthY[k]);
+ }
+ if (BytePerPixC[k] > 0) {
+ surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
+ dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
+ Read256BytesBlockWidthC[k] - 1,
+ Read256BytesBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k],
+ Read256BytesBlockWidthC[k]));
+ } else {
+ swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
+ dml_ceil(SwathWidthC[k] - 1,
+ Read256BytesBlockWidthC[k]) +
+ Read256BytesBlockWidthC[k]);
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
+ SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
+ Read256BytesBlockHeightY[k]) -
+ dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
+ } else {
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
+ Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
+ }
+ if (BytePerPixC[k] > 0) {
+ surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
+ dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
+ Read256BytesBlockHeightC[k] - 1,
+ Read256BytesBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k],
+ Read256BytesBlockHeightC[k]));
+ } else {
+ swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
+ dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
+ Read256BytesBlockHeightC[k]);
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
+ dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
+ dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
+ dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
+ dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
+ dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
+ dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
+#endif
+
+ }
+} // CalculateSwathWidth
+
+bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ unsigned int TotalNumberOfActiveDPP,
+ bool NoChroma,
+ enum output_encoder_class Output,
+ enum dm_swizzle_mode SurfaceTiling,
+ bool CompBufReservedSpaceNeedAdjustment,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
+{
+ bool ret_val = false;
+
+ ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
+ TotalNumberOfActiveDPP == 1 && NoChroma);
+ if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
+ ret_val = false;
+
+ if (SurfaceTiling == dm_sw_linear)
+ ret_val = false;
+
+ if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
+ ret_val = false;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
+ dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+ dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
+#endif
+
+ return (ret_val);
+}
+
+void dml32_CalculateDETBufferSize(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ bool UnboundedRequestEnabled,
+ unsigned int nomDETInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum source_format_class SourcePixelFormat[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int RoundedUpMaxSwathSizeBytesY[],
+ unsigned int RoundedUpMaxSwathSizeBytesC[],
+ unsigned int DPPPerSurface[],
+ /* Output */
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int *CompressedBufferSizeInkByte)
+{
+ unsigned int DETBufferSizePoolInKByte;
+ unsigned int NextDETBufferPieceInKByte;
+ bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
+ bool NextPotentialSurfaceToAssignDETPieceFound;
+ unsigned int NextSurfaceToAssignDETPiece;
+ double TotalBandwidth;
+ double BandwidthOfSurfacesNotAssignedDETPiece;
+ unsigned int max_minDET;
+ unsigned int minDET;
+ unsigned int minDET_pipe;
+ unsigned int j, k;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
+ dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
+ dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
+ CompressedBufferSegmentSizeInkByteFinal);
+#endif
+
+ // Note: Will use default det size if that fits 2 swaths
+ if (UnboundedRequestEnabled) {
+ if (DETSizeOverride[0] > 0) {
+ DETBufferSizeInKByte[0] = DETSizeOverride[0];
+ } else {
+ DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
+ ((double) RoundedUpMaxSwathSizeBytesY[0] +
+ (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
+ }
+ *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
+ } else {
+ DETBufferSizePoolInKByte = MaxTotalDETInKByte;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DETBufferSizeInKByte[k] = nomDETInKByte;
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12) {
+ max_minDET = nomDETInKByte - 64;
+ } else {
+ max_minDET = nomDETInKByte;
+ }
+ minDET = 128;
+ minDET_pipe = 0;
+
+ // add DET resource until can hold 2 full swaths
+ while (minDET <= max_minDET && minDET_pipe == 0) {
+ if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
+ (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
+ minDET_pipe = minDET;
+ minDET = minDET + 64;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
+ dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
+ dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+
+ if (minDET_pipe == 0) {
+ minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
+ (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
+ __func__, k, minDET_pipe);
+#endif
+ }
+
+ if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
+ DETBufferSizeInKByte[k] = 0;
+ } else if (DETSizeOverride[k] > 0) {
+ DETBufferSizeInKByte[k] = DETSizeOverride[k];
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
+ (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
+ } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
+ DETBufferSizeInKByte[k] = minDET_pipe;
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
+ (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
+#endif
+ }
+
+ TotalBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
+ TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
+ for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
+ dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
+#endif
+ BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
+ (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
+ ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
+ ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
+ } else {
+ DETPieceAssignedToThisSurfaceAlready[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
+ DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
+ BandwidthOfSurfacesNotAssignedDETPiece);
+#endif
+ }
+
+ for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ NextPotentialSurfaceToAssignDETPieceFound = false;
+ NextSurfaceToAssignDETPiece = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
+ ReadBandwidthLuma[k]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
+ ReadBandwidthChroma[k]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
+ ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
+ NextSurfaceToAssignDETPiece);
+#endif
+ if (!DETPieceAssignedToThisSurfaceAlready[k] &&
+ (!NextPotentialSurfaceToAssignDETPieceFound ||
+ ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
+ ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
+ NextSurfaceToAssignDETPiece = k;
+ NextPotentialSurfaceToAssignDETPieceFound = true;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
+ __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
+ __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
+#endif
+ }
+
+ if (NextPotentialSurfaceToAssignDETPieceFound) {
+ // Note: To show the banker's rounding behavior in VBA and also the fact
+ // that the DET buffer size varies due to precision issue
+ //
+ //double tmp1 = ((double) DETBufferSizePoolInKByte *
+ // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ // BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
+ // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ //BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //
+ //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
+ //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
+
+ NextDETBufferPieceInKByte = dml_min(
+ dml_round((double) DETBufferSizePoolInKByte *
+ (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ BandwidthOfSurfacesNotAssignedDETPiece /
+ ((ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
+ (ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
+ dml_floor((double) DETBufferSizePoolInKByte,
+ (ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+
+ // Above calculation can assign the entire DET buffer allocation to a single pipe.
+ // We should limit the per-pipe DET size to the nominal / max per pipe.
+ if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
+ if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
+ nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
+ NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
+ } else {
+ // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
+ // already has the max per-pipe value
+ NextDETBufferPieceInKByte = 0;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
+ DETBufferSizePoolInKByte);
+ dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
+ NextSurfaceToAssignDETPiece);
+ dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
+ NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
+ NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
+ __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
+ dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
+ NextDETBufferPieceInKByte);
+ dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
+ __func__, j, NextSurfaceToAssignDETPiece,
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
+ + NextDETBufferPieceInKByte
+ / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
+ DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
+ (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ }
+ }
+ *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
+ }
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+ for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
+ __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+ }
+#endif
+} // CalculateDETBufferSize
+
+void dml32_CalculateODMMode(
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ unsigned int HActive,
+ enum output_format_class OutFormat,
+ enum output_encoder_class Output,
+ enum odm_combine_policy ODMUse,
+ double StateDispclk,
+ double MaxDispclk,
+ bool DSCEnable,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int MaxNumDPP,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ unsigned int NumberOfDSCSlices,
+
+ /* Output */
+ bool *TotalAvailablePipesSupport,
+ unsigned int *NumberOfDPP,
+ enum odm_combine_mode *ODMMode,
+ double *RequiredDISPCLKPerSurface)
+{
+
+ double SurfaceRequiredDISPCLKWithoutODMCombine;
+ double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+
+ SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ *TotalAvailablePipesSupport = true;
+ *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
+
+ if (ODMUse == dm_odm_combine_policy_none)
+ *ODMMode = dm_odm_combine_mode_disabled;
+
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
+ *NumberOfDPP = 0;
+
+ // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
+ // (ODMUse == "" || ODMUse == "CombineAsNeeded")
+
+ if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
+ ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
+ (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
+ || NumberOfDSCSlices > 8)))) {
+ if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
+ *ODMMode = dm_odm_combine_mode_4to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ *NumberOfDPP = 4;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
+ (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
+ (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
+ || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
+ if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
+ *ODMMode = dm_odm_combine_mode_2to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ *NumberOfDPP = 2;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else {
+ if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
+ *NumberOfDPP = 1;
+ else
+ *TotalAvailablePipesSupport = false;
+ }
+ if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
+ ODMUse != dm_odm_combine_policy_4to1) {
+ if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
+ *ODMMode = dm_odm_combine_mode_disabled;
+ *NumberOfDPP = 0;
+ *TotalAvailablePipesSupport = false;
+ } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
+ *ODMMode == dm_odm_combine_mode_4to1) {
+ *ODMMode = dm_odm_combine_mode_4to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ *NumberOfDPP = 4;
+ } else {
+ *ODMMode = dm_odm_combine_mode_2to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ *NumberOfDPP = 2;
+ }
+ }
+ if (Output == dm_hdmi && OutFormat == dm_420 &&
+ HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
+ *ODMMode = dm_odm_combine_mode_disabled;
+ *NumberOfDPP = 0;
+ *TotalAvailablePipesSupport = false;
+ }
+}
+
+double dml32_CalculateRequiredDispclk(
+ enum odm_combine_mode ODMMode,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ double MaxDispclk)
+{
+ double RequiredDispclk = 0.;
+ double PixelClockAfterODM;
+ double DISPCLKWithRampingRoundedToDFSGranularity;
+ double DISPCLKWithoutRampingRoundedToDFSGranularity;
+ double MaxDispclkRoundedDownToDFSGranularity;
+
+ if (ODMMode == dm_odm_combine_mode_4to1)
+ PixelClockAfterODM = PixelClock / 4;
+ else if (ODMMode == dm_odm_combine_mode_2to1)
+ PixelClockAfterODM = PixelClock / 2;
+ else
+ PixelClockAfterODM = PixelClock;
+
+
+ DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
+ PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
+ * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
+
+ DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
+ PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
+
+ MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
+
+ if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
+ RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
+ else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
+ RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
+ else
+ RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
+
+ return RequiredDispclk;
+}
+
+double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
+{
+ if (Clock <= 0.0)
+ return 0.0;
+
+ if (round_up)
+ return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
+ else
+ return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
+}
+
+void dml32_CalculateOutputLink(
+ double PHYCLKPerState,
+ double PHYCLKD18PerState,
+ double PHYCLKD32PerState,
+ double Downspreading,
+ bool IsMainSurfaceUsingTheIndicatedTiming,
+ enum output_encoder_class Output,
+ enum output_format_class OutputFormat,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClockBackEnd,
+ double ForcedOutputLinkBPP,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int NumberOfDSCSlices,
+ double AudioSampleRate,
+ unsigned int AudioSampleLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ bool DSCEnable,
+ unsigned int OutputLinkDPLanes,
+ enum dm_output_link_dp_rate OutputLinkDPRate,
+
+ /* Output */
+ bool *RequiresDSC,
+ double *RequiresFEC,
+ double *OutBpp,
+ enum dm_output_type *OutputType,
+ enum dm_output_rate *OutputRate,
+ unsigned int *RequiredSlots)
+{
+ bool LinkDSCEnable;
+ unsigned int dummy;
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = 0;
+ *OutputType = dm_output_type_unknown;
+ *OutputRate = dm_output_rate_unknown;
+
+ if (IsMainSurfaceUsingTheIndicatedTiming) {
+ if (Output == dm_hdmi) {
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
+ PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = "HDMI";
+ *OutputType = dm_output_type_hdmi;
+
+ } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
+ if (DSCEnable == true) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp || Output == dm_dp2p0)
+ *RequiresFEC = true;
+ else
+ *RequiresFEC = false;
+ } else {
+ *RequiresDSC = false;
+ LinkDSCEnable = false;
+ if (Output == dm_dp2p0)
+ *RequiresFEC = true;
+ else
+ *RequiresFEC = false;
+ }
+ if (Output == dm_dp2p0) {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
+ PHYCLKD32PerState >= 10000 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR10";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr10;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
+ *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR13p5";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
+ *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR20";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr20;
+ }
+ } else {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
+ PHYCLKPerState >= 270) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
+ *OutBpp == 0 && PHYCLKPerState >= 540) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR2";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr2;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
+ AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
+ RequiredSlots);
+
+ if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR3";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr3;
+ }
+ }
+ }
+ }
+}
+
+void dml32_CalculateDPPCLK(
+ unsigned int NumberOfActiveSurfaces,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKDPPCLKVCOSpeed,
+ double DPPCLKUsingSingleDPP[],
+ unsigned int DPPPerSurface[],
+
+ /* output */
+ double *GlobalDPPCLK,
+ double Dppclk[])
+{
+ unsigned int k;
+ *GlobalDPPCLK = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
+ }
+ *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
+}
+
+double dml32_TruncToValidBPP(
+ double LinkBitRate,
+ unsigned int Lanes,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum output_encoder_class Output,
+ enum output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int DSCSlices,
+ unsigned int AudioRate,
+ unsigned int AudioLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ /* Output */
+ unsigned int *RequiredSlots)
+{
+ double MaxLinkBPP;
+ unsigned int MinDSCBPP;
+ double MaxDSCBPP;
+ unsigned int NonDSCBPP0;
+ unsigned int NonDSCBPP1;
+ unsigned int NonDSCBPP2;
+ unsigned int NonDSCBPP3;
+
+ if (Format == dm_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
+ } else if (Format == dm_444) {
+ NonDSCBPP0 = 18;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 30;
+ NonDSCBPP3 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
+ } else {
+ if (Output == dm_hdmi) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 24;
+ } else {
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+ }
+ if (Format == dm_n422) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
+ }
+ }
+ if (Output == dm_dp2p0) {
+ MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
+ } else if (DSCEnable && Output == dm_dp) {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
+ }
+
+ if (DSCEnable) {
+ if (ODMModeDSC == dm_odm_combine_mode_4to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ else if (ODMModeDSC == dm_odm_combine_mode_2to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ else if (ODMModeDSC == dm_odm_split_mode_1to2)
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ } else {
+ if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ }
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP)
+ return BPP_INVALID;
+ else if (MaxLinkBPP >= MaxDSCBPP)
+ return MaxDSCBPP;
+ else
+ return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP3)
+ return NonDSCBPP3;
+ else if (MaxLinkBPP >= NonDSCBPP2)
+ return NonDSCBPP2;
+ else if (MaxLinkBPP >= NonDSCBPP1)
+ return NonDSCBPP1;
+ else if (MaxLinkBPP >= NonDSCBPP0)
+ return 16.0;
+ else
+ return BPP_INVALID;
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
+ DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
+ (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
+ return BPP_INVALID;
+ else
+ return DesiredBPP;
+ }
+
+ *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
+
+ return BPP_INVALID;
+} // TruncToValidBPP
+
+double dml32_RequiredDTBCLK(
+ bool DSCEnable,
+ double PixelClock,
+ enum output_format_class OutputFormat,
+ double OutputBpp,
+ unsigned int DSCSlices,
+ unsigned int HTotal,
+ unsigned int HActive,
+ unsigned int AudioRate,
+ unsigned int AudioLayout)
+{
+ double PixelWordRate;
+ double HCActive;
+ double HCBlank;
+ double AverageTribyteRate;
+ double HActiveTribyteRate;
+
+ if (DSCEnable != true)
+ return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
+
+ PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
+ HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
+ dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
+ HCBlank = 64 + 32 *
+ dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
+ AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
+ HActiveTribyteRate = PixelWordRate * HCActive / HActive;
+ return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
+}
+
+unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
+ enum odm_combine_mode ODMMode,
+ unsigned int DSCInputBitPerComponent,
+ double OutputBpp,
+ unsigned int HActive,
+ unsigned int HTotal,
+ unsigned int NumberOfDSCSlices,
+ enum output_format_class OutputFormat,
+ enum output_encoder_class Output,
+ double PixelClock,
+ double PixelClockBackEnd,
+ double dsc_delay_factor_wa)
+{
+ unsigned int DSCDelayRequirement_val;
+
+ if (DSCEnabled == true && OutputBpp != 0) {
+ if (ODMMode == dm_odm_combine_mode_4to1) {
+ DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
+ } else if (ODMMode == dm_odm_combine_mode_2to1) {
+ DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
+ } else {
+ DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
+ }
+
+ DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
+ dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
+
+ DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
+
+ } else {
+ DSCDelayRequirement_val = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
+ dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
+ dml_print("DML::%s: HActive = %d\n", __func__, HActive);
+ dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
+ dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
+ dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
+ dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
+#endif
+
+ return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
+}
+
+void dml32_CalculateSurfaceSizeInMall(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ bool DCCEnable[],
+ bool ViewportStationary[],
+ unsigned int ViewportXStartY[],
+ unsigned int ViewportYStartY[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int ViewportWidthY[],
+ unsigned int ViewportHeightY[],
+ unsigned int BytesPerPixelY[],
+ unsigned int ViewportWidthC[],
+ unsigned int ViewportHeightC[],
+ unsigned int BytesPerPixelC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int ReadBlockWidthY[],
+ unsigned int ReadBlockWidthC[],
+ unsigned int ReadBlockHeightY[],
+ unsigned int ReadBlockHeightC[],
+
+ /* Output */
+ unsigned int SurfaceSizeInMALL[],
+ bool *ExceededMALLSize)
+{
+ unsigned int TotalSurfaceSizeInMALL = 0;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ViewportStationary[k]) {
+ SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
+ dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
+ ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
+ ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
+ ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
+ ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
+ dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
+
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
+ dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
+ ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
+ dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
+ ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
+ BytesPerPixelC[k];
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
+ dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
+ Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
+ - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
+ * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
+ Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
+ ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
+ Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
+ * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], 8 *
+ Read256BytesBlockWidthC[k]),
+ dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
+ * Read256BytesBlockWidthC[k] - 1, 8 *
+ Read256BytesBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k], 8 *
+ Read256BytesBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], 8 *
+ Read256BytesBlockHeightC[k]),
+ dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
+ 8 * Read256BytesBlockHeightC[k] - 1, 8 *
+ Read256BytesBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k], 8 *
+ Read256BytesBlockHeightC[k])) *
+ BytesPerPixelC[k] / 256;
+ }
+ }
+ } else {
+ SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
+ ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
+ dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
+ ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
+ BytesPerPixelY[k];
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
+ ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
+ ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
+ BytesPerPixelC[k];
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
+ Read256BytesBlockWidthY[k] - 1), 8 *
+ Read256BytesBlockWidthY[k]) *
+ dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
+ Read256BytesBlockHeightY[k] - 1), 8 *
+ Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
+
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
+ Read256BytesBlockWidthC[k] - 1), 8 *
+ Read256BytesBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
+ Read256BytesBlockHeightC[k] - 1), 8 *
+ Read256BytesBlockHeightC[k]) *
+ BytesPerPixelC[k] / 256;
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+ }
+ *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
+} // CalculateSurfaceSizeInMall
+
+void dml32_CalculateVMRowAndSwath(
+ unsigned int NumberOfActiveSurfaces,
+ DmlPipe myPipe[],
+ unsigned int SurfaceSizeInMALL[],
+ unsigned int PTEBufferSizeInRequestsLuma,
+ unsigned int PTEBufferSizeInRequestsChroma,
+ unsigned int DCCMetaBufferSizeBytes,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int MALLAllocatedForDCN,
+ double SwathWidthY[],
+ double SwathWidthC[],
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes[],
+ unsigned int HostVMMinPageSize,
+
+ /* Output */
+ bool PTEBufferSizeNotExceeded[],
+ bool DCCMetaBufferSizeNotExceeded[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int dpte_row_height_luma[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
+ unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int PTERequestSizeC[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+ double PrefetchSourceLinesY[],
+ double PrefetchSourceLinesC[],
+ double VInitPreFillY[],
+ double VInitPreFillC[],
+ unsigned int MaxNumSwathY[],
+ unsigned int MaxNumSwathC[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double PixelPTEBytesPerRow[],
+ double PDEAndMetaPTEBytesFrame[],
+ double MetaRowByte[],
+ bool use_one_row_for_frame[],
+ bool use_one_row_for_frame_flip[],
+ bool UsesMALLForStaticScreen[],
+ bool PTE_BUFFER_MODE[],
+ unsigned int BIGK_FRAGMENT_SIZE[])
+{
+ unsigned int k;
+ unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
+ unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
+ unsigned int PDEAndMetaPTEBytesFrameY;
+ unsigned int PDEAndMetaPTEBytesFrameC;
+ unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
+ unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
+ bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (HostVMEnable == true) {
+ vm_group_bytes[k] = 512;
+ dpte_group_bytes[k] = 512;
+ } else if (GPUVMEnable == true) {
+ vm_group_bytes[k] = 2048;
+ if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
+ dpte_group_bytes[k] = 512;
+ else
+ dpte_group_bytes[k] = 2048;
+ } else {
+ vm_group_bytes[k] = 0;
+ dpte_group_bytes[k] = 0;
+ }
+
+ if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
+ myPipe[k].SourcePixelFormat == dm_420_12 ||
+ myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
+ if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
+ !IsVertical(myPipe[k].SourceRotation)) {
+ PTEBufferSizeInRequestsForLuma[k] =
+ (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
+ } else {
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
+ }
+
+ PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
+ myPipe[k].ViewportStationary,
+ myPipe[k].DCCEnable,
+ myPipe[k].DPPPerSurface,
+ myPipe[k].BlockHeight256BytesC,
+ myPipe[k].BlockWidth256BytesC,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].SurfaceTiling,
+ myPipe[k].BytePerPixelC,
+ myPipe[k].SourceRotation,
+ SwathWidthC[k],
+ myPipe[k].ViewportHeightChroma,
+ myPipe[k].ViewportXStartC,
+ myPipe[k].ViewportYStartC,
+ GPUVMEnable,
+ HostVMEnable,
+ HostVMMaxNonCachedPageTableLevels,
+ GPUVMMaxPageTableLevels,
+ GPUVMMinPageSizeKBytes[k],
+ HostVMMinPageSize,
+ PTEBufferSizeInRequestsForChroma[k],
+ myPipe[k].PitchC,
+ myPipe[k].DCCMetaPitchC,
+ myPipe[k].BlockWidthC,
+ myPipe[k].BlockHeightC,
+
+ /* Output */
+ &MetaRowByteC[k],
+ &PixelPTEBytesPerRowC[k],
+ &dpte_row_width_chroma_ub[k],
+ &dpte_row_height_chroma[k],
+ &dpte_row_height_linear_chroma[k],
+ &PixelPTEBytesPerRowC_one_row_per_frame[k],
+ &dpte_row_width_chroma_ub_one_row_per_frame[k],
+ &dpte_row_height_chroma_one_row_per_frame[k],
+ &meta_req_width_chroma[k],
+ &meta_req_height_chroma[k],
+ &meta_row_width_chroma[k],
+ &meta_row_height_chroma[k],
+ &PixelPTEReqWidthC[k],
+ &PixelPTEReqHeightC[k],
+ &PTERequestSizeC[k],
+ &dpde0_bytes_per_frame_ub_c[k],
+ &meta_pte_bytes_per_frame_ub_c[k]);
+
+ PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
+ myPipe[k].VRatioChroma,
+ myPipe[k].VTapsChroma,
+ myPipe[k].InterlaceEnable,
+ myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ myPipe[k].SwathHeightC,
+ myPipe[k].SourceRotation,
+ myPipe[k].ViewportStationary,
+ SwathWidthC[k],
+ myPipe[k].ViewportHeightChroma,
+ myPipe[k].ViewportXStartC,
+ myPipe[k].ViewportYStartC,
+
+ /* Output */
+ &VInitPreFillC[k],
+ &MaxNumSwathC[k]);
+ } else {
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
+ PTEBufferSizeInRequestsForChroma[k] = 0;
+ PixelPTEBytesPerRowC[k] = 0;
+ PDEAndMetaPTEBytesFrameC = 0;
+ MetaRowByteC[k] = 0;
+ MaxNumSwathC[k] = 0;
+ PrefetchSourceLinesC[k] = 0;
+ dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ }
+
+ PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
+ myPipe[k].ViewportStationary,
+ myPipe[k].DCCEnable,
+ myPipe[k].DPPPerSurface,
+ myPipe[k].BlockHeight256BytesY,
+ myPipe[k].BlockWidth256BytesY,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].SurfaceTiling,
+ myPipe[k].BytePerPixelY,
+ myPipe[k].SourceRotation,
+ SwathWidthY[k],
+ myPipe[k].ViewportHeight,
+ myPipe[k].ViewportXStart,
+ myPipe[k].ViewportYStart,
+ GPUVMEnable,
+ HostVMEnable,
+ HostVMMaxNonCachedPageTableLevels,
+ GPUVMMaxPageTableLevels,
+ GPUVMMinPageSizeKBytes[k],
+ HostVMMinPageSize,
+ PTEBufferSizeInRequestsForLuma[k],
+ myPipe[k].PitchY,
+ myPipe[k].DCCMetaPitchY,
+ myPipe[k].BlockWidthY,
+ myPipe[k].BlockHeightY,
+
+ /* Output */
+ &MetaRowByteY[k],
+ &PixelPTEBytesPerRowY[k],
+ &dpte_row_width_luma_ub[k],
+ &dpte_row_height_luma[k],
+ &dpte_row_height_linear_luma[k],
+ &PixelPTEBytesPerRowY_one_row_per_frame[k],
+ &dpte_row_width_luma_ub_one_row_per_frame[k],
+ &dpte_row_height_luma_one_row_per_frame[k],
+ &meta_req_width[k],
+ &meta_req_height[k],
+ &meta_row_width[k],
+ &meta_row_height[k],
+ &PixelPTEReqWidthY[k],
+ &PixelPTEReqHeightY[k],
+ &PTERequestSizeY[k],
+ &dpde0_bytes_per_frame_ub_l[k],
+ &meta_pte_bytes_per_frame_ub_l[k]);
+
+ PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
+ myPipe[k].VRatio,
+ myPipe[k].VTaps,
+ myPipe[k].InterlaceEnable,
+ myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ myPipe[k].SwathHeightY,
+ myPipe[k].SourceRotation,
+ myPipe[k].ViewportStationary,
+ SwathWidthY[k],
+ myPipe[k].ViewportHeight,
+ myPipe[k].ViewportXStart,
+ myPipe[k].ViewportYStart,
+
+ /* Output */
+ &VInitPreFillY[k],
+ &MaxNumSwathY[k]);
+
+ PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
+ MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
+
+ if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
+ PTEBufferSizeNotExceeded[k] = true;
+ } else {
+ PTEBufferSizeNotExceeded[k] = false;
+ }
+
+ one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
+ PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
+ }
+
+ dml32_CalculateMALLUseForStaticScreen(
+ NumberOfActiveSurfaces,
+ MALLAllocatedForDCN,
+ UseMALLForStaticScreen, // mode
+ SurfaceSizeInMALL,
+ one_row_per_frame_fits_in_buffer,
+ /* Output */
+ UsesMALLForStaticScreen); // boolen
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
+ (GPUVMMinPageSizeKBytes[k] > 64);
+ BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
+ dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
+#endif
+ use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
+ (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
+
+ use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
+ !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
+
+ if (use_one_row_for_frame[k]) {
+ dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
+ dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
+ dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
+ dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
+ PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
+ }
+
+ if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
+ DCCMetaBufferSizeNotExceeded[k] = true;
+ else
+ DCCMetaBufferSizeNotExceeded[k] = false;
+
+ PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
+ if (use_one_row_for_frame[k])
+ PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
+
+ dml32_CalculateRowBandwidth(
+ GPUVMEnable,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].VRatio,
+ myPipe[k].VRatioChroma,
+ myPipe[k].DCCEnable,
+ myPipe[k].HTotal / myPipe[k].PixelClock,
+ MetaRowByteY[k], MetaRowByteC[k],
+ meta_row_height[k],
+ meta_row_height_chroma[k],
+ PixelPTEBytesPerRowY[k],
+ PixelPTEBytesPerRowC[k],
+ dpte_row_height_luma[k],
+ dpte_row_height_chroma[k],
+
+ /* Output */
+ &meta_row_bw[k],
+ &dpte_row_bw[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
+ __func__, k, use_one_row_for_frame_flip[k]);
+ dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
+ __func__, k, UseMALLForPStateChange[k]);
+ dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
+ dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
+ __func__, k, dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
+ __func__, k, dpte_row_height_chroma[k]);
+ dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
+ __func__, k, dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
+ dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
+ __func__, k, PTEBufferSizeNotExceeded[k]);
+ dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
+ dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
+#endif
+ }
+} // CalculateVMRowAndSwath
+
+unsigned int dml32_CalculateVMAndRowBytes(
+ bool ViewportStationary,
+ bool DCCEnable,
+ unsigned int NumberOfDPPs,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum dm_rotation_angle SourceRotation,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int MacroTileWidth,
+ unsigned int MacroTileHeight,
+
+ /* Output */
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ unsigned int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *dpte_row_height_linear,
+ unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
+ unsigned int *dpte_row_width_ub_one_row_per_frame,
+ unsigned int *dpte_row_height_one_row_per_frame,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ unsigned int *DPDE0BytesFrame,
+ unsigned int *MetaPTEBytesFrame)
+{
+ unsigned int MPDEBytesFrame;
+ unsigned int DCCMetaSurfaceBytes;
+ unsigned int ExtraDPDEBytesFrame;
+ unsigned int PDEAndMetaPTEBytesFrame;
+ unsigned int HostVMDynamicLevels = 0;
+ unsigned int MacroTileSizeBytes;
+ unsigned int vp_height_meta_ub;
+ unsigned int vp_height_dpte_ub;
+ unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ }
+
+ *MetaRequestHeight = 8 * BlockHeight256Bytes;
+ *MetaRequestWidth = 8 * BlockWidth256Bytes;
+ if (SurfaceTiling == dm_sw_linear) {
+ *meta_row_height = 32;
+ *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
+ - dml_floor(ViewportXStart, *MetaRequestWidth);
+ } else if (!IsVertical(SourceRotation)) {
+ *meta_row_height = *MetaRequestHeight;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
+ *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
+ } else {
+ *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
+ }
+ *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
+ } else {
+ *meta_row_height = *MetaRequestWidth;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
+ *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
+ } else {
+ *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
+ }
+ *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
+ }
+
+ if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
+ vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
+ 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
+ } else if (!IsVertical(SourceRotation)) {
+ vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
+ } else {
+ vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
+ }
+
+ DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
+
+ if (GPUVMEnable == true) {
+ *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
+ (8 * 4.0 * 1024), 1) + 1) * 64;
+ MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
+ } else {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ }
+
+ if (DCCEnable != true) {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ *MetaRowByte = 0;
+ }
+
+ MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
+
+ if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
+ if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
+ vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
+ MacroTileHeight - 1, MacroTileHeight) -
+ dml_floor(ViewportYStart, MacroTileHeight);
+ } else if (!IsVertical(SourceRotation)) {
+ vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
+ } else {
+ vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
+ }
+ *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
+ (8 * 2097152), 1) + 1);
+ ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
+ } else {
+ *DPDE0BytesFrame = 0;
+ ExtraDPDEBytesFrame = 0;
+ vp_height_dpte_ub = 0;
+ }
+
+ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
+ dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
+ dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
+ dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
+ dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
+ dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
+ dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
+ dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
+ dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
+ dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
+ dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
+ dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
+ dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
+#endif
+
+ if (HostVMEnable == true)
+ PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *PixelPTEReqHeight = 1;
+ *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ *PTERequestSize = 64;
+ } else if (GPUVMMinPageSizeKBytes == 4) {
+ *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
+ *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
+ *PTERequestSize = 128;
+ } else {
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
+ *PTERequestSize = 64;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
+ dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
+ dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
+ dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
+ dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
+#endif
+
+ *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
+ *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
+ (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
+ (double) *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
+ *PTERequestSize;
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ *PixelPTEReqWidth / Pitch), 1));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
+ PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
+ dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
+ dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
+ dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
+ dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
+ 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
+#endif
+ *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
+ (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
+
+ // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
+ *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ PixelPTEReqWidth_linear / Pitch), 1);
+ if (*dpte_row_height_linear > 128)
+ *dpte_row_height_linear = 128;
+
+ } else if (!IsVertical(SourceRotation)) {
+ *dpte_row_height = *PixelPTEReqHeight;
+
+ if (GPUVMMinPageSizeKBytes > 64) {
+ *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
+ *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ } else if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
+ *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
+ dml_floor(ViewportXStart, *PixelPTEReqWidth);
+ } else {
+ *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
+ *PixelPTEReqWidth;
+ }
+
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else {
+ *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
+
+ if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
+ *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
+ } else {
+ *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
+ * *PixelPTEReqHeight;
+ }
+
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
+ }
+
+ if (GPUVMEnable != true)
+ *PixelPTEBytesPerRow = 0;
+ if (HostVMEnable == true)
+ *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
+ dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
+ dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
+ dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
+ dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
+ dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
+ __func__, *dpte_row_width_ub_one_row_per_frame);
+ dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
+ __func__, *PixelPTEBytesPerRow_one_row_per_frame);
+ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
+ *MetaPTEBytesFrame);
+#endif
+
+ return PDEAndMetaPTEBytesFrame;
+} // CalculateVMAndRowBytes
+
+double dml32_CalculatePrefetchSourceLines(
+ double VRatio,
+ unsigned int VTaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ enum dm_rotation_angle SourceRotation,
+ bool ViewportStationary,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+
+ /* Output */
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath)
+{
+
+ unsigned int vp_start_rot;
+ unsigned int sw0_tmp;
+ unsigned int MaxPartialSwath;
+ double numLines;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
+ dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
+ dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
+ dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
+ dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
+ dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
+#endif
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
+ else
+ *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
+
+ if (ViewportStationary) {
+ if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
+ vp_start_rot = SwathHeight -
+ (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
+ } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
+ vp_start_rot = ViewportXStart;
+ } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
+ vp_start_rot = SwathHeight -
+ (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
+ } else {
+ vp_start_rot = ViewportYStart;
+ }
+ sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
+ if (sw0_tmp < *VInitPreFill)
+ *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
+ else
+ *MaxNumSwath = 1;
+ MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
+ } else {
+ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
+ if (*VInitPreFill > 1)
+ MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
+ else
+ MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
+ }
+ numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
+ dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
+ dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
+ dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
+ dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
+#endif
+ return numLines;
+
+} // CalculatePrefetchSourceLines
+
+void dml32_CalculateMALLUseForStaticScreen(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCNFinal,
+ enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ unsigned int SurfaceSizeInMALL[],
+ bool one_row_per_frame_fits_in_buffer[],
+
+ /* output */
+ bool UsesMALLForStaticScreen[])
+{
+ unsigned int k;
+ unsigned int SurfaceToAddToMALL;
+ bool CanAddAnotherSurfaceToMALL;
+ unsigned int TotalSurfaceSizeInMALL;
+
+ TotalSurfaceSizeInMALL = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
+ if (UsesMALLForStaticScreen[k])
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
+ dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
+#endif
+ }
+
+ SurfaceToAddToMALL = 0;
+ CanAddAnotherSurfaceToMALL = true;
+ while (CanAddAnotherSurfaceToMALL) {
+ CanAddAnotherSurfaceToMALL = false;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
+ !UsesMALLForStaticScreen[k] &&
+ UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
+ one_row_per_frame_fits_in_buffer[k] &&
+ (!CanAddAnotherSurfaceToMALL ||
+ SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
+ CanAddAnotherSurfaceToMALL = true;
+ SurfaceToAddToMALL = k;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
+ __func__, k, UseMALLForStaticScreen[k]);
+#endif
+ }
+ }
+ if (CanAddAnotherSurfaceToMALL) {
+ UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
+ dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
+#endif
+
+ }
+ }
+}
+
+void dml32_CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ /* Output */
+ double *meta_row_bw,
+ double *dpte_row_bw)
+{
+ if (DCCEnable != true) {
+ *meta_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
+ SourcePixelFormat == dm_rgbe_alpha) {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
+ MetaRowByteChroma / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
+ SourcePixelFormat == dm_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
+ VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+double dml32_CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock)
+{
+ double ret;
+
+ ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true) {
+ ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
+ (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ }
+ return ret;
+}
+
+void dml32_CalculateUrgentBurstFactor(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ unsigned int DETBufferSizeY,
+ unsigned int DETBufferSizeC,
+ /* Output */
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ double LinesInDETLuma;
+ double LinesInDETChroma;
+ unsigned int LinesInCursorBuffer;
+ double CursorBufferSizeInTime;
+ double DETBufferSizeInTimeLuma;
+ double DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
+ (CursorWidth * CursorBPP / 8.0)), 1.0);
+ if (VRatio > 0) {
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 0;
+ } else {
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime /
+ (CursorBufferSizeInTime - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorCursor = 1;
+ }
+ }
+
+ LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
+ DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
+
+ if (VRatio > 0) {
+ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 0;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorLuma = 1;
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
+ 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
+ / swath_width_chroma_ub;
+
+ if (VRatio > 0) {
+ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 0;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
+ / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorChroma = 1;
+ }
+ }
+} // CalculateUrgentBurstFactor
+
+void dml32_CalculateDCFCLKDeepSleep(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ /* Output */
+ double *DCFClkDeepSleep)
+{
+ unsigned int k;
+ double DisplayPipeLineDeliveryTimeLuma;
+ double DisplayPipeLineDeliveryTimeChroma;
+ double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
+ double ReadBandwidth = 0.0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
+ / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
+ DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
+ / Dppclk[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
+ BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
+ 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
+ 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+
+ *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
+ dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
+ dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
+ dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
+#endif
+} // CalculateDCFCLKDeepSleep
+
+double dml32_CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackDestinationWidth,
+ unsigned int WritebackDestinationHeight,
+ unsigned int WritebackSourceHeight,
+ unsigned int HTotal)
+{
+ double CalculateWriteBackDelay;
+ double Line_length;
+ double Output_lines_last_notclamped;
+ double WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = dml_max((double) WritebackDestinationWidth,
+ dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
+ dml_ceil(((double)WritebackSourceHeight -
+ (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
+ (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+void dml32_UseMinimumDCFCLK(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool DRRDisplay[],
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int MaxPrefetchMode,
+ double DRAMClockChangeLatencyFinal,
+ double FCLKChangeLatency,
+ double SREnterPlusExitTime,
+ unsigned int ReturnBusWidth,
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool DynamicMetadataVMEnabled,
+ bool ImmediateFlipRequirement,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
+ double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
+ unsigned int VTotal[],
+ unsigned int VActive[],
+ unsigned int DynamicMetadataTransmittedBytes[],
+ unsigned int DynamicMetadataLinesBeforeActiveRequired[],
+ bool Interlace[],
+ double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
+ double RequiredDISPCLK[][2],
+ double UrgLatency[],
+ unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
+ double ProjectedDCFClkDeepSleep[][2],
+ double MaximumVStartup[][2][DC__NUM_DPP__MAX],
+ unsigned int TotalNumberOfActiveDPP[][2],
+ unsigned int TotalNumberOfDCCActiveDPP[][2],
+ unsigned int dpte_group_bytes[],
+ double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
+ double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
+ double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
+ double MetaRowBytes[][2][DC__NUM_DPP__MAX],
+ bool DynamicMetadataEnable[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double DCFCLKPerState[],
+ /* Output */
+ double DCFCLKState[][2])
+{
+ unsigned int i, j, k;
+ unsigned int dummy1;
+ double dummy2, dummy3;
+ double NormalEfficiency;
+ double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
+
+ NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
+ for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
+ double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
+ double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
+ double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
+ double MinimumTWait = 0.0;
+ double DPTEBandwidth;
+ double DCFCLKRequiredForAverageBandwidth;
+ unsigned int ExtraLatencyBytes;
+ double ExtraLatencyCycles;
+ double DCFCLKRequiredForPeakBandwidth;
+ unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
+ double MinimumTvmPlus2Tr0;
+
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+ + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
+ / (15.75 * HTotal[k] / PixelClock[k]);
+ }
+
+ for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
+ NoOfDPPState[k] = NoOfDPP[i][j][k];
+
+ DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
+ DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
+
+ ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
+ TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
+ NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+ ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
+ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double DCFCLKCyclesRequiredInPrefetch;
+ double PrefetchTime;
+
+ PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
+ * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
+ + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
+ * BytePerPixelC[k]) / NormalEfficiency
+ / ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
+ + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
+ / NormalEfficiency / ReturnBusWidth
+ * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
+ / ReturnBusWidth
+ + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
+ + PixelDCFCLKCyclesRequiredInPrefetch[k];
+ PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
+ * HTotal[k] / PixelClock[k];
+ DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
+ DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
+ UrgLatency[i] * GPUVMMaxPageTableLevels *
+ (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+
+ MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
+ UseMALLForPStateChange[k],
+ SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ DRRDisplay[k],
+ DRAMClockChangeLatencyFinal,
+ FCLKChangeLatency,
+ UrgLatency[i],
+ SREnterPlusExitTime);
+
+ PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
+ MinimumTWait - UrgLatency[i] *
+ ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
+ GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
+ HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
+ DynamicMetadataVMExtraLatency[k];
+
+ if (PrefetchTime > 0) {
+ double ExpectedVRatioPrefetch;
+
+ ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
+ PixelDCFCLKCyclesRequiredInPrefetch[k] /
+ DCFCLKCyclesRequiredInPrefetch);
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
+ PixelDCFCLKCyclesRequiredInPrefetch[k] /
+ PrefetchPixelLinesTime[k] *
+ dml_max(1.0, ExpectedVRatioPrefetch) *
+ dml_max(1.0, ExpectedVRatioPrefetch / 4);
+ if (HostVMEnable == true || ImmediateFlipRequirement == true) {
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] =
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] +
+ NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
+ NormalEfficiency / ReturnBusWidth;
+ }
+ } else {
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
+ }
+ if (DynamicMetadataEnable[k] == true) {
+ double TSetupPipe;
+ double TdmbfPipe;
+ double TdmsksPipe;
+ double TdmecPipe;
+ double AllowedTimeForUrgentExtraLatency;
+
+ dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ MaxInterDCNTileRepeaters,
+ RequiredDPPCLKPerSurface[i][j][k],
+ RequiredDISPCLK[i][j],
+ ProjectedDCFClkDeepSleep[i][j],
+ PixelClock[k],
+ HTotal[k],
+ VTotal[k] - VActive[k],
+ DynamicMetadataTransmittedBytes[k],
+ DynamicMetadataLinesBeforeActiveRequired[k],
+ Interlace[k],
+ ProgressiveToInterlaceUnitInOPP,
+
+ /* output */
+ &TSetupPipe,
+ &TdmbfPipe,
+ &TdmecPipe,
+ &TdmsksPipe,
+ &dummy1,
+ &dummy2,
+ &dummy3);
+ AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
+ PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
+ TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
+ if (AllowedTimeForUrgentExtraLatency > 0)
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] =
+ dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
+ ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
+ else
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
+ }
+ }
+ DCFCLKRequiredForPeakBandwidth = 0;
+ for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
+ DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
+ DCFCLKRequiredForPeakBandwidthPerSurface[k];
+ }
+ MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
+ (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
+ (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double MaximumTvmPlus2Tr0PlusTsw;
+
+ MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
+ PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+ if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
+ DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
+ } else {
+ DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
+ 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
+ MinimumTvmPlus2Tr0 -
+ PrefetchPixelLinesTime[k] / 4),
+ (2 * ExtraLatencyCycles +
+ PixelDCFCLKCyclesRequiredInPrefetch[k]) /
+ (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
+ }
+ }
+ DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
+ dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
+ }
+ }
+}
+
+unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels)
+{
+ unsigned int k;
+ double ret;
+ unsigned int HostVMDynamicLevels;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ } else {
+ HostVMDynamicLevels = 0;
+ }
+
+ ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
+ TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
+
+ if (GPUVMEnable == true) {
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
+ (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ }
+ }
+ return ret;
+}
+
+void dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ unsigned int MaxInterDCNTileRepeaters,
+ double Dppclk,
+ double Dispclk,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ unsigned int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+
+ /* output */
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ double TotalRepeaterDelayTime;
+
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
+ *VUpdateWidthPix =
+ dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
+ *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
+ TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
+ *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
+ *Tdmec = HTotal / PixelClock;
+
+ if (DynamicMetadataLinesBeforeActiveRequired == 0)
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ else
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
+ *Tdmsks = *Tdmsks / 2;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
+ dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
+ dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
+
+ dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
+ __func__, DynamicMetadataLinesBeforeActiveRequired);
+ dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
+ dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
+ dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
+#endif
+}
+
+double dml32_CalculateTWait(
+ unsigned int PrefetchMode,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ bool DRRDisplay,
+ double DRAMClockChangeLatency,
+ double FCLKChangeLatency,
+ double UrgentLatency,
+ double SREnterPlusExitTime)
+{
+ double TWait = 0.0;
+
+ if (PrefetchMode == 0 &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
+ !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
+ TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
+ } else {
+ TWait = UrgentLatency;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
+ dml_print("DML::%s: TWait = %f\n", __func__, TWait);
+#endif
+ return TWait;
+} // CalculateTWait
+
+// Function: get_return_bw_mbps
+// Megabyte per second
+double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const bool HostVMEnable,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed)
+{
+ double ReturnBW = 0.;
+ double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
+ double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
+ double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
+ double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
+ double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
+
+ if (HostVMEnable != true)
+ ReturnBW = PixelDataOnlyReturnBW;
+ else
+ ReturnBW = PixelMixedWithVMDataReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
+ dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
+ dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
+ dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
+ dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
+ dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
+ dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
+#endif
+ return ReturnBW;
+}
+
+// Function: get_return_bw_mbps_vm_only
+// Megabyte per second
+double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed)
+{
+ double VMDataOnlyReturnBW = dml_min3(
+ soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
+ * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
+ * (VoltageLevel < 2 ?
+ soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+#endif
+ return VMDataOnlyReturnBW;
+}
+
+double dml32_CalculateExtraLatency(
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ double DCFCLK,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels)
+{
+ double ExtraLatencyBytes;
+ double ExtraLatency;
+
+ ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ TotalNumberOfActiveDPP,
+ PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP,
+ MetaChunkSize,
+ GPUVMEnable,
+ HostVMEnable,
+ NumberOfActiveSurfaces,
+ NumberOfDPP,
+ dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+
+ ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
+ dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
+#endif
+
+ return ExtraLatency;
+} // CalculateExtraLatency
+
+bool dml32_CalculatePrefetchSchedule(
+ struct vba_vars_st *v,
+ unsigned int k,
+ double HostVMInefficiencyFactor,
+ DmlPipe *myPipe,
+ unsigned int DSCDelay,
+ unsigned int DPP_RECOUT_WIDTH,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ unsigned int VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ unsigned int VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ double TPreReq,
+ /* Output */
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
+ bool MyError = false;
+ unsigned int DPPCycles, DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+ double prefetch_bw_oto;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingMetaPTE = 0;
+ double TimeForFetchingRowInVBlank = 0;
+ double LinesToRequestPrefetchPixelData = 0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips;
+ double Tr0_trips;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double Lsw_oto;
+ double Tpre_rounded;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double prefetch_sw_bytes;
+ double bytes_pp;
+ double dep_bytes;
+ unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
+ double min_Lsw;
+ double Tsw_est1 = 0;
+ double Tsw_est3 = 0;
+ double TPreMargin = 0;
+
+ if (v->GPUVMEnable == true && v->HostVMEnable == true)
+ HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
+ else
+ HostVMDynamicLevelsTrips = 0;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
+ dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
+ dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
+ __func__, v->HostVMEnable, HostVMInefficiencyFactor);
+#endif
+ dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ v->MaxInterDCNTileRepeaters,
+ myPipe->Dppclk,
+ myPipe->Dispclk,
+ myPipe->DCFClkDeepSleep,
+ myPipe->PixelClock,
+ myPipe->HTotal,
+ myPipe->VBlank,
+ v->DynamicMetadataTransmittedBytes[k],
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ myPipe->InterlaceEnable,
+ myPipe->ProgressiveToInterlaceUnitInOPP,
+ TSetup,
+
+ /* output */
+ &Tdmbf,
+ &Tdmec,
+ &Tdmsks,
+ VUpdateOffsetPix,
+ VUpdateWidthPix,
+ VReadyOffsetPix);
+
+ LineTime = myPipe->HTotal / myPipe->PixelClock;
+ trip_to_mem = UrgentLatency;
+ Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
+
+ if (v->DynamicMetadataVMEnabled == true)
+ *Tdmdl = TWait + Tvm_trips + trip_to_mem;
+ else
+ *Tdmdl = TWait + UrgentExtraLatency;
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (v->DynamicMetadataEnable[k] == false)
+ *Tdmdl = 0.0;
+#endif
+
+ if (v->DynamicMetadataEnable[k] == true) {
+ if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
+ *NotEnoughTimeForDynamicMetadata = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
+ __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
+ __func__, Tdmsks);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
+ __func__, *Tdmdl);
+#endif
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
+ v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
+
+ if (myPipe->ScalerEnabled)
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
+ else
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
+
+ DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
+
+ DISPCLKCycles = v->DISPCLKDelaySubtotal;
+
+ if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
+ return true;
+
+ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
+ myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
+
+ *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
+ + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
+ + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
+ myPipe->HActive / 2 : 0)
+ + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
+ dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
+ dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
+ dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
+ dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
+ dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
+ dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
+ dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
+ dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
+#endif
+
+ if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
+ *DSTYAfterScaler = 1;
+ else
+ *DSTYAfterScaler = 0;
+
+ DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
+ dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
+#endif
+
+ MyError = false;
+
+ Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
+
+ if (v->GPUVMEnable == true) {
+ Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
+ if (v->GPUVMMaxPageTableLevels >= 3) {
+ *Tno_bw = UrgentExtraLatency + trip_to_mem *
+ (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
+ } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
+ Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
+ 4.0 * LineTime; // VBA_ERROR
+ *Tno_bw = UrgentExtraLatency;
+ } else {
+ *Tno_bw = 0;
+ }
+ } else if (myPipe->DCCEnable == true) {
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
+ *Tno_bw = 0;
+ } else {
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = LineTime / 2.0;
+ *Tno_bw = 0;
+ }
+ Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
+ Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
+
+ if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
+ || myPipe->SourcePixelFormat == dm_420_12) {
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+ } else {
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+ }
+
+ prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+ + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
+ prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
+ prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
+
+ min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
+ min_Lsw = dml_max(min_Lsw, 1.0);
+ Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
+
+ if (v->GPUVMEnable == true) {
+ Tvm_oto = dml_max3(
+ Tvm_trips,
+ *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
+ LineTime / 4.0);
+ } else
+ Tvm_oto = LineTime / 4.0;
+
+ if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_oto = dml_max4(
+ Tr0_trips,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
+ (LineTime - Tvm_oto)/2.0,
+ LineTime / 4.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
+ dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
+#endif
+ } else
+ Tr0_oto = (LineTime - Tvm_oto) / 2.0;
+
+ Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
+ Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
+ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
+
+ dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
+ (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
+ dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
+ dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
+ dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
+ dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
+ dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
+ dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
+ dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
+ dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
+ dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
+ dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
+ dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
+#endif
+
+ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ Tpre_rounded = dst_y_prefetch_equ * LineTime;
+
+ TPreMargin = Tpre_rounded - TPreReq;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
+ dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
+ dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
+ dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
+ __func__, VStartup * LineTime);
+ dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
+ dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
+ dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
+ dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
+ __func__, *DSTYAfterScaler);
+#endif
+ dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
+ MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
+
+ if (prefetch_sw_bytes < dep_bytes)
+ prefetch_sw_bytes = 2 * dep_bytes;
+
+ *PrefetchBandwidth = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ if (dst_y_prefetch_equ > 1 && TPreMargin > 0.0) {
+ double PrefetchBandwidth1;
+ double PrefetchBandwidth2;
+ double PrefetchBandwidth3;
+ double PrefetchBandwidth4;
+
+ if (Tpre_rounded - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
+ Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
+ } else
+ PrefetchBandwidth1 = 0;
+
+ if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
+ && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
+ }
+
+ if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
+ (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
+ else
+ PrefetchBandwidth2 = 0;
+
+ if (Tpre_rounded - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
+ Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
+ } else
+ PrefetchBandwidth3 = 0;
+
+
+ if (VStartup == MaxVStartup &&
+ (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
+ LineTime - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
+ }
+
+ if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
+ PrefetchBandwidth4 = prefetch_sw_bytes /
+ (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
+ } else {
+ PrefetchBandwidth4 = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
+ dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
+ dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
+ dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
+ dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
+ dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
+ dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
+ dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
+#endif
+ {
+ bool Case1OK;
+ bool Case2OK;
+ bool Case3OK;
+
+ if (PrefetchBandwidth1 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
+ >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / PrefetchBandwidth1 >= Tr0_trips_rounded) {
+ Case1OK = true;
+ } else {
+ Case1OK = false;
+ }
+ } else {
+ Case1OK = false;
+ }
+
+ if (PrefetchBandwidth2 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
+ >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / PrefetchBandwidth2 < Tr0_trips_rounded) {
+ Case2OK = true;
+ } else {
+ Case2OK = false;
+ }
+ } else {
+ Case2OK = false;
+ }
+
+ if (PrefetchBandwidth3 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
+ Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
+ Tr0_trips_rounded) {
+ Case3OK = true;
+ } else {
+ Case3OK = false;
+ }
+ } else {
+ Case3OK = false;
+ }
+
+ if (Case1OK)
+ prefetch_bw_equ = PrefetchBandwidth1;
+ else if (Case2OK)
+ prefetch_bw_equ = PrefetchBandwidth2;
+ else if (Case3OK)
+ prefetch_bw_equ = PrefetchBandwidth3;
+ else
+ prefetch_bw_equ = PrefetchBandwidth4;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
+ dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
+ dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
+#endif
+
+ if (prefetch_bw_equ > 0) {
+ if (v->GPUVMEnable == true) {
+ Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
+ HostVMInefficiencyFactor / prefetch_bw_equ,
+ Tvm_trips, LineTime / 4);
+ } else {
+ Tvm_equ = LineTime / 4;
+ }
+
+ if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
+ (LineTime - Tvm_equ) / 2, LineTime / 4);
+ } else {
+ Tr0_equ = (LineTime - Tvm_equ) / 2;
+ }
+ } else {
+ Tvm_equ = 0;
+ Tr0_equ = 0;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
+#endif
+ }
+ }
+
+ if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
+ if (dst_y_prefetch_oto * LineTime < TPreReq) {
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ } else {
+ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
+ }
+ TimeForFetchingMetaPTE = Tvm_oto;
+ TimeForFetchingRowInVBlank = Tr0_oto;
+ *PrefetchBandwidth = prefetch_bw_oto;
+ } else {
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ TimeForFetchingMetaPTE = Tvm_equ;
+ TimeForFetchingRowInVBlank = Tr0_equ;
+ *PrefetchBandwidth = prefetch_bw_equ;
+ }
+
+ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
+
+ *DestinationLinesToRequestRowInVBlank =
+ dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
+
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
+ *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
+#endif
+
+ if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
+ *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
+ dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
+#endif
+ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+ *VRatioPrefetchY =
+ dml_max((double) PrefetchSourceLinesY /
+ LinesToRequestPrefetchPixelData,
+ (double) MaxNumSwathY * SwathHeightY /
+ (LinesToRequestPrefetchPixelData -
+ (VInitPreFillY - 3.0) / 2.0));
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+ } else {
+ MyError = true;
+ *VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
+#endif
+ }
+
+ *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
+ dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
+#endif
+ if ((SwathHeightC > 4)) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+ *VRatioPrefetchC =
+ dml_max(*VRatioPrefetchC,
+ (double) MaxNumSwathC * SwathHeightC /
+ (LinesToRequestPrefetchPixelData -
+ (VInitPreFillC - 3.0) / 2.0));
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+ } else {
+ MyError = true;
+ *VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
+#endif
+ }
+
+ *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
+ / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
+ / LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
+ __func__, *RequiredPrefetchPixDataBWLuma);
+#endif
+ *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
+ LinesToRequestPrefetchPixelData
+ * myPipe->BytePerPixelC
+ * swath_width_chroma_ub / LineTime;
+ } else {
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
+ __func__, LinesToRequestPrefetchPixelData);
+#endif
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
+ (double)LinesToRequestPrefetchPixelData * LineTime +
+ 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
+ dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
+ (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
+ dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
+ dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
+ TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
+ ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
+ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
+ PixelPTEBytesPerRow);
+#endif
+ } else {
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
+ __func__, dst_y_prefetch_equ);
+#endif
+ }
+
+ {
+ double prefetch_vm_bw;
+ double prefetch_row_bw;
+
+ if (PDEAndMetaPTEBytesFrame == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*DestinationLinesToRequestVMInVBlank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+ prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
+ (*DestinationLinesToRequestVMInVBlank * LineTime);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+#endif
+ }
+
+ if (MetaRowByte + PixelPTEBytesPerRow == 0) {
+ prefetch_row_bw = 0;
+ } else if (*DestinationLinesToRequestRowInVBlank > 0) {
+ prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInVBlank * LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+#endif
+ }
+
+ *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (MyError) {
+ *PrefetchBandwidth = 0;
+ TimeForFetchingMetaPTE = 0;
+ TimeForFetchingRowInVBlank = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *DestinationLinesForPrefetch = 0;
+ LinesToRequestPrefetchPixelData = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ return MyError;
+} // CalculatePrefetchSchedule
+
+void dml32_CalculateFlipSchedule(
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool GPUVMEnable,
+ double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow,
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw,
+ bool DCCEnable,
+ unsigned int dpte_row_height,
+ unsigned int meta_row_height,
+ unsigned int dpte_row_height_chroma,
+ unsigned int meta_row_height_chroma,
+ bool use_one_row_for_frame_flip,
+
+ /* Output */
+ double *DestinationLinesToRequestVMInImmediateFlip,
+ double *DestinationLinesToRequestRowInImmediateFlip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe)
+{
+ double min_row_time = 0.0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double TimeForFetchingMetaPTEImmediateFlip;
+ double TimeForFetchingRowInVBlankImmediateFlip;
+ double ImmediateFlipBW;
+
+ if (GPUVMEnable == true && HostVMEnable == true)
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ else
+ HostVMDynamicLevelsTrips = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
+ dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+#endif
+
+ if (TotImmediateFlipBytes > 0) {
+ if (use_one_row_for_frame_flip) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
+ BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ } else {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
+ BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ }
+ if (GPUVMEnable == true) {
+ TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
+ HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency *
+ (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ LineTime / 4.0);
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ }
+ if ((GPUVMEnable == true || DCCEnable == true)) {
+ TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
+ UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
+ } else {
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ }
+
+ *DestinationLinesToRequestVMInImmediateFlip =
+ dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
+ *DestinationLinesToRequestRowInImmediateFlip =
+ dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
+
+ if (GPUVMEnable == true) {
+ *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
+ (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
+ } else if ((GPUVMEnable == true || DCCEnable == true)) {
+ *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
+ } else {
+ *final_flip_bw = 0;
+ }
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ *DestinationLinesToRequestVMInImmediateFlip = 0;
+ *DestinationLinesToRequestRowInImmediateFlip = 0;
+ *final_flip_bw = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dml_min(dpte_row_height *
+ LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = dml_min(meta_row_height *
+ LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
+ } else {
+ min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
+ LineTime / VRatio, dpte_row_height_chroma * LineTime /
+ VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
+ }
+ } else {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dpte_row_height * LineTime / VRatio;
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = meta_row_height * LineTime / VRatio;
+ } else {
+ min_row_time =
+ dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
+ }
+ }
+
+ if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
+ || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
+ > min_row_time) {
+ *ImmediateFlipSupportedForPipe = false;
+ } else {
+ *ImmediateFlipSupportedForPipe = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
+ __func__, *DestinationLinesToRequestVMInImmediateFlip);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
+ __func__, *DestinationLinesToRequestRowInImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
+ __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
+#endif
+} // CalculateFlipSchedule
+
+void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct vba_vars_st *v,
+ unsigned int PrefetchMode,
+ double DCFCLK,
+ double ReturnBW,
+ SOCParametersList mmSOCParameters,
+ double SOCCLK,
+ double DCFClkDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ double DSTXAfterScaler[],
+ double DSTYAfterScaler[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+
+ /* Output */
+ enum clock_change_support *DRAMClockChangeSupport,
+ double MaxActiveDRAMClockChangeLatencySupported[],
+ unsigned int SubViewportLinesNeededInMALL[],
+ enum dm_fclock_change_support *FCLKChangeSupport,
+ double *MinActiveFCLKChangeLatencySupported,
+ bool *USRRetrainingSupport,
+ double ActiveDRAMClockChangeLatencyMargin[])
+{
+ unsigned int i, j, k;
+ unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
+ unsigned int DRAMClockChangeSupportNumber = 0;
+ unsigned int LastSurfaceWithoutMargin;
+ unsigned int DRAMClockChangeMethod = 0;
+ bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
+ double MinActiveFCLKChangeMargin = 0.;
+ double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
+ double ActiveClockChangeLatencyHidingY;
+ double ActiveClockChangeLatencyHidingC;
+ double ActiveClockChangeLatencyHiding;
+ double EffectiveDETBufferSizeY;
+ double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
+ double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
+ double TotalPixelBW = 0.0;
+ bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DC__NUM_DPP__MAX];
+ double LinesInDETC[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double WritebackFCLKChangeLatencyMargin;
+ double WritebackLatencyHiding;
+ bool SameTimingForFCLKChange;
+
+ unsigned int TotalActiveWriteback = 0;
+ unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
+ unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
+
+ v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
+ v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
+ + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
+ v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
+ v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
+ v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
+ + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
+ dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
+ dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
+ dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
+ dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
+ dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
+ __func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
+#endif
+
+
+ TotalActiveWriteback = 0;
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (v->WritebackEnable[k] == true)
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+
+ if (TotalActiveWriteback <= 1) {
+ v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
+ } else {
+ v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
+ + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+ if (v->USRRetrainingRequiredFinal)
+ v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+ if (TotalActiveWriteback <= 1) {
+ v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ + mmSOCParameters.WritebackLatency;
+ v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ + mmSOCParameters.WritebackLatency;
+ } else {
+ v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
+ }
+
+ if (v->USRRetrainingRequiredFinal)
+ v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+ if (v->USRRetrainingRequiredFinal)
+ v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
+ __func__, v->Watermark.WritebackDRAMClockChangeWatermark);
+ dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
+ dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
+ dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
+ dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
+#endif
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
+ SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
+ }
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+
+ LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
+ LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
+
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
+ dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal);
+ dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]);
+ dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]);
+ dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]);
+#endif
+
+ EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
+ EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
+ EffectiveDETBufferSizeY = DETBufferSizeY[k];
+
+ if (UnboundedRequestEnabled) {
+ EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+ + CompressedBufferSizeInkByte * 1024
+ * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
+ / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
+ }
+
+ LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
+
+ ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
+ - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
+
+ if (v->NumberOfActiveSurfaces > 1) {
+ ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
+ - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
+ / v->PixelClock[k] / v->VRatio[k];
+ }
+
+ if (BytePerPixelDETC[k] > 0) {
+ LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
+ / v->VRatioChroma[k];
+ ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
+ - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
+ / v->PixelClock[k];
+ if (v->NumberOfActiveSurfaces > 1) {
+ ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
+ - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
+ / v->PixelClock[k] / v->VRatioChroma[k];
+ }
+ ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
+ ActiveClockChangeLatencyHidingC);
+ } else {
+ ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
+ }
+
+ ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
+ - v->Watermark.DRAMClockChangeWatermark;
+ ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
+ - v->Watermark.FCLKChangeWatermark;
+ USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
+
+ if (v->WritebackEnable[k]) {
+ WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
+ / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
+ if (v->WritebackPixelFormat[k] == dm_444_64)
+ WritebackLatencyHiding = WritebackLatencyHiding / 2;
+
+ WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
+ - v->Watermark.WritebackDRAMClockChangeWatermark;
+
+ WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
+ - v->Watermark.WritebackFCLKChangeWatermark;
+
+ ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
+ WritebackFCLKChangeLatencyMargin);
+ ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
+ WritebackDRAMClockChangeLatencyMargin);
+ }
+ MaxActiveDRAMClockChangeLatencySupported[k] =
+ (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
+ 0 :
+ (ActiveDRAMClockChangeLatencyMargin[k]
+ + mmSOCParameters.DRAMClockChangeLatency);
+ }
+
+ for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
+ for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
+ if (i == j ||
+ (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
+ (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
+ (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
+ (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
+ v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
+ v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
+ (v->DRRDisplay[i] || v->DRRDisplay[j]))) {
+ SynchronizedSurfaces[i][j] = true;
+ } else {
+ SynchronizedSurfaces[i][j] = false;
+ }
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
+ ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
+ FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
+ MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
+ SurfaceWithMinActiveFCLKChangeMargin = k;
+ }
+ }
+
+ *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
+
+ SameTimingForFCLKChange = true;
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
+ if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (SameTimingForFCLKChange ||
+ ActiveFCLKChangeLatencyMargin[k] <
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
+ }
+ SameTimingForFCLKChange = false;
+ }
+ }
+
+ if (MinActiveFCLKChangeMargin > 0) {
+ *FCLKChangeSupport = dm_fclock_change_vactive;
+ } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
+ (PrefetchMode <= 1)) {
+ *FCLKChangeSupport = dm_fclock_change_vblank;
+ } else {
+ *FCLKChangeSupport = dm_fclock_change_unsupported;
+ }
+
+ *USRRetrainingSupport = true;
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (USRRetrainingLatencyMargin[k] < 0)) {
+ *USRRetrainingSupport = false;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
+ v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
+ v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
+ ActiveDRAMClockChangeLatencyMargin[k] < 0) {
+ if (PrefetchMode > 0) {
+ DRAMClockChangeSupportNumber = 2;
+ } else if (DRAMClockChangeSupportNumber == 0) {
+ DRAMClockChangeSupportNumber = 1;
+ LastSurfaceWithoutMargin = k;
+ } else if (DRAMClockChangeSupportNumber == 1 &&
+ !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
+ DRAMClockChangeSupportNumber = 2;
+ }
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
+ DRAMClockChangeMethod = 1;
+ else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
+ DRAMClockChangeMethod = 2;
+ }
+
+ if (DRAMClockChangeMethod == 0) {
+ if (DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
+ else if (DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ } else if (DRAMClockChangeMethod == 1) {
+ if (DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
+ else if (DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ } else {
+ if (DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
+ else if (DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ }
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ unsigned int dst_y_pstate;
+ unsigned int src_y_pstate_l;
+ unsigned int src_y_pstate_c;
+ unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
+
+ dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
+ src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
+ src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
+ sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
+dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
+dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
+dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
+dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
+dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
+dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]);
+dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
+#endif
+ SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
+
+ if (BytePerPixelDETC[k] > 0) {
+ src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
+ src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
+ sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
+ SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
+dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
+dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]);
+dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
+#endif
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
+ dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
+ dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
+ __func__, *MinActiveFCLKChangeLatencySupported);
+ dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
+#endif
+} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
+
+double dml32_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackSourceWidth,
+ unsigned int WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize,
+ double DISPCLKDPPCLKVCOSpeed)
+{
+ double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
+ WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
+ return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
+}
+
+void dml32_CalculateMinAndMaxPrefetchMode(
+ enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
+ unsigned int *MinPrefetchMode,
+ unsigned int *MaxPrefetchMode)
+{
+ if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
+ *MinPrefetchMode = 3;
+ *MaxPrefetchMode = 3;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
+ *MinPrefetchMode = 2;
+ *MaxPrefetchMode = 2;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
+ *MinPrefetchMode = 1;
+ *MaxPrefetchMode = 1;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
+ *MinPrefetchMode = 0;
+ *MaxPrefetchMode = 0;
+ } else {
+ *MinPrefetchMode = 0;
+ *MaxPrefetchMode = 3;
+ }
+} // CalculateMinAndMaxPrefetchMode
+
+void dml32_CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActiveSurfaces,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+
+ /* Output */
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[])
+{
+ double req_per_swath_ub;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
+ dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
+ dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
+ dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
+ dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
+#endif
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] =
+ swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] =
+ swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] =
+ swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
+ swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
+ swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
+ DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
+ swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!IsVertical(SourceRotation[k]))
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
+ else
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
+#endif
+
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (!IsVertical(SourceRotation[k]))
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
+ else
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
+#endif
+ DisplayPipeRequestDeliveryTimeChroma[k] =
+ DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ unsigned int cursor_req_per_width;
+
+ cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
+ 256.0 / 8.0, 1.0);
+ if (NumberOfCursors[k] > 0) {
+ if (VRatio[k] <= 1) {
+ CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
+ HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
+ PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ if (VRatioPrefetchY[k] <= 1) {
+ CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
+ HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
+ PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ } else {
+ CursorRequestDeliveryTime[k] = 0;
+ CursorRequestDeliveryTimePrefetch[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
+ __func__, k, NumberOfCursors[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
+ __func__, k, CursorRequestDeliveryTime[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
+ __func__, k, CursorRequestDeliveryTimePrefetch[k]);
+#endif
+ }
+} // CalculatePixelDeliveryTimes
+
+void dml32_CalculateMetaAndPTETimes(
+ bool use_one_row_for_frame[],
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int MetaChunkSize,
+ unsigned int MinMetaChunkSizeBytes,
+ unsigned int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int dpte_row_height[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PTERequestSizeC[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+
+ /* Output */
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[])
+{
+ unsigned int meta_chunk_width;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_chunks_per_row_ub;
+ unsigned int meta_chunk_width_chroma;
+ unsigned int min_meta_chunk_width_chroma;
+ unsigned int meta_chunk_per_row_int_chroma;
+ unsigned int meta_row_remainder_chroma;
+ unsigned int meta_chunk_threshold_chroma;
+ unsigned int meta_chunks_per_row_ub_chroma;
+ unsigned int dpte_group_width_luma;
+ unsigned int dpte_groups_per_row_luma_ub;
+ unsigned int dpte_group_width_chroma;
+ unsigned int dpte_groups_per_row_chroma_ub;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0)
+ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ else
+ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
+ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0)
+ DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ else
+ DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (DCCEnable[k] == true) {
+ meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
+ min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
+ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = meta_row_width[k] % meta_chunk_width;
+ if (!IsVertical(SourceRotation[k]))
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
+ else
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
+
+ if (meta_row_remainder <= meta_chunk_threshold)
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ else
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+
+ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ if (BytePerPixelC[k] == 0) {
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
+ meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
+ meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
+ meta_chunk_width_chroma;
+ meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (!IsVertical(SourceRotation[k])) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
+ meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
+ meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ else
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+
+ TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ TimePerMetaChunkNominal[k] = 0;
+ TimePerMetaChunkVBlank[k] = 0;
+ TimePerMetaChunkFlip[k] = 0;
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (GPUVMEnable == true) {
+ if (!IsVertical(SourceRotation[k])) {
+ dpte_group_width_luma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
+ } else {
+ dpte_group_width_luma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
+ (double) dpte_group_width_luma / 2.0, 1.0);
+ } else {
+ dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
+ (double) dpte_group_width_luma, 1.0);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
+ __func__, k, use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
+ __func__, k, dpte_group_bytes[k]);
+ dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
+ __func__, k, PTERequestSizeY[k]);
+ dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
+ __func__, k, PixelPTEReqWidthY[k]);
+ dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
+ __func__, k, PixelPTEReqHeightY[k]);
+ dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
+ __func__, k, dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
+ __func__, k, dpte_group_width_luma);
+ dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
+ __func__, k, dpte_groups_per_row_luma_ub);
+#endif
+
+ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ if (BytePerPixelC[k] == 0) {
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (!IsVertical(SourceRotation[k])) {
+ dpte_group_width_chroma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
+ } else {
+ dpte_group_width_chroma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
+ (double) dpte_group_width_chroma / 2.0, 1.0);
+ } else {
+ dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
+ (double) dpte_group_width_chroma, 1.0);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
+ __func__, k, dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
+ __func__, k, dpte_group_width_chroma);
+ dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
+ __func__, k, dpte_groups_per_row_chroma_ub);
+#endif
+ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ time_per_pte_group_nom_luma[k] = 0;
+ time_per_pte_group_vblank_luma[k] = 0;
+ time_per_pte_group_flip_luma[k] = 0;
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, k, DestinationLinesToRequestRowInVBlank[k]);
+ dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
+ __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
+ __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
+ __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
+ __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
+ __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
+ __func__, k, TimePerMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
+ __func__, k, TimePerMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
+ __func__, k, TimePerMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
+ __func__, k, TimePerChromaMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
+ __func__, k, TimePerChromaMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
+ __func__, k, TimePerChromaMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
+ __func__, k, time_per_pte_group_nom_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
+ __func__, k, time_per_pte_group_vblank_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
+ __func__, k, time_per_pte_group_flip_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
+ __func__, k, time_per_pte_group_nom_chroma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
+ __func__, k, time_per_pte_group_vblank_chroma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
+ __func__, k, time_per_pte_group_flip_chroma[k]);
+#endif
+ }
+} // CalculateMetaAndPTETimes
+
+void dml32_CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ unsigned int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+
+ /* Output */
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[])
+{
+ unsigned int k;
+ unsigned int num_group_per_lower_vm_stage;
+ unsigned int num_req_per_lower_vm_stage;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+#endif
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
+ dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
+ dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
+ __func__, k, dpde0_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
+ __func__, k, dpde0_bytes_per_frame_ub_c[k]);
+ dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
+ __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
+ __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
+#endif
+
+ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0) +
+ dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = 2 + dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = 1 + dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) + dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1);
+ }
+ }
+ }
+
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
+ dpde0_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
+ meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
+ 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
+ meta_pte_bytes_per_frame_ub_l[k] / 64 +
+ meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
+ 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ }
+ }
+
+ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
+ HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
+ HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+
+ if (GPUVMMaxPageTableLevels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
+#endif
+ }
+} // CalculateVMGroupAndRequestTimes
+
+void dml32_CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ unsigned int nomDETInKByte,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum dm_rotation_angle SourceRotation,
+ /* Output */
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma)
+{
+ typedef enum {
+ REQ_256Bytes,
+ REQ_128BytesNonContiguous,
+ REQ_128BytesContiguous,
+ REQ_NA
+ } RequestType;
+
+ RequestType RequestLuma;
+ RequestType RequestChroma;
+
+ unsigned int segment_order_horz_contiguous_luma;
+ unsigned int segment_order_horz_contiguous_chroma;
+ unsigned int segment_order_vert_contiguous_luma;
+ unsigned int segment_order_vert_contiguous_chroma;
+ unsigned int req128_horz_wc_l;
+ unsigned int req128_horz_wc_c;
+ unsigned int req128_vert_wc_l;
+ unsigned int req128_vert_wc_c;
+ unsigned int MAS_vp_horz_limit;
+ unsigned int MAS_vp_vert_limit;
+ unsigned int max_vp_horz_width;
+ unsigned int max_vp_vert_height;
+ unsigned int eff_surf_width_l;
+ unsigned int eff_surf_width_c;
+ unsigned int eff_surf_height_l;
+ unsigned int eff_surf_height_c;
+ unsigned int full_swath_bytes_horz_wc_l;
+ unsigned int full_swath_bytes_horz_wc_c;
+ unsigned int full_swath_bytes_vert_wc_l;
+ unsigned int full_swath_bytes_vert_wc_c;
+ unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
+
+ unsigned int yuv420;
+ unsigned int horz_div_l;
+ unsigned int horz_div_c;
+ unsigned int vert_div_l;
+ unsigned int vert_div_c;
+
+ unsigned int swath_buf_size;
+ double detile_buf_vp_horz_limit;
+ double detile_buf_vp_vert_limit;
+
+ yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
+ SourcePixelFormat == dm_420_12) ? 1 : 0);
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
+ BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
+ (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
+ BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
+ BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
+ (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
+ (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
+ MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
+ max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
+ max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
+ full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
+ full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
+ full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
+ full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
+ full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
+ full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
+ full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2) {
+ segment_order_horz_contiguous_luma = 0;
+ segment_order_vert_contiguous_luma = 1;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ segment_order_vert_contiguous_luma = 0;
+ }
+
+ if (BytePerPixelC == 2) {
+ segment_order_horz_contiguous_chroma = 0;
+ segment_order_vert_contiguous_chroma = 1;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ segment_order_vert_contiguous_chroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
+ dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
+ dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
+ dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
+ dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
+ dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
+ dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
+ __func__, segment_order_horz_contiguous_chroma);
+#endif
+
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
+ (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
+ (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+
+ } else if (!IsVertical(SourceRotation)) {
+ if (req128_horz_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if (segment_order_horz_contiguous_luma == 0)
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_horz_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if (segment_order_horz_contiguous_chroma == 0)
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+
+ } else {
+ if (req128_vert_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if (segment_order_vert_contiguous_luma == 0)
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_vert_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if (segment_order_vert_contiguous_chroma == 0)
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+ }
+
+ if (RequestLuma == REQ_256Bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (RequestLuma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (RequestChroma == REQ_256Bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (RequestChroma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
+ dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
+ dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
+ dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
+ dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
+ dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
+#endif
+
+} // CalculateDCCConfiguration
+
+void dml32_CalculateStutterEfficiency(
+ unsigned int CompressedBufferSizeInkByte,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool UnboundedRequestEnabled,
+ unsigned int MetaFIFOSizeInKEntries,
+ unsigned int ZeroSizeBufferEntries,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ unsigned int CompbufReservedSpace64B,
+ unsigned int CompbufReservedSpaceZs,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizeTimingsFinal,
+ unsigned int BlendingAndTiming[],
+ double StutterEnterPlusExitWatermark,
+ double Z8StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ unsigned int DPPPerSurface[],
+ unsigned int DETBufferSizeY[],
+ unsigned int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ unsigned int HTotal[],
+ unsigned int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesC[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int DCCYMaxUncompressedBlock[],
+ unsigned int DCCCMaxUncompressedBlock[],
+ unsigned int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthSurfaceLuma[],
+ double ReadBandwidthSurfaceChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+
+ /* Output */
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ unsigned int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ unsigned int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod,
+ bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
+{
+
+ bool FoundCriticalSurface = false;
+ unsigned int SwathSizeCriticalSurface = 0;
+ unsigned int LastChunkOfSwathSize;
+ unsigned int MissingPartOfLastSwathOfDETSize;
+ double LastZ8StutterPeriod = 0.0;
+ double LastStutterPeriod = 0.0;
+ unsigned int TotalNumberOfActiveOTG = 0;
+ double doublePixelClock;
+ unsigned int doubleHTotal;
+ unsigned int doubleVTotal;
+ bool SameTiming = true;
+ double DETBufferingTimeY;
+ double SwathWidthYCriticalSurface = 0.0;
+ double SwathHeightYCriticalSurface = 0.0;
+ double VActiveTimeCriticalSurface = 0.0;
+ double FrameTimeCriticalSurface = 0.0;
+ unsigned int BytePerPixelYCriticalSurface = 0;
+ double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
+ unsigned int DETBufferSizeYCriticalSurface = 0;
+ double MinTTUVBlankCriticalSurface = 0.0;
+ unsigned int BlockWidth256BytesYCriticalSurface = 0;
+ bool doublePlaneCriticalSurface = 0;
+ bool doublePipeCriticalSurface = 0;
+ double TotalCompressedReadBandwidth;
+ double TotalRowReadBandwidth;
+ double AverageDCCCompressionRate;
+ double EffectiveCompressedBufferSize;
+ double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
+ double StutterBurstTime;
+ unsigned int TotalActiveWriteback;
+ double LinesInDETY;
+ double LinesInDETYRoundedDownToSwath;
+ double MaximumEffectiveCompressionLuma;
+ double MaximumEffectiveCompressionChroma;
+ double TotalZeroSizeRequestReadBandwidth;
+ double TotalZeroSizeCompressedReadBandwidth;
+ double AverageDCCZeroSizeFraction;
+ double AverageZeroSizeCompressionRate;
+ unsigned int k;
+
+ TotalZeroSizeRequestReadBandwidth = 0;
+ TotalZeroSizeCompressedReadBandwidth = 0;
+ TotalRowReadBandwidth = 0;
+ TotalCompressedReadBandwidth = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ if (DCCEnable[k] == true) {
+ if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
+ || (!IsVertical(SourceRotation[k])
+ && BlockHeight256BytesY[k] > SwathHeightY[k])
+ || DCCYMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionLuma = 2;
+ } else {
+ MaximumEffectiveCompressionLuma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k]
+ / dml_min(NetDCCRateLuma[k],
+ MaximumEffectiveCompressionLuma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
+ __func__, k, NetDCCRateLuma[k]);
+ dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
+ __func__, k, MaximumEffectiveCompressionLuma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
+ / MaximumEffectiveCompressionLuma;
+
+ if (ReadBandwidthSurfaceChroma[k] > 0) {
+ if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
+ || (!IsVertical(SourceRotation[k])
+ && BlockHeight256BytesC[k] > SwathHeightC[k])
+ || DCCCMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionChroma = 2;
+ } else {
+ MaximumEffectiveCompressionChroma = 4;
+ }
+ TotalCompressedReadBandwidth =
+ TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ / dml_min(NetDCCRateChroma[k],
+ MaximumEffectiveCompressionChroma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
+ __func__, k, ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
+ __func__, k, NetDCCRateChroma[k]);
+ dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
+ __func__, k, MaximumEffectiveCompressionChroma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ * DCCFractionOfZeroSizeRequestsChroma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ * DCCFractionOfZeroSizeRequestsChroma[k]
+ / MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
+ }
+ TotalRowReadBandwidth = TotalRowReadBandwidth
+ + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
+ }
+ }
+
+ AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
+ AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
+ dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
+ __func__, TotalZeroSizeCompressedReadBandwidth);
+ dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
+ dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
+ dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
+ dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
+#endif
+ if (AverageDCCZeroSizeFraction == 1) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
+ / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
+ * AverageZeroSizeCompressionRate
+ + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
+ * AverageZeroSizeCompressionRate;
+ } else if (AverageDCCZeroSizeFraction > 0) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
+ / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = dml_min(
+ (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (double) MetaFIFOSizeInKEntries * 1024 * 64
+ / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
+ + 1 / AverageDCCCompressionRate))
+ + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
+ * AverageDCCCompressionRate,
+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
+ / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__,
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
+ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
+ AverageDCCCompressionRate));
+ dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
+ CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
+ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+#endif
+ } else {
+ EffectiveCompressedBufferSize = dml_min(
+ (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
+ + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
+ * AverageDCCCompressionRate;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__,
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__,
+ MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
+ dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+#endif
+
+ *StutterPeriod = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ LinesInDETY = ((double) DETBufferSizeY[k]
+ + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
+ * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
+ / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
+ DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
+ / VRatio[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
+ dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
+ __func__, k, LinesInDETYRoundedDownToSwath);
+ dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
+ dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
+ dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+#endif
+
+ if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
+ bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
+
+ FoundCriticalSurface = true;
+ *StutterPeriod = DETBufferingTimeY;
+ FrameTimeCriticalSurface = (
+ isInterlaceTiming ?
+ dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
+ * (double) HTotal[k] / PixelClock[k];
+ VActiveTimeCriticalSurface = (
+ isInterlaceTiming ?
+ dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
+ * (double) HTotal[k] / PixelClock[k];
+ BytePerPixelYCriticalSurface = BytePerPixelY[k];
+ SwathWidthYCriticalSurface = SwathWidthY[k];
+ SwathHeightYCriticalSurface = SwathHeightY[k];
+ BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
+ LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
+ - (LinesInDETY - LinesInDETYRoundedDownToSwath);
+ DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
+ MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
+ doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
+ doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
+ __func__, k, FoundCriticalSurface);
+ dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
+ __func__, k, *StutterPeriod);
+ dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
+ __func__, k, MinTTUVBlankCriticalSurface);
+ dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
+ __func__, k, FrameTimeCriticalSurface);
+ dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
+ __func__, k, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
+ __func__, k, BytePerPixelYCriticalSurface);
+ dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
+ __func__, k, SwathWidthYCriticalSurface);
+ dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
+ __func__, k, SwathHeightYCriticalSurface);
+ dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
+ __func__, k, BlockWidth256BytesYCriticalSurface);
+ dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
+ __func__, k, doublePlaneCriticalSurface);
+ dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
+ __func__, k, doublePipeCriticalSurface);
+ dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
+ __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
+#endif
+ }
+ }
+ }
+
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
+ EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
+ __func__, *StutterPeriod * TotalDataReadBandwidth);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+ dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
+ dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+#endif
+
+ StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
+ / ReturnBW
+ + (*StutterPeriod * TotalDataReadBandwidth
+ - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
+ + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
+ AverageDCCCompressionRate / ReturnBW);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
+ __func__, (*StutterPeriod * TotalDataReadBandwidth));
+ dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
+ dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+#endif
+ StutterBurstTime = dml_max(StutterBurstTime,
+ LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
+ * SwathWidthYCriticalSurface / ReturnBW);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Time to finish residue swath=%f\n",
+ __func__,
+ LinesToFinishSwathTransferStutterCriticalSurface *
+ BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
+#endif
+
+ TotalActiveWriteback = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (WritebackEnable[k])
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+
+ if (TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
+ dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
+ dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+#endif
+ *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
+ 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
+ *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
+ 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
+ *NumberOfStutterBurstsPerFrame = (
+ *StutterEfficiencyNotIncludingVBlank > 0 ?
+ dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
+ *Z8NumberOfStutterBurstsPerFrame = (
+ *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
+ dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
+ } else {
+ *StutterEfficiencyNotIncludingVBlank = 0.;
+ *Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *NumberOfStutterBurstsPerFrame = 0;
+ *Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *Z8StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ if (BlendingAndTiming[k] == k) {
+ if (TotalNumberOfActiveOTG == 0) {
+ doublePixelClock = PixelClock[k];
+ doubleHTotal = HTotal[k];
+ doubleVTotal = VTotal[k];
+ } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
+ || doubleVTotal != VTotal[k]) {
+ SameTiming = false;
+ }
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ }
+ }
+ }
+
+ if (*StutterEfficiencyNotIncludingVBlank > 0) {
+ LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+
+ if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
+ && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
+ *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
+ + StutterBurstTime * VActiveTimeCriticalSurface
+ / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *StutterEfficiency = 0;
+ }
+
+ if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ LastZ8StutterPeriod = VActiveTimeCriticalSurface
+ - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+ if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
+ MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
+ *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
+ * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *Z8StutterEfficiency = 0.;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+ dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
+ dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
+ * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
+ LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
+ MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
+ - DETBufferSizeYCriticalSurface;
+
+ *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
+ && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
+ && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
+ && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
+ dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
+ dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
+ dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+#endif
+} // CalculateStutterEfficiency
+
+void dml32_CalculateMaxDETAndMinCompressedBufferSize(
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int MaxNumDPP,
+ bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
+
+ /* Output */
+ unsigned int *MaxTotalDETInKByte,
+ unsigned int *nomDETInKByte,
+ unsigned int *MinCompressedBufferSizeInKByte)
+{
+ bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
+ unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
+
+ *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
+ (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
+ *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
+ *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
+ dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
+ dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
+#endif
+
+ if (det_buff_size_override_en) {
+ *nomDETInKByte = det_buff_size_override_val;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
+#endif
+ }
+} // CalculateMaxDETAndMinCompressedBufferSize
+
+bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[])
+{
+ unsigned int k;
+ bool NotEnoughUrgentLatencyHiding = false;
+ bool CalculateVActiveBandwithSupport_val = false;
+ double VActiveBandwith = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
+ }
+
+ CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
+dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
+dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
+#endif
+ return CalculateVActiveBandwithSupport_val;
+}
+
+void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *PrefetchBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *PrefetchBandwidthSupport)
+{
+ unsigned int k;
+ bool NotEnoughUrgentLatencyHiding = false;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ *PrefetchBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+ *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
+}
+
+double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double cursor_bw_pre[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[])
+{
+ unsigned int k;
+ double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ return CalculateBandwidthAvailableForImmediateFlip_val;
+}
+
+void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ enum immediate_flip_requirement ImmediateFlipRequirement[],
+ double final_flip_bw[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *TotalBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *ImmediateFlipBandwidthSupport)
+{
+ unsigned int k;
+ *TotalBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ } else {
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+ }
+ *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
+ *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
new file mode 100644
index 000000000000..3989c2a28fae
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -0,0 +1,1144 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML_DCN32_DISPLAY_MODE_VBA_UTIL_32_H__
+#define __DML_DCN32_DISPLAY_MODE_VBA_UTIL_32_H__
+
+#include "../display_mode_enums.h"
+#include "os_types.h"
+#include "../dc_features.h"
+#include "../display_mode_structs.h"
+#include "../display_mode_vba.h"
+
+unsigned int dml32_dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output);
+
+unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
+
+bool IsVertical(enum dm_rotation_angle Scan);
+
+void dml32_CalculateBytePerPixelAndBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+
+ /*Output*/
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC,
+ unsigned int *MacroTileHeightY,
+ unsigned int *MacroTileHeightC,
+ unsigned int *MacroTileWidthY,
+ unsigned int *MacroTileWidthC);
+
+void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
+ double HRatio,
+ double HRatioChroma,
+ double VRatio,
+ double VRatioChroma,
+ double MaxDCHUBToPSCLThroughput,
+ double MaxPSCLToLBThroughput,
+ double PixelClock,
+ enum source_format_class SourcePixelFormat,
+ unsigned int HTaps,
+ unsigned int HTapsChroma,
+ unsigned int VTaps,
+ unsigned int VTapsChroma,
+
+ /* output */
+ double *PSCL_THROUGHPUT,
+ double *PSCL_THROUGHPUT_CHROMA,
+ double *DPPCLKUsingSingleDPP);
+
+void dml32_CalculateSwathAndDETConfiguration(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ double ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int nomDETInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ unsigned int PixelChunkSizeKBytes,
+ unsigned int ROBSizeKBytes,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class Output[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BlendingAndTiming[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ unsigned int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool *UnboundedRequestEnabled,
+ unsigned int *CompressedBufferSizeInkByte,
+ unsigned int *CompBufReservedSpaceKBytes,
+ bool *CompBufReservedSpaceNeedAdjustment,
+ bool ViewportSizeSupportPerSurface[],
+ bool *ViewportSizeSupport);
+
+void dml32_CalculateSwathWidth(
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ enum source_format_class SourcePixelFormat[],
+ enum dm_rotation_angle SourceScan[],
+ bool ViewportStationary[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int BlendingAndTiming[],
+ unsigned int HActive[],
+ double HRatio[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ double SwathWidthdoubleDPPY[],
+ double SwathWidthdoubleDPPC[],
+ double SwathWidthY[], // per-pipe
+ double SwathWidthC[], // per-pipe
+ unsigned int MaximumSwathHeightY[],
+ unsigned int MaximumSwathHeightC[],
+ unsigned int swath_width_luma_ub[], // per-pipe
+ unsigned int swath_width_chroma_ub[]);
+
+bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ unsigned int TotalNumberOfActiveDPP,
+ bool NoChroma,
+ enum output_encoder_class Output,
+ enum dm_swizzle_mode SurfaceTiling,
+ bool CompBufReservedSpaceNeedAdjustment,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+
+void dml32_CalculateDETBufferSize(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ bool UnboundedRequestEnabled,
+ unsigned int nomDETInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum source_format_class SourcePixelFormat[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int RoundedUpMaxSwathSizeBytesY[],
+ unsigned int RoundedUpMaxSwathSizeBytesC[],
+ unsigned int DPPPerSurface[],
+ /* Output */
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int *CompressedBufferSizeInkByte);
+
+void dml32_CalculateODMMode(
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ unsigned int HActive,
+ enum output_format_class OutFormat,
+ enum output_encoder_class Output,
+ enum odm_combine_policy ODMUse,
+ double StateDispclk,
+ double MaxDispclk,
+ bool DSCEnable,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int MaxNumDPP,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ unsigned int NumberOfDSCSlices,
+
+ /* Output */
+ bool *TotalAvailablePipesSupport,
+ unsigned int *NumberOfDPP,
+ enum odm_combine_mode *ODMMode,
+ double *RequiredDISPCLKPerSurface);
+
+double dml32_CalculateRequiredDispclk(
+ enum odm_combine_mode ODMMode,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ double MaxDispclk);
+
+double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed);
+
+void dml32_CalculateOutputLink(
+ double PHYCLKPerState,
+ double PHYCLKD18PerState,
+ double PHYCLKD32PerState,
+ double Downspreading,
+ bool IsMainSurfaceUsingTheIndicatedTiming,
+ enum output_encoder_class Output,
+ enum output_format_class OutputFormat,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClockBackEnd,
+ double ForcedOutputLinkBPP,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int NumberOfDSCSlices,
+ double AudioSampleRate,
+ unsigned int AudioSampleLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ bool DSCEnable,
+ unsigned int OutputLinkDPLanes,
+ enum dm_output_link_dp_rate OutputLinkDPRate,
+
+ /* Output */
+ bool *RequiresDSC,
+ double *RequiresFEC,
+ double *OutBpp,
+ enum dm_output_type *OutputType,
+ enum dm_output_rate *OutputRate,
+ unsigned int *RequiredSlots);
+
+void dml32_CalculateDPPCLK(
+ unsigned int NumberOfActiveSurfaces,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKDPPCLKVCOSpeed,
+ double DPPCLKUsingSingleDPP[],
+ unsigned int DPPPerSurface[],
+
+ /* output */
+ double *GlobalDPPCLK,
+ double Dppclk[]);
+
+double dml32_TruncToValidBPP(
+ double LinkBitRate,
+ unsigned int Lanes,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum output_encoder_class Output,
+ enum output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int DSCSlices,
+ unsigned int AudioRate,
+ unsigned int AudioLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ /* Output */
+ unsigned int *RequiredSlots);
+
+double dml32_RequiredDTBCLK(
+ bool DSCEnable,
+ double PixelClock,
+ enum output_format_class OutputFormat,
+ double OutputBpp,
+ unsigned int DSCSlices,
+ unsigned int HTotal,
+ unsigned int HActive,
+ unsigned int AudioRate,
+ unsigned int AudioLayout);
+
+unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
+ enum odm_combine_mode ODMMode,
+ unsigned int DSCInputBitPerComponent,
+ double OutputBpp,
+ unsigned int HActive,
+ unsigned int HTotal,
+ unsigned int NumberOfDSCSlices,
+ enum output_format_class OutputFormat,
+ enum output_encoder_class Output,
+ double PixelClock,
+ double PixelClockBackEnd,
+ double dsc_delay_factor_wa);
+
+void dml32_CalculateSurfaceSizeInMall(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ bool DCCEnable[],
+ bool ViewportStationary[],
+ unsigned int ViewportXStartY[],
+ unsigned int ViewportYStartY[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int ViewportWidthY[],
+ unsigned int ViewportHeightY[],
+ unsigned int BytesPerPixelY[],
+ unsigned int ViewportWidthC[],
+ unsigned int ViewportHeightC[],
+ unsigned int BytesPerPixelC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int ReadBlockWidthY[],
+ unsigned int ReadBlockWidthC[],
+ unsigned int ReadBlockHeightY[],
+ unsigned int ReadBlockHeightC[],
+
+ /* Output */
+ unsigned int SurfaceSizeInMALL[],
+ bool *ExceededMALLSize);
+
+void dml32_CalculateVMRowAndSwath(
+ unsigned int NumberOfActiveSurfaces,
+ DmlPipe myPipe[],
+ unsigned int SurfaceSizeInMALL[],
+ unsigned int PTEBufferSizeInRequestsLuma,
+ unsigned int PTEBufferSizeInRequestsChroma,
+ unsigned int DCCMetaBufferSizeBytes,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int MALLAllocatedForDCN,
+ double SwathWidthY[],
+ double SwathWidthC[],
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes[],
+ unsigned int HostVMMinPageSize,
+
+ /* Output */
+ bool PTEBufferSizeNotExceeded[],
+ bool DCCMetaBufferSizeNotExceeded[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int dpte_row_height_luma[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
+ unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int PTERequestSizeC[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+ double PrefetchSourceLinesY[],
+ double PrefetchSourceLinesC[],
+ double VInitPreFillY[],
+ double VInitPreFillC[],
+ unsigned int MaxNumSwathY[],
+ unsigned int MaxNumSwathC[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double PixelPTEBytesPerRow[],
+ double PDEAndMetaPTEBytesFrame[],
+ double MetaRowByte[],
+ bool use_one_row_for_frame[],
+ bool use_one_row_for_frame_flip[],
+ bool UsesMALLForStaticScreen[],
+ bool PTE_BUFFER_MODE[],
+ unsigned int BIGK_FRAGMENT_SIZE[]);
+
+unsigned int dml32_CalculateVMAndRowBytes(
+ bool ViewportStationary,
+ bool DCCEnable,
+ unsigned int NumberOfDPPs,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum dm_rotation_angle SourceScan,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int MacroTileWidth,
+ unsigned int MacroTileHeight,
+
+ /* Output */
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ unsigned int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *dpte_row_height_linear,
+ unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
+ unsigned int *dpte_row_width_ub_one_row_per_frame,
+ unsigned int *dpte_row_height_one_row_per_frame,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ unsigned int *DPDE0BytesFrame,
+ unsigned int *MetaPTEBytesFrame);
+
+double dml32_CalculatePrefetchSourceLines(
+ double VRatio,
+ unsigned int VTaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ enum dm_rotation_angle SourceRotation,
+ bool ViewportStationary,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+
+ /* Output */
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath);
+
+void dml32_CalculateMALLUseForStaticScreen(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCNFinal,
+ enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ unsigned int SurfaceSizeInMALL[],
+ bool one_row_per_frame_fits_in_buffer[],
+
+ /* output */
+ bool UsesMALLForStaticScreen[]);
+
+void dml32_CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ /* Output */
+ double *meta_row_bw,
+ double *dpte_row_bw);
+
+double dml32_CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock);
+
+void dml32_CalculateUrgentBurstFactor(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ unsigned int DETBufferSizeY,
+ unsigned int DETBufferSizeC,
+ /* Output */
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding);
+
+void dml32_CalculateDCFCLKDeepSleep(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ /* Output */
+ double *DCFClkDeepSleep);
+
+double dml32_CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackDestinationWidth,
+ unsigned int WritebackDestinationHeight,
+ unsigned int WritebackSourceHeight,
+ unsigned int HTotal);
+
+void dml32_UseMinimumDCFCLK(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool DRRDisplay[],
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int MaxPrefetchMode,
+ double DRAMClockChangeLatencyFinal,
+ double FCLKChangeLatency,
+ double SREnterPlusExitTime,
+ unsigned int ReturnBusWidth,
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool DynamicMetadataVMEnabled,
+ bool ImmediateFlipRequirement,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
+ double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
+ unsigned int VTotal[],
+ unsigned int VActive[],
+ unsigned int DynamicMetadataTransmittedBytes[],
+ unsigned int DynamicMetadataLinesBeforeActiveRequired[],
+ bool Interlace[],
+ double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
+ double RequiredDISPCLK[][2],
+ double UrgLatency[],
+ unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
+ double ProjectedDCFClkDeepSleep[][2],
+ double MaximumVStartup[][2][DC__NUM_DPP__MAX],
+ unsigned int TotalNumberOfActiveDPP[][2],
+ unsigned int TotalNumberOfDCCActiveDPP[][2],
+ unsigned int dpte_group_bytes[],
+ double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
+ double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
+ double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
+ double MetaRowBytes[][2][DC__NUM_DPP__MAX],
+ bool DynamicMetadataEnable[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double DCFCLKPerState[],
+ /* Output */
+ double DCFCLKState[][2]);
+
+unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels);
+
+void dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ unsigned int MaxInterDCNTileRepeaters,
+ double Dppclk,
+ double Dispclk,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ unsigned int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+
+double dml32_CalculateTWait(
+ unsigned int PrefetchMode,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ bool DRRDisplay,
+ double DRAMClockChangeLatency,
+ double FCLKChangeLatency,
+ double UrgentLatency,
+ double SREnterPlusExitTime);
+
+double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const bool HostVMEnable,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed);
+
+double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed);
+
+double dml32_CalculateExtraLatency(
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ double DCFCLK,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels);
+
+bool dml32_CalculatePrefetchSchedule(
+ struct vba_vars_st *v,
+ unsigned int k,
+ double HostVMInefficiencyFactor,
+ DmlPipe *myPipe,
+ unsigned int DSCDelay,
+ unsigned int DPP_RECOUT_WIDTH,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ unsigned int VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ unsigned int VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ double TPreReq,
+ /* Output */
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+
+void dml32_CalculateFlipSchedule(
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool GPUVMEnable,
+ double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow,
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw,
+ bool DCCEnable,
+ unsigned int dpte_row_height,
+ unsigned int meta_row_height,
+ unsigned int dpte_row_height_chroma,
+ unsigned int meta_row_height_chroma,
+ bool use_one_row_for_frame_flip,
+
+ /* Output */
+ double *DestinationLinesToRequestVMInImmediateFlip,
+ double *DestinationLinesToRequestRowInImmediateFlip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe);
+
+void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct vba_vars_st *v,
+ unsigned int PrefetchMode,
+ double DCFCLK,
+ double ReturnBW,
+ SOCParametersList mmSOCParameters,
+ double SOCCLK,
+ double DCFClkDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ double DSTXAfterScaler[],
+ double DSTYAfterScaler[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+
+ /* Output */
+ enum clock_change_support *DRAMClockChangeSupport,
+ double MaxActiveDRAMClockChangeLatencySupported[],
+ unsigned int SubViewportLinesNeededInMALL[],
+ enum dm_fclock_change_support *FCLKChangeSupport,
+ double *MinActiveFCLKChangeLatencySupported,
+ bool *USRRetrainingSupport,
+ double ActiveDRAMClockChangeLatencyMargin[]);
+
+double dml32_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackSourceWidth,
+ unsigned int WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize,
+ double DISPCLKDPPCLKVCOSpeed);
+
+void dml32_CalculateMinAndMaxPrefetchMode(
+ enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
+ unsigned int *MinPrefetchMode,
+ unsigned int *MaxPrefetchMode);
+
+void dml32_CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActiveSurfaces,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+
+ /* Output */
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[]);
+
+void dml32_CalculateMetaAndPTETimes(
+ bool use_one_row_for_frame[],
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int MetaChunkSize,
+ unsigned int MinMetaChunkSizeBytes,
+ unsigned int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int dpte_row_height[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PTERequestSizeC[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+
+ /* Output */
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[]);
+
+void dml32_CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ unsigned int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+
+ /* Output */
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[]);
+
+void dml32_CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ unsigned int nomDETInKByte,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum dm_rotation_angle SourceRotation,
+ /* Output */
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma);
+
+void dml32_CalculateStutterEfficiency(
+ unsigned int CompressedBufferSizeInkByte,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool UnboundedRequestEnabled,
+ unsigned int MetaFIFOSizeInKEntries,
+ unsigned int ZeroSizeBufferEntries,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ unsigned int CompbufReservedSpace64B,
+ unsigned int CompbufReservedSpaceZs,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizeTimingsFinal,
+ unsigned int BlendingAndTiming[],
+ double StutterEnterPlusExitWatermark,
+ double Z8StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ unsigned int DPPPerSurface[],
+ unsigned int DETBufferSizeY[],
+ unsigned int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ unsigned int HTotal[],
+ unsigned int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesC[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int DCCYMaxUncompressedBlock[],
+ unsigned int DCCCMaxUncompressedBlock[],
+ unsigned int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthSurfaceLuma[],
+ double ReadBandwidthSurfaceChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+
+ /* Output */
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ unsigned int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ unsigned int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod,
+ bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+
+void dml32_CalculateMaxDETAndMinCompressedBufferSize(
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int MaxNumDPP,
+ bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
+
+ /* Output */
+ unsigned int *MaxTotalDETInKByte,
+ unsigned int *nomDETInKByte,
+ unsigned int *MinCompressedBufferSizeInKByte);
+
+bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[]);
+
+void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *PrefetchBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *PrefetchBandwidthSupport);
+
+double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double cursor_bw_pre[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[]);
+
+void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ enum immediate_flip_requirement ImmediateFlipRequirement[],
+ double final_flip_bw[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *TotalBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *ImmediateFlipBandwidthSupport);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
new file mode 100644
index 000000000000..395ae8761980
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
@@ -0,0 +1,615 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "../display_mode_lib.h"
+#include "../display_mode_vba.h"
+#include "../dml_inline_defs.h"
+#include "display_rq_dlg_calc_32.h"
+
+static bool is_dual_plane(enum source_format_class source_format)
+{
+ bool ret_val = 0;
+
+ if ((source_format == dm_420_12) || (source_format == dm_420_8) || (source_format == dm_420_10)
+ || (source_format == dm_rgbe_alpha))
+ ret_val = 1;
+
+ return ret_val;
+}
+
+void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs,
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx)
+{
+ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
+ bool dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
+ double stored_swath_l_bytes;
+ double stored_swath_c_bytes;
+ bool is_phantom_pipe;
+ uint32_t pixel_chunk_bytes = 0;
+ uint32_t min_pixel_chunk_bytes = 0;
+ uint32_t meta_chunk_bytes = 0;
+ uint32_t min_meta_chunk_bytes = 0;
+ uint32_t dpte_group_bytes = 0;
+ uint32_t mpte_group_bytes = 0;
+
+ uint32_t p1_pixel_chunk_bytes = 0;
+ uint32_t p1_min_pixel_chunk_bytes = 0;
+ uint32_t p1_meta_chunk_bytes = 0;
+ uint32_t p1_min_meta_chunk_bytes = 0;
+ uint32_t p1_dpte_group_bytes = 0;
+ uint32_t p1_mpte_group_bytes = 0;
+
+ unsigned int detile_buf_size_in_bytes;
+ unsigned int detile_buf_plane1_addr;
+ unsigned int pte_row_height_linear;
+
+ memset(rq_regs, 0, sizeof(*rq_regs));
+
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] start, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+
+ pixel_chunk_bytes = get_pixel_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024; // From VBA
+ min_pixel_chunk_bytes = get_min_pixel_chunk_size_in_byte(mode_lib, e2e_pipe_param, num_pipes); // From VBA
+
+ if (pixel_chunk_bytes == 64 * 1024)
+ min_pixel_chunk_bytes = 0;
+
+ meta_chunk_bytes = get_meta_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024; // From VBA
+ min_meta_chunk_bytes = get_min_meta_chunk_size_in_byte(mode_lib, e2e_pipe_param, num_pipes); // From VBA
+
+ dpte_group_bytes = get_dpte_group_size_in_bytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ mpte_group_bytes = get_vm_group_size_in_bytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ p1_pixel_chunk_bytes = pixel_chunk_bytes;
+ p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
+ p1_meta_chunk_bytes = meta_chunk_bytes;
+ p1_min_meta_chunk_bytes = min_meta_chunk_bytes;
+ p1_dpte_group_bytes = dpte_group_bytes;
+ p1_mpte_group_bytes = mpte_group_bytes;
+
+ if ((enum source_format_class) src->source_format == dm_rgbe_alpha)
+ p1_pixel_chunk_bytes = get_alpha_pixel_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024;
+
+ rq_regs->rq_regs_l.chunk_size = dml_log2(pixel_chunk_bytes) - 10;
+ rq_regs->rq_regs_c.chunk_size = dml_log2(p1_pixel_chunk_bytes) - 10;
+
+ if (min_pixel_chunk_bytes == 0)
+ rq_regs->rq_regs_l.min_chunk_size = 0;
+ else
+ rq_regs->rq_regs_l.min_chunk_size = dml_log2(min_pixel_chunk_bytes) - 8 + 1;
+
+ if (p1_min_pixel_chunk_bytes == 0)
+ rq_regs->rq_regs_c.min_chunk_size = 0;
+ else
+ rq_regs->rq_regs_c.min_chunk_size = dml_log2(p1_min_pixel_chunk_bytes) - 8 + 1;
+
+ rq_regs->rq_regs_l.meta_chunk_size = dml_log2(meta_chunk_bytes) - 10;
+ rq_regs->rq_regs_c.meta_chunk_size = dml_log2(p1_meta_chunk_bytes) - 10;
+
+ if (min_meta_chunk_bytes == 0)
+ rq_regs->rq_regs_l.min_meta_chunk_size = 0;
+ else
+ rq_regs->rq_regs_l.min_meta_chunk_size = dml_log2(min_meta_chunk_bytes) - 6 + 1;
+
+ if (min_meta_chunk_bytes == 0)
+ rq_regs->rq_regs_c.min_meta_chunk_size = 0;
+ else
+ rq_regs->rq_regs_c.min_meta_chunk_size = dml_log2(p1_min_meta_chunk_bytes) - 6 + 1;
+
+ rq_regs->rq_regs_l.dpte_group_size = dml_log2(dpte_group_bytes) - 6;
+ rq_regs->rq_regs_l.mpte_group_size = dml_log2(mpte_group_bytes) - 6;
+ rq_regs->rq_regs_c.dpte_group_size = dml_log2(p1_dpte_group_bytes) - 6;
+ rq_regs->rq_regs_c.mpte_group_size = dml_log2(p1_mpte_group_bytes) - 6;
+
+ detile_buf_size_in_bytes = get_det_buffer_size_kbytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * 1024;
+ detile_buf_plane1_addr = 0;
+ pte_row_height_linear = get_dpte_row_height_linear_l(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx);
+
+ if (src->sw_mode == dm_sw_linear)
+ ASSERT(pte_row_height_linear >= 8);
+
+ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(pte_row_height_linear), 1) - 3;
+
+ if (dual_plane) {
+ unsigned int p1_pte_row_height_linear = get_dpte_row_height_linear_c(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx);
+ ;
+ if (src->sw_mode == dm_sw_linear)
+ ASSERT(p1_pte_row_height_linear >= 8);
+
+ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(p1_pte_row_height_linear), 1) - 3;
+ }
+
+ rq_regs->rq_regs_l.swath_height = dml_log2(get_swath_height_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx));
+ rq_regs->rq_regs_c.swath_height = dml_log2(get_swath_height_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx));
+
+ // FIXME: take the max between luma, chroma chunk size?
+ // okay for now, as we are setting pixel_chunk_bytes to 8kb anyways
+ if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
+ rq_regs->drq_expansion_mode = 0;
+ } else {
+ rq_regs->drq_expansion_mode = 2;
+ }
+ rq_regs->prq_expansion_mode = 1;
+ rq_regs->mrq_expansion_mode = 1;
+ rq_regs->crq_expansion_mode = 1;
+
+ stored_swath_l_bytes = get_det_stored_buffer_size_l_bytes(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx);
+ stored_swath_c_bytes = get_det_stored_buffer_size_c_bytes(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx);
+ is_phantom_pipe = get_is_phantom_pipe(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+
+ // Note: detile_buf_plane1_addr is in unit of 1KB
+ if (dual_plane) {
+ if (is_phantom_pipe) {
+ detile_buf_plane1_addr = ((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma
+ } else {
+ if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
+ detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n",
+ __func__, detile_buf_plane1_addr);
+#endif
+ } else {
+ detile_buf_plane1_addr =
+ dml_round_to_multiple(
+ (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0),
+ 1024, 0) / 1024.0; // 2/3 to luma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n",
+ __func__, detile_buf_plane1_addr);
+#endif
+ }
+ }
+ }
+ rq_regs->plane1_base_address = detile_buf_plane1_addr;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
+ dml_print("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
+ dml_print("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
+ dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
+ dml_print("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
+#endif
+ print__rq_regs_st(mode_lib, rq_regs);
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] done, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+}
+
+void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx)
+{
+ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
+ const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
+ const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
+ double refcyc_per_req_delivery_pre_cur0 = 0.;
+ double refcyc_per_req_delivery_cur0 = 0.;
+ double refcyc_per_req_delivery_pre_c = 0.;
+ double refcyc_per_req_delivery_c = 0.;
+ double refcyc_per_req_delivery_pre_l;
+ double refcyc_per_req_delivery_l;
+ double refcyc_per_line_delivery_pre_c = 0.;
+ double refcyc_per_line_delivery_c = 0.;
+ double refcyc_per_line_delivery_pre_l;
+ double refcyc_per_line_delivery_l;
+ double min_ttu_vblank;
+ double vratio_pre_l;
+ double vratio_pre_c;
+ unsigned int min_dst_y_next_start;
+ unsigned int htotal = dst->htotal;
+ unsigned int hblank_end = dst->hblank_end;
+ unsigned int vblank_end = dst->vblank_end;
+ bool interlaced = dst->interlaced;
+ double pclk_freq_in_mhz = dst->pixel_rate_mhz;
+ unsigned int vready_after_vcount0;
+ double refclk_freq_in_mhz = clks->refclk_mhz;
+ double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
+ bool dual_plane = 0;
+ unsigned int pipe_index_in_combine[DC__NUM_PIPES__MAX];
+ unsigned int dst_x_after_scaler;
+ unsigned int dst_y_after_scaler;
+ double dst_y_prefetch;
+ double dst_y_per_vm_vblank;
+ double dst_y_per_row_vblank;
+ double dst_y_per_vm_flip;
+ double dst_y_per_row_flip;
+ double max_dst_y_per_vm_vblank = 32.0;
+ double max_dst_y_per_row_vblank = 16.0;
+ double dst_y_per_pte_row_nom_l;
+ double dst_y_per_pte_row_nom_c;
+ double dst_y_per_meta_row_nom_l;
+ double dst_y_per_meta_row_nom_c;
+ double refcyc_per_pte_group_nom_l;
+ double refcyc_per_pte_group_nom_c;
+ double refcyc_per_pte_group_vblank_l;
+ double refcyc_per_pte_group_vblank_c;
+ double refcyc_per_pte_group_flip_l;
+ double refcyc_per_pte_group_flip_c;
+ double refcyc_per_meta_chunk_nom_l;
+ double refcyc_per_meta_chunk_nom_c;
+ double refcyc_per_meta_chunk_vblank_l;
+ double refcyc_per_meta_chunk_vblank_c;
+ double refcyc_per_meta_chunk_flip_l;
+ double refcyc_per_meta_chunk_flip_c;
+
+ memset(dlg_regs, 0, sizeof(*dlg_regs));
+ memset(ttu_regs, 0, sizeof(*ttu_regs));
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] starts, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced);
+ ASSERT(ref_freq_to_pix_freq < 4.0);
+
+ dlg_regs->ref_freq_to_pix_freq = (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
+ dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal * dml_pow(2, 8));
+ dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
+
+ min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ min_dst_y_next_start = get_min_dst_y_next_start(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+
+ dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
+ dml_print("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, min_dst_y_next_start);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+
+ dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
+
+ vready_after_vcount0 = get_vready_at_or_after_vsync(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx); // From VBA
+ dlg_regs->vready_after_vcount0 = vready_after_vcount0;
+
+ dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, dlg_regs->vready_after_vcount0);
+
+ dst_x_after_scaler = dml_ceil(get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1);
+ dst_y_after_scaler = dml_ceil(get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1);
+
+ // do some adjustment on the dst_after scaler to account for odm combine mode
+ dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: input dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler);
+
+ // need to figure out which side of odm combine we're in
+ if (dst->odm_combine == dm_odm_combine_mode_2to1 || dst->odm_combine == dm_odm_combine_mode_4to1) {
+ // figure out which pipes go together
+ bool visited[DC__NUM_PIPES__MAX];
+ unsigned int i, j, k;
+
+ for (k = 0; k < num_pipes; ++k) {
+ visited[k] = false;
+ pipe_index_in_combine[k] = 0;
+ }
+
+ for (i = 0; i < num_pipes; i++) {
+ if (e2e_pipe_param[i].pipe.src.is_hsplit && !visited[i]) {
+
+ unsigned int grp = e2e_pipe_param[i].pipe.src.hsplit_grp;
+ unsigned int grp_idx = 0;
+
+ for (j = i; j < num_pipes; j++) {
+ if (e2e_pipe_param[j].pipe.src.hsplit_grp == grp
+ && e2e_pipe_param[j].pipe.src.is_hsplit && !visited[j]) {
+ pipe_index_in_combine[j] = grp_idx;
+ dml_print("DML_DLG: %s: pipe[%d] is in grp %d idx %d\n",
+ __func__, j, grp, grp_idx);
+ grp_idx++;
+ visited[j] = true;
+ }
+ }
+ }
+ }
+ }
+
+ if (dst->odm_combine == dm_odm_combine_mode_disabled) {
+ // FIXME how about ODM split??
+ dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end * ref_freq_to_pix_freq);
+ } else {
+ if (dst->odm_combine == dm_odm_combine_mode_2to1 || dst->odm_combine == dm_odm_combine_mode_4to1) {
+ // TODO: We should really check that 4to1 is supported before setting it to 4
+ unsigned int odm_combine_factor = (dst->odm_combine == dm_odm_combine_mode_2to1 ? 2 : 4);
+ unsigned int odm_pipe_index = pipe_index_in_combine[pipe_idx];
+
+ dlg_regs->refcyc_h_blank_end = (unsigned int) (((double) hblank_end
+ + odm_pipe_index * (double) dst->hactive / odm_combine_factor) * ref_freq_to_pix_freq);
+ }
+ }
+ ASSERT(dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13));
+
+ dml_print("DML_DLG: %s: htotal= %d\n", __func__, htotal);
+ dml_print("DML_DLG: %s: dst_x_after_scaler[%d]= %d\n", __func__, pipe_idx, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: dst_y_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_y_after_scaler);
+
+ dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ // From VBA
+ dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ // magic!
+ if (htotal <= 75) {
+ max_dst_y_per_vm_vblank = 100.0;
+ max_dst_y_per_row_vblank = 100.0;
+ }
+
+ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
+ dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip);
+ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
+
+ ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank);
+ ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank);
+ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
+
+ vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l);
+ dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c);
+
+ // Active
+ refcyc_per_line_delivery_pre_l = get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_line_delivery_l = get_refcyc_per_line_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l);
+
+ if (dual_plane) {
+ refcyc_per_line_delivery_pre_c = get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_line_delivery_c = get_refcyc_per_line_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n",
+ __func__, refcyc_per_line_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n",
+ __func__, refcyc_per_line_delivery_c);
+ }
+
+ if (src->dynamic_metadata_enable && src->gpuvm)
+ dlg_regs->refcyc_per_vm_dmdata = get_refcyc_per_vm_dmdata_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dlg_regs->dmdata_dl_delta = get_dmdata_dl_delta_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx)
+ * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l);
+
+ if (dual_plane) {
+ refcyc_per_req_delivery_pre_c = get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_req_delivery_c = get_refcyc_per_req_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n",
+ __func__, refcyc_per_req_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c);
+ }
+
+ // TTU - Cursor
+ ASSERT(src->num_cursors <= 1);
+ if (src->num_cursors > 0) {
+ refcyc_per_req_delivery_pre_cur0 = get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib,
+ e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_req_delivery_cur0 = get_refcyc_per_cursor_req_delivery_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n",
+ __func__, refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n",
+ __func__, refcyc_per_req_delivery_cur0);
+ }
+
+ // Assign to register structures
+ dlg_regs->min_dst_y_next_start = min_dst_y_next_start * dml_pow(2, 2);
+ ASSERT(dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
+
+ dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
+ dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk
+ dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
+ dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
+ dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
+ dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2));
+ dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2));
+
+ dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
+ dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
+
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, dlg_regs->dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, dlg_regs->dst_y_per_row_vblank);
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, dlg_regs->dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, dlg_regs->dst_y_per_row_flip);
+
+ dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+ dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+
+ // From VBA
+ dst_y_per_pte_row_nom_l = get_dst_y_per_pte_row_nom_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_pte_row_nom_c = get_dst_y_per_pte_row_nom_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_meta_row_nom_l = get_dst_y_per_meta_row_nom_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_meta_row_nom_c = get_dst_y_per_meta_row_nom_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+
+ refcyc_per_pte_group_nom_l = get_refcyc_per_pte_group_nom_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_nom_c = get_refcyc_per_pte_group_nom_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_vblank_l = get_refcyc_per_pte_group_vblank_l_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_vblank_c = get_refcyc_per_pte_group_vblank_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_flip_l = get_refcyc_per_pte_group_flip_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_flip_c = get_refcyc_per_pte_group_flip_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_meta_chunk_nom_l = get_refcyc_per_meta_chunk_nom_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_nom_c = get_refcyc_per_meta_chunk_nom_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_vblank_l = get_refcyc_per_meta_chunk_vblank_l_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_vblank_c = get_refcyc_per_meta_chunk_vblank_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_flip_l = get_refcyc_per_meta_chunk_flip_l_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_flip_c = get_refcyc_per_meta_chunk_flip_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dlg_regs->dst_y_per_pte_row_nom_l = dst_y_per_pte_row_nom_l * dml_pow(2, 2);
+ dlg_regs->dst_y_per_pte_row_nom_c = dst_y_per_pte_row_nom_c * dml_pow(2, 2);
+ dlg_regs->dst_y_per_meta_row_nom_l = dst_y_per_meta_row_nom_l * dml_pow(2, 2);
+ dlg_regs->dst_y_per_meta_row_nom_c = dst_y_per_meta_row_nom_c * dml_pow(2, 2);
+ dlg_regs->refcyc_per_pte_group_nom_l = refcyc_per_pte_group_nom_l;
+ dlg_regs->refcyc_per_pte_group_nom_c = refcyc_per_pte_group_nom_c;
+ dlg_regs->refcyc_per_pte_group_vblank_l = refcyc_per_pte_group_vblank_l;
+ dlg_regs->refcyc_per_pte_group_vblank_c = refcyc_per_pte_group_vblank_c;
+ dlg_regs->refcyc_per_pte_group_flip_l = refcyc_per_pte_group_flip_l;
+ dlg_regs->refcyc_per_pte_group_flip_c = refcyc_per_pte_group_flip_c;
+ dlg_regs->refcyc_per_meta_chunk_nom_l = refcyc_per_meta_chunk_nom_l;
+ dlg_regs->refcyc_per_meta_chunk_nom_c = refcyc_per_meta_chunk_nom_c;
+ dlg_regs->refcyc_per_meta_chunk_vblank_l = refcyc_per_meta_chunk_vblank_l;
+ dlg_regs->refcyc_per_meta_chunk_vblank_c = refcyc_per_meta_chunk_vblank_c;
+ dlg_regs->refcyc_per_meta_chunk_flip_l = refcyc_per_meta_chunk_flip_l;
+ dlg_regs->refcyc_per_meta_chunk_flip_c = refcyc_per_meta_chunk_flip_c;
+ dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, 1);
+ dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, 1);
+ dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, 1);
+ dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, 1);
+
+ dlg_regs->chunk_hdl_adjust_cur0 = 3;
+ dlg_regs->dst_y_offset_cur0 = 0;
+ dlg_regs->chunk_hdl_adjust_cur1 = 3;
+ dlg_regs->dst_y_offset_cur1 = 0;
+
+ dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
+
+ ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_pre_cur0 =
+ (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_pre_cur1 = 0;
+ ttu_regs->refcyc_per_req_delivery_cur1 = 0;
+ ttu_regs->qos_level_low_wm = 0;
+
+ ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal * ref_freq_to_pix_freq);
+
+ ttu_regs->qos_level_flip = 14;
+ ttu_regs->qos_level_fixed_l = 8;
+ ttu_regs->qos_level_fixed_c = 8;
+ ttu_regs->qos_level_fixed_cur0 = 8;
+ ttu_regs->qos_ramp_disable_l = 0;
+ ttu_regs->qos_ramp_disable_c = 0;
+ ttu_regs->qos_ramp_disable_cur0 = 0;
+ ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
+
+ // CHECK for HW registers' range, assert or clamp
+ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
+ if (dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1;
+
+ if (dlg_regs->refcyc_per_vm_group_flip >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1;
+
+ if (dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1;
+
+ if (dlg_regs->refcyc_per_vm_req_flip >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1;
+
+ ASSERT(dlg_regs->dst_y_after_scaler < (unsigned int) 8);
+ ASSERT(dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17));
+ if (dual_plane) {
+ if (dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) {
+ // FIXME what so special about chroma, can we just assert?
+ dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u > register max U15.2 %u\n",
+ __func__, dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)dml_pow(2, 17) - 1);
+ }
+ }
+ ASSERT(dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17));
+ ASSERT(dlg_regs->dst_y_per_meta_row_nom_c < (unsigned int)dml_pow(2, 17));
+
+ if (dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
+ if (dual_plane) {
+ if (dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
+ }
+ ASSERT(dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
+ if (dual_plane) {
+ ASSERT(dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)dml_pow(2, 13));
+ }
+
+ if (dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
+ if (dual_plane) {
+ if (dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1;
+ }
+ ASSERT(dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_meta_chunk_vblank_c < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13));
+ ASSERT(ttu_regs->qos_level_low_wm < dml_pow(2, 14));
+ ASSERT(ttu_regs->qos_level_high_wm < dml_pow(2, 14));
+ ASSERT(ttu_regs->min_ttu_vblank < dml_pow(2, 24));
+
+ print__ttu_regs_st(mode_lib, ttu_regs);
+ print__dlg_regs_st(mode_lib, dlg_regs);
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] done, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h
new file mode 100644
index 000000000000..ebee365293cd
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML32_DISPLAY_RQ_DLG_CALC_H__
+#define __DML32_DISPLAY_RQ_DLG_CALC_H__
+
+#include "../display_rq_dlg_helpers.h"
+
+struct display_mode_lib;
+
+/*
+* Function: dml_rq_dlg_get_rq_reg
+* Main entry point for test to get the register values out of this DML class.
+* This function calls <get_rq_param> and <extract_rq_regs> functions to calculate
+* and then populate the rq_regs struct
+* Input:
+* pipe_param - pipe source configuration (e.g. vp, pitch, scaling, dest, etc.)
+* Output:
+* rq_regs - struct that holds all the RQ registers field value.
+* See also: <display_rq_regs_st>
+*/
+void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs,
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
+
+/*
+* Function: dml_rq_dlg_get_dlg_reg
+* Calculate and return DLG and TTU register struct given the system setting
+* Output:
+* dlg_regs - output DLG register struct
+* ttu_regs - output DLG TTU register struct
+* Input:
+* e2e_pipe_param - "compacted" array of e2e pipe param struct
+* num_pipes - num of active "pipe" or "route"
+* pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg
+* cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered.
+* Added for legacy or unrealistic timing tests.
+*/
+void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
new file mode 100644
index 000000000000..432b4ecd01a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr.h"
+#include "resource.h"
+#include "dcn321_fpu.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn321/dcn321_resource.h"
+#include "dml/dcn32/display_mode_vba_util_32.h"
+
+#define DCN3_2_DEFAULT_DET_SIZE 256
+
+struct _vcs_dpi_ip_params_st dcn3_21_ip = {
+ .gpuvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_enable = 0,
+ .rob_buffer_size_kbytes = 128,
+ .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1280,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 22,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .alpha_pixel_chunk_size_kbytes = 4,
+ .min_pixel_chunk_size_bytes = 1024,
+ .dcc_meta_buffer_size_bytes = 6272,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,
+ .maximum_pixels_per_line_per_dsc_unit = 6016,
+ .dsc422_native_support = true,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 57,
+ .line_buffer_size_bits = 1171920,
+ .max_line_buffer_lines = 32,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+ .max_num_dp2p0_outputs = 2,
+ .max_num_dp2p0_streams = 4,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 1564.0,
+ .fabricclk_mhz = 400.0,
+ .dispclk_mhz = 2150.0,
+ .dppclk_mhz = 2150.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .phyclk_d32_mhz = 625.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 716.667,
+ .dram_speed_mts = 1600.0,
+ .dtbclk_mhz = 1564.0,
+ },
+ },
+ .num_states = 1,
+ .sr_exit_time_us = 19.95,
+ .sr_enter_plus_exit_time_us = 24.36,
+ .sr_exit_z8_time_us = 285.0,
+ .sr_enter_plus_exit_z8_time_us = 320,
+ .writeback_latency_us = 12.0,
+ .round_trip_ping_latency_dcfclk_cycles = 263,
+ .urgent_latency_pixel_data_only_us = 9.35,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 9.35,
+ .urgent_latency_vm_data_only_us = 9.35,
+ .fclk_change_latency_us = 20,
+ .usr_retraining_latency_us = 2,
+ .smn_latency_us = 2,
+ .mall_allocated_for_dcn_mbytes = 64,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 100.0,
+ .pct_ideal_fabric_bw_after_urgent = 67.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented
+ .pct_ideal_dram_bw_after_urgent_strobe = 67.0,
+ .max_avg_sdp_bw_use_normal_percent = 80.0,
+ .max_avg_fabric_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_strobe_percent = 50.0,
+ .max_avg_dram_bw_use_normal_percent = 15.0,
+ .num_chans = 8,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .dram_clock_change_latency_us = 400,
+ .dispclk_dppclk_vco_speed_mhz = 4300.0,
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ if (entry->dcfclk_mhz > 0) {
+ float bw_on_sdp = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ entry->fabricclk_mhz = bw_on_sdp / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_sdp / (dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->fabricclk_mhz > 0) {
+ float bw_on_fabric = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ entry->dcfclk_mhz = bw_on_fabric / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_fabric / (dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->dram_speed_mts > 0) {
+ float bw_on_dram = entry->dram_speed_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ entry->fabricclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dcfclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ }
+}
+
+static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ float memory_bw_kbytes_sec;
+ float fabric_bw_kbytes_sec;
+ float sdp_bw_kbytes_sec;
+ float limiting_bw_kbytes_sec;
+
+ memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ limiting_bw_kbytes_sec = memory_bw_kbytes_sec;
+
+ if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;
+
+ if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;
+
+ return limiting_bw_kbytes_sec;
+}
+
+void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+ unsigned int *num_entries,
+ struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ int i = 0;
+ int index = 0;
+ float net_bw_of_new_state = 0;
+
+ dc_assert_fp_enabled();
+
+ get_optimal_ntuple(entry);
+
+ if (*num_entries == 0) {
+ table[0] = *entry;
+ (*num_entries)++;
+ } else {
+ net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
+ while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) {
+ index++;
+ if (index >= *num_entries)
+ break;
+ }
+
+ for (i = *num_entries; i > index; i--)
+ table[i] = table[i - 1];
+
+ table[index] = *entry;
+ (*num_entries)++;
+ }
+}
+
+static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
+ unsigned int index)
+{
+ int i;
+
+ if (*num_entries == 0)
+ return;
+
+ for (i = index; i < *num_entries - 1; i++) {
+ table[i] = table[i + 1];
+ }
+ memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st));
+}
+
+static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
+ struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ int i, j;
+ struct _vcs_dpi_voltage_scaling_st entry = {0};
+
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0,
+ max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0;
+
+ unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299;
+
+ static const unsigned int num_dcfclk_stas = 5;
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564};
+
+ unsigned int num_uclk_dpms = 0;
+ unsigned int num_fclk_dpms = 0;
+ unsigned int num_dcfclk_dpms = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz)
+ max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz)
+ max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz)
+ max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+
+ if (bw_params->clk_table.entries[i].memclk_mhz > 0)
+ num_uclk_dpms++;
+ if (bw_params->clk_table.entries[i].fclk_mhz > 0)
+ num_fclk_dpms++;
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > 0)
+ num_dcfclk_dpms++;
+ }
+
+ if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz)
+ return -1;
+
+ if (max_dppclk_mhz == 0)
+ max_dppclk_mhz = max_dispclk_mhz;
+
+ if (max_fclk_mhz == 0)
+ max_fclk_mhz = max_dcfclk_mhz * dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / dcn3_21_soc.pct_ideal_fabric_bw_after_urgent;
+
+ if (max_phyclk_mhz == 0)
+ max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz;
+
+ *num_entries = 0;
+ entry.dispclk_mhz = max_dispclk_mhz;
+ entry.dscclk_mhz = max_dispclk_mhz / 3;
+ entry.dppclk_mhz = max_dppclk_mhz;
+ entry.dtbclk_mhz = max_dtbclk_mhz;
+ entry.phyclk_mhz = max_phyclk_mhz;
+ entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz;
+ entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz;
+
+ // Insert all the DCFCLK STAs
+ for (i = 0; i < num_dcfclk_stas; i++) {
+ entry.dcfclk_mhz = dcfclk_sta_targets[i];
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // Insert the max DCFCLK
+ entry.dcfclk_mhz = max_dcfclk_mhz;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+
+ // Insert the UCLK DPMS
+ for (i = 0; i < num_uclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;
+
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // If FCLK is coarse grained, insert individual DPMs.
+ if (num_fclk_dpms > 2) {
+ for (i = 0; i < num_fclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+ }
+ // If FCLK fine grained, only insert max
+ else {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = max_fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // At this point, the table contains all "points of interest" based on
+ // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock
+ // ratios (by derate, are exact).
+
+ // Remove states that require higher clocks than are supported
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz > max_dcfclk_mhz ||
+ table[i].fabricclk_mhz > max_fclk_mhz ||
+ table[i].dram_speed_mts > max_uclk_mhz * 16)
+ remove_entry_from_table_at_index(table, num_entries, i);
+ }
+
+ // At this point, the table only contains supported points of interest
+ // it could be used as is, but some states may be redundant due to
+ // coarse grained nature of some clocks, so we want to round up to
+ // coarse grained DPMs and remove duplicates.
+
+ // Round up UCLKs
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_uclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) {
+ table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ // If FCLK is coarse grained, round up to next DPMs
+ if (num_fclk_dpms > 2) {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_fclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) {
+ table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz;
+ break;
+ }
+ }
+ }
+ }
+ // Otherwise, round up to minimum.
+ else {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].fabricclk_mhz < min_fclk_mhz) {
+ table[i].fabricclk_mhz = min_fclk_mhz;
+ break;
+ }
+ }
+ }
+
+ // Round DCFCLKs up to minimum
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz < min_dcfclk_mhz) {
+ table[i].dcfclk_mhz = min_dcfclk_mhz;
+ break;
+ }
+ }
+
+ // Remove duplicate states, note duplicate states are always neighbouring since table is sorted.
+ i = 0;
+ while (i < *num_entries - 1) {
+ if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz &&
+ table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz &&
+ table[i].dram_speed_mts == table[i + 1].dram_speed_mts)
+ remove_entry_from_table_at_index(table, num_entries, i + 1);
+ else
+ i++;
+ }
+
+ // Fix up the state indicies
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ table[i].state = i;
+ }
+
+ return 0;
+}
+
+static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+/** dcn321_update_bw_bounding_box
+ * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet
+ * with actual values as per dGPU SKU:
+ * -with passed few options from dc->config
+ * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW)
+ * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes
+ * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU
+ * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC)
+ * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different
+ * clocks (which might differ for certain dGPU SKU of the same ASIC)
+ */
+void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ dc_assert_fp_enabled();
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
+ /* Overrides from dc->config options */
+ dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk;
+
+ /* Override from passed dc->bb_overrides if available*/
+ if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
+ && dc->bb_overrides.sr_exit_time_ns) {
+ dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000)
+ != dc->bb_overrides.sr_enter_plus_exit_time_ns
+ && dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ dcn3_21_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
+ && dc->bb_overrides.urgent_latency_ns) {
+ dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ dcn3_21_soc.urgent_latency_pixel_data_only_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000)
+ != dc->bb_overrides.dram_clock_change_latency_ns
+ && dc->bb_overrides.dram_clock_change_latency_ns) {
+ dcn3_21_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000)
+ != dc->bb_overrides.fclk_clock_change_latency_ns
+ && dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dcn3_21_soc.fclk_change_latency_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+ }
+
+ if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000)
+ != dc->bb_overrides.dummy_clock_change_latency_ns
+ && dc->bb_overrides.dummy_clock_change_latency_ns) {
+ dcn3_21_soc.dummy_pstate_latency_us =
+ dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0;
+ }
+
+ /* Override from VBIOS if VBIOS bb_info available */
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
+ struct bp_soc_bb_info bb_info = {0};
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_21_soc.dram_clock_change_latency_us =
+ bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_21_soc.sr_enter_plus_exit_time_us =
+ bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_21_soc.sr_exit_time_us =
+ bb_info.dram_sr_exit_latency_100ns * 10;
+ }
+ }
+
+ /* Override from VBIOS for num_chan */
+ if (dc->ctx->dc_bios->vram_info.num_chans)
+ dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+ }
+
+ /* DML DSC delay factor workaround */
+ dcn3_21_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0;
+
+ dcn3_21_ip.min_prefetch_in_strobe_us = dc->debug.min_prefetch_in_strobe_ns / 1000.0;
+
+ /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
+ dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */
+ if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) {
+ if (dc->debug.use_legacy_soc_bb_mechanism) {
+ unsigned int i = 0, j = 0, num_states = 0;
+
+ unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
+ unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
+
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {615, 906, 1324, 1564};
+ unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0;
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ }
+ if (!max_dcfclk_mhz)
+ max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz;
+ if (!max_dispclk_mhz)
+ max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz;
+ if (!max_dppclk_mhz)
+ max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz;
+ if (!max_phyclk_mhz)
+ max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz;
+
+ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
+ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
+ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+ // Update size of array since we "removed" duplicates
+ num_dcfclk_sta_targets = i + 1;
+ }
+
+ num_uclk_states = bw_params->clk_table.num_entries;
+
+ // Calculate optimal dcfclk for each uclk
+ for (i = 0; i < num_uclk_states; i++) {
+ dcn321_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+ &optimal_dcfclk_for_uclk[i], NULL);
+ if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
+ optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
+ }
+ }
+
+ // Calculate optimal uclk for each dcfclk sta target
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ for (j = 0; j < num_uclk_states; j++) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ i = 0;
+ j = 0;
+ // create the final dcfclk and uclk table
+ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+ j = num_uclk_states;
+ }
+ }
+ }
+
+ while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
+ dcn3_21_soc.num_states = num_states;
+ for (i = 0; i < dcn3_21_soc.num_states; i++) {
+ dcn3_21_soc.clock_limits[i].state = i;
+ dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+
+ /* Fill all states with max values of all these clocks */
+ dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3;
+
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (i > 0) {
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz;
+ } else {
+ dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+ } else if (bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
+
+ if (!dram_speed_mts[i] && i > 0)
+ dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts;
+ else
+ dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */
+ /* PHYCLK_D18, PHYCLK_D32 */
+ dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz;
+ }
+ } else {
+ build_synthetic_soc_states(bw_params, dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states);
+ }
+
+ /* Re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32);
+ }
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
new file mode 100644
index 000000000000..e8fad9b4be69
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN32_FPU_H__
+#define __DCN32_FPU_H__
+
+#include "dml/display_mode_vba.h"
+
+void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+ unsigned int *num_entries,
+ struct _vcs_dpi_voltage_scaling_st *entry);
+
+void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
index edb9f7567d6d..f394b3f3922a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
@@ -26,7 +26,11 @@
#define __DISPLAY_MODE_ENUMS_H__
enum output_encoder_class {
- dm_dp = 0, dm_hdmi = 1, dm_wb = 2, dm_edp
+ dm_dp = 0,
+ dm_hdmi = 1,
+ dm_wb = 2,
+ dm_edp = 3,
+ dm_dp2p0 = 5,
};
enum output_format_class {
dm_444 = 0, dm_420 = 1, dm_n422, dm_s422
@@ -105,6 +109,10 @@ enum clock_change_support {
dm_dram_clock_change_uninitialized = 0,
dm_dram_clock_change_vactive,
dm_dram_clock_change_vblank,
+ dm_dram_clock_change_vactive_w_mall_full_frame,
+ dm_dram_clock_change_vactive_w_mall_sub_vp,
+ dm_dram_clock_change_vblank_w_mall_full_frame,
+ dm_dram_clock_change_vblank_w_mall_sub_vp,
dm_dram_clock_change_unsupported
};
@@ -169,6 +177,9 @@ enum odm_combine_mode {
dm_odm_combine_mode_disabled,
dm_odm_combine_mode_2to1,
dm_odm_combine_mode_4to1,
+ dm_odm_split_mode_1to2,
+ dm_odm_mode_mso_1to2,
+ dm_odm_mode_mso_1to4
};
enum odm_combine_policy {
@@ -176,11 +187,15 @@ enum odm_combine_policy {
dm_odm_combine_policy_none,
dm_odm_combine_policy_2to1,
dm_odm_combine_policy_4to1,
+ dm_odm_split_policy_1to2,
+ dm_odm_mso_policy_1to2,
+ dm_odm_mso_policy_1to4,
};
enum immediate_flip_requirement {
dm_immediate_flip_not_required,
dm_immediate_flip_required,
+ dm_immediate_flip_opportunistic,
};
enum unbounded_requesting_policy {
@@ -189,4 +204,75 @@ enum unbounded_requesting_policy {
dm_unbounded_requesting_disable
};
+enum dm_rotation_angle {
+ dm_rotation_0,
+ dm_rotation_90,
+ dm_rotation_180,
+ dm_rotation_270,
+ dm_rotation_0m,
+ dm_rotation_90m,
+ dm_rotation_180m,
+ dm_rotation_270m,
+};
+
+enum dm_use_mall_for_pstate_change_mode {
+ dm_use_mall_pstate_change_disable,
+ dm_use_mall_pstate_change_full_frame,
+ dm_use_mall_pstate_change_sub_viewport,
+ dm_use_mall_pstate_change_phantom_pipe
+};
+
+enum dm_use_mall_for_static_screen_mode {
+ dm_use_mall_static_screen_disable,
+ dm_use_mall_static_screen_optimize,
+ dm_use_mall_static_screen_enable,
+};
+
+enum dm_output_link_dp_rate {
+ dm_dp_rate_na,
+ dm_dp_rate_hbr,
+ dm_dp_rate_hbr2,
+ dm_dp_rate_hbr3,
+ dm_dp_rate_uhbr10,
+ dm_dp_rate_uhbr13p5,
+ dm_dp_rate_uhbr20,
+};
+
+enum dm_fclock_change_support {
+ dm_fclock_change_vactive,
+ dm_fclock_change_vblank,
+ dm_fclock_change_unsupported,
+};
+
+enum dm_prefetch_modes {
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible,
+ dm_prefetch_support_uclk_fclk_and_stutter,
+ dm_prefetch_support_fclk_and_stutter,
+ dm_prefetch_support_stutter,
+ dm_prefetch_support_none,
+};
+enum dm_output_type {
+ dm_output_type_unknown,
+ dm_output_type_dp,
+ dm_output_type_edp,
+ dm_output_type_dp2p0,
+ dm_output_type_hdmi,
+ dm_output_type_hdmifrl,
+};
+
+enum dm_output_rate {
+ dm_output_rate_unknown,
+ dm_output_rate_dp_rate_hbr,
+ dm_output_rate_dp_rate_hbr2,
+ dm_output_rate_dp_rate_hbr3,
+ dm_output_rate_dp_rate_uhbr10,
+ dm_output_rate_dp_rate_uhbr13p5,
+ dm_output_rate_dp_rate_uhbr20,
+ dm_output_rate_hdmi_rate_3x3,
+ dm_output_rate_hdmi_rate_6x3,
+ dm_output_rate_hdmi_rate_6x4,
+ dm_output_rate_hdmi_rate_8x4,
+ dm_output_rate_hdmi_rate_10x4,
+ dm_output_rate_hdmi_rate_12x4,
+};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
index 30db51fbd8cd..4125d3d111d1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
@@ -35,6 +35,10 @@
#include "dcn30/display_rq_dlg_calc_30.h"
#include "dcn31/display_mode_vba_31.h"
#include "dcn31/display_rq_dlg_calc_31.h"
+#include "dcn314/display_mode_vba_314.h"
+#include "dcn314/display_rq_dlg_calc_314.h"
+#include "dcn32/display_mode_vba_32.h"
+#include "dcn32/display_rq_dlg_calc_32.h"
#include "dml_logger.h"
const struct dml_funcs dml20_funcs = {
@@ -72,6 +76,20 @@ const struct dml_funcs dml31_funcs = {
.rq_dlg_get_rq_reg = dml31_rq_dlg_get_rq_reg
};
+const struct dml_funcs dml314_funcs = {
+ .validate = dml314_ModeSupportAndSystemConfigurationFull,
+ .recalculate = dml314_recalculate,
+ .rq_dlg_get_dlg_reg = dml314_rq_dlg_get_dlg_reg,
+ .rq_dlg_get_rq_reg = dml314_rq_dlg_get_rq_reg
+};
+
+const struct dml_funcs dml32_funcs = {
+ .validate = dml32_ModeSupportAndSystemConfigurationFull,
+ .recalculate = dml32_recalculate,
+ .rq_dlg_get_dlg_reg_v2 = dml32_rq_dlg_get_dlg_reg,
+ .rq_dlg_get_rq_reg_v2 = dml32_rq_dlg_get_rq_reg
+};
+
void dml_init_instance(struct display_mode_lib *lib,
const struct _vcs_dpi_soc_bounding_box_st *soc_bb,
const struct _vcs_dpi_ip_params_st *ip_params,
@@ -96,8 +114,15 @@ void dml_init_instance(struct display_mode_lib *lib,
break;
case DML_PROJECT_DCN31:
case DML_PROJECT_DCN31_FPGA:
+ case DML_PROJECT_DCN315:
lib->funcs = dml31_funcs;
break;
+ case DML_PROJECT_DCN314:
+ lib->funcs = dml314_funcs;
+ break;
+ case DML_PROJECT_DCN32:
+ lib->funcs = dml32_funcs;
+ break;
default:
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
index d76251fd1566..3d643d50c3eb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
@@ -40,7 +40,10 @@ enum dml_project {
DML_PROJECT_DCN21,
DML_PROJECT_DCN30,
DML_PROJECT_DCN31,
+ DML_PROJECT_DCN315,
DML_PROJECT_DCN31_FPGA,
+ DML_PROJECT_DCN314,
+ DML_PROJECT_DCN32,
};
struct display_mode_lib;
@@ -62,6 +65,20 @@ struct dml_funcs {
struct display_mode_lib *mode_lib,
display_rq_regs_st *rq_regs,
const display_pipe_params_st *pipe_param);
+ // DLG interfaces have different function parameters in DCN32.
+ // Create new function pointers to address the changes
+ void (*rq_dlg_get_dlg_reg_v2)(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
+ void (*rq_dlg_get_rq_reg_v2)(display_rq_regs_st *rq_regs,
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
void (*recalculate)(struct display_mode_lib *mode_lib);
void (*validate)(struct display_mode_lib *mode_lib);
};
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index d46a2733024c..64d602e6412f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -26,6 +26,16 @@
#include "dc_features.h"
#include "display_mode_enums.h"
+/**
+ * DOC: overview
+ *
+ * Most of the DML code is automatically generated and tested via hardware
+ * description language. Usually, we use the reference _vcs_dpi in the code
+ * where VCS means "Verilog Compiled Simulator" and DPI stands for "Direct
+ * Programmer Interface". In other words, those structs can be used to
+ * interface with Verilog with other languages such as C.
+ */
+
#ifndef __DISPLAY_MODE_STRUCTS_H__
#define __DISPLAY_MODE_STRUCTS_H__
@@ -54,12 +64,102 @@ typedef struct _vcs_dpi_display_rq_regs_st display_rq_regs_st;
typedef struct _vcs_dpi_display_dlg_sys_params_st display_dlg_sys_params_st;
typedef struct _vcs_dpi_display_arb_params_st display_arb_params_st;
+typedef struct {
+ double UrgentWatermark;
+ double WritebackUrgentWatermark;
+ double DRAMClockChangeWatermark;
+ double FCLKChangeWatermark;
+ double WritebackDRAMClockChangeWatermark;
+ double WritebackFCLKChangeWatermark;
+ double StutterExitWatermark;
+ double StutterEnterPlusExitWatermark;
+ double Z8StutterExitWatermark;
+ double Z8StutterEnterPlusExitWatermark;
+ double USRRetrainingWatermark;
+} Watermarks;
+
+typedef struct {
+ double UrgentLatency;
+ double ExtraLatency;
+ double WritebackLatency;
+ double DRAMClockChangeLatency;
+ double FCLKChangeLatency;
+ double SRExitTime;
+ double SREnterPlusExitTime;
+ double SRExitZ8Time;
+ double SREnterPlusExitZ8Time;
+ double USRRetrainingLatencyPlusSMNLatency;
+} Latencies;
+
+typedef struct {
+ double Dppclk;
+ double Dispclk;
+ double PixelClock;
+ double DCFClkDeepSleep;
+ unsigned int DPPPerSurface;
+ bool ScalerEnabled;
+ enum dm_rotation_angle SourceRotation;
+ unsigned int ViewportHeight;
+ unsigned int ViewportHeightChroma;
+ unsigned int BlockWidth256BytesY;
+ unsigned int BlockHeight256BytesY;
+ unsigned int BlockWidth256BytesC;
+ unsigned int BlockHeight256BytesC;
+ unsigned int BlockWidthY;
+ unsigned int BlockHeightY;
+ unsigned int BlockWidthC;
+ unsigned int BlockHeightC;
+ unsigned int InterlaceEnable;
+ unsigned int NumberOfCursors;
+ unsigned int VBlank;
+ unsigned int HTotal;
+ unsigned int HActive;
+ bool DCCEnable;
+ enum odm_combine_mode ODMMode;
+ enum source_format_class SourcePixelFormat;
+ enum dm_swizzle_mode SurfaceTiling;
+ unsigned int BytePerPixelY;
+ unsigned int BytePerPixelC;
+ bool ProgressiveToInterlaceUnitInOPP;
+ double VRatio;
+ double VRatioChroma;
+ unsigned int VTaps;
+ unsigned int VTapsChroma;
+ unsigned int PitchY;
+ unsigned int DCCMetaPitchY;
+ unsigned int PitchC;
+ unsigned int DCCMetaPitchC;
+ bool ViewportStationary;
+ unsigned int ViewportXStart;
+ unsigned int ViewportYStart;
+ unsigned int ViewportXStartC;
+ unsigned int ViewportYStartC;
+ bool FORCE_ONE_ROW_FOR_FRAME;
+ unsigned int SwathHeightY;
+ unsigned int SwathHeightC;
+} DmlPipe;
+
+typedef struct {
+ double UrgentLatency;
+ double ExtraLatency;
+ double WritebackLatency;
+ double DRAMClockChangeLatency;
+ double FCLKChangeLatency;
+ double SRExitTime;
+ double SREnterPlusExitTime;
+ double SRExitZ8Time;
+ double SREnterPlusExitZ8Time;
+ double USRRetrainingLatency;
+ double SMNLatency;
+} SOCParametersList;
+
struct _vcs_dpi_voltage_scaling_st {
int state;
double dscclk_mhz;
double dcfclk_mhz;
double socclk_mhz;
double phyclk_d18_mhz;
+ double phyclk_d32_mhz;
double dram_speed_mts;
double fabricclk_mhz;
double dispclk_mhz;
@@ -69,8 +169,20 @@ struct _vcs_dpi_voltage_scaling_st {
double dtbclk_mhz;
};
+/**
+ * _vcs_dpi_soc_bounding_box_st: SOC definitions
+ *
+ * This struct maintains the SOC Bounding Box information for the ASIC; it
+ * defines things such as clock, voltage, performance, etc. Usually, we load
+ * these values from VBIOS; if something goes wrong, we use some hard-coded
+ * values, which will enable the ASIC to light up with limitations.
+ */
struct _vcs_dpi_soc_bounding_box_st {
struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
+ /**
+ * @num_states: It represents the total of Display Power Management
+ * (DPM) supported by the specific ASIC.
+ */
unsigned int num_states;
double sr_exit_time_us;
double sr_enter_plus_exit_time_us;
@@ -80,6 +192,16 @@ struct _vcs_dpi_soc_bounding_box_st {
double urgent_latency_pixel_data_only_us;
double urgent_latency_pixel_mixed_with_vm_data_us;
double urgent_latency_vm_data_only_us;
+ double usr_retraining_latency_us;
+ double smn_latency_us;
+ double fclk_change_latency_us;
+ double mall_allocated_for_dcn_mbytes;
+ double pct_ideal_fabric_bw_after_urgent;
+ double pct_ideal_dram_bw_after_urgent_strobe;
+ double max_avg_fabric_bw_use_normal_percent;
+ double max_avg_dram_bw_use_normal_strobe_percent;
+ enum dm_prefetch_modes allow_for_pstate_or_stutter_in_vblank_final;
+ bool dram_clock_change_requirement_final;
double writeback_latency_us;
double ideal_dram_bw_after_urgent_percent;
double pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
@@ -126,6 +248,14 @@ struct _vcs_dpi_soc_bounding_box_st {
enum self_refresh_affinity allow_dram_self_refresh_or_dram_clock_change_in_vblank;
};
+/**
+ * @_vcs_dpi_ip_params_st: IP configuraion for DCN blocks
+ *
+ * In this struct you can find the DCN configuration associated to the specific
+ * ASIC. For example, here we can save how many DPPs the ASIC is using and it
+ * is available.
+ *
+ */
struct _vcs_dpi_ip_params_st {
bool use_min_dcfclk;
bool clamp_min_dcfclk;
@@ -141,12 +271,16 @@ struct _vcs_dpi_ip_params_st {
unsigned int odm_capable;
unsigned int rob_buffer_size_kbytes;
unsigned int det_buffer_size_kbytes;
+ unsigned int min_comp_buffer_size_kbytes;
unsigned int dpte_buffer_size_in_pte_reqs_luma;
unsigned int dpte_buffer_size_in_pte_reqs_chroma;
unsigned int pde_proc_buffer_size_64k_reqs;
unsigned int dpp_output_buffer_pixels;
unsigned int opp_output_buffer_lines;
unsigned int pixel_chunk_size_kbytes;
+ unsigned int alpha_pixel_chunk_size_kbytes;
+ unsigned int min_pixel_chunk_size_bytes;
+ unsigned int dcc_meta_buffer_size_bytes;
unsigned char pte_enable;
unsigned int pte_chunk_size_kbytes;
unsigned int meta_chunk_size_kbytes;
@@ -167,12 +301,16 @@ struct _vcs_dpi_ip_params_st {
double writeback_min_hscl_ratio;
double writeback_min_vscl_ratio;
unsigned int maximum_dsc_bits_per_component;
+ unsigned int maximum_pixels_per_line_per_dsc_unit;
unsigned int writeback_max_hscl_taps;
unsigned int writeback_max_vscl_taps;
unsigned int writeback_line_buffer_luma_buffer_size;
unsigned int writeback_line_buffer_chroma_buffer_size;
unsigned int max_page_table_levels;
+ /**
+ * @max_num_dpp: Maximum number of DPP supported in the target ASIC.
+ */
unsigned int max_num_dpp;
unsigned int max_num_otg;
unsigned int cursor_chunk_size;
@@ -223,6 +361,13 @@ struct _vcs_dpi_ip_params_st {
unsigned int can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one;
unsigned int bug_forcing_LC_req_same_size_fixed;
unsigned int number_of_cursors;
+ unsigned int max_num_dp2p0_outputs;
+ unsigned int max_num_dp2p0_streams;
+ unsigned int VBlankNomDefaultUS;
+
+ /* DM workarounds */
+ double dsc_delay_factor_wa; // TODO: Remove after implementing root cause fix
+ double min_prefetch_in_strobe_us;
};
struct _vcs_dpi_display_xfc_params_st {
@@ -249,6 +394,8 @@ struct _vcs_dpi_display_pipe_source_params_st {
bool hostvm_levels_force_en;
unsigned int hostvm_levels_force;
int source_scan;
+ int source_rotation; // new in dml32
+ unsigned int det_size_override; // use to populate DETSizeOverride in vba struct
int sw_mode;
int macro_tile_size;
unsigned int surface_width_y;
@@ -263,6 +410,15 @@ struct _vcs_dpi_display_pipe_source_params_st {
unsigned int viewport_height_c;
unsigned int viewport_width_max;
unsigned int viewport_height_max;
+ unsigned int viewport_x_y;
+ unsigned int viewport_x_c;
+ bool viewport_stationary;
+ unsigned int dcc_rate_luma;
+ unsigned int gpuvm_min_page_size_kbytes;
+ unsigned int use_mall_for_pstate_change;
+ unsigned int use_mall_for_static_screen;
+ bool force_one_row_for_frame;
+ bool pte_buffer_mode;
unsigned int data_pitch;
unsigned int data_pitch_c;
unsigned int meta_pitch;
@@ -295,10 +451,17 @@ struct writeback_st {
int wb_vtaps_luma;
int wb_htaps_chroma;
int wb_vtaps_chroma;
+ unsigned int wb_htaps;
+ unsigned int wb_vtaps;
double wb_hratio;
double wb_vratio;
};
+struct display_audio_params_st {
+ unsigned int audio_sample_rate_khz;
+ int audio_sample_layout;
+};
+
struct _vcs_dpi_display_output_params_st {
int dp_lanes;
double output_bpp;
@@ -312,6 +475,11 @@ struct _vcs_dpi_display_output_params_st {
int dsc_slices;
int max_audio_sample_rate;
struct writeback_st wb;
+ struct display_audio_params_st audio;
+ unsigned int output_bpc;
+ int dp_rate;
+ unsigned int dp_multistream_id;
+ bool dp_multistream_en;
};
struct _vcs_dpi_scaler_ratio_depth_st {
@@ -346,6 +514,7 @@ struct _vcs_dpi_display_pipe_dest_params_st {
unsigned int htotal;
unsigned int vtotal;
unsigned int vfront_porch;
+ unsigned int vblank_nom;
unsigned int vactive;
unsigned int hactive;
unsigned int vstartup_start;
@@ -360,6 +529,10 @@ struct _vcs_dpi_display_pipe_dest_params_st {
unsigned char use_maximum_vstartup;
unsigned int vtotal_max;
unsigned int vtotal_min;
+ unsigned int refresh_rate;
+ bool synchronize_timings;
+ unsigned int odm_combine_policy;
+ bool drr_display;
};
struct _vcs_dpi_display_pipe_params_st {
@@ -445,6 +618,8 @@ struct _vcs_dpi_display_dlg_regs_st {
unsigned int refcyc_h_blank_end;
unsigned int dlg_vblank_end;
unsigned int min_dst_y_next_start;
+ unsigned int optimized_min_dst_y_next_start;
+ unsigned int optimized_min_dst_y_next_start_us;
unsigned int refcyc_per_htotal;
unsigned int refcyc_x_after_scaler;
unsigned int dst_y_after_scaler;
@@ -546,7 +721,6 @@ struct _vcs_dpi_display_dlg_sys_params_st {
double t_sr_wm_us;
double t_extra_us;
double mem_trip_us;
- double t_srx_delay_us;
double deepsleep_dcfclk_mhz;
double total_flip_bw;
unsigned int total_flip_bytes;
@@ -556,6 +730,9 @@ struct _vcs_dpi_display_arb_params_st {
int max_req_outstanding;
int min_req_outstanding;
int sat_level_us;
+ int hvm_min_req_outstand_commit_threshold;
+ int hvm_max_qos_commit_threshold;
+ int compbuf_reserved_space_kbytes;
};
#endif /*__DISPLAY_MODE_STRUCTS_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 0fad15020c74..8e6585dab20e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -47,6 +47,7 @@ static void recalculate_params(
unsigned int num_pipes);
static unsigned int CursorBppEnumToBits(enum cursor_bpp ebpp);
+static void cache_debug_params(struct display_mode_lib *mode_lib);
unsigned int dml_get_voltage_level(
struct display_mode_lib *mode_lib,
@@ -73,6 +74,7 @@ unsigned int dml_get_voltage_level(
PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
}
mode_lib->funcs.validate(mode_lib);
+ cache_debug_params(mode_lib);
return mode_lib->vba.VoltageLevel;
}
@@ -109,6 +111,22 @@ dml_get_attr_func(tcalc, mode_lib->vba.TCalc);
dml_get_attr_func(fraction_of_urgent_bandwidth, mode_lib->vba.FractionOfUrgentBandwidth);
dml_get_attr_func(fraction_of_urgent_bandwidth_imm_flip, mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip);
+
+dml_get_attr_func(cstate_max_cap_mode, mode_lib->vba.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+dml_get_attr_func(comp_buffer_size_kbytes, mode_lib->vba.CompressedBufferSizeInkByte);
+dml_get_attr_func(pixel_chunk_size_in_kbyte, mode_lib->vba.PixelChunkSizeInKByte);
+dml_get_attr_func(alpha_pixel_chunk_size_in_kbyte, mode_lib->vba.AlphaPixelChunkSizeInKByte);
+dml_get_attr_func(meta_chunk_size_in_kbyte, mode_lib->vba.MetaChunkSize);
+dml_get_attr_func(min_pixel_chunk_size_in_byte, mode_lib->vba.MinPixelChunkSizeBytes);
+dml_get_attr_func(min_meta_chunk_size_in_byte, mode_lib->vba.MinMetaChunkSizeBytes);
+dml_get_attr_func(fclk_watermark, mode_lib->vba.Watermark.FCLKChangeWatermark);
+dml_get_attr_func(usr_retraining_watermark, mode_lib->vba.Watermark.USRRetrainingWatermark);
+
+dml_get_attr_func(comp_buffer_reserved_space_kbytes, mode_lib->vba.CompBufReservedSpaceKBytes);
+dml_get_attr_func(comp_buffer_reserved_space_64bytes, mode_lib->vba.CompBufReservedSpace64B);
+dml_get_attr_func(comp_buffer_reserved_space_zs, mode_lib->vba.CompBufReservedSpaceZs);
+dml_get_attr_func(unbounded_request_enabled, mode_lib->vba.UnboundedRequestEnabled);
+
#define dml_get_pipe_attr_func(attr, var) double get_##attr(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes, unsigned int which_pipe) \
{\
unsigned int which_plane; \
@@ -163,6 +181,27 @@ dml_get_pipe_attr_func(vupdate_width, mode_lib->vba.VUpdateWidthPix);
dml_get_pipe_attr_func(vready_offset, mode_lib->vba.VReadyOffsetPix);
dml_get_pipe_attr_func(vready_at_or_after_vsync, mode_lib->vba.VREADY_AT_OR_AFTER_VSYNC);
dml_get_pipe_attr_func(min_dst_y_next_start, mode_lib->vba.MIN_DST_Y_NEXT_START);
+dml_get_pipe_attr_func(dst_y_per_pte_row_nom_l, mode_lib->vba.DST_Y_PER_PTE_ROW_NOM_L);
+dml_get_pipe_attr_func(dst_y_per_pte_row_nom_c, mode_lib->vba.DST_Y_PER_PTE_ROW_NOM_C);
+dml_get_pipe_attr_func(dst_y_per_meta_row_nom_l, mode_lib->vba.DST_Y_PER_META_ROW_NOM_L);
+dml_get_pipe_attr_func(dst_y_per_meta_row_nom_c, mode_lib->vba.DST_Y_PER_META_ROW_NOM_C);
+dml_get_pipe_attr_func(refcyc_per_pte_group_nom_l_in_us, mode_lib->vba.time_per_pte_group_nom_luma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_nom_c_in_us, mode_lib->vba.time_per_pte_group_nom_chroma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_vblank_l_in_us, mode_lib->vba.time_per_pte_group_vblank_luma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_vblank_c_in_us, mode_lib->vba.time_per_pte_group_vblank_chroma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_flip_l_in_us, mode_lib->vba.time_per_pte_group_flip_luma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_flip_c_in_us, mode_lib->vba.time_per_pte_group_flip_chroma);
+dml_get_pipe_attr_func(vstartup_calculated, mode_lib->vba.VStartup);
+dml_get_pipe_attr_func(dpte_row_height_linear_c, mode_lib->vba.dpte_row_height_linear_chroma);
+dml_get_pipe_attr_func(swath_height_l, mode_lib->vba.SwathHeightY);
+dml_get_pipe_attr_func(swath_height_c, mode_lib->vba.SwathHeightC);
+dml_get_pipe_attr_func(det_stored_buffer_size_l_bytes, mode_lib->vba.DETBufferSizeY);
+dml_get_pipe_attr_func(det_stored_buffer_size_c_bytes, mode_lib->vba.DETBufferSizeC);
+dml_get_pipe_attr_func(dpte_group_size_in_bytes, mode_lib->vba.dpte_group_bytes);
+dml_get_pipe_attr_func(vm_group_size_in_bytes, mode_lib->vba.vm_group_bytes);
+dml_get_pipe_attr_func(dpte_row_height_linear_l, mode_lib->vba.dpte_row_height_linear);
+dml_get_pipe_attr_func(pte_buffer_mode, mode_lib->vba.PTE_BUFFER_MODE);
+dml_get_pipe_attr_func(subviewport_lines_needed_in_mall, mode_lib->vba.SubViewportLinesNeededInMALL);
double get_total_immediate_flip_bytes(
struct display_mode_lib *mode_lib,
@@ -200,6 +239,67 @@ double get_total_prefetch_bw(
return total_prefetch_bw;
}
+unsigned int get_total_surface_size_in_mall_bytes(
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes)
+{
+ unsigned int k;
+ unsigned int size = 0.0;
+ recalculate_params(mode_lib, pipes, num_pipes);
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ size += mode_lib->vba.SurfaceSizeInMALL[k];
+ return size;
+}
+
+static unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx)
+{
+ int pipe_idx = -1;
+ int i;
+
+ ASSERT(plane_idx < DC__NUM_DPP__MAX);
+
+ for (i = 0; i < DC__NUM_DPP__MAX ; i++) {
+ if (plane_idx == mode_lib->vba.pipe_plane[i]) {
+ pipe_idx = i;
+ break;
+ }
+ }
+ ASSERT(pipe_idx >= 0);
+
+ return pipe_idx;
+}
+
+
+double get_det_buffer_size_kbytes(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes, unsigned int pipe_idx)
+{
+ unsigned int plane_idx;
+ double det_buf_size_kbytes;
+
+ recalculate_params(mode_lib, pipes, num_pipes);
+ plane_idx = mode_lib->vba.pipe_plane[pipe_idx];
+
+ dml_print("DML::%s: num_pipes=%d pipe_idx=%d plane_idx=%0d\n", __func__, num_pipes, pipe_idx, plane_idx);
+ det_buf_size_kbytes = mode_lib->vba.DETBufferSizeInKByte[plane_idx]; // per hubp DET buffer size
+
+ dml_print("DML::%s: det_buf_size_kbytes=%3.2f\n", __func__, det_buf_size_kbytes);
+
+ return det_buf_size_kbytes;
+}
+
+bool get_is_phantom_pipe(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes, unsigned int pipe_idx)
+{
+ unsigned int plane_idx;
+
+ recalculate_params(mode_lib, pipes, num_pipes);
+ plane_idx = mode_lib->vba.pipe_plane[pipe_idx];
+ dml_print("DML::%s: num_pipes=%d pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, num_pipes, pipe_idx,
+ mode_lib->vba.UsesMALLForPStateChange[plane_idx]);
+ return (mode_lib->vba.UsesMALLForPStateChange[plane_idx] == dm_use_mall_pstate_change_phantom_pipe);
+}
+
static void fetch_socbb_params(struct display_mode_lib *mode_lib)
{
soc_bounding_box_st *soc = &mode_lib->vba.soc;
@@ -239,6 +339,21 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
soc->max_avg_sdp_bw_use_normal_percent;
mode_lib->vba.SRExitZ8Time = soc->sr_exit_z8_time_us;
mode_lib->vba.SREnterPlusExitZ8Time = soc->sr_enter_plus_exit_z8_time_us;
+ mode_lib->vba.FCLKChangeLatency = soc->fclk_change_latency_us;
+ mode_lib->vba.USRRetrainingLatency = soc->usr_retraining_latency_us;
+ mode_lib->vba.SMNLatency = soc->smn_latency_us;
+ mode_lib->vba.MALLAllocatedForDCNFinal = soc->mall_allocated_for_dcn_mbytes;
+
+ mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencySTROBE = soc->pct_ideal_dram_bw_after_urgent_strobe;
+ mode_lib->vba.MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation =
+ soc->max_avg_fabric_bw_use_normal_percent;
+ mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE =
+ soc->max_avg_dram_bw_use_normal_strobe_percent;
+
+ mode_lib->vba.DRAMClockChangeRequirementFinal = soc->dram_clock_change_requirement_final;
+ mode_lib->vba.FCLKChangeRequirementFinal = 1;
+ mode_lib->vba.USRRetrainingRequiredFinal = 1;
+ mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal = soc->allow_for_pstate_or_stutter_in_vblank_final;
mode_lib->vba.DRAMClockChangeLatency = soc->dram_clock_change_latency_us;
mode_lib->vba.DummyPStateCheck = soc->dram_clock_change_latency_us == soc->dummy_pstate_latency_us;
mode_lib->vba.DRAMClockChangeSupportsVActive = !soc->disable_dram_clock_change_vactive_support ||
@@ -281,6 +396,7 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
mode_lib->vba.SOCCLKPerState[i] = soc->clock_limits[i].socclk_mhz;
mode_lib->vba.PHYCLKPerState[i] = soc->clock_limits[i].phyclk_mhz;
mode_lib->vba.PHYCLKD18PerState[i] = soc->clock_limits[i].phyclk_d18_mhz;
+ mode_lib->vba.PHYCLKD32PerState[i] = soc->clock_limits[i].phyclk_d32_mhz;
mode_lib->vba.MaxDppclk[i] = soc->clock_limits[i].dppclk_mhz;
mode_lib->vba.MaxDSCCLK[i] = soc->clock_limits[i].dscclk_mhz;
mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mts;
@@ -323,6 +439,18 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib)
mode_lib->vba.COMPBUF_RESERVED_SPACE_ZS = ip->compbuf_reserved_space_zs;
mode_lib->vba.MaximumDSCBitsPerComponent = ip->maximum_dsc_bits_per_component;
mode_lib->vba.DSC422NativeSupport = ip->dsc422_native_support;
+ /* In DCN3.2, nomDETInKByte should be initialized correctly. */
+ mode_lib->vba.nomDETInKByte = ip->det_buffer_size_kbytes;
+ mode_lib->vba.CompbufReservedSpace64B = ip->compbuf_reserved_space_64b;
+ mode_lib->vba.CompbufReservedSpaceZs = ip->compbuf_reserved_space_zs;
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal = ip->compressed_buffer_segment_size_in_kbytes;
+ mode_lib->vba.LineBufferSizeFinal = ip->line_buffer_size_bits;
+ mode_lib->vba.AlphaPixelChunkSizeInKByte = ip->alpha_pixel_chunk_size_kbytes; // not ysed
+ mode_lib->vba.MinPixelChunkSizeBytes = ip->min_pixel_chunk_size_bytes; // not used
+ mode_lib->vba.MaximumPixelsPerLinePerDSCUnit = ip->maximum_pixels_per_line_per_dsc_unit;
+ mode_lib->vba.MaxNumDP2p0Outputs = ip->max_num_dp2p0_outputs;
+ mode_lib->vba.MaxNumDP2p0Streams = ip->max_num_dp2p0_streams;
+ mode_lib->vba.DCCMetaBufferSizeBytes = ip->dcc_meta_buffer_size_bytes;
mode_lib->vba.PixelChunkSizeInKByte = ip->pixel_chunk_size_kbytes;
mode_lib->vba.MetaChunkSize = ip->meta_chunk_size_kbytes;
@@ -397,6 +525,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
visited[k] = false;
mode_lib->vba.NumberOfActivePlanes = 0;
+ mode_lib->vba.NumberOfActiveSurfaces = 0;
mode_lib->vba.ImmediateFlipSupport = false;
for (j = 0; j < mode_lib->vba.cache_num_pipes; ++j) {
display_pipe_source_params_st *src = &pipes[j].pipe.src;
@@ -427,6 +556,22 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
src->viewport_y_y;
mode_lib->vba.ViewportYStartC[mode_lib->vba.NumberOfActivePlanes] =
src->viewport_y_c;
+ mode_lib->vba.SourceRotation[mode_lib->vba.NumberOfActiveSurfaces] = src->source_rotation;
+ mode_lib->vba.ViewportXStartY[mode_lib->vba.NumberOfActiveSurfaces] = src->viewport_x_y;
+ mode_lib->vba.ViewportXStartC[mode_lib->vba.NumberOfActiveSurfaces] = src->viewport_x_c;
+ // TODO: Assign correct value to viewport_stationary
+ mode_lib->vba.ViewportStationary[mode_lib->vba.NumberOfActivePlanes] =
+ src->viewport_stationary;
+ mode_lib->vba.UsesMALLForPStateChange[mode_lib->vba.NumberOfActivePlanes] = src->use_mall_for_pstate_change;
+ mode_lib->vba.UseMALLForStaticScreen[mode_lib->vba.NumberOfActivePlanes] = src->use_mall_for_static_screen;
+ mode_lib->vba.GPUVMMinPageSizeKBytes[mode_lib->vba.NumberOfActivePlanes] = src->gpuvm_min_page_size_kbytes;
+ mode_lib->vba.RefreshRate[mode_lib->vba.NumberOfActivePlanes] = dst->refresh_rate; //todo remove this
+ mode_lib->vba.OutputLinkDPRate[mode_lib->vba.NumberOfActivePlanes] = dout->dp_rate;
+ mode_lib->vba.ODMUse[mode_lib->vba.NumberOfActivePlanes] = dst->odm_combine_policy;
+ mode_lib->vba.DETSizeOverride[mode_lib->vba.NumberOfActivePlanes] = src->det_size_override;
+ //TODO: Need to assign correct values to dp_multistream vars
+ mode_lib->vba.OutputMultistreamEn[mode_lib->vba.NumberOfActiveSurfaces] = dout->dp_multistream_en;
+ mode_lib->vba.OutputMultistreamId[mode_lib->vba.NumberOfActiveSurfaces] = dout->dp_multistream_id;
mode_lib->vba.PitchY[mode_lib->vba.NumberOfActivePlanes] = src->data_pitch;
mode_lib->vba.SurfaceWidthY[mode_lib->vba.NumberOfActivePlanes] = src->surface_width_y;
mode_lib->vba.SurfaceHeightY[mode_lib->vba.NumberOfActivePlanes] = src->surface_height_y;
@@ -452,6 +597,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.HTotal[mode_lib->vba.NumberOfActivePlanes] = dst->htotal;
mode_lib->vba.VTotal[mode_lib->vba.NumberOfActivePlanes] = dst->vtotal;
mode_lib->vba.VFrontPorch[mode_lib->vba.NumberOfActivePlanes] = dst->vfront_porch;
+ mode_lib->vba.VBlankNom[mode_lib->vba.NumberOfActivePlanes] = dst->vblank_nom;
mode_lib->vba.DCCFractionOfZeroSizeRequestsLuma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_fraction_of_zs_req_luma;
mode_lib->vba.DCCFractionOfZeroSizeRequestsChroma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_fraction_of_zs_req_chroma;
mode_lib->vba.DCCEnable[mode_lib->vba.NumberOfActivePlanes] =
@@ -479,7 +625,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.skip_dio_check[mode_lib->vba.NumberOfActivePlanes] =
dout->is_virtual;
- if (!dout->dsc_enable)
+ if (dout->dsc_enable)
mode_lib->vba.ForcedOutputLinkBPP[mode_lib->vba.NumberOfActivePlanes] = dout->output_bpp;
else
mode_lib->vba.ForcedOutputLinkBPP[mode_lib->vba.NumberOfActivePlanes] = 0.0;
@@ -553,6 +699,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.PixelClock[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz;
mode_lib->vba.PixelClockBackEnd[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz;
mode_lib->vba.DPPCLK[mode_lib->vba.NumberOfActivePlanes] = clks->dppclk_mhz;
+ mode_lib->vba.DRRDisplay[mode_lib->vba.NumberOfActiveSurfaces] = dst->drr_display;
if (ip->is_line_buffer_bpp_fixed)
mode_lib->vba.LBBitPerPixel[mode_lib->vba.NumberOfActivePlanes] =
ip->line_buffer_fixed_bpp;
@@ -675,6 +822,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
}
mode_lib->vba.NumberOfActivePlanes++;
+ mode_lib->vba.NumberOfActiveSurfaces++;
}
// handle overlays through BlendingAndTiming
@@ -700,6 +848,11 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
}
}
+ mode_lib->vba.SynchronizeTimingsFinal = pipes[0].pipe.dest.synchronize_timings;
+ mode_lib->vba.DCCProgrammingAssumesScanDirectionUnknownFinal = false;
+
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = 0;
+
mode_lib->vba.UseUnboundedRequesting = dm_unbounded_requesting;
for (k = 0; k < mode_lib->vba.cache_num_pipes; ++k) {
if (pipes[k].pipe.src.unbounded_req_mode == 0)
@@ -743,6 +896,54 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.GPUVMEnable = mode_lib->vba.GPUVMEnable && !!ip->gpuvm_enable;
mode_lib->vba.HostVMEnable = mode_lib->vba.HostVMEnable && !!ip->hostvm_enable;
+
+ for (k = 0; k < mode_lib->vba.cache_num_pipes; ++k) {
+ mode_lib->vba.ForceOneRowForFrame[k] = pipes[k].pipe.src.force_one_row_for_frame;
+ mode_lib->vba.PteBufferMode[k] = pipes[k].pipe.src.pte_buffer_mode;
+
+ if (mode_lib->vba.PteBufferMode[k] == 0 && mode_lib->vba.GPUVMEnable) {
+ if (mode_lib->vba.ForceOneRowForFrame[k] ||
+ (mode_lib->vba.GPUVMMinPageSizeKBytes[k] > 64*1024) ||
+ (mode_lib->vba.UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_disable) ||
+ (mode_lib->vba.UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable)) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ERROR: Invalid PteBufferMode=%d for plane %0d!\n",
+ __func__, mode_lib->vba.PteBufferMode[k], k);
+ dml_print("DML::%s: - ForceOneRowForFrame = %d\n",
+ __func__, mode_lib->vba.ForceOneRowForFrame[k]);
+ dml_print("DML::%s: - GPUVMMinPageSizeKBytes = %d\n",
+ __func__, mode_lib->vba.GPUVMMinPageSizeKBytes[k]);
+ dml_print("DML::%s: - UseMALLForPStateChange = %d\n",
+ __func__, (int) mode_lib->vba.UsesMALLForPStateChange[k]);
+ dml_print("DML::%s: - UseMALLForStaticScreen = %d\n",
+ __func__, (int) mode_lib->vba.UseMALLForStaticScreen[k]);
+#endif
+ ASSERT(0);
+ }
+ }
+ }
+}
+
+/**
+ * ********************************************************************************************
+ * cache_debug_params: Cache any params that needed to be maintained from the initial validation
+ * for debug purposes.
+ *
+ * The DML getters can modify some of the VBA params that we are interested in (for example when
+ * calculating with dummy p-state latency), so cache any params here that we want for debugging
+ *
+ * @param [in] mode_lib: mode_lib input/output of validate call
+ *
+ * @return: void
+ *
+ * ********************************************************************************************
+ */
+static void cache_debug_params(struct display_mode_lib *mode_lib)
+{
+ int k = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++)
+ mode_lib->vba.CachedActiveDRAMClockChangeLatencyMargin[k] = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
}
// in wm mode we pull the parameters needed from the display_e2e_pipe_params_st structs
@@ -768,7 +969,7 @@ static void recalculate_params(
}
}
-bool Calculate256BBlockSizes(
+void Calculate256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int BytePerPixelY,
@@ -806,7 +1007,6 @@ bool Calculate256BBlockSizes(
*BlockWidth256BytesY = 256 / BytePerPixelY / *BlockHeight256BytesY;
*BlockWidth256BytesC = 256 / BytePerPixelC / *BlockHeight256BytesC;
}
- return true;
}
bool CalculateMinAndMaxPrefetchMode(
@@ -872,6 +1072,7 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib)
soc_bounding_box_st *soc = &mode_lib->vba.soc;
unsigned int k;
unsigned int total_pipes = 0;
+ unsigned int pipe_idx = 0;
mode_lib->vba.VoltageLevel = mode_lib->vba.cache_pipes[0].clks_cfg.voltage;
mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
@@ -893,6 +1094,11 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib)
// Total Available Pipes Support Check
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
total_pipes += mode_lib->vba.DPPPerPlane[k];
+ pipe_idx = get_pipe_idx(mode_lib, k);
+ if (mode_lib->vba.cache_pipes[pipe_idx].clks_cfg.dppclk_mhz > 0.0)
+ mode_lib->vba.DPPCLK[k] = mode_lib->vba.cache_pipes[pipe_idx].clks_cfg.dppclk_mhz;
+ else
+ mode_lib->vba.DPPCLK[k] = soc->clock_limits[mode_lib->vba.VoltageLevel].dppclk_mhz;
}
ASSERT(total_pipes <= DC__NUM_DPP__MAX);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 90e87961fe3e..630f3395e90a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -58,6 +58,19 @@ dml_get_attr_decl(return_bw);
dml_get_attr_decl(tcalc);
dml_get_attr_decl(fraction_of_urgent_bandwidth);
dml_get_attr_decl(fraction_of_urgent_bandwidth_imm_flip);
+dml_get_attr_decl(cstate_max_cap_mode);
+dml_get_attr_decl(comp_buffer_size_kbytes);
+dml_get_attr_decl(pixel_chunk_size_in_kbyte);
+dml_get_attr_decl(alpha_pixel_chunk_size_in_kbyte);
+dml_get_attr_decl(meta_chunk_size_in_kbyte);
+dml_get_attr_decl(min_pixel_chunk_size_in_byte);
+dml_get_attr_decl(min_meta_chunk_size_in_byte);
+dml_get_attr_decl(fclk_watermark);
+dml_get_attr_decl(usr_retraining_watermark);
+dml_get_attr_decl(comp_buffer_reserved_space_kbytes);
+dml_get_attr_decl(comp_buffer_reserved_space_64bytes);
+dml_get_attr_decl(comp_buffer_reserved_space_zs);
+dml_get_attr_decl(unbounded_request_enabled);
#define dml_get_pipe_attr_decl(attr) double get_##attr(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes, unsigned int which_pipe)
@@ -75,6 +88,26 @@ dml_get_pipe_attr_decl(dst_y_per_row_vblank);
dml_get_pipe_attr_decl(dst_y_prefetch);
dml_get_pipe_attr_decl(dst_y_per_vm_flip);
dml_get_pipe_attr_decl(dst_y_per_row_flip);
+dml_get_pipe_attr_decl(dst_y_per_pte_row_nom_l);
+dml_get_pipe_attr_decl(dst_y_per_pte_row_nom_c);
+dml_get_pipe_attr_decl(dst_y_per_meta_row_nom_l);
+dml_get_pipe_attr_decl(dst_y_per_meta_row_nom_c);
+dml_get_pipe_attr_decl(dpte_row_height_linear_c);
+dml_get_pipe_attr_decl(swath_height_l);
+dml_get_pipe_attr_decl(swath_height_c);
+dml_get_pipe_attr_decl(det_stored_buffer_size_l_bytes);
+dml_get_pipe_attr_decl(det_stored_buffer_size_c_bytes);
+dml_get_pipe_attr_decl(dpte_group_size_in_bytes);
+dml_get_pipe_attr_decl(vm_group_size_in_bytes);
+dml_get_pipe_attr_decl(det_buffer_size_kbytes);
+dml_get_pipe_attr_decl(dpte_row_height_linear_l);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_nom_l_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_nom_c_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_vblank_l_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_vblank_c_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_flip_l_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_flip_c_in_us);
+dml_get_pipe_attr_decl(pte_buffer_mode);
dml_get_pipe_attr_decl(refcyc_per_vm_group_vblank);
dml_get_pipe_attr_decl(refcyc_per_vm_group_flip);
dml_get_pipe_attr_decl(refcyc_per_vm_req_vblank);
@@ -108,6 +141,8 @@ dml_get_pipe_attr_decl(vupdate_width);
dml_get_pipe_attr_decl(vready_offset);
dml_get_pipe_attr_decl(vready_at_or_after_vsync);
dml_get_pipe_attr_decl(min_dst_y_next_start);
+dml_get_pipe_attr_decl(vstartup_calculated);
+dml_get_pipe_attr_decl(subviewport_lines_needed_in_mall);
double get_total_immediate_flip_bytes(
struct display_mode_lib *mode_lib,
@@ -126,9 +161,18 @@ unsigned int dml_get_voltage_level(
const display_e2e_pipe_params_st *pipes,
unsigned int num_pipes);
+unsigned int get_total_surface_size_in_mall_bytes(
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes);
+
+bool get_is_phantom_pipe(struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes,
+ unsigned int pipe_idx);
void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct display_mode_lib *mode_lib);
-bool Calculate256BBlockSizes(
+void Calculate256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int BytePerPixelY,
@@ -138,6 +182,79 @@ bool Calculate256BBlockSizes(
unsigned int *BlockWidth256BytesY,
unsigned int *BlockWidth256BytesC);
+struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation {
+ unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX];
+ double dummy_single_array[2][DC__NUM_DPP__MAX];
+ unsigned int dummy_long_array[2][DC__NUM_DPP__MAX];
+ double dummy_double_array[2][DC__NUM_DPP__MAX];
+ bool dummy_boolean_array[DC__NUM_DPP__MAX];
+ bool dummy_boolean;
+ bool dummy_boolean2;
+ enum output_encoder_class dummy_output_encoder_array[DC__NUM_DPP__MAX];
+ DmlPipe SurfaceParameters[DC__NUM_DPP__MAX];
+ bool dummy_boolean_array2[2][DC__NUM_DPP__MAX];
+ unsigned int ReorderBytes;
+ unsigned int VMDataOnlyReturnBW;
+ double HostVMInefficiencyFactor;
+ DmlPipe myPipe;
+ SOCParametersList mmSOCParameters;
+ double dummy_unit_vector[DC__NUM_DPP__MAX];
+ double dummy_single[2];
+ enum clock_change_support dummy_dramchange_support;
+ enum dm_fclock_change_support dummy_fclkchange_support;
+ bool dummy_USRRetrainingSupport;
+};
+
+struct dml32_ModeSupportAndSystemConfigurationFull {
+ unsigned int dummy_integer_array[22][DC__NUM_DPP__MAX];
+ double dummy_double_array[2][DC__NUM_DPP__MAX];
+ DmlPipe SurfParameters[DC__NUM_DPP__MAX];
+ double dummy_single[5];
+ double dummy_single2[5];
+ SOCParametersList mSOCParameters;
+ unsigned int MaximumSwathWidthSupportLuma;
+ unsigned int MaximumSwathWidthSupportChroma;
+ double DSTYAfterScaler[DC__NUM_DPP__MAX];
+ double DSTXAfterScaler[DC__NUM_DPP__MAX];
+ double MaxTotalVActiveRDBandwidth;
+ bool dummy_boolean_array[2][DC__NUM_DPP__MAX];
+ enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX];
+ DmlPipe myPipe;
+ unsigned int dummy_integer[4];
+ unsigned int TotalNumberOfActiveOTG;
+ unsigned int TotalNumberOfActiveHDMIFRL;
+ unsigned int TotalNumberOfActiveDP2p0;
+ unsigned int TotalNumberOfActiveDP2p0Outputs;
+ unsigned int TotalDSCUnitsRequired;
+ unsigned int ReorderingBytes;
+ unsigned int TotalSlots;
+ unsigned int NumberOfDPPDSC;
+ unsigned int NumberOfDPPNoDSC;
+ unsigned int NextPrefetchModeState;
+ bool MPCCombineMethodAsNeededForPStateChangeAndVoltage;
+ bool MPCCombineMethodAsPossible;
+ bool FullFrameMALLPStateMethod;
+ bool SubViewportMALLPStateMethod;
+ bool PhantomPipeMALLPStateMethod;
+ bool NoChroma;
+ bool TotalAvailablePipesSupportNoDSC;
+ bool TotalAvailablePipesSupportDSC;
+ enum odm_combine_mode ODMModeNoDSC;
+ enum odm_combine_mode ODMModeDSC;
+ double RequiredDISPCLKPerSurfaceNoDSC;
+ double RequiredDISPCLKPerSurfaceDSC;
+ double BWOfNonCombinedSurfaceOfMaximumBandwidth;
+ double VMDataOnlyReturnBWPerState;
+ double HostVMInefficiencyFactor;
+ bool dummy_boolean[2];
+};
+
+struct dummy_vars {
+ struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation;
+ struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull;
+};
+
struct vba_vars_st {
ip_params_st ip;
soc_bounding_box_st soc;
@@ -169,6 +286,7 @@ struct vba_vars_st {
double NextMaxVStartup;
double VBlankTime;
double SmallestVBlank;
+ enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal; // Mode Support only
double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
double EffectiveDETPlusLBLinesLuma;
double EffectiveDETPlusLBLinesChroma;
@@ -194,7 +312,9 @@ struct vba_vars_st {
unsigned int ActiveDPPs;
unsigned int LBLatencyHidingSourceLinesY;
unsigned int LBLatencyHidingSourceLinesC;
+ double ActiveDRAMClockChangeLatencyMarginPerState[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];// DML doesn't save active margin per state
double ActiveDRAMClockChangeLatencyMargin[DC__NUM_DPP__MAX];
+ double CachedActiveDRAMClockChangeLatencyMargin[DC__NUM_DPP__MAX]; // Cache in dml_get_voltage_level for debug purposes only
double MinActiveDRAMClockChangeMargin;
double InitFillLevel;
double FinalFillMargin;
@@ -211,6 +331,13 @@ struct vba_vars_st {
double UrgentLatencyPixelMixedWithVMData;
double UrgentLatencyVMDataOnly;
double UrgentLatency; // max of the above three
+ double USRRetrainingLatency;
+ double SMNLatency;
+ double FCLKChangeLatency;
+ unsigned int MALLAllocatedForDCNFinal;
+ double MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation;
+ double MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE;
+ double PercentOfIdealDRAMBWReceivedAfterUrgLatencySTROBE;
double WritebackLatency;
double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly; // Mode Support
double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData; // Mode Support
@@ -283,6 +410,14 @@ struct vba_vars_st {
double DPPCLKDelayCNVCCursor;
double DISPCLKDelaySubtotal;
bool ProgressiveToInterlaceUnitInOPP;
+ unsigned int CompressedBufferSegmentSizeInkByteFinal;
+ unsigned int CompbufReservedSpace64B;
+ unsigned int CompbufReservedSpaceZs;
+ unsigned int LineBufferSizeFinal;
+ unsigned int MaximumPixelsPerLinePerDSCUnit;
+ unsigned int AlphaPixelChunkSizeInKByte;
+ double MinPixelChunkSizeBytes;
+ unsigned int DCCMetaBufferSizeBytes;
// Pipe/Plane Parameters
int VoltageLevel;
double FabricClock;
@@ -290,6 +425,23 @@ struct vba_vars_st {
double DISPCLK;
double SOCCLK;
double DCFCLK;
+ unsigned int MaxTotalDETInKByte;
+ unsigned int MinCompressedBufferSizeInKByte;
+ unsigned int NumberOfActiveSurfaces;
+ bool ViewportStationary[DC__NUM_DPP__MAX];
+ unsigned int RefreshRate[DC__NUM_DPP__MAX];
+ double OutputBPP[DC__NUM_DPP__MAX];
+ unsigned int GPUVMMinPageSizeKBytes[DC__NUM_DPP__MAX];
+ bool SynchronizeTimingsFinal;
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ bool ForceOneRowForFrame[DC__NUM_DPP__MAX];
+ unsigned int ViewportXStartY[DC__NUM_DPP__MAX];
+ unsigned int ViewportXStartC[DC__NUM_DPP__MAX];
+ enum dm_rotation_angle SourceRotation[DC__NUM_DPP__MAX];
+ bool DRRDisplay[DC__NUM_DPP__MAX];
+ bool PteBufferMode[DC__NUM_DPP__MAX];
+ enum dm_output_type OutputType[DC__NUM_DPP__MAX];
+ enum dm_output_rate OutputRate[DC__NUM_DPP__MAX];
unsigned int NumberOfActivePlanes;
unsigned int NumberOfDSCSlices[DC__NUM_DPP__MAX];
@@ -354,6 +506,8 @@ struct vba_vars_st {
unsigned int CursorBPP[DC__NUM_DPP__MAX][DC__NUM_CURSOR__MAX];
bool XFCEnabled[DC__NUM_DPP__MAX];
bool ScalerEnabled[DC__NUM_DPP__MAX];
+ unsigned int VBlankNom[DC__NUM_DPP__MAX];
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment;
// Intermediates/Informational
bool ImmediateFlipSupport;
@@ -391,6 +545,16 @@ struct vba_vars_st {
double StutterEfficiencyNotIncludingVBlank;
double NonUrgentLatencyTolerance;
double MinActiveDRAMClockChangeLatencySupported;
+ double Z8StutterEfficiencyBestCase;
+ unsigned int Z8NumberOfStutterBurstsPerFrameBestCase;
+ double Z8StutterEfficiencyNotIncludingVBlankBestCase;
+ double StutterPeriodBestCase;
+ Watermarks Watermark;
+ bool DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+ unsigned int CompBufReservedSpaceKBytes;
+ unsigned int CompBufReservedSpace64B;
+ unsigned int CompBufReservedSpaceZs;
+ bool CompBufReservedSpaceNeedAdjustment;
// These are the clocks calcuated by the library but they are not actually
// used explicitly. They are fetched by tests and then possibly used. The
@@ -398,6 +562,10 @@ struct vba_vars_st {
double DISPCLK_calculated;
double DPPCLK_calculated[DC__NUM_DPP__MAX];
+ bool ImmediateFlipSupportedSurface[DC__NUM_DPP__MAX];
+
+ bool Use_One_Row_For_Frame[DC__NUM_DPP__MAX];
+ bool Use_One_Row_For_Frame_Flip[DC__NUM_DPP__MAX];
unsigned int VUpdateOffsetPix[DC__NUM_DPP__MAX];
double VUpdateWidthPix[DC__NUM_DPP__MAX];
double VReadyOffsetPix[DC__NUM_DPP__MAX];
@@ -428,6 +596,7 @@ struct vba_vars_st {
double DRAMSpeedPerState[DC__VOLTAGE_STATES];
double MaxDispclk[DC__VOLTAGE_STATES];
int VoltageOverrideLevel;
+ double PHYCLKD32PerState[DC__VOLTAGE_STATES];
/*outputs*/
bool ScaleRatioAndTapsSupport;
@@ -451,6 +620,51 @@ struct vba_vars_st {
bool PitchSupport;
enum dm_validation_status ValidationStatus[DC__VOLTAGE_STATES];
+ /* Mode Support Reason */
+ bool P2IWith420;
+ bool DSCOnlyIfNecessaryWithBPP;
+ bool DSC422NativeNotSupported;
+ bool LinkRateDoesNotMatchDPVersion;
+ bool LinkRateForMultistreamNotIndicated;
+ bool BPPForMultistreamNotIndicated;
+ bool MultistreamWithHDMIOreDP;
+ bool MSOOrODMSplitWithNonDPLink;
+ bool NotEnoughLanesForMSO;
+ bool ViewportExceedsSurface;
+
+ bool ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified;
+ bool ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
+ bool InvalidCombinationOfMALLUseForPStateAndStaticScreen;
+ bool InvalidCombinationOfMALLUseForPState;
+
+ enum dm_output_link_dp_rate OutputLinkDPRate[DC__NUM_DPP__MAX];
+ double PrefetchLinesYThisState[DC__NUM_DPP__MAX];
+ double PrefetchLinesCThisState[DC__NUM_DPP__MAX];
+ double meta_row_bandwidth_this_state[DC__NUM_DPP__MAX];
+ double dpte_row_bandwidth_this_state[DC__NUM_DPP__MAX];
+ double DPTEBytesPerRowThisState[DC__NUM_DPP__MAX];
+ double PDEAndMetaPTEBytesPerFrameThisState[DC__NUM_DPP__MAX];
+ double MetaRowBytesThisState[DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame_flip[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame_this_state[DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame_flip_this_state[DC__NUM_DPP__MAX];
+
+ unsigned int OutputTypeAndRatePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
+ double RequiredDISPCLKPerSurface[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int MacroTileHeightY[DC__NUM_DPP__MAX];
+ unsigned int MacroTileHeightC[DC__NUM_DPP__MAX];
+ unsigned int MacroTileWidthY[DC__NUM_DPP__MAX];
+ unsigned int MacroTileWidthC[DC__NUM_DPP__MAX];
+ bool ImmediateFlipRequiredFinal;
+ bool DCCProgrammingAssumesScanDirectionUnknownFinal;
+ bool EnoughWritebackUnits;
+ bool ODMCombine2To1SupportCheckOK[DC__VOLTAGE_STATES];
+ bool NumberOfDP2p0Support;
+ unsigned int MaxNumDP2p0Streams;
+ unsigned int MaxNumDP2p0Outputs;
+ enum dm_output_type OutputTypePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
+ enum dm_output_rate OutputRatePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
double WritebackLineBufferLumaBufferSize;
double WritebackLineBufferChromaBufferSize;
double WritebackMinHSCLRatio;
@@ -544,6 +758,8 @@ struct vba_vars_st {
bool DTBCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES];
double UrgentRoundTripAndOutOfOrderLatencyPerState[DC__VOLTAGE_STATES];
bool ROBSupport[DC__VOLTAGE_STATES][2];
+ //based on rev 99: Dim DCCMetaBufferSizeSupport(NumberOfStates, 1) As Boolean
+ bool DCCMetaBufferSizeSupport[DC__VOLTAGE_STATES][2];
bool PTEBufferSizeNotExceeded[DC__VOLTAGE_STATES][2];
bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES][2];
double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES][2];
@@ -585,8 +801,6 @@ struct vba_vars_st {
double PSCL_FACTOR[DC__NUM_DPP__MAX];
double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX];
double MaximumVStartup[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
- unsigned int MacroTileWidthY[DC__NUM_DPP__MAX];
- unsigned int MacroTileWidthC[DC__NUM_DPP__MAX];
double AlignedDCCMetaPitch[DC__NUM_DPP__MAX];
double AlignedYPitch[DC__NUM_DPP__MAX];
double AlignedCPitch[DC__NUM_DPP__MAX];
@@ -644,8 +858,7 @@ struct vba_vars_st {
double dummy7[DC__NUM_DPP__MAX];
double dummy8[DC__NUM_DPP__MAX];
double dummy13[DC__NUM_DPP__MAX];
- unsigned int dummyinteger1ms[DC__NUM_DPP__MAX];
- double dummyinteger2ms[DC__NUM_DPP__MAX];
+ double dummy_double_array[2][DC__NUM_DPP__MAX];
unsigned int dummyinteger3[DC__NUM_DPP__MAX];
unsigned int dummyinteger4[DC__NUM_DPP__MAX];
unsigned int dummyinteger5;
@@ -655,14 +868,8 @@ struct vba_vars_st {
unsigned int dummyinteger9;
unsigned int dummyinteger10;
unsigned int dummyinteger11;
- unsigned int dummyinteger12;
- unsigned int dummyinteger30;
- unsigned int dummyinteger31;
- unsigned int dummyinteger32;
- unsigned int dummyintegerarr1[DC__NUM_DPP__MAX];
- unsigned int dummyintegerarr2[DC__NUM_DPP__MAX];
- unsigned int dummyintegerarr3[DC__NUM_DPP__MAX];
- unsigned int dummyintegerarr4[DC__NUM_DPP__MAX];
+ unsigned int dummy_integer_array[8][DC__NUM_DPP__MAX];
+
bool dummysinglestring;
bool SingleDPPViewportSizeSupportPerPlane[DC__NUM_DPP__MAX];
double PlaneRequiredDISPCLKWithODMCombine2To1;
@@ -802,7 +1009,6 @@ struct vba_vars_st {
double TimePerChromaMetaChunkFlip[DC__NUM_DPP__MAX];
unsigned int DCCCMaxUncompressedBlock[DC__NUM_DPP__MAX];
unsigned int DCCCMaxCompressedBlock[DC__NUM_DPP__MAX];
- unsigned int DCCCIndependent64ByteBlock[DC__NUM_DPP__MAX];
double VStartupMargin;
bool NotEnoughTimeForDynamicMetadata[DC__NUM_DPP__MAX];
@@ -893,8 +1099,8 @@ struct vba_vars_st {
double meta_row_bandwidth[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
double DETBufferSizeYAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
double DETBufferSizeCAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
- int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
- int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
bool NotUrgentLatencyHiding[DC__VOLTAGE_STATES][2];
unsigned int SwathHeightYAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
unsigned int SwathHeightCAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
@@ -907,8 +1113,6 @@ struct vba_vars_st {
double WritebackDelayTime[DC__NUM_DPP__MAX];
unsigned int DCCYIndependentBlock[DC__NUM_DPP__MAX];
unsigned int DCCCIndependentBlock[DC__NUM_DPP__MAX];
- unsigned int dummyinteger15;
- unsigned int dummyinteger16;
unsigned int dummyinteger17;
unsigned int dummyinteger18;
unsigned int dummyinteger19;
@@ -969,6 +1173,58 @@ struct vba_vars_st {
int Z8NumberOfStutterBurstsPerFrame;
unsigned int MaximumDSCBitsPerComponent;
unsigned int NotEnoughUrgentLatencyHidingA[DC__VOLTAGE_STATES][2];
+ double ReadBandwidthSurfaceLuma[DC__NUM_DPP__MAX];
+ double ReadBandwidthSurfaceChroma[DC__NUM_DPP__MAX];
+ double SurfaceRequiredDISPCLKWithoutODMCombine;
+ double SurfaceRequiredDISPCLK;
+ double MinActiveFCLKChangeLatencySupported;
+ int MinVoltageLevel;
+ int MaxVoltageLevel;
+ unsigned int TotalNumberOfSingleDPPSurfaces[DC__VOLTAGE_STATES][2];
+ unsigned int CompressedBufferSizeInkByteAllStates[DC__VOLTAGE_STATES][2];
+ unsigned int DETBufferSizeInKByteAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int DETBufferSizeInKByteThisState[DC__NUM_DPP__MAX];
+ unsigned int SurfaceSizeInMALL[DC__NUM_DPP__MAX];
+ bool ExceededMALLSize;
+ bool PTE_BUFFER_MODE[DC__NUM_DPP__MAX];
+ unsigned int BIGK_FRAGMENT_SIZE[DC__NUM_DPP__MAX];
+ unsigned int CompressedBufferSizeInkByteThisState;
+ enum dm_fclock_change_support FCLKChangeSupport[DC__VOLTAGE_STATES][2];
+ bool USRRetrainingSupport[DC__VOLTAGE_STATES][2];
+ enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX];
+ bool UnboundedRequestEnabledAllStates[DC__VOLTAGE_STATES][2];
+ bool SingleDPPViewportSizeSupportPerSurface[DC__NUM_DPP__MAX];
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[DC__NUM_DPP__MAX];
+ bool UnboundedRequestEnabledThisState;
+ bool DRAMClockChangeRequirementFinal;
+ bool FCLKChangeRequirementFinal;
+ bool USRRetrainingRequiredFinal;
+ unsigned int DETSizeOverride[DC__NUM_DPP__MAX];
+ unsigned int nomDETInKByte;
+ enum mpc_combine_affinity MPCCombineUse[DC__NUM_DPP__MAX];
+ bool MPCCombineMethodIncompatible;
+ unsigned int RequiredSlots[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
+ bool ExceededMultistreamSlots[DC__VOLTAGE_STATES];
+ enum odm_combine_policy ODMUse[DC__NUM_DPP__MAX];
+ unsigned int OutputMultistreamId[DC__NUM_DPP__MAX];
+ bool OutputMultistreamEn[DC__NUM_DPP__MAX];
+ bool UsesMALLForStaticScreen[DC__NUM_DPP__MAX];
+ double MaxActiveDRAMClockChangeLatencySupported[DC__NUM_DPP__MAX];
+ double WritebackAllowFCLKChangeEndPosition[DC__NUM_DPP__MAX];
+ bool PTEBufferSizeNotExceededPerState[DC__NUM_DPP__MAX]; // new in DML32
+ bool DCCMetaBufferSizeNotExceededPerState[DC__NUM_DPP__MAX]; // new in DML32
+ bool NotEnoughDSCSlices[DC__VOLTAGE_STATES];
+ bool PixelsPerLinePerDSCUnitSupport[DC__VOLTAGE_STATES];
+ bool DCCMetaBufferSizeNotExceeded[DC__VOLTAGE_STATES][2];
+ unsigned int dpte_row_height_linear[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_linear_chroma[DC__NUM_DPP__MAX];
+ unsigned int BlockHeightY[DC__NUM_DPP__MAX];
+ unsigned int BlockHeightC[DC__NUM_DPP__MAX];
+ unsigned int BlockWidthY[DC__NUM_DPP__MAX];
+ unsigned int BlockWidthC[DC__NUM_DPP__MAX];
+ unsigned int SubViewportLinesNeededInMALL[DC__NUM_DPP__MAX];
+ bool VActiveBandwithSupport[DC__VOLTAGE_STATES][2];
+ struct dummy_vars dummy_vars;
};
bool CalculateMinAndMaxPrefetchMode(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
index 71ea503cb32f..412e75eb4704 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
@@ -142,9 +142,6 @@ void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _v
dml_print("DML_RQ_DLG_CALC: t_sr_wm_us = %3.2f\n", dlg_sys_param->t_sr_wm_us);
dml_print("DML_RQ_DLG_CALC: t_extra_us = %3.2f\n", dlg_sys_param->t_extra_us);
dml_print(
- "DML_RQ_DLG_CALC: t_srx_delay_us = %3.2f\n",
- dlg_sys_param->t_srx_delay_us);
- dml_print(
"DML_RQ_DLG_CALC: deepsleep_dcfclk_mhz = %3.2f\n",
dlg_sys_param->deepsleep_dcfclk_mhz);
dml_print(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
index 59dc2c5b58dd..3df559c591f8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
@@ -1331,10 +1331,6 @@ void dml1_rq_dlg_get_dlg_params(
if (dual_plane)
DTRACE("DLG: %s: swath_height_c = %d", __func__, swath_height_c);
- DTRACE(
- "DLG: %s: t_srx_delay_us = %3.2f",
- __func__,
- (double) dlg_sys_param->t_srx_delay_us);
DTRACE("DLG: %s: line_time_in_us = %3.2f", __func__, (double) line_time_in_us);
DTRACE("DLG: %s: vupdate_offset = %d", __func__, vupdate_offset);
DTRACE("DLG: %s: vupdate_width = %d", __func__, vupdate_width);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
index 479d7d83220c..072bd0539605 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
@@ -76,14 +76,9 @@ static inline double dml_floor(double a, double granularity)
static inline double dml_round(double a)
{
- double round_pt = 0.5;
- double ceil = dml_ceil(a, 1);
- double floor = dml_floor(a, 1);
+ const double round_pt = 0.5;
- if (a - floor >= round_pt)
- return ceil;
- else
- return floor;
+ return dml_floor(a + round_pt, 1);
}
/* float
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
deleted file mode 100644
index 789f7562cdc7..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
+++ /dev/null
@@ -1,1889 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dml_wrapper.h"
-#include "resource.h"
-#include "core_types.h"
-#include "dsc.h"
-#include "clk_mgr.h"
-
-#ifndef DC_LOGGER_INIT
-#define DC_LOGGER_INIT
-#undef DC_LOG_WARNING
-#define DC_LOG_WARNING
-#endif
-
-#define DML_WRAPPER_TRANSLATION_
-#include "dml_wrapper_translation.c"
-#undef DML_WRAPPER_TRANSLATION_
-
-static bool is_dual_plane(enum surface_pixel_format format)
-{
- return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
-}
-
-static void build_clamping_params(struct dc_stream_state *stream)
-{
- stream->clamping.clamping_level = CLAMPING_FULL_RANGE;
- stream->clamping.c_depth = stream->timing.display_color_depth;
- stream->clamping.pixel_encoding = stream->timing.pixel_encoding;
-}
-
-static void get_pixel_clock_parameters(
- const struct pipe_ctx *pipe_ctx,
- struct pixel_clk_params *pixel_clk_params)
-{
- const struct dc_stream_state *stream = pipe_ctx->stream;
-
- /*TODO: is this halved for YCbCr 420? in that case we might want to move
- * the pixel clock normalization for hdmi up to here instead of doing it
- * in pll_adjust_pix_clk
- */
- pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz;
- pixel_clk_params->encoder_object_id = stream->link->link_enc->id;
- pixel_clk_params->signal_type = pipe_ctx->stream->signal;
- pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1;
- /* TODO: un-hardcode*/
- pixel_clk_params->requested_sym_clk = LINK_RATE_LOW *
- LINK_RATE_REF_FREQ_IN_KHZ;
- pixel_clk_params->flags.ENABLE_SS = 0;
- pixel_clk_params->color_depth =
- stream->timing.display_color_depth;
- pixel_clk_params->flags.DISPLAY_BLANKED = 1;
- pixel_clk_params->flags.SUPPORT_YCBCR420 = (stream->timing.pixel_encoding ==
- PIXEL_ENCODING_YCBCR420);
- pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding;
- if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) {
- pixel_clk_params->color_depth = COLOR_DEPTH_888;
- }
- if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
- pixel_clk_params->requested_pix_clk_100hz = pixel_clk_params->requested_pix_clk_100hz / 2;
- }
- if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
- pixel_clk_params->requested_pix_clk_100hz *= 2;
-
-}
-
-static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
-{
- get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params);
-
- if (pipe_ctx->clock_source)
- pipe_ctx->clock_source->funcs->get_pix_clk_dividers(
- pipe_ctx->clock_source,
- &pipe_ctx->stream_res.pix_clk_params,
- &pipe_ctx->pll_settings);
-
- pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding;
-
- resource_build_bit_depth_reduction_params(pipe_ctx->stream,
- &pipe_ctx->stream->bit_depth_params);
- build_clamping_params(pipe_ctx->stream);
-
- return DC_OK;
-}
-
-static void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
- struct bit_depth_reduction_params *fmt_bit_depth)
-{
- enum dc_dither_option option = stream->dither_option;
- enum dc_pixel_encoding pixel_encoding =
- stream->timing.pixel_encoding;
-
- memset(fmt_bit_depth, 0, sizeof(*fmt_bit_depth));
-
- if (option == DITHER_OPTION_DEFAULT) {
- switch (stream->timing.display_color_depth) {
- case COLOR_DEPTH_666:
- option = DITHER_OPTION_SPATIAL6;
- break;
- case COLOR_DEPTH_888:
- option = DITHER_OPTION_SPATIAL8;
- break;
- case COLOR_DEPTH_101010:
- option = DITHER_OPTION_SPATIAL10;
- break;
- default:
- option = DITHER_OPTION_DISABLE;
- }
- }
-
- if (option == DITHER_OPTION_DISABLE)
- return;
-
- if (option == DITHER_OPTION_TRUN6) {
- fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
- fmt_bit_depth->flags.TRUNCATE_DEPTH = 0;
- } else if (option == DITHER_OPTION_TRUN8 ||
- option == DITHER_OPTION_TRUN8_SPATIAL6 ||
- option == DITHER_OPTION_TRUN8_FM6) {
- fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
- fmt_bit_depth->flags.TRUNCATE_DEPTH = 1;
- } else if (option == DITHER_OPTION_TRUN10 ||
- option == DITHER_OPTION_TRUN10_SPATIAL6 ||
- option == DITHER_OPTION_TRUN10_SPATIAL8 ||
- option == DITHER_OPTION_TRUN10_FM8 ||
- option == DITHER_OPTION_TRUN10_FM6 ||
- option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
- fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
- fmt_bit_depth->flags.TRUNCATE_DEPTH = 2;
- }
-
- /* special case - Formatter can only reduce by 4 bits at most.
- * When reducing from 12 to 6 bits,
- * HW recommends we use trunc with round mode
- * (if we did nothing, trunc to 10 bits would be used)
- * note that any 12->10 bit reduction is ignored prior to DCE8,
- * as the input was 10 bits.
- */
- if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM ||
- option == DITHER_OPTION_SPATIAL6 ||
- option == DITHER_OPTION_FM6) {
- fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
- fmt_bit_depth->flags.TRUNCATE_DEPTH = 2;
- fmt_bit_depth->flags.TRUNCATE_MODE = 1;
- }
-
- /* spatial dither
- * note that spatial modes 1-3 are never used
- */
- if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM ||
- option == DITHER_OPTION_SPATIAL6 ||
- option == DITHER_OPTION_TRUN10_SPATIAL6 ||
- option == DITHER_OPTION_TRUN8_SPATIAL6) {
- fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1;
- fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 0;
- fmt_bit_depth->flags.HIGHPASS_RANDOM = 1;
- fmt_bit_depth->flags.RGB_RANDOM =
- (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0;
- } else if (option == DITHER_OPTION_SPATIAL8_FRAME_RANDOM ||
- option == DITHER_OPTION_SPATIAL8 ||
- option == DITHER_OPTION_SPATIAL8_FM6 ||
- option == DITHER_OPTION_TRUN10_SPATIAL8 ||
- option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
- fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1;
- fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 1;
- fmt_bit_depth->flags.HIGHPASS_RANDOM = 1;
- fmt_bit_depth->flags.RGB_RANDOM =
- (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0;
- } else if (option == DITHER_OPTION_SPATIAL10_FRAME_RANDOM ||
- option == DITHER_OPTION_SPATIAL10 ||
- option == DITHER_OPTION_SPATIAL10_FM8 ||
- option == DITHER_OPTION_SPATIAL10_FM6) {
- fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1;
- fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 2;
- fmt_bit_depth->flags.HIGHPASS_RANDOM = 1;
- fmt_bit_depth->flags.RGB_RANDOM =
- (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0;
- }
-
- if (option == DITHER_OPTION_SPATIAL6 ||
- option == DITHER_OPTION_SPATIAL8 ||
- option == DITHER_OPTION_SPATIAL10) {
- fmt_bit_depth->flags.FRAME_RANDOM = 0;
- } else {
- fmt_bit_depth->flags.FRAME_RANDOM = 1;
- }
-
- //////////////////////
- //// temporal dither
- //////////////////////
- if (option == DITHER_OPTION_FM6 ||
- option == DITHER_OPTION_SPATIAL8_FM6 ||
- option == DITHER_OPTION_SPATIAL10_FM6 ||
- option == DITHER_OPTION_TRUN10_FM6 ||
- option == DITHER_OPTION_TRUN8_FM6 ||
- option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
- fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1;
- fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 0;
- } else if (option == DITHER_OPTION_FM8 ||
- option == DITHER_OPTION_SPATIAL10_FM8 ||
- option == DITHER_OPTION_TRUN10_FM8) {
- fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1;
- fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 1;
- } else if (option == DITHER_OPTION_FM10) {
- fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1;
- fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 2;
- }
-
- fmt_bit_depth->pixel_encoding = pixel_encoding;
-}
-
-bool dml_validate_dsc(struct dc *dc, struct dc_state *new_ctx)
-{
- int i;
-
- /* Validate DSC config, dsc count validation is already done */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i];
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dsc_config dsc_cfg;
- struct pipe_ctx *odm_pipe;
- int opp_cnt = 1;
-
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- opp_cnt++;
-
- /* Only need to validate top pipe */
- if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe || !stream || !stream->timing.flags.DSC)
- continue;
-
- dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left
- + stream->timing.h_border_right) / opp_cnt;
- dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top
- + stream->timing.v_border_bottom;
- dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
- dsc_cfg.color_depth = stream->timing.display_color_depth;
- dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false;
- dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg;
- dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt;
-
- if (pipe_ctx->stream_res.dsc && !pipe_ctx->stream_res.dsc->funcs->dsc_validate_stream(pipe_ctx->stream_res.dsc, &dsc_cfg))
- return false;
- }
- return true;
-}
-
-enum dc_status dml_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream)
-{
- enum dc_status status = DC_OK;
- struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream);
-
- if (!pipe_ctx)
- return DC_ERROR_UNEXPECTED;
-
-
- status = build_pipe_hw_param(pipe_ctx);
-
- return status;
-}
-
-void dml_acquire_dsc(const struct dc *dc,
- struct resource_context *res_ctx,
- struct display_stream_compressor **dsc,
- int pipe_idx)
-{
- int i;
- const struct resource_pool *pool = dc->res_pool;
- struct display_stream_compressor *dsc_old = dc->current_state->res_ctx.pipe_ctx[pipe_idx].stream_res.dsc;
-
- ASSERT(*dsc == NULL); /* If this ASSERT fails, dsc was not released properly */
- *dsc = NULL;
-
- /* Always do 1-to-1 mapping when number of DSCs is same as number of pipes */
- if (pool->res_cap->num_dsc == pool->res_cap->num_opp) {
- *dsc = pool->dscs[pipe_idx];
- res_ctx->is_dsc_acquired[pipe_idx] = true;
- return;
- }
-
- /* Return old DSC to avoid the need for redo it */
- if (dsc_old && !res_ctx->is_dsc_acquired[dsc_old->inst]) {
- *dsc = dsc_old;
- res_ctx->is_dsc_acquired[dsc_old->inst] = true;
- return ;
- }
-
- /* Find first free DSC */
- for (i = 0; i < pool->res_cap->num_dsc; i++)
- if (!res_ctx->is_dsc_acquired[i]) {
- *dsc = pool->dscs[i];
- res_ctx->is_dsc_acquired[i] = true;
- break;
- }
-}
-
-static bool dml_split_stream_for_mpc_or_odm(
- const struct dc *dc,
- struct resource_context *res_ctx,
- struct pipe_ctx *pri_pipe,
- struct pipe_ctx *sec_pipe,
- bool odm)
-{
- int pipe_idx = sec_pipe->pipe_idx;
- const struct resource_pool *pool = dc->res_pool;
-
- *sec_pipe = *pri_pipe;
-
- sec_pipe->pipe_idx = pipe_idx;
- sec_pipe->plane_res.mi = pool->mis[pipe_idx];
- sec_pipe->plane_res.hubp = pool->hubps[pipe_idx];
- sec_pipe->plane_res.ipp = pool->ipps[pipe_idx];
- sec_pipe->plane_res.xfm = pool->transforms[pipe_idx];
- sec_pipe->plane_res.dpp = pool->dpps[pipe_idx];
- sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst;
- sec_pipe->stream_res.dsc = NULL;
- if (odm) {
- if (pri_pipe->next_odm_pipe) {
- ASSERT(pri_pipe->next_odm_pipe != sec_pipe);
- sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe;
- sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe;
- }
- if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) {
- pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe;
- sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe;
- }
- if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) {
- pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe;
- sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe;
- }
- pri_pipe->next_odm_pipe = sec_pipe;
- sec_pipe->prev_odm_pipe = pri_pipe;
- ASSERT(sec_pipe->top_pipe == NULL);
-
- if (!sec_pipe->top_pipe)
- sec_pipe->stream_res.opp = pool->opps[pipe_idx];
- else
- sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp;
- if (sec_pipe->stream->timing.flags.DSC == 1) {
- dml_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx);
- ASSERT(sec_pipe->stream_res.dsc);
- if (sec_pipe->stream_res.dsc == NULL)
- return false;
- }
- } else {
- if (pri_pipe->bottom_pipe) {
- ASSERT(pri_pipe->bottom_pipe != sec_pipe);
- sec_pipe->bottom_pipe = pri_pipe->bottom_pipe;
- sec_pipe->bottom_pipe->top_pipe = sec_pipe;
- }
- pri_pipe->bottom_pipe = sec_pipe;
- sec_pipe->top_pipe = pri_pipe;
-
- ASSERT(pri_pipe->plane_state);
- }
-
- return true;
-}
-
-static struct pipe_ctx *dml_find_split_pipe(
- struct dc *dc,
- struct dc_state *context,
- int old_index)
-{
- struct pipe_ctx *pipe = NULL;
- int i;
-
- if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) {
- pipe = &context->res_ctx.pipe_ctx[old_index];
- pipe->pipe_idx = old_index;
- }
-
- if (!pipe)
- for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
- if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL
- && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
- if (context->res_ctx.pipe_ctx[i].stream == NULL) {
- pipe = &context->res_ctx.pipe_ctx[i];
- pipe->pipe_idx = i;
- break;
- }
- }
- }
-
- /*
- * May need to fix pipes getting tossed from 1 opp to another on flip
- * Add for debugging transient underflow during topology updates:
- * ASSERT(pipe);
- */
- if (!pipe)
- for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
- if (context->res_ctx.pipe_ctx[i].stream == NULL) {
- pipe = &context->res_ctx.pipe_ctx[i];
- pipe->pipe_idx = i;
- break;
- }
- }
-
- return pipe;
-}
-
-static void dml_release_dsc(struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct display_stream_compressor **dsc)
-{
- int i;
-
- for (i = 0; i < pool->res_cap->num_dsc; i++)
- if (pool->dscs[i] == *dsc) {
- res_ctx->is_dsc_acquired[i] = false;
- *dsc = NULL;
- break;
- }
-}
-
-static int dml_get_num_mpc_splits(struct pipe_ctx *pipe)
-{
- int mpc_split_count = 0;
- struct pipe_ctx *other_pipe = pipe->bottom_pipe;
-
- while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
- mpc_split_count++;
- other_pipe = other_pipe->bottom_pipe;
- }
- other_pipe = pipe->top_pipe;
- while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
- mpc_split_count++;
- other_pipe = other_pipe->top_pipe;
- }
-
- return mpc_split_count;
-}
-
-static bool dml_enough_pipes_for_subvp(struct dc *dc,
- struct dc_state *context)
-{
- int i = 0;
- int num_pipes = 0;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->stream && pipe->plane_state)
- num_pipes++;
- }
-
- // Sub-VP only possible if the number of "real" pipes is
- // less than or equal to half the number of available pipes
- if (num_pipes * 2 > dc->res_pool->pipe_count)
- return false;
-
- return true;
-}
-
-static int dml_validate_apply_pipe_split_flags(
- struct dc *dc,
- struct dc_state *context,
- int vlevel,
- int *split,
- bool *merge)
-{
- int i, pipe_idx, vlevel_split;
- int plane_count = 0;
- bool force_split = false;
- bool avoid_split = dc->debug.pipe_split_policy == MPC_SPLIT_AVOID;
- struct vba_vars_st *v = &context->bw_ctx.dml.vba;
- int max_mpc_comb = v->maxMpcComb;
-
- if (context->stream_count > 1) {
- if (dc->debug.pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP)
- avoid_split = true;
- } else if (dc->debug.force_single_disp_pipe_split)
- force_split = true;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- /**
- * Workaround for avoiding pipe-split in cases where we'd split
- * planes that are too small, resulting in splits that aren't
- * valid for the scaler.
- */
- if (pipe->plane_state &&
- (pipe->plane_state->dst_rect.width <= 16 ||
- pipe->plane_state->dst_rect.height <= 16 ||
- pipe->plane_state->src_rect.width <= 16 ||
- pipe->plane_state->src_rect.height <= 16))
- avoid_split = true;
-
- /* TODO: fix dc bugs and remove this split threshold thing */
- if (pipe->stream && !pipe->prev_odm_pipe &&
- (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state))
- ++plane_count;
- }
- if (plane_count > dc->res_pool->pipe_count / 2)
- avoid_split = true;
-
- /* W/A: Mode timing with borders may not work well with pipe split, avoid for this corner case */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct dc_crtc_timing timing;
-
- if (!pipe->stream)
- continue;
- else {
- timing = pipe->stream->timing;
- if (timing.h_border_left + timing.h_border_right
- + timing.v_border_top + timing.v_border_bottom > 0) {
- avoid_split = true;
- break;
- }
- }
- }
-
- /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */
- if (avoid_split) {
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++)
- if (v->NoOfDPP[vlevel][0][pipe_idx] == 1 &&
- v->ModeSupport[vlevel][0])
- break;
- /* Impossible to not split this pipe */
- if (vlevel > context->bw_ctx.dml.soc.num_states)
- vlevel = vlevel_split;
- else
- max_mpc_comb = 0;
- pipe_idx++;
- }
- v->maxMpcComb = max_mpc_comb;
- }
-
- /* Split loop sets which pipe should be split based on dml outputs and dc flags */
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- int pipe_plane = v->pipe_plane[pipe_idx];
- bool split4mpc = context->stream_count == 1 && plane_count == 1
- && dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4;
-
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- if (split4mpc || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 4)
- split[i] = 4;
- else if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 2)
- split[i] = 2;
-
- if ((pipe->stream->view_format ==
- VIEW_3D_FORMAT_SIDE_BY_SIDE ||
- pipe->stream->view_format ==
- VIEW_3D_FORMAT_TOP_AND_BOTTOM) &&
- (pipe->stream->timing.timing_3d_format ==
- TIMING_3D_FORMAT_TOP_AND_BOTTOM ||
- pipe->stream->timing.timing_3d_format ==
- TIMING_3D_FORMAT_SIDE_BY_SIDE))
- split[i] = 2;
- if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) {
- split[i] = 2;
- v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1;
- }
- if (dc->debug.force_odm_combine_4to1 & (1 << pipe->stream_res.tg->inst)) {
- split[i] = 4;
- v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_4to1;
- }
- /*420 format workaround*/
- if (pipe->stream->timing.h_addressable > 7680 &&
- pipe->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
- split[i] = 4;
- }
-
- v->ODMCombineEnabled[pipe_plane] =
- v->ODMCombineEnablePerState[vlevel][pipe_plane];
-
- if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) {
- if (dml_get_num_mpc_splits(pipe) == 1) {
- /*If need split for mpc but 2 way split already*/
- if (split[i] == 4)
- split[i] = 2; /* 2 -> 4 MPC */
- else if (split[i] == 2)
- split[i] = 0; /* 2 -> 2 MPC */
- else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state)
- merge[i] = true; /* 2 -> 1 MPC */
- } else if (dml_get_num_mpc_splits(pipe) == 3) {
- /*If need split for mpc but 4 way split already*/
- if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe)
- || !pipe->bottom_pipe)) {
- merge[i] = true; /* 4 -> 2 MPC */
- } else if (split[i] == 0 && pipe->top_pipe &&
- pipe->top_pipe->plane_state == pipe->plane_state)
- merge[i] = true; /* 4 -> 1 MPC */
- split[i] = 0;
- } else if (dml_get_num_mpc_splits(pipe)) {
- /* ODM -> MPC transition */
- if (pipe->prev_odm_pipe) {
- split[i] = 0;
- merge[i] = true;
- }
- }
- } else {
- if (dml_get_num_mpc_splits(pipe) == 1) {
- /*If need split for odm but 2 way split already*/
- if (split[i] == 4)
- split[i] = 2; /* 2 -> 4 ODM */
- else if (split[i] == 2)
- split[i] = 0; /* 2 -> 2 ODM */
- else if (pipe->prev_odm_pipe) {
- ASSERT(0); /* NOT expected yet */
- merge[i] = true; /* exit ODM */
- }
- } else if (dml_get_num_mpc_splits(pipe) == 3) {
- /*If need split for odm but 4 way split already*/
- if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe)
- || !pipe->next_odm_pipe)) {
- ASSERT(0); /* NOT expected yet */
- merge[i] = true; /* 4 -> 2 ODM */
- } else if (split[i] == 0 && pipe->prev_odm_pipe) {
- ASSERT(0); /* NOT expected yet */
- merge[i] = true; /* exit ODM */
- }
- split[i] = 0;
- } else if (dml_get_num_mpc_splits(pipe)) {
- /* MPC -> ODM transition */
- ASSERT(0); /* NOT expected yet */
- if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
- split[i] = 0;
- merge[i] = true;
- }
- }
- }
-
- /* Adjust dppclk when split is forced, do not bother with dispclk */
- if (split[i] != 0 && v->NoOfDPP[vlevel][max_mpc_comb][pipe_idx] == 1)
- v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2;
- pipe_idx++;
- }
-
- return vlevel;
-}
-
-static void dml_set_phantom_stream_timing(struct dc *dc,
- struct dc_state *context,
- struct pipe_ctx *ref_pipe,
- struct dc_stream_state *phantom_stream)
-{
- // phantom_vactive = blackout (latency + margin) + fw_processing_delays + pstate allow width
- uint32_t phantom_vactive_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us + 60 +
- dc->caps.subvp_fw_processing_delay_us +
- dc->caps.subvp_pstate_allow_width_us;
- uint32_t phantom_vactive = ((double)phantom_vactive_us/1000000) *
- (ref_pipe->stream->timing.pix_clk_100hz * 100) /
- (double)ref_pipe->stream->timing.h_total;
- uint32_t phantom_bp = ref_pipe->pipe_dlg_param.vstartup_start;
-
- phantom_stream->dst.y = 0;
- phantom_stream->dst.height = phantom_vactive;
- phantom_stream->src.y = 0;
- phantom_stream->src.height = phantom_vactive;
-
- phantom_stream->timing.v_addressable = phantom_vactive;
- phantom_stream->timing.v_front_porch = 1;
- phantom_stream->timing.v_total = phantom_stream->timing.v_addressable +
- phantom_stream->timing.v_front_porch +
- phantom_stream->timing.v_sync_width +
- phantom_bp;
-}
-
-static struct dc_stream_state *dml_enable_phantom_stream(struct dc *dc,
- struct dc_state *context,
- struct pipe_ctx *ref_pipe)
-{
- struct dc_stream_state *phantom_stream = NULL;
-
- phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink);
- phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
- phantom_stream->dpms_off = true;
- phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
- phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream;
- ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN;
- ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream;
-
- /* stream has limited viewport and small timing */
- memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing));
- memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src));
- memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst));
- dml_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream);
-
- dc_add_stream_to_ctx(dc, context, phantom_stream);
- dc->hwss.apply_ctx_to_hw(dc, context);
- return phantom_stream;
-}
-
-static void dml_enable_phantom_plane(struct dc *dc,
- struct dc_state *context,
- struct dc_stream_state *phantom_stream,
- struct pipe_ctx *main_pipe)
-{
- struct dc_plane_state *phantom_plane = NULL;
- struct dc_plane_state *prev_phantom_plane = NULL;
- struct pipe_ctx *curr_pipe = main_pipe;
-
- while (curr_pipe) {
- if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state)
- phantom_plane = prev_phantom_plane;
- else
- phantom_plane = dc_create_plane_state(dc);
-
- memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address));
- memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality,
- sizeof(phantom_plane->scaling_quality));
- memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect));
- memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect));
- memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect));
- memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size,
- sizeof(phantom_plane->plane_size));
- memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info,
- sizeof(phantom_plane->tiling_info));
- memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc));
- /* Currently compat_level is undefined in dc_state
- * phantom_plane->compat_level = curr_pipe->plane_state->compat_level;
- */
- phantom_plane->format = curr_pipe->plane_state->format;
- phantom_plane->rotation = curr_pipe->plane_state->rotation;
- phantom_plane->visible = curr_pipe->plane_state->visible;
-
- /* Shadow pipe has small viewport. */
- phantom_plane->clip_rect.y = 0;
- phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable;
-
- dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context);
-
- curr_pipe = curr_pipe->bottom_pipe;
- prev_phantom_plane = phantom_plane;
- }
-}
-
-static void dml_add_phantom_pipes(struct dc *dc, struct dc_state *context)
-{
- int i = 0;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct dc_stream_state *ref_stream = pipe->stream;
- // Only construct phantom stream for top pipes that have plane enabled
- if (!pipe->top_pipe && pipe->plane_state && pipe->stream &&
- pipe->stream->mall_stream_config.type == SUBVP_NONE) {
- struct dc_stream_state *phantom_stream = NULL;
-
- phantom_stream = dml_enable_phantom_stream(dc, context, pipe);
- dml_enable_phantom_plane(dc, context, phantom_stream, pipe);
- }
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->plane_state && pipe->stream &&
- pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
- pipe->stream->use_dynamic_meta = false;
- pipe->plane_state->flip_immediate = false;
- if (!resource_build_scaling_params(pipe)) {
- // Log / remove phantom pipes since failed to build scaling params
- }
- }
- }
-}
-
-static void dml_remove_phantom_pipes(struct dc *dc, struct dc_state *context)
-{
- int i;
- bool removed_pipe = false;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- // build scaling params for phantom pipes
- if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
- dc_rem_all_planes_for_stream(dc, pipe->stream, context);
- dc_remove_stream_from_ctx(dc, context, pipe->stream);
- removed_pipe = true;
- }
-
- // Clear all phantom stream info
- if (pipe->stream) {
- pipe->stream->mall_stream_config.type = SUBVP_NONE;
- pipe->stream->mall_stream_config.paired_stream = NULL;
- }
- }
- if (removed_pipe)
- dc->hwss.apply_ctx_to_hw(dc, context);
-}
-
-/*
- * If the input state contains no upstream planes for a particular pipe (i.e. only timing)
- * we need to populate some "conservative" plane information as DML cannot handle "no planes"
- */
-static void populate_default_plane_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_params_st *pipe)
-{
- pipe->src.is_hsplit = pipe->dest.odm_combine != dm_odm_combine_mode_disabled;
- pipe->src.source_scan = dm_horz;
- pipe->src.sw_mode = dm_sw_4kb_s;
- pipe->src.macro_tile_size = dm_64k_tile;
- pipe->src.viewport_width = timing->h_addressable;
- if (pipe->src.viewport_width > 1920)
- pipe->src.viewport_width = 1920;
- pipe->src.viewport_height = timing->v_addressable;
- if (pipe->src.viewport_height > 1080)
- pipe->src.viewport_height = 1080;
- pipe->src.surface_height_y = pipe->src.viewport_height;
- pipe->src.surface_width_y = pipe->src.viewport_width;
- pipe->src.surface_height_c = pipe->src.viewport_height;
- pipe->src.surface_width_c = pipe->src.viewport_width;
- pipe->src.data_pitch = ((pipe->src.viewport_width + 255) / 256) * 256;
- pipe->src.source_format = dm_444_32;
- pipe->dest.recout_width = pipe->src.viewport_width;
- pipe->dest.recout_height = pipe->src.viewport_height;
- pipe->dest.full_recout_width = pipe->dest.recout_width;
- pipe->dest.full_recout_height = pipe->dest.recout_height;
- pipe->scale_ratio_depth.lb_depth = dm_lb_16;
- pipe->scale_ratio_depth.hscl_ratio = 1.0;
- pipe->scale_ratio_depth.vscl_ratio = 1.0;
- pipe->scale_ratio_depth.scl_enable = 0;
- pipe->scale_taps.htaps = 1;
- pipe->scale_taps.vtaps = 1;
- pipe->dest.vtotal_min = timing->v_total;
- pipe->dest.vtotal_max = timing->v_total;
-
- if (pipe->dest.odm_combine == dm_odm_combine_mode_2to1) {
- pipe->src.viewport_width /= 2;
- pipe->dest.recout_width /= 2;
- } else if (pipe->dest.odm_combine == dm_odm_combine_mode_4to1) {
- pipe->src.viewport_width /= 4;
- pipe->dest.recout_width /= 4;
- }
-
- pipe->src.dcc = false;
- pipe->src.dcc_rate = 1;
-}
-
-/*
- * If the pipe is not blending (i.e. pipe_ctx->top pipe == null) then its
- * hsplit group is equal to its own pipe ID
- * Otherwise, all pipes part of the same blending tree have the same hsplit group
- * ID as the top most pipe
- *
- * If the pipe ctx is ODM combined, then similar logic follows
- */
-static void populate_hsplit_group_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe)
-{
- e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx;
-
- if (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state
- == dc_pipe_ctx->plane_state) {
- struct pipe_ctx *first_pipe = dc_pipe_ctx->top_pipe;
- int split_idx = 0;
-
- while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state
- == dc_pipe_ctx->plane_state) {
- first_pipe = first_pipe->top_pipe;
- split_idx++;
- }
-
- /* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */
- if (split_idx == 0)
- e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx;
- else if (split_idx == 1)
- e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx;
- else if (split_idx == 2)
- e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->top_pipe->pipe_idx;
-
- } else if (dc_pipe_ctx->prev_odm_pipe) {
- struct pipe_ctx *first_pipe = dc_pipe_ctx->prev_odm_pipe;
-
- while (first_pipe->prev_odm_pipe)
- first_pipe = first_pipe->prev_odm_pipe;
- e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx;
- }
-}
-
-static void populate_dml_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe, int always_scale)
-{
- const struct dc_plane_state *pln = dc_pipe_ctx->plane_state;
- const struct scaler_data *scl = &dc_pipe_ctx->plane_res.scl_data;
-
- e2e_pipe->pipe.src.immediate_flip = pln->flip_immediate;
- e2e_pipe->pipe.src.is_hsplit = (dc_pipe_ctx->bottom_pipe && dc_pipe_ctx->bottom_pipe->plane_state == pln)
- || (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state == pln)
- || e2e_pipe->pipe.dest.odm_combine != dm_odm_combine_mode_disabled;
-
- /* stereo is not split */
- if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE ||
- pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) {
- e2e_pipe->pipe.src.is_hsplit = false;
- e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx;
- }
-
- e2e_pipe->pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90
- || pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz;
- e2e_pipe->pipe.src.viewport_y_y = scl->viewport.y;
- e2e_pipe->pipe.src.viewport_y_c = scl->viewport_c.y;
- e2e_pipe->pipe.src.viewport_width = scl->viewport.width;
- e2e_pipe->pipe.src.viewport_width_c = scl->viewport_c.width;
- e2e_pipe->pipe.src.viewport_height = scl->viewport.height;
- e2e_pipe->pipe.src.viewport_height_c = scl->viewport_c.height;
- e2e_pipe->pipe.src.viewport_width_max = pln->src_rect.width;
- e2e_pipe->pipe.src.viewport_height_max = pln->src_rect.height;
- e2e_pipe->pipe.src.surface_width_y = pln->plane_size.surface_size.width;
- e2e_pipe->pipe.src.surface_height_y = pln->plane_size.surface_size.height;
- e2e_pipe->pipe.src.surface_width_c = pln->plane_size.chroma_size.width;
- e2e_pipe->pipe.src.surface_height_c = pln->plane_size.chroma_size.height;
-
- if (pln->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA
- || pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
- e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch;
- e2e_pipe->pipe.src.data_pitch_c = pln->plane_size.chroma_pitch;
- e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch;
- e2e_pipe->pipe.src.meta_pitch_c = pln->dcc.meta_pitch_c;
- } else {
- e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch;
- e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch;
- }
- e2e_pipe->pipe.src.dcc = pln->dcc.enable;
- e2e_pipe->pipe.src.dcc_rate = 1;
- e2e_pipe->pipe.dest.recout_width = scl->recout.width;
- e2e_pipe->pipe.dest.recout_height = scl->recout.height;
- e2e_pipe->pipe.dest.full_recout_height = scl->recout.height;
- e2e_pipe->pipe.dest.full_recout_width = scl->recout.width;
- if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_2to1)
- e2e_pipe->pipe.dest.full_recout_width *= 2;
- else if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_4to1)
- e2e_pipe->pipe.dest.full_recout_width *= 4;
- else {
- struct pipe_ctx *split_pipe = dc_pipe_ctx->bottom_pipe;
-
- while (split_pipe && split_pipe->plane_state == pln) {
- e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width;
- split_pipe = split_pipe->bottom_pipe;
- }
- split_pipe = dc_pipe_ctx->top_pipe;
- while (split_pipe && split_pipe->plane_state == pln) {
- e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width;
- split_pipe = split_pipe->top_pipe;
- }
- }
-
- e2e_pipe->pipe.scale_ratio_depth.lb_depth = dm_lb_16;
- e2e_pipe->pipe.scale_ratio_depth.hscl_ratio = (double) scl->ratios.horz.value / (1ULL<<32);
- e2e_pipe->pipe.scale_ratio_depth.hscl_ratio_c = (double) scl->ratios.horz_c.value / (1ULL<<32);
- e2e_pipe->pipe.scale_ratio_depth.vscl_ratio = (double) scl->ratios.vert.value / (1ULL<<32);
- e2e_pipe->pipe.scale_ratio_depth.vscl_ratio_c = (double) scl->ratios.vert_c.value / (1ULL<<32);
- e2e_pipe->pipe.scale_ratio_depth.scl_enable =
- scl->ratios.vert.value != dc_fixpt_one.value
- || scl->ratios.horz.value != dc_fixpt_one.value
- || scl->ratios.vert_c.value != dc_fixpt_one.value
- || scl->ratios.horz_c.value != dc_fixpt_one.value /*Lb only or Full scl*/
- || always_scale; /*support always scale*/
- e2e_pipe->pipe.scale_taps.htaps = scl->taps.h_taps;
- e2e_pipe->pipe.scale_taps.htaps_c = scl->taps.h_taps_c;
- e2e_pipe->pipe.scale_taps.vtaps = scl->taps.v_taps;
- e2e_pipe->pipe.scale_taps.vtaps_c = scl->taps.v_taps_c;
-
- /* Currently compat_level is not defined. Commenting it until further resolution
- * if (pln->compat_level == DC_LEGACY_TILING_ADDR_GEN_TWO) {
- swizzle_to_dml_params(pln->tiling_info.gfx9.swizzle,
- &e2e_pipe->pipe.src.sw_mode);
- e2e_pipe->pipe.src.macro_tile_size =
- swizzle_mode_to_macro_tile_size(pln->tiling_info.gfx9.swizzle);
- } else {
- gfx10array_mode_to_dml_params(pln->tiling_info.gfx10compatible.array_mode,
- pln->compat_level,
- &e2e_pipe->pipe.src.sw_mode);
- e2e_pipe->pipe.src.macro_tile_size = dm_4k_tile;
- }*/
-
- e2e_pipe->pipe.src.source_format = dc_source_format_to_dml_source_format(pln->format);
-}
-
-static void populate_dml_cursor_parameters_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe)
-{
- /*
- * For graphic plane, cursor number is 1, nv12 is 0
- * bw calculations due to cursor on/off
- */
- if (dc_pipe_ctx->plane_state &&
- (dc_pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
- dc_pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM))
- e2e_pipe->pipe.src.num_cursors = 0;
- else
- e2e_pipe->pipe.src.num_cursors = 1;
-
- e2e_pipe->pipe.src.cur0_src_width = 256;
- e2e_pipe->pipe.src.cur0_bpp = dm_cur_32bit;
-}
-
-static int populate_dml_pipes_from_context_base(
- struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- bool fast_validate)
-{
- int pipe_cnt, i;
- bool synchronized_vblank = true;
- struct resource_context *res_ctx = &context->res_ctx;
-
- for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) {
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
-
- if (pipe_cnt < 0) {
- pipe_cnt = i;
- continue;
- }
-
- if (res_ctx->pipe_ctx[pipe_cnt].stream == res_ctx->pipe_ctx[i].stream)
- continue;
-
- if (dc->debug.disable_timing_sync ||
- (!resource_are_streams_timing_synchronizable(
- res_ctx->pipe_ctx[pipe_cnt].stream,
- res_ctx->pipe_ctx[i].stream) &&
- !resource_are_vblanks_synchronizable(
- res_ctx->pipe_ctx[pipe_cnt].stream,
- res_ctx->pipe_ctx[i].stream))) {
- synchronized_vblank = false;
- break;
- }
- }
-
- for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing;
-
- struct audio_check aud_check = {0};
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
-
- /* todo:
- pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0;
- pipes[pipe_cnt].pipe.src.dcc = 0;
- pipes[pipe_cnt].pipe.src.vm = 0;*/
-
- pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
-
- pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC;
- /* todo: rotation?*/
- pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h;
- if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) {
- pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true;
- /* 1/2 vblank */
- pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active =
- (timing->v_total - timing->v_addressable
- - timing->v_border_top - timing->v_border_bottom) / 2;
- /* 36 bytes dp, 32 hdmi */
- pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes =
- dc_is_dp_signal(res_ctx->pipe_ctx[i].stream->signal) ? 36 : 32;
- }
- pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank;
-
- dc_timing_to_dml_timing(timing, &pipes[pipe_cnt].pipe.dest);
- pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min;
- pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max;
-
- pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst;
-
- pipes[pipe_cnt].pipe.dest.odm_combine = get_dml_odm_combine(&res_ctx->pipe_ctx[i]);
-
- populate_hsplit_group_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]);
-
- pipes[pipe_cnt].dout.dp_lanes = 4;
- pipes[pipe_cnt].dout.is_virtual = 0;
- pipes[pipe_cnt].dout.output_type = get_dml_output_type(res_ctx->pipe_ctx[i].stream->signal);
- if (pipes[pipe_cnt].dout.output_type < 0) {
- pipes[pipe_cnt].dout.output_type = dm_dp;
- pipes[pipe_cnt].dout.is_virtual = 1;
- }
-
- populate_color_depth_and_encoding_from_timing(&res_ctx->pipe_ctx[i].stream->timing, &pipes[pipe_cnt].dout);
-
- if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC)
- pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0;
-
- /* todo: default max for now, until there is logic reflecting this in dc*/
- pipes[pipe_cnt].dout.dsc_input_bpc = 12;
- /*fill up the audio sample rate (unit in kHz)*/
- get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check);
- pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000;
-
- populate_dml_cursor_parameters_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]);
-
- if (!res_ctx->pipe_ctx[i].plane_state) {
- populate_default_plane_from_timing(timing, &pipes[pipe_cnt].pipe);
- } else {
- populate_dml_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt], dc->debug.always_scale);
- }
-
- pipe_cnt++;
- }
-
- /* populate writeback information */
- if (dc->res_pool)
- dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes);
-
- return pipe_cnt;
-}
-
-static int dml_populate_dml_pipes_from_context(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- bool fast_validate)
-{
- int i, pipe_cnt;
- struct resource_context *res_ctx = &context->res_ctx;
- struct pipe_ctx *pipe;
-
- populate_dml_pipes_from_context_base(dc, context, pipes, fast_validate);
-
- for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- struct dc_crtc_timing *timing;
-
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
- pipe = &res_ctx->pipe_ctx[i];
- timing = &pipe->stream->timing;
-
- pipes[pipe_cnt].pipe.src.gpuvm = true;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
- pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
-
- pipes[pipe_cnt].dout.dsc_input_bpc = 0;
- if (pipes[pipe_cnt].dout.dsc_enable) {
- switch (timing->display_color_depth) {
- case COLOR_DEPTH_888:
- pipes[pipe_cnt].dout.dsc_input_bpc = 8;
- break;
- case COLOR_DEPTH_101010:
- pipes[pipe_cnt].dout.dsc_input_bpc = 10;
- break;
- case COLOR_DEPTH_121212:
- pipes[pipe_cnt].dout.dsc_input_bpc = 12;
- break;
- default:
- ASSERT(0);
- break;
- }
- }
- pipe_cnt++;
- }
- dc->config.enable_4to1MPC = false;
- if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
- if (is_dual_plane(pipe->plane_state->format)
- && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
- dc->config.enable_4to1MPC = true;
- } else if (!is_dual_plane(pipe->plane_state->format)) {
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
- pipes[0].pipe.src.unbounded_req_mode = true;
- }
- }
-
- return pipe_cnt;
-}
-
-static void dml_full_validate_bw_helper(struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int *vlevel,
- int *split,
- bool *merge,
- int *pipe_cnt)
-{
- struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
-
- /*
- * DML favors voltage over p-state, but we're more interested in
- * supporting p-state over voltage. We can't support p-state in
- * prefetch mode > 0 so try capping the prefetch mode to start.
- */
- context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
- dm_allow_self_refresh_and_mclk_switch;
- *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
- /* This may adjust vlevel and maxMpcComb */
- if (*vlevel < context->bw_ctx.dml.soc.num_states)
- *vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
-
- /* Conditions for setting up phantom pipes for SubVP:
- * 1. Not force disable SubVP
- * 2. Full update (i.e. !fast_validate)
- * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?)
- * 4. Display configuration passes validation
- * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch)
- */
- if (!dc->debug.force_disable_subvp &&
- dml_enough_pipes_for_subvp(dc, context) &&
- *vlevel < context->bw_ctx.dml.soc.num_states &&
- (vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported ||
- dc->debug.force_subvp_mclk_switch)) {
-
- dml_add_phantom_pipes(dc, context);
-
- /* Create input to DML based on new context which includes phantom pipes
- * TODO: Input to DML should mark which pipes are phantom
- */
- *pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false);
- *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
- if (*vlevel < context->bw_ctx.dml.soc.num_states) {
- memset(split, 0, MAX_PIPES * sizeof(*split));
- memset(merge, 0, MAX_PIPES * sizeof(*merge));
- *vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
- }
-
- // If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
- // remove phantom pipes and repopulate dml pipes
- if (*vlevel == context->bw_ctx.dml.soc.num_states ||
- vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
- dml_remove_phantom_pipes(dc, context);
- *pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false);
- }
- }
-}
-
-static void dcn20_adjust_adaptive_sync_v_startup(
- const struct dc_crtc_timing *dc_crtc_timing, int *vstartup_start)
-{
- struct dc_crtc_timing patched_crtc_timing;
- uint32_t asic_blank_end = 0;
- uint32_t asic_blank_start = 0;
- uint32_t newVstartup = 0;
-
- patched_crtc_timing = *dc_crtc_timing;
-
- if (patched_crtc_timing.flags.INTERLACE == 1) {
- if (patched_crtc_timing.v_front_porch < 2)
- patched_crtc_timing.v_front_porch = 2;
- } else {
- if (patched_crtc_timing.v_front_porch < 1)
- patched_crtc_timing.v_front_porch = 1;
- }
-
- /* blank_start = frame end - front porch */
- asic_blank_start = patched_crtc_timing.v_total -
- patched_crtc_timing.v_front_porch;
-
- /* blank_end = blank_start - active */
- asic_blank_end = asic_blank_start -
- patched_crtc_timing.v_border_bottom -
- patched_crtc_timing.v_addressable -
- patched_crtc_timing.v_border_top;
-
- newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start);
-
- *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start);
-}
-
-static bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx)
-{
- return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
- pipe_ctx->link_res.hpo_dp_link_enc &&
- dc_is_dp_signal(pipe_ctx->stream->signal));
-}
-
-static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
-{
- int i;
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-#if defined (CONFIG_DRM_AMD_DC_DP2_0)
- if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i]))
- return true;
-#endif
- }
- return false;
-}
-
-static void dml_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
-{
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
- }
-}
-
-static bool dml_internal_validate(
- struct dc *dc,
- struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int *pipe_cnt_out,
- int *vlevel_out,
- bool fast_validate)
-{
- bool out = false;
- bool repopulate_pipes = false;
- int split[MAX_PIPES] = { 0 };
- bool merge[MAX_PIPES] = { false };
- bool newly_split[MAX_PIPES] = { false };
- int pipe_cnt, i, pipe_idx, vlevel;
- struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
-
- ASSERT(pipes);
- if (!pipes)
- return false;
-
- // For each full update, remove all existing phantom pipes first
- dml_remove_phantom_pipes(dc, context);
-
- dml_update_soc_for_wm_a(dc, context);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->plane_state) {
- // On initial pass through DML, we intend to use MALL for SS on all
- // (non-PSR) surfaces with none using MALL for P-State
- // 'mall_plane_config': is not a member of 'dc_plane_state' - commenting it out till mall_plane_config gets supported in dc_plant_state
- //if (pipe->stream && pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED)
- // pipe->plane_state->mall_plane_config.use_mall_for_ss = true;
- }
- }
- pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
-
- if (!pipe_cnt) {
- out = true;
- goto validate_out;
- }
-
- dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt);
-
- if (!fast_validate) {
- dml_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt);
- }
-
- if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states ||
- vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
- /*
- * If mode is unsupported or there's still no p-state support then
- * fall back to favoring voltage.
- *
- * We don't actually support prefetch mode 2, so require that we
- * at least support prefetch mode 1.
- */
- context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
- dm_allow_self_refresh;
-
- vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
- if (vlevel < context->bw_ctx.dml.soc.num_states) {
- memset(split, 0, sizeof(split));
- memset(merge, 0, sizeof(merge));
- vlevel = dml_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
- }
- }
-
- dml_log_mode_support_params(&context->bw_ctx.dml);
-
- if (vlevel == context->bw_ctx.dml.soc.num_states)
- goto validate_fail;
-
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *mpo_pipe = pipe->bottom_pipe;
-
- if (!pipe->stream)
- continue;
-
- /* We only support full screen mpo with ODM */
- if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled
- && pipe->plane_state && mpo_pipe
- && memcmp(&mpo_pipe->plane_res.scl_data.recout,
- &pipe->plane_res.scl_data.recout,
- sizeof(struct rect)) != 0) {
- ASSERT(mpo_pipe->plane_state != pipe->plane_state);
- goto validate_fail;
- }
- pipe_idx++;
- }
-
- /* merge pipes if necessary */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- /*skip pipes that don't need merging*/
- if (!merge[i])
- continue;
-
- /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */
- if (pipe->prev_odm_pipe) {
- /*split off odm pipe*/
- pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe;
- if (pipe->next_odm_pipe)
- pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe;
-
- pipe->bottom_pipe = NULL;
- pipe->next_odm_pipe = NULL;
- pipe->plane_state = NULL;
- pipe->stream = NULL;
- pipe->top_pipe = NULL;
- pipe->prev_odm_pipe = NULL;
- if (pipe->stream_res.dsc)
- dml_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc);
- memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
- memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
- repopulate_pipes = true;
- } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
- struct pipe_ctx *top_pipe = pipe->top_pipe;
- struct pipe_ctx *bottom_pipe = pipe->bottom_pipe;
-
- top_pipe->bottom_pipe = bottom_pipe;
- if (bottom_pipe)
- bottom_pipe->top_pipe = top_pipe;
-
- pipe->top_pipe = NULL;
- pipe->bottom_pipe = NULL;
- pipe->plane_state = NULL;
- pipe->stream = NULL;
- memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
- memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
- repopulate_pipes = true;
- } else
- ASSERT(0); /* Should never try to merge master pipe */
-
- }
-
- for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
- struct pipe_ctx *hsplit_pipe = NULL;
- bool odm;
- int old_index = -1;
-
- if (!pipe->stream || newly_split[i])
- continue;
-
- pipe_idx++;
- odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled;
-
- if (!pipe->plane_state && !odm)
- continue;
-
- if (split[i]) {
- if (odm) {
- if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
- else if (old_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->pipe_idx;
- } else {
- if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx;
- else if (old_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->pipe_idx;
- }
- hsplit_pipe = dml_find_split_pipe(dc, context, old_index);
- ASSERT(hsplit_pipe);
- if (!hsplit_pipe)
- goto validate_fail;
-
- if (!dml_split_stream_for_mpc_or_odm(
- dc, &context->res_ctx,
- pipe, hsplit_pipe, odm))
- goto validate_fail;
-
- newly_split[hsplit_pipe->pipe_idx] = true;
- repopulate_pipes = true;
- }
- if (split[i] == 4) {
- struct pipe_ctx *pipe_4to1;
-
- if (odm && old_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->pipe_idx;
- else if (!odm && old_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->pipe_idx;
- else
- old_index = -1;
- pipe_4to1 = dml_find_split_pipe(dc, context, old_index);
- ASSERT(pipe_4to1);
- if (!pipe_4to1)
- goto validate_fail;
- if (!dml_split_stream_for_mpc_or_odm(
- dc, &context->res_ctx,
- pipe, pipe_4to1, odm))
- goto validate_fail;
- newly_split[pipe_4to1->pipe_idx] = true;
-
- if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe
- && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
- else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->bottom_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx;
- else
- old_index = -1;
- pipe_4to1 = dml_find_split_pipe(dc, context, old_index);
- ASSERT(pipe_4to1);
- if (!pipe_4to1)
- goto validate_fail;
- if (!dml_split_stream_for_mpc_or_odm(
- dc, &context->res_ctx,
- hsplit_pipe, pipe_4to1, odm))
- goto validate_fail;
- newly_split[pipe_4to1->pipe_idx] = true;
- }
- if (odm)
- dml_build_mapped_resource(dc, context, pipe->stream);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->plane_state) {
- if (!resource_build_scaling_params(pipe))
- goto validate_fail;
- }
- }
-
- /* Actual dsc count per stream dsc validation*/
- if (!dml_validate_dsc(dc, context)) {
- vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE;
- goto validate_fail;
- }
-
- if (repopulate_pipes)
- pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
- *vlevel_out = vlevel;
- *pipe_cnt_out = pipe_cnt;
-
- out = true;
- goto validate_out;
-
-validate_fail:
- out = false;
-
-validate_out:
- return out;
-}
-
-static void dml_calculate_dlg_params(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt,
- int vlevel)
-{
- int i, pipe_idx;
- int plane_count;
-
- /* Writeback MCIF_WB arbitration parameters */
- if (dc->res_pool)
- dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
-
- context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000;
- context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000;
- context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000;
- context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16;
- context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000;
- context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000;
- context->bw_ctx.bw.dcn.clk.p_state_change_support =
- context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
- != dm_dram_clock_change_unsupported;
-
- context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
- /* 'z9_support': is not a member of 'dc_clocks' - Commenting out till we have this support in dc_clocks
- * context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ?
- DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW;
- */
- plane_count = 0;
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (context->res_ctx.pipe_ctx[i].plane_state)
- plane_count++;
- }
-
- /* Commented out as per above error for now.
- if (plane_count == 0)
- context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW;
- */
- context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
- /* TODO : Uncomment the below line and make changes
- * as per DML nomenclature once it is available.
- * context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = context->bw_ctx.dml.vba.fclk_pstate_support;
- */
-
- if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
- context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
-
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
- pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
- // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
- context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
- context->res_ctx.pipe_ctx[i].unbounded_req = false;
- } else {
- context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes;
- context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode;
- }
-
- if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
- context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
- context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
- pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
- context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
- pipe_idx++;
- }
- /*save a original dppclock copy*/
- context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
- context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
- context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz * 1000;
- context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz * 1000;
- context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes
- - context->bw_ctx.dml.ip.det_buffer_size_kbytes * pipe_idx;
-
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- bool cstate_en = context->bw_ctx.dml.vba.PrefetchMode[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != 2;
-
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg(&context->bw_ctx.dml,
- &context->res_ctx.pipe_ctx[i].dlg_regs,
- &context->res_ctx.pipe_ctx[i].ttu_regs,
- pipes,
- pipe_cnt,
- pipe_idx,
- cstate_en,
- context->bw_ctx.bw.dcn.clk.p_state_change_support,
- false, false, true);
-
- context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml,
- &context->res_ctx.pipe_ctx[i].rq_regs,
- &pipes[pipe_idx].pipe);
- pipe_idx++;
- }
-}
-
-static void dml_calculate_wm_and_dlg(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt,
- int vlevel)
-{
- int i, pipe_idx, vlevel_temp = 0;
-
- double dcfclk = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
- double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
- unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
- bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] !=
- dm_dram_clock_change_unsupported;
-
- /* Set B:
- * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present,
- * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark
- * calculations to cover bootup clocks.
- * DCFCLK: soc.clock_limits[2] when available
- * UCLK: soc.clock_limits[2] when available
- */
- if (context->bw_ctx.dml.soc.num_states > 2) {
- vlevel_temp = 2;
- dcfclk = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz;
- } else
- dcfclk = 615; //DCFCLK Vmin_lv
-
- pipes[0].clks_cfg.voltage = vlevel_temp;
- pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
- pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
-
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
- }
- context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- //context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- //context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
- /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 4;
- context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 8;
-
- /* Set D:
- * All clocks min.
- * DCFCLK: Min, as reported by PM FW when available
- * UCLK : Min, as reported by PM FW when available
- * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr)
- */
-
- if (context->bw_ctx.dml.soc.num_states > 2) {
- vlevel_temp = 0;
- dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz;
- } else
- dcfclk = 615; //DCFCLK Vmin_lv
-
- pipes[0].clks_cfg.voltage = vlevel_temp;
- pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
- pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
-
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
- }
- context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- //context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- //context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
- /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 4;
- context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 8;
-
- /* Set C, for Dummy P-State:
- * All clocks min.
- * DCFCLK: Min, as reported by PM FW, when available
- * UCLK : Min, as reported by PM FW, when available
- * pstate latency as per UCLK state dummy pstate latency
- */
- if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
- unsigned int min_dram_speed_mts_margin = 160;
-
- if ((!pstate_en))
- min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
-
- /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
- for (i = 3; i > 0; i--)
- if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
- break;
-
- context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
- context->bw_ctx.dml.soc.dummy_pstate_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
- context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
- context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
- }
- context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- //context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- //context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
- /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 4;
- context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 8;
-
- if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) {
- /* The only difference between A and C is p-state latency, if p-state is not supported
- * with full p-state latency we want to calculate DLG based on dummy p-state latency,
- * Set A p-state watermark set to 0 previously, when p-state unsupported, for now keep as previous implementation.
- */
- context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
- } else {
- /* Set A:
- * All clocks min.
- * DCFCLK: Min, as reported by PM FW, when available
- * UCLK: Min, as reported by PM FW, when available
- */
- dml_update_soc_for_wm_a(dc, context);
- context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- }
-
- pipes[0].clks_cfg.voltage = vlevel;
- pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation;
- pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
-
- for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
-
- pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
- pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
-
- if (dc->config.forced_clocks) {
- pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
- pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
- }
- if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
- pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
- if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
- pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
-
- pipe_idx++;
- }
-
- context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
-
- dml_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
-
- if (!pstate_en)
- /* Restore full p-state latency */
- context->bw_ctx.dml.soc.dram_clock_change_latency_us =
- dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
-}
-
-bool dml_validate(struct dc *dc,
- struct dc_state *context,
- bool fast_validate)
-{
- bool out = false;
-
- BW_VAL_TRACE_SETUP();
-
- int vlevel = 0;
- int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = context->bw_ctx.dml.dml_pipe_state;
- DC_LOGGER_INIT(dc->ctx->logger);
-
- BW_VAL_TRACE_COUNT();
-
- out = dml_internal_validate(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
-
- if (pipe_cnt == 0)
- goto validate_out;
-
- if (!out)
- goto validate_fail;
-
- BW_VAL_TRACE_END_VOLTAGE_LEVEL();
-
- if (fast_validate) {
- BW_VAL_TRACE_SKIP(fast);
- goto validate_out;
- }
-
- dml_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel);
-
- BW_VAL_TRACE_END_WATERMARKS();
-
- goto validate_out;
-
-validate_fail:
- DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n",
- dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
-
- BW_VAL_TRACE_SKIP(fail);
- out = false;
-
-validate_out:
- BW_VAL_TRACE_FINISH();
-
- return out;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c
deleted file mode 100644
index 4ec5310a2962..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifdef DML_WRAPPER_TRANSLATION_
-
-static void gfx10array_mode_to_dml_params(
- enum array_mode_values array_mode,
- enum legacy_tiling_compat_level compat_level,
- unsigned int *sw_mode)
-{
- switch (array_mode) {
- case DC_ARRAY_LINEAR_ALLIGNED:
- case DC_ARRAY_LINEAR_GENERAL:
- *sw_mode = dm_sw_linear;
- break;
- case DC_ARRAY_2D_TILED_THIN1:
-// DC_LEGACY_TILING_ADDR_GEN_ZERO - undefined as per current code hence removed
-#if 0
- if (compat_level == DC_LEGACY_TILING_ADDR_GEN_ZERO)
- *sw_mode = dm_sw_gfx7_2d_thin_l_vp;
- else
- *sw_mode = dm_sw_gfx7_2d_thin_gl;
-#endif
- break;
- default:
- ASSERT(0); /* Not supported */
- break;
- }
-}
-
-static void swizzle_to_dml_params(
- enum swizzle_mode_values swizzle,
- unsigned int *sw_mode)
-{
- switch (swizzle) {
- case DC_SW_LINEAR:
- *sw_mode = dm_sw_linear;
- break;
- case DC_SW_4KB_S:
- *sw_mode = dm_sw_4kb_s;
- break;
- case DC_SW_4KB_S_X:
- *sw_mode = dm_sw_4kb_s_x;
- break;
- case DC_SW_4KB_D:
- *sw_mode = dm_sw_4kb_d;
- break;
- case DC_SW_4KB_D_X:
- *sw_mode = dm_sw_4kb_d_x;
- break;
- case DC_SW_64KB_S:
- *sw_mode = dm_sw_64kb_s;
- break;
- case DC_SW_64KB_S_X:
- *sw_mode = dm_sw_64kb_s_x;
- break;
- case DC_SW_64KB_S_T:
- *sw_mode = dm_sw_64kb_s_t;
- break;
- case DC_SW_64KB_D:
- *sw_mode = dm_sw_64kb_d;
- break;
- case DC_SW_64KB_D_X:
- *sw_mode = dm_sw_64kb_d_x;
- break;
- case DC_SW_64KB_D_T:
- *sw_mode = dm_sw_64kb_d_t;
- break;
- case DC_SW_64KB_R_X:
- *sw_mode = dm_sw_64kb_r_x;
- break;
- case DC_SW_VAR_S:
- *sw_mode = dm_sw_var_s;
- break;
- case DC_SW_VAR_S_X:
- *sw_mode = dm_sw_var_s_x;
- break;
- case DC_SW_VAR_D:
- *sw_mode = dm_sw_var_d;
- break;
- case DC_SW_VAR_D_X:
- *sw_mode = dm_sw_var_d_x;
- break;
-
- default:
- ASSERT(0); /* Not supported */
- break;
- }
-}
-
-static void dc_timing_to_dml_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_dest_params_st *dest)
-{
- dest->hblank_start = timing->h_total - timing->h_front_porch;
- dest->hblank_end = dest->hblank_start
- - timing->h_addressable
- - timing->h_border_left
- - timing->h_border_right;
- dest->vblank_start = timing->v_total - timing->v_front_porch;
- dest->vblank_end = dest->vblank_start
- - timing->v_addressable
- - timing->v_border_top
- - timing->v_border_bottom;
- dest->htotal = timing->h_total;
- dest->vtotal = timing->v_total;
- dest->hactive = timing->h_addressable;
- dest->vactive = timing->v_addressable;
- dest->interlaced = timing->flags.INTERLACE;
- dest->pixel_rate_mhz = timing->pix_clk_100hz/10000.0;
- if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
- dest->pixel_rate_mhz *= 2;
-}
-
-static enum odm_combine_mode get_dml_odm_combine(const struct pipe_ctx *pipe)
-{
- int odm_split_count = 0;
- enum odm_combine_mode combine_mode = dm_odm_combine_mode_disabled;
- struct pipe_ctx *next_pipe = pipe->next_odm_pipe;
-
- // Traverse pipe tree to determine odm split count
- while (next_pipe) {
- odm_split_count++;
- next_pipe = next_pipe->next_odm_pipe;
- }
- pipe = pipe->prev_odm_pipe;
- while (pipe) {
- odm_split_count++;
- pipe = pipe->prev_odm_pipe;
- }
-
- // Translate split to DML odm combine factor
- switch (odm_split_count) {
- case 1:
- combine_mode = dm_odm_combine_mode_2to1;
- break;
- case 3:
- combine_mode = dm_odm_combine_mode_4to1;
- break;
- default:
- combine_mode = dm_odm_combine_mode_disabled;
- }
-
- return combine_mode;
-}
-
-static int get_dml_output_type(enum signal_type dc_signal)
-{
- int dml_output_type = -1;
-
- switch (dc_signal) {
- case SIGNAL_TYPE_DISPLAY_PORT_MST:
- case SIGNAL_TYPE_DISPLAY_PORT:
- dml_output_type = dm_dp;
- break;
- case SIGNAL_TYPE_EDP:
- dml_output_type = dm_edp;
- break;
- case SIGNAL_TYPE_HDMI_TYPE_A:
- case SIGNAL_TYPE_DVI_SINGLE_LINK:
- case SIGNAL_TYPE_DVI_DUAL_LINK:
- dml_output_type = dm_hdmi;
- break;
- default:
- break;
- }
-
- return dml_output_type;
-}
-
-static void populate_color_depth_and_encoding_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_output_params_st *dout)
-{
- int output_bpc = 0;
-
- switch (timing->display_color_depth) {
- case COLOR_DEPTH_666:
- output_bpc = 6;
- break;
- case COLOR_DEPTH_888:
- output_bpc = 8;
- break;
- case COLOR_DEPTH_101010:
- output_bpc = 10;
- break;
- case COLOR_DEPTH_121212:
- output_bpc = 12;
- break;
- case COLOR_DEPTH_141414:
- output_bpc = 14;
- break;
- case COLOR_DEPTH_161616:
- output_bpc = 16;
- break;
- case COLOR_DEPTH_999:
- output_bpc = 9;
- break;
- case COLOR_DEPTH_111111:
- output_bpc = 11;
- break;
- default:
- output_bpc = 8;
- break;
- }
-
- switch (timing->pixel_encoding) {
- case PIXEL_ENCODING_RGB:
- case PIXEL_ENCODING_YCBCR444:
- dout->output_format = dm_444;
- dout->output_bpp = output_bpc * 3;
- break;
- case PIXEL_ENCODING_YCBCR420:
- dout->output_format = dm_420;
- dout->output_bpp = (output_bpc * 3.0) / 2;
- break;
- case PIXEL_ENCODING_YCBCR422:
- if (timing->flags.DSC && !timing->dsc_cfg.ycbcr422_simple)
- dout->output_format = dm_n422;
- else
- dout->output_format = dm_s422;
- dout->output_bpp = output_bpc * 2;
- break;
- default:
- dout->output_format = dm_444;
- dout->output_bpp = output_bpc * 3;
- }
-}
-
-static enum source_format_class dc_source_format_to_dml_source_format(enum surface_pixel_format dc_format)
-{
- enum source_format_class dml_format = dm_444_32;
-
- switch (dc_format) {
- case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
- case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
- dml_format = dm_420_8;
- break;
- case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
- case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
- dml_format = dm_420_10;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
- case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
- dml_format = dm_444_64;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
- case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
- dml_format = dm_444_16;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS:
- dml_format = dm_444_8;
- break;
- case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
- dml_format = dm_rgbe_alpha;
- break;
- default:
- dml_format = dm_444_32;
- break;
- }
-
- return dml_format;
-}
-
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c
index ec636d06e18c..ef75eb7d5adc 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c
@@ -68,7 +68,7 @@ static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
int sel = table_hash(mode, bpc, max_min);
int table_size = 0;
int index;
- const struct qp_entry *table = 0L;
+ const struct qp_entry *table = NULL;
// alias enum
enum { min = DAL_MM_MIN, max = DAL_MM_MAX };
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h
index cad244c023cd..d7cd8cc24758 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h
@@ -27,7 +27,7 @@
#define __RC_CALC_FPU_H__
#include "os_types.h"
-#include <drm/drm_dsc.h>
+#include <drm/display/drm_dsc.h>
#define QP_SET_SIZE 15