diff options
author | 2025-01-09 05:35:04 +0000 | |
---|---|---|
committer | 2025-01-24 09:53:12 -0500 | |
commit | a8d42cd228ec41ad99c50a270db82f0dd9127a28 (patch) | |
tree | a28d89d759c6473ace13d9e44432f1f98255b0fe | |
parent | drm/amdkfd: Clear MODE.VSKIP in gfx9 trap handler (diff) | |
download | wireguard-linux-a8d42cd228ec41ad99c50a270db82f0dd9127a28.tar.xz wireguard-linux-a8d42cd228ec41ad99c50a270db82f0dd9127a28.zip |
drm/amd/display: mark static functions noinline_for_stack
When compiling allmodconfig (CONFIG_WERROR=y) with clang-19, see the
following errors:
.../display/dc/dml2/display_mode_core.c:6268:13: warning: stack frame size (3128) exceeds limit (3072) in 'dml_prefetch_check' [-Wframe-larger-than]
.../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c:7236:13: warning: stack frame size (3256) exceeds limit (3072) in 'dml_core_mode_support' [-Wframe-larger-than]
Mark static functions called by dml_prefetch_check() and
dml_core_mode_support() noinline_for_stack to avoid them become huge
functions and thus exceed the frame size limit.
A way to reproduce:
$ git checkout next-20250107
$ mkdir build_dir
$ export PATH=/tmp/llvm-19.1.6-x86_64/bin:$PATH
$ make LLVM=1 O=build_dir allmodconfig
$ make LLVM=1 O=build_dir drivers/gpu/drm/ -j
The way how it chose static functions to mark:
[0] Unset CONFIG_WERROR in build_dir/.config.
To get display_mode_core.o without errors.
[1] Get a function list called by dml_prefetch_check().
$ sed -n '6268,6711p' drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c \
| sed -n -r 's/.*\W(\w+)\(.*/\1/p' | sort -u >/tmp/syms
[2] Get the non-inline function list.
Objdump won't show the symbols if they are inline functions.
$ make LLVM=1 O=build_dir drivers/gpu/drm/ -j
$ objdump -d build_dir/.../display_mode_core.o | \
./scripts/checkstack.pl x86_64 0 | \
grep -f /tmp/syms | cut -d' ' -f2- >/tmp/orig
[3] Get the full function list.
Append "-fno-inline" to `CFLAGS_.../display_mode_core.o` in
drivers/gpu/drm/amd/display/dc/dml2/Makefile.
$ make LLVM=1 O=build_dir drivers/gpu/drm/ -j
$ objdump -d build_dir/.../display_mode_core.o | \
./scripts/checkstack.pl x86_64 0 | \
grep -f /tmp/syms | cut -d' ' -f2- >/tmp/noinline
[4] Get the inline function list.
If a symbol only in /tmp/noinline but not in /tmp/orig, it is a good
candidate to mark noinline.
$ diff /tmp/orig /tmp/noinline
Chosen functions and their stack sizes:
CalculateBandwidthAvailableForImmediateFlip [display_mode_core.o]:144
CalculateExtraLatency [display_mode_core.o]:176
CalculateTWait [display_mode_core.o]:64
CalculateVActiveBandwithSupport [display_mode_core.o]:112
set_calculate_prefetch_schedule_params [display_mode_core.o]:48
CheckGlobalPrefetchAdmissibility [dml2_core_dcn4_calcs.o]:544
calculate_bandwidth_available [dml2_core_dcn4_calcs.o]:320
calculate_vactive_det_fill_latency [dml2_core_dcn4_calcs.o]:272
CalculateDCFCLKDeepSleep [dml2_core_dcn4_calcs.o]:208
CalculateODMMode [dml2_core_dcn4_calcs.o]:208
CalculateOutputLink [dml2_core_dcn4_calcs.o]:176
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 12 |
2 files changed, 12 insertions, 12 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 35bc917631ae..84a2de9a76d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -1736,7 +1736,7 @@ static void CalculateBytePerPixelAndBlockSizes( #endif } // CalculateBytePerPixelAndBlockSizes -static dml_float_t CalculateTWait( +static noinline_for_stack dml_float_t CalculateTWait( dml_uint_t PrefetchMode, enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal, @@ -4458,7 +4458,7 @@ static void CalculateSwathWidth( } } // CalculateSwathWidth -static dml_float_t CalculateExtraLatency( +static noinline_for_stack dml_float_t CalculateExtraLatency( dml_uint_t RoundTripPingLatencyCycles, dml_uint_t ReorderingBytes, dml_float_t DCFCLK, @@ -5915,7 +5915,7 @@ static dml_uint_t DSCDelayRequirement( return DSCDelayRequirement_val; } -static dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces, +static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces, dml_float_t ReturnBW, dml_bool_t NotUrgentLatencyHiding[], dml_float_t ReadBandwidthLuma[], @@ -6019,7 +6019,7 @@ static void CalculatePrefetchBandwithSupport( #endif } -static dml_float_t CalculateBandwidthAvailableForImmediateFlip( +static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip( dml_uint_t NumberOfActiveSurfaces, dml_float_t ReturnBW, dml_float_t ReadBandwidthLuma[], @@ -6213,7 +6213,7 @@ static dml_uint_t CalculateMaxVStartup( return max_vstartup_lines; } -static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib, +static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib, struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params, dml_uint_t j, dml_uint_t k) @@ -6265,7 +6265,7 @@ static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *m CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; } -static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) +static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib) { struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index c4dbf27abaf8..8ed49a9df378 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -2778,7 +2778,7 @@ static double dml_get_return_bandwidth_available( return return_bw_mbps; } -static void calculate_bandwidth_available( +static noinline_for_stack void calculate_bandwidth_available( double avg_bandwidth_available_min[dml2_core_internal_soc_state_max], double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM @@ -3625,7 +3625,7 @@ static void CalculateDCFCLKDeepSleepTdlut( dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); } -static void CalculateDCFCLKDeepSleep( +static noinline_for_stack void CalculateDCFCLKDeepSleep( const struct dml2_display_cfg *display_cfg, unsigned int NumberOfActiveSurfaces, unsigned int BytePerPixelY[], @@ -4142,7 +4142,7 @@ static bool ValidateODMMode(enum dml2_odm_mode ODMMode, return true; } -static void CalculateODMMode( +static noinline_for_stack void CalculateODMMode( unsigned int MaximumPixelsPerLinePerDSCUnit, unsigned int HActive, enum dml2_output_format_class OutFormat, @@ -4239,7 +4239,7 @@ static void CalculateODMMode( #endif } -static void CalculateOutputLink( +static noinline_for_stack void CalculateOutputLink( struct dml2_core_internal_scratch *s, double PHYCLK, double PHYCLKD18, @@ -5999,7 +5999,7 @@ static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned in } // a global check against the aggregate effect of the per plane prefetch schedule -static bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch, +static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p) { struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals; @@ -7012,7 +7012,7 @@ static void calculate_bytes_to_fetch_required_to_hide_latency( } } -static void calculate_vactive_det_fill_latency( +static noinline_for_stack void calculate_vactive_det_fill_latency( const struct dml2_display_cfg *display_cfg, unsigned int num_active_planes, unsigned int bytes_required_l[], |