diff options
60 files changed, 1501 insertions, 765 deletions
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 1201b8ab4702..07c71a29963d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -21,6 +21,13 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(src)/xe_wa_oob.rules $(call cmd,wa_oob) +generated_device_oob := $(obj)/generated/xe_device_wa_oob.c $(obj)/generated/xe_device_wa_oob.h +quiet_cmd_device_wa_oob = GEN $(notdir $(generated_device_oob)) + cmd_device_wa_oob = mkdir -p $(@D); $^ $(generated_device_oob) +$(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe_gen_wa_oob \ + $(src)/xe_device_wa_oob.rules + $(call cmd,device_wa_oob) + # Please keep these build lists sorted! # core driver code @@ -156,7 +163,8 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ xe_pci_sriov.o \ - xe_sriov_pf.o + xe_sriov_pf.o \ + xe_sriov_pf_service.o # include helpers for tests even when XE is built-in ifdef CONFIG_DRM_XE_KUNIT_TEST @@ -341,4 +349,4 @@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) uses_generated_oob := $(addprefix $(obj)/, $(xe-y)) -$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h +$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h $(obj)/generated/xe_device_wa_oob.h diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 994af591a2e8..1b101edb838b 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -12,9 +12,13 @@ #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_PER_CTX_PTR (0x12 + 1) +#define CTX_CS_INDIRECT_CTX (0x14 + 1) +#define CTX_CS_INDIRECT_CTX_OFFSET (0x16 + 1) #define CTX_TIMESTAMP (0x22 + 1) #define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) +#define CTX_ACC_CTR_THOLD (0x2a + 1) +#define CTX_ASID (0x2e + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) @@ -36,4 +40,7 @@ #define INDIRECT_CTX_RING_START_UDW (0x08 + 1) #define INDIRECT_CTX_RING_CTL (0x0a + 1) +#define CTX_INDIRECT_CTX_OFFSET_MASK REG_GENMASK(15, 6) +#define CTX_INDIRECT_CTX_OFFSET_DEFAULT REG_FIELD_PREP(CTX_INDIRECT_CTX_OFFSET_MASK, 0xd) + #endif diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c deleted file mode 100644 index b683585db852..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 AND MIT -/* - * Copyright © 2024 Intel Corporation - */ - -#include <kunit/test.h> - -#include "xe_device.h" -#include "xe_kunit_helpers.h" -#include "xe_pci_test.h" - -static int pf_service_test_init(struct kunit *test) -{ - struct xe_pci_fake_data fake = { - .sriov_mode = XE_SRIOV_MODE_PF, - .platform = XE_TIGERLAKE, /* some random platform */ - .subplatform = XE_SUBPLATFORM_NONE, - }; - struct xe_device *xe; - struct xe_gt *gt; - - test->priv = &fake; - xe_kunit_helper_xe_device_test_init(test); - - xe = test->priv; - KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); - - gt = xe_device_get_gt(xe, 0); - pf_init_versions(gt); - - /* - * sanity check: - * - all supported platforms VF/PF ABI versions must be defined - * - base version can't be newer than latest - */ - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.latest.major); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor, - gt->sriov.pf.service.version.latest.minor); - - test->priv = gt; - return 0; -} - -static void pf_negotiate_any(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY, - VF2PF_HANDSHAKE_MINOR_ANY, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_base_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor); -} - -static void pf_negotiate_base_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major); - if (major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.base.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor - 1, - &major, &minor)); -} - -static void pf_negotiate_base_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major - 1, 1, - &major, &minor)); -} - -static void pf_negotiate_latest_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.latest.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor - 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1); -} - -static void pf_negotiate_latest_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - kunit_skip(test, "no prev major"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major - 1, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); -} - -static struct kunit_case pf_service_test_cases[] = { - KUNIT_CASE(pf_negotiate_any), - KUNIT_CASE(pf_negotiate_base_match), - KUNIT_CASE(pf_negotiate_base_newer), - KUNIT_CASE(pf_negotiate_base_next), - KUNIT_CASE(pf_negotiate_base_older), - KUNIT_CASE(pf_negotiate_base_prev), - KUNIT_CASE(pf_negotiate_latest_match), - KUNIT_CASE(pf_negotiate_latest_newer), - KUNIT_CASE(pf_negotiate_latest_next), - KUNIT_CASE(pf_negotiate_latest_older), - KUNIT_CASE(pf_negotiate_latest_prev), - {} -}; - -static struct kunit_suite pf_service_suite = { - .name = "pf_service", - .test_cases = pf_service_test_cases, - .init = pf_service_test_init, -}; - -kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c new file mode 100644 index 000000000000..ba95e29b597d --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024-2025 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_device.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +static int pf_service_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + xe_sriov_pf_service_init(xe); + /* + * sanity check: + * - all supported platforms VF/PF ABI versions must be defined + * - base version can't be newer than latest + */ + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.latest.major); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.minor, + xe->sriov.pf.service.version.latest.minor); + return 0; +} + +static void pf_negotiate_any(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, VF2PF_HANDSHAKE_MAJOR_ANY, + VF2PF_HANDSHAKE_MINOR_ANY, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_base_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.base.minor); +} + +static void pf_negotiate_base_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_GE(test, minor, xe->sriov.pf.service.version.base.minor); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_LE(test, major, xe->sriov.pf.service.version.latest.major); + if (major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.base.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor - 1, + &major, &minor)); +} + +static void pf_negotiate_base_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major - 1, 1, + &major, &minor)); +} + +static void pf_negotiate_latest_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.latest.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor - 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor - 1); +} + +static void pf_negotiate_latest_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + kunit_skip(test, "no prev major"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major - 1, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major - 1); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); +} + +static struct kunit_case pf_service_test_cases[] = { + KUNIT_CASE(pf_negotiate_any), + KUNIT_CASE(pf_negotiate_base_match), + KUNIT_CASE(pf_negotiate_base_newer), + KUNIT_CASE(pf_negotiate_base_next), + KUNIT_CASE(pf_negotiate_base_older), + KUNIT_CASE(pf_negotiate_base_prev), + KUNIT_CASE(pf_negotiate_latest_match), + KUNIT_CASE(pf_negotiate_latest_newer), + KUNIT_CASE(pf_negotiate_latest_next), + KUNIT_CASE(pf_negotiate_latest_older), + KUNIT_CASE(pf_negotiate_latest_prev), + {} +}; + +static struct kunit_suite pf_service_suite = { + .name = "pf_service", + .test_cases = pf_service_test_cases, + .init = pf_service_test_init, +}; + +kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 9570672fce33..5ce0e26822f2 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -19,7 +19,7 @@ static int bb_prefetch(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) + if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt)) /* * RCS and CCS require 1K, although other engines would be * okay with 512. diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index fafacd73dcc3..b5cc65506696 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -14,7 +14,7 @@ struct xe_gt; struct xe_exec_queue; struct xe_sched_job; -struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm); struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4e0355d0f406..18f27da47a36 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2174,21 +2174,6 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); } -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags) -{ - struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - type, flags); - if (IS_ERR(bo)) - return bo; - - xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); - - return bo; -} - static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 8559901e4088..02e8cde4c6b2 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -118,9 +118,6 @@ struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, size_t size, u64 offset, enum ttm_bo_type type, u32 flags, u64 alignment); -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags); struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, size_t size, u32 flags); struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index d83cd6ed3fa8..26e9d146ccbf 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -20,7 +20,9 @@ #include "xe_pm.h" #include "xe_pxp_debugfs.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" #include "xe_step.h" +#include "xe_wa.h" #ifdef CONFIG_DRM_XE_DEBUG #include "xe_bo_evict.h" @@ -82,9 +84,28 @@ static int sriov_info(struct seq_file *m, void *data) return 0; } +static int workarounds(struct xe_device *xe, struct drm_printer *p) +{ + xe_pm_runtime_get(xe); + xe_wa_device_dump(xe, p); + xe_pm_runtime_put(xe); + + return 0; +} + +static int workaround_info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + workarounds(xe, &p); + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"info", info, 0}, { .name = "sriov_info", .show = sriov_info, }, + { .name = "workarounds", .show = workaround_info, }, }; static int forcewake_open(struct inode *inode, struct file *file) @@ -273,4 +294,7 @@ void xe_debugfs_register(struct xe_device *xe) xe_pxp_debugfs_register(xe->pxp); fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); + + if (IS_SRIOV_PF(xe)) + xe_sriov_pf_debugfs_register(xe, root); } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6db09cfc8eb8..6dc84e4ed281 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -68,6 +68,7 @@ #include "xe_wait_user_fence.h" #include "xe_wa.h" +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> static int xe_file_open(struct drm_device *dev, struct drm_file *file) @@ -700,6 +701,9 @@ int xe_device_probe_early(struct xe_device *xe) { int err; + xe_wa_device_init(xe); + xe_wa_process_device_oob(xe); + err = xe_mmio_probe_early(xe); if (err) return err; @@ -861,6 +865,10 @@ int xe_device_probe(struct xe_device *xe) return err; } + if (xe->tiles->media_gt && + XE_WA(xe->tiles->media_gt, 15015404425_disable)) + XE_DEVICE_WA_DISABLE(xe, 15015404425); + xe_nvm_init(xe); err = xe_heci_gsc_init(xe); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index f0eb8150f185..bc802e066a7d 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -131,6 +131,10 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe) for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \ for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) +#define for_each_gt_on_tile(gt__, tile__, id__) \ + for_each_gt((gt__), (tile__)->xe, (id__)) \ + for_each_if((gt__)->tile == (tile__)) + static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) { return >->pm.fw; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index d15b2793629e..d4d2c6854790 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -21,7 +21,9 @@ #include "xe_platform_types.h" #include "xe_pmu_types.h" #include "xe_pt_types.h" +#include "xe_sriov_pf_types.h" #include "xe_sriov_types.h" +#include "xe_sriov_vf_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" #include "xe_ttm_vram_mgr_types.h" @@ -363,6 +365,19 @@ struct xe_device { u8 skip_pcode:1; } info; + /** @wa_active: keep track of active workarounds */ + struct { + /** @wa_active.oob: bitmap with active OOB workarounds */ + unsigned long *oob; + + /** + * @wa_active.oob_initialized: Mark oob as initialized to help detecting misuse + * of XE_DEVICE_WA() - it can only be called on initialization after + * Device OOB WAs have been processed. + */ + bool oob_initialized; + } wa_active; + /** @survivability: survivability information for device */ struct xe_survivability survivability; @@ -409,10 +424,12 @@ struct xe_device { /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ enum xe_sriov_mode __mode; - /** @sriov.pf: PF specific data */ - struct xe_device_pf pf; - /** @sriov.vf: VF specific data */ - struct xe_device_vf vf; + union { + /** @sriov.pf: PF specific data */ + struct xe_device_pf pf; + /** @sriov.vf: VF specific data */ + struct xe_device_vf vf; + }; /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules new file mode 100644 index 000000000000..3a0c4ccc4224 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -0,0 +1,2 @@ +15015404425 PLATFORM(LUNARLAKE) + PLATFORM(PANTHERLAKE) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 8a5cba22b586..c59a9b330697 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -64,7 +64,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) { int i, j; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) init_domain(fw, XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, FORCEWAKE_ACK_RENDER); diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index ed9183599e31..6581cb0f0e59 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -18,8 +18,8 @@ " *\n" \ " * This file was generated from rules: %s\n" \ " */\n" \ - "#ifndef _GENERATED_XE_WA_OOB_\n" \ - "#define _GENERATED_XE_WA_OOB_\n" \ + "#ifndef _GENERATED_%s_\n" \ + "#define _GENERATED_%s_\n" \ "\n" \ "enum {\n" @@ -52,7 +52,7 @@ static char *strip(char *line, size_t linelen) } #define MAX_LINE_LEN 4096 -static int parse(FILE *input, FILE *csource, FILE *cheader) +static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix) { char line[MAX_LINE_LEN + 1]; char *name, *prev_name = NULL, *rules; @@ -96,7 +96,7 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) } if (name) { - fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx); + fprintf(cheader, "\t%s_%s = %u,\n", prefix, name, idx); /* Close previous entry before starting a new one */ if (idx) @@ -118,7 +118,33 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) if (idx) fprintf(csource, ") },\n"); - fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx); + fprintf(cheader, "\t_%s_COUNT = %u\n", prefix, idx); + + return 0; +} + +static int fn_to_prefix(const char *fn, char *prefix, size_t size) +{ + size_t len; + + fn = basename(fn); + len = strlen(fn); + + if (len > size - 1) + return -ENAMETOOLONG; + + memcpy(prefix, fn, len + 1); + + for (char *p = prefix; *p; p++) { + switch (*p) { + case '.': + *p = '\0'; + return 0; + default: + *p = toupper(*p); + break; + } + } return 0; } @@ -141,6 +167,7 @@ int main(int argc, const char *argv[]) [ARGS_CHEADER] = { .fn = argv[3], .mode = "w" }, }; int ret = 1; + char prefix[128]; if (argc < 3) { fprintf(stderr, "ERROR: wrong arguments\n"); @@ -148,6 +175,9 @@ int main(int argc, const char *argv[]) return 1; } + if (fn_to_prefix(args[ARGS_CHEADER].fn, prefix, sizeof(prefix)) < 0) + return 1; + for (int i = 0; i < _ARGS_COUNT; i++) { args[i].f = fopen(args[i].fn, args[i].mode); if (!args[i].f) { @@ -157,9 +187,10 @@ int main(int argc, const char *argv[]) } } - fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn); + fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn, prefix, prefix); + ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f, - args[ARGS_CHEADER].f); + args[ARGS_CHEADER].f, prefix); if (!ret) fprintf(args[ARGS_CHEADER].f, FOOTER); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index d0519cd6704a..464282a89eef 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -23,6 +23,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_tile.h" /* * GSC proxy: @@ -483,7 +484,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) } /* no multi-tile devices with this feature yet */ - if (tile->id > 0) { + if (!xe_tile_is_root(tile)) { xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d397df056e4c..c8eda36546d3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -112,7 +112,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) if (!fw_ref) return; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); @@ -146,30 +146,23 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) static void gt_reset_worker(struct work_struct *w); -static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, + long timeout_jiffies) { struct xe_sched_job *job; - struct xe_bb *bb; struct dma_fence *fence; long timeout; - bb = xe_bb_new(gt, 4, false); - if (IS_ERR(bb)) - return PTR_ERR(bb); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); + if (IS_ERR(job)) return PTR_ERR(job); - } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); + timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies); dma_fence_put(fence); - xe_bb_free(bb, NULL); if (timeout < 0) return timeout; else if (!timeout) @@ -178,27 +171,30 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } +static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +{ + struct xe_bb *bb; + int ret; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + ret = emit_job_sync(q, bb, HZ); + xe_bb_free(bb, NULL); + + return ret; +} + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; struct xe_reg_sr_entry *entry; + int count_rmw = 0, count = 0, ret; unsigned long idx; - struct xe_sched_job *job; struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - int count_rmw = 0; - int count = 0; - - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) - /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); - else - /* Just pick a large BB size */ - bb = xe_bb_new(gt, SZ_4K, false); - - if (IS_ERR(bb)) - return PTR_ERR(bb); + size_t bb_len = 0; + u32 *cs; /* count RMW registers as those will be handled separately */ xa_for_each(&sr->xa, idx, entry) { @@ -208,13 +204,34 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) ++count_rmw; } - if (count || count_rmw) - xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + if (count) + bb_len += count * 2 + 1; + + if (count_rmw) + bb_len += count_rmw * 20 + 7; + + if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* + * Big enough to emit all of the context's 3DSTATE via + * xe_lrc_emit_hwe_state_instructions() + */ + bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + + bb = xe_bb_new(gt, bb_len, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + cs = bb->cs; if (count) { - /* emit single LRI with all non RMW regs */ + /* + * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per + * reg + 1 + */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; @@ -229,79 +246,68 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) val |= entry->set_bits; - bb->cs[bb->len++] = reg.addr; - bb->cs[bb->len++] = val; + *cs++ = reg.addr; + *cs++ = val; xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); } } if (count_rmw) { - /* emit MI_MATH for each RMW reg */ + /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) continue; - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; - bb->cs[bb->len++] = entry->reg.addr; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | - MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = entry->clr_bits; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = entry->set_bits; - - bb->cs[bb->len++] = MI_MATH(8); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1); - bb->cs[bb->len++] = CS_ALU_INSTR_AND; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2); - bb->cs[bb->len++] = CS_ALU_INSTR_OR; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); - - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = entry->reg.addr; + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; + *cs++ = entry->reg.addr; + *cs++ = CS_GPR_REG(0, 0).addr; + + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = entry->clr_bits; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = entry->set_bits; + + *cs++ = MI_MATH(8); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1); + *cs++ = CS_ALU_INSTR_AND; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2); + *cs++ = CS_ALU_INSTR_OR; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = entry->reg.addr; xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", entry->reg.addr, entry->clr_bits, entry->set_bits); } /* reset used GPR */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = 0; + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = 0; } - xe_lrc_emit_hwe_state_instructions(q, bb); + cs = xe_lrc_emit_hwe_state_instructions(q, cs); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); - } + bb->len = cs - bb->cs; - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); + ret = emit_job_sync(q, bb, HZ); - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - return 0; + return ret; } int xe_gt_record_default_lrcs(struct xe_gt *gt) @@ -363,14 +369,6 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) goto put_nop_q; } - /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_nop_q; - } - xe_map_memcpy_from(xe, default_lrc, &q->lrc[0]->bo->vmap, xe_lrc_pphwsp_offset(q->lrc[0]), @@ -470,7 +468,7 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt) xe_gt_mcr_init(gt); xe_gt_enable_host_l2_vram(gt); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) goto err_force_wake; @@ -547,7 +545,7 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) if (err) goto err_force_wake; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* * USM has its only SA pool to non-block behind user operations */ @@ -563,7 +561,7 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) } } - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { struct xe_tile *tile = gt_to_tile(gt); tile->migrate = xe_migrate_init(tile); @@ -583,7 +581,7 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) xe_gt_apply_ccs_mode(gt); } - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -628,15 +626,15 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = xe_gt_pagefault_init(gt); + err = xe_gt_sysfs_init(gt); if (err) return err; - err = xe_gt_sysfs_init(gt); + err = gt_init_with_gt_forcewake(gt); if (err) return err; - err = gt_init_with_gt_forcewake(gt); + err = xe_gt_pagefault_init(gt); if (err) return err; @@ -780,7 +778,7 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) @@ -835,6 +833,9 @@ static int gt_reset(struct xe_gt *gt) goto err_out; } + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_stop_prepare(gt); + xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 8128ddfdd788..e9ccab8aedbe 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -106,6 +106,11 @@ static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt) xe_device_uc_enabled(gt_to_xe(gt)); } +static inline bool xe_gt_is_main_type(struct xe_gt *gt) +{ + return gt->info.type == XE_GT_TYPE_MAIN; +} + static inline bool xe_gt_is_media_type(struct xe_gt *gt) { return gt->info.type == XE_GT_TYPE_MEDIA; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index c11206410a4d..ffb210216aa9 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -121,7 +121,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (vcs_mask || vecs_mask) gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; if (xe->info.platform != XE_DG1) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index c08efca6420e..35489fa81825 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -172,6 +172,25 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_cancel_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (cancel_work_sync(>->sriov.pf.workers.restart)) + xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n"); +} + +/** + * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support. + * @gt: the &xe_gt + * + * This function can only be called on the PF. + */ +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ + pf_cancel_restart(gt); +} + static void pf_restart(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f474509411c0..e2b2ff8132dc 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -13,6 +13,7 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt); int xe_gt_sriov_pf_init(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) @@ -29,6 +30,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } +static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ +} + static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index d0cf1d80be07..494909f74eb2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -238,26 +238,35 @@ static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned i } /* Return: number of configuration dwords written */ -static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +static u32 encode_ggtt(u32 *cfg, u64 start, u64 size, bool details) { u32 n = 0; - if (xe_ggtt_node_allocated(config->ggtt_region)) { - if (details) { - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region->base.start); - cfg[n++] = upper_32_bits(config->ggtt_region->base.start); - } - - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region->base.size); - cfg[n++] = upper_32_bits(config->ggtt_region->base.size); + if (details) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); + cfg[n++] = lower_32_bits(start); + cfg[n++] = upper_32_bits(start); } + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); + cfg[n++] = lower_32_bits(size); + cfg[n++] = upper_32_bits(size); + return n; } /* Return: number of configuration dwords written */ +static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +{ + struct xe_ggtt_node *node = config->ggtt_region; + + if (!xe_ggtt_node_allocated(node)) + return 0; + + return encode_ggtt(cfg, node->base.start, node->base.size, details); +} + +/* Return: number of configuration dwords written */ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) { u32 n = 0; @@ -332,6 +341,17 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) } xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + if (vfid == PFID) { + u64 ggtt_start = xe_wopcm_size(gt_to_xe(gt)); + u64 ggtt_size = gt_to_tile(gt)->mem.ggtt->size - ggtt_start; + + /* plain PF config data will never include a real GGTT region */ + xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config, true)); + + /* fake PF GGTT config covers full GGTT range except reserved WOPCM */ + num_dwords += encode_ggtt(cfg + num_dwords, ggtt_start, ggtt_size, true); + } + num_klvs = xe_guc_klv_count(cfg, num_dwords); err = pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); @@ -376,7 +396,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) { u64 spare; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -388,7 +408,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size) { - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -443,7 +463,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) int err; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); size = round_up(size, alignment); @@ -492,7 +512,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_ggtt_node *node = config->ggtt_region; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return xe_ggtt_node_allocated(node) ? node->base.size : 0; } @@ -560,7 +580,7 @@ int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size { int err; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) @@ -622,7 +642,7 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -693,7 +713,7 @@ int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); fair = pf_estimate_fair_ggtt(gt, num_vfs); @@ -1327,7 +1347,17 @@ static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 si static void pf_force_lmtt_invalidate(struct xe_device *xe) { - /* TODO */ + struct xe_lmtt *lmtt; + struct xe_tile *tile; + unsigned int tid; + + xe_assert(xe, xe_device_has_lmtt(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_invalidate_hw(lmtt); + } } static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) @@ -1406,7 +1436,7 @@ fail: static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); if (config->lmem_obj) { @@ -1425,7 +1455,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) xe_gt_assert(gt, vfid); xe_gt_assert(gt, IS_DGFX(xe)); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); size = round_up(size, pf_get_lmem_alignment(gt)); @@ -1552,7 +1582,7 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -1629,7 +1659,7 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!xe_device_has_lmtt(gt_to_xe(gt))) return 0; @@ -1663,7 +1693,7 @@ int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs); result = result ?: err; err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs); @@ -1991,7 +2021,7 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_device *xe = gt_to_xe(gt); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_release_vf_config_ggtt(gt, config); if (IS_DGFX(xe)) { pf_release_vf_config_lmem(gt, config); @@ -2082,7 +2112,7 @@ static int pf_sanitize_vf_resources(struct xe_gt *gt, u32 vfid, long timeout) * Only GGTT and LMEM requires to be cleared by the PF. * GuC doorbell IDs and context IDs do not need any clearing. */ - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_sanitize_ggtt(config->ggtt_region, vfid); if (IS_DGFX(xe)) err = pf_sanitize_lmem(tile, config->lmem_obj, timeout); @@ -2149,7 +2179,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool is_primary = !xe_gt_is_media_type(gt); + bool is_primary = xe_gt_is_main_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -2366,6 +2396,35 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static void pf_prepare_self_config(struct xe_gt *gt) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, PFID); + + /* + * We want PF to be allowed to use all of context ID, doorbells IDs + * and whole usable GGTT area. While we can store ctxs/dbs numbers + * directly in the config structure, can't do the same with the GGTT + * configuration, so let it be prepared on demand while pushing KLVs. + */ + config->num_ctxs = GUC_ID_MAX; + config->num_dbs = GUC_NUM_DOORBELLS; +} + +static int pf_push_self_config(struct xe_gt *gt) +{ + int err; + + err = pf_push_full_vf_config(gt, PFID); + if (err) { + xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n", + ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n"); + return 0; +} + static void fini_config(void *arg) { struct xe_gt *gt = arg; @@ -2389,9 +2448,18 @@ static void fini_config(void *arg) int xe_gt_sriov_pf_config_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + int err; xe_gt_assert(gt, IS_SRIOV_PF(xe)); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_prepare_self_config(gt); + err = pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (err) + return err; + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); } @@ -2409,6 +2477,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt) unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); unsigned int fail = 0, skip = 0; + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) { if (xe_gt_sriov_pf_config_is_empty(gt, n)) skip++; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 1f50aec3a059..4f7fff892bc0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -15,10 +15,11 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_monitor.h" -#include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_sriov.h" +#include "xe_sriov_pf_service.h" +#include "xe_tile.h" static const char *control_cmd_to_string(u32 cmd) { @@ -1064,7 +1065,9 @@ static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) return false; - xe_gt_sriov_pf_service_reset(gt, vfid); + if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt)) + xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid); + xe_gt_sriov_pf_monitor_flr(gt, vfid); pf_enter_vf_flr_reset_mmio(gt, vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 13970d5a2867..bf679b21f485 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -78,11 +78,6 @@ static const struct drm_info_list pf_info[] = { .data = xe_gt_sriov_pf_service_print_runtime, }, { - "negotiated_versions", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_service_print_version, - }, - { "adverse_events", .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_monitor_print_events, @@ -305,7 +300,7 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne xe_gt_assert(gt, gt == extract_gt(parent)); xe_gt_assert(gt, vfid == extract_vfid(parent)); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", 0644, parent, parent, &ggtt_fops); if (xe_device_has_lmtt(gt_to_xe(gt))) @@ -554,7 +549,7 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) pfdentry->d_inode->i_private = gt; drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { drm_debugfs_create_files(pf_ggtt_info, ARRAY_SIZE(pf_ggtt_info), pfdentry, minor); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index fa74b3e1a964..76dd9233ef9f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -19,91 +19,7 @@ #include "xe_gt_sriov_pf_service_types.h" #include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" - -static void pf_init_versions(struct xe_gt *gt) -{ - BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); - BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); - - /* base versions may differ between platforms */ - gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; - gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; - - /* latest version is same for all platforms */ - gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; - gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_negotiate_version(struct xe_gt *gt, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base; - struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest; - - xe_gt_assert(gt, base.major); - xe_gt_assert(gt, base.major <= latest.major); - xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor)); - - /* VF doesn't care - return our latest */ - if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && - wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants newer than our - return our latest */ - if (wanted_major > latest.major) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants older than min required - reject */ - if (wanted_major < base.major || - (wanted_major == base.major && wanted_minor < base.minor)) { - return -EPERM; - } - - /* previous major - return wanted, as we should still support it */ - if (wanted_major < latest.major) { - /* XXX: we are not prepared for multi-versions yet */ - xe_gt_assert(gt, base.major == latest.major); - return -ENOPKG; - } - - /* same major - return common minor */ - *major = wanted_major; - *minor = min_t(u32, latest.minor, wanted_minor); - return 0; -} - -static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - xe_gt_assert(gt, major || minor); - - gt->sriov.pf.vfs[vfid].version.major = major; - gt->sriov.pf.vfs[vfid].version.minor = minor; -} - -static void pf_disconnect(struct xe_gt *gt, u32 vfid) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - gt->sriov.pf.vfs[vfid].version.major = 0; - gt->sriov.pf.vfs[vfid].version.minor = 0; -} - -static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - return major == gt->sriov.pf.vfs[vfid].version.major && - minor <= gt->sriov.pf.vfs[vfid].version.minor; -} +#include "xe_sriov_pf_service.h" static const struct xe_reg tgl_runtime_regs[] = { RPM_CONFIG0, /* _MMIO(0x0d00) */ @@ -285,8 +201,6 @@ int xe_gt_sriov_pf_service_init(struct xe_gt *gt) { int err; - pf_init_versions(gt); - err = pf_alloc_runtime_info(gt); if (unlikely(err)) goto failed; @@ -311,47 +225,6 @@ void xe_gt_sriov_pf_service_update(struct xe_gt *gt) pf_prepare_runtime_info(gt); } -/** - * xe_gt_sriov_pf_service_reset - Reset a connection with the VF. - * @gt: the &xe_gt - * @vfid: the VF identifier - * - * Reset a VF driver negotiated VF/PF ABI version. - * After that point, the VF driver will have to perform new version handshake - * to continue use of the PF services again. - * - * This function can only be called on PF. - */ -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid) -{ - pf_disconnect(gt, vfid); -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_process_handshake(struct xe_gt *gt, u32 vfid, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - int err; - - xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n", - vfid, wanted_major, wanted_minor); - - err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor); - - if (err < 0) { - xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n", - vfid, wanted_major, wanted_minor, ERR_PTR(err)); - pf_disconnect(gt, vfid); - } else { - xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n", - vfid, *major, *minor); - pf_connect(gt, vfid, *major, *minor); - } - - return 0; -} - /* Return: length of the response message or a negative error code on failure. */ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, const u32 *request, u32 len, u32 *response, u32 size) @@ -371,7 +244,8 @@ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]); wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]); - err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor); + err = xe_sriov_pf_service_handshake_vf(gt_to_xe(gt), origin, wanted_major, wanted_minor, + &major, &minor); if (err < 0) return err; @@ -430,8 +304,10 @@ static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin, u32 remaining = 0; int ret; - if (!pf_is_negotiated(gt, origin, 1, 0)) + /* this action is available from ABI 1.0 */ + if (!xe_sriov_pf_service_is_negotiated(gt_to_xe(gt), origin, 1, 0)) return -EACCES; + if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) return -EMSGSIZE; if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) @@ -528,33 +404,3 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p return 0; } - -/** - * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs. - * @gt: the &xe_gt - * @p: the &drm_printer - * - * This function is for PF use only. - */ -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_device *xe = gt_to_xe(gt); - unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); - struct xe_gt_sriov_pf_service_version *version; - - xe_gt_assert(gt, IS_SRIOV_PF(xe)); - - for (n = 1; n <= total_vfs; n++) { - version = >->sriov.pf.vfs[n].version; - if (!version->major && !version->minor) - continue; - - drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor); - } - - return 0; -} - -#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) -#include "tests/xe_gt_sriov_pf_service_test.c" -#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h index 56aaadf0360d..10b02c9b651c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h @@ -14,9 +14,7 @@ struct xe_gt; int xe_gt_sriov_pf_service_init(struct xe_gt *gt); void xe_gt_sriov_pf_service_update(struct xe_gt *gt); -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid); -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 93cd26dca070..b282838d59e6 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -552,7 +552,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt) if (unlikely(err)) return err; - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { err = vf_get_lmem_info(gt); if (unlikely(err)) return err; @@ -649,7 +649,7 @@ s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt) struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return config->ggtt_shift; } @@ -686,21 +686,22 @@ static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) return 0; } -static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor) +static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_assert(xe, IS_SRIOV_VF(xe)); - gt->sriov.vf.pf_version.major = major; - gt->sriov.vf.pf_version.minor = minor; + xe->sriov.vf.pf_version.major = major; + xe->sriov.vf.pf_version.minor = minor; } -static void vf_disconnect_pf(struct xe_gt *gt) +static void vf_disconnect_pf(struct xe_device *xe) { - vf_connect_pf(gt, 0, 0); + vf_connect_pf(xe, 0, 0); } static int vf_handshake_with_pf(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR; u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR; u32 major = major_wanted, minor = minor_wanted; @@ -716,13 +717,13 @@ static int vf_handshake_with_pf(struct xe_gt *gt) } xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor); - vf_connect_pf(gt, major, minor); + vf_connect_pf(xe, major, minor); return 0; failed: xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n", major, minor, ERR_PTR(err)); - vf_disconnect_pf(gt); + vf_disconnect_pf(xe); return err; } @@ -775,10 +776,12 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + struct xe_device *xe = gt_to_xe(gt); - return major == gt->sriov.vf.pf_version.major && - minor <= gt->sriov.vf.pf_version.minor; + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + + return major == xe->sriov.vf.pf_version.major && + minor <= xe->sriov.vf.pf_version.minor; } static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs) @@ -1036,7 +1039,7 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "GGTT shift on last restore:\t%lld\n", config->ggtt_shift); - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf); } @@ -1072,9 +1075,10 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) */ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) { + struct xe_device *xe = gt_to_xe(gt); struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; struct xe_uc_fw_version *wanted = >->sriov.vf.wanted_guc_version; - struct xe_gt_sriov_vf_relay_version *pf_version = >->sriov.vf.pf_version; + struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version; struct xe_uc_fw_version ver; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index ef041679e9d4..298dedf4b009 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -10,16 +10,6 @@ #include "xe_uc_fw_types.h" /** - * struct xe_gt_sriov_vf_relay_version - PF ABI version details. - */ -struct xe_gt_sriov_vf_relay_version { - /** @major: major version. */ - u16 major; - /** @minor: minor version. */ - u16 minor; -}; - -/** * struct xe_gt_sriov_vf_selfconfig - VF configuration data. */ struct xe_gt_sriov_vf_selfconfig { @@ -66,8 +56,6 @@ struct xe_gt_sriov_vf { struct xe_uc_fw_version guc_version; /** @self_config: resource configurations. */ struct xe_gt_sriov_vf_selfconfig self_config; - /** @pf_version: negotiated VF/PF ABI version. */ - struct xe_gt_sriov_vf_relay_version pf_version; /** @runtime: runtime data retrieved from the PF. */ struct xe_gt_sriov_vf_runtime runtime; }; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 6088df8e159c..086c12ee3d9d 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -330,6 +330,40 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return 0; } +static int send_tlb_invalidation_all(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) +{ + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION_ALL, + 0, /* seqno, replaced in send_tlb_invalidation */ + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), + }; + + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); +} + +/** + * xe_gt_tlb_invalidation_all - Invalidate all TLBs across PF and all VFs. + * @gt: the &xe_gt structure + * @fence: the &xe_gt_tlb_invalidation_fence to be signaled on completion + * + * Send a request to invalidate all TLBs across PF and all VFs. + * + * Return: 0 on success, negative error code on error + */ +int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence) +{ + int err; + + xe_gt_assert(gt, gt == fence->gt); + + err = send_tlb_invalidation_all(gt, fence); + if (err) + xe_gt_err(gt, "TLB invalidation request failed (%pe)", ERR_PTR(err)); + + return err; +} + /* * Ensure that roundup_pow_of_two(length) doesn't overflow. * Note that roundup_pow_of_two() operates on unsigned long, diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index 31072dbcad8e..f7f0f2eaf4b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -20,6 +20,7 @@ int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); +int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence); int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 305939c69747..8c63e3263643 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -290,11 +290,6 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } -bool xe_dss_mask_empty(const xe_dss_mask_t mask) -{ - return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); -} - /** * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant * @gt: GT to check diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index a72d26ba0653..c8140704ad4c 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -41,8 +41,6 @@ xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask) unsigned int xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); -bool xe_dss_mask_empty(const xe_dss_mask_t mask); - bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index c43e62dc692e..5df5b8c2a3e4 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -23,6 +23,7 @@ #include "xe_mmio.h" #include "xe_pxp.h" #include "xe_sriov.h" +#include "xe_tile.h" /* * Interrupt registers for a unit are always consecutive and ordered @@ -161,7 +162,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) dmask = irqs << 16 | irqs; smask = irqs << 16; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* Enable interrupts for each engine class */ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); if (ccs_mask) @@ -261,7 +262,7 @@ gt_engine_identity(struct xe_device *xe, static void gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) { - if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) + if (instance == OTHER_GUC_INSTANCE && xe_gt_is_main_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); @@ -552,7 +553,7 @@ static void xelp_irq_reset(struct xe_tile *tile) static void dg1_irq_reset(struct xe_tile *tile) { - if (tile->id == 0) + if (xe_tile_is_root(tile)) dg1_intr_disable(tile_to_xe(tile)); gt_irq_reset(tile); diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 381f576036d0..a2000307d5bf 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -11,6 +11,7 @@ #include "xe_assert.h" #include "xe_bo.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_lmtt.h" #include "xe_map.h" #include "xe_mmio.h" @@ -222,6 +223,58 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt) lmtt_setup_dir_ptr(lmtt); } +static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_gt_tlb_invalidation_fence fences[XE_MAX_GT_PER_TILE]; + struct xe_gt_tlb_invalidation_fence *fence = fences; + struct xe_tile *tile = lmtt_to_tile(lmtt); + struct xe_gt *gt; + int result = 0; + int err; + u8 id; + + for_each_gt_on_tile(gt, tile, id) { + xe_gt_tlb_invalidation_fence_init(gt, fence, true); + err = xe_gt_tlb_invalidation_all(gt, fence); + result = result ?: err; + fence++; + } + + lmtt_debug(lmtt, "num_fences=%d err=%d\n", (int)(fence - fences), result); + + /* + * It is fine to wait for all fences, even for those which covers the + * invalidation request that failed, as such fence should be already + * marked as signaled. + */ + fence = fences; + for_each_gt_on_tile(gt, tile, id) + xe_gt_tlb_invalidation_fence_wait(fence++); + + return result; +} + +/** + * xe_lmtt_invalidate_hw - Invalidate LMTT hardware. + * @lmtt: the &xe_lmtt to invalidate + * + * Send requests to all GuCs on this tile to invalidate all TLBs. + * + * This function should be called only when running as a PF driver. + */ +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_device *xe = lmtt_to_xe(lmtt); + int err; + + lmtt_assert(lmtt, IS_SRIOV_PF(xe)); + + err = lmtt_invalidate_hw(lmtt); + if (err) + xe_sriov_warn(xe, "LMTT%u invalidation failed (%pe)", + lmtt_to_tile(lmtt)->id, ERR_PTR(err)); +} + static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, u64 pte, unsigned int idx) { @@ -276,6 +329,7 @@ static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) return; lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid); + lmtt_invalidate_hw(lmtt); lmtt_assert(lmtt, pd->level > 0); lmtt_assert(lmtt, pt->level == pd->level - 1); diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h index cb10ef994db6..75a234fbf367 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.h +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -15,6 +15,7 @@ struct xe_lmtt_ops; #ifdef CONFIG_PCI_IOV int xe_lmtt_init(struct xe_lmtt *lmtt); void xe_lmtt_init_hw(struct xe_lmtt *lmtt); +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt); int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index d2ad8fe737eb..6d38411bdeba 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -39,15 +39,46 @@ #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) #define LRC_PPHWSP_SIZE SZ_4K +#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K #define LRC_WA_BB_SIZE SZ_4K +/* + * Layout of the LRC and associated data allocated as + * lrc->bo: + * + * Region Size + * +============================+=================================+ <- __xe_lrc_ring_offset() + * | Ring | ring_size, see | + * | | xe_lrc_init() | + * +============================+=================================+ <- __xe_lrc_pphwsp_offset() + * | PPHWSP (includes SW state) | 4K | + * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset() + * | Engine Context Image | n * 4K, see | + * | | xe_gt_lrc_size() | + * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset() + * | Indirect Ring State Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_RING_STATE | + * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset() + * | Indirect Context Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_CTX | + * +============================+=================================+ <- __xe_lrc_wa_bb_offset() + * | WA BB Per Ctx | 4k | + * +============================+=================================+ <- xe_bo_size(lrc->bo) + */ + static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) { return gt_to_xe(lrc->fence_ctx.gt); } +static bool +gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) +{ + return false; +} + size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) { struct xe_device *xe = gt_to_xe(gt); @@ -582,8 +613,6 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) if (xe_gt_has_indirect_ring_state(hwe->gt)) regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ } static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) @@ -717,7 +746,18 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { - return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_RING_STATE_SIZE; + u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - + LRC_INDIRECT_RING_STATE_SIZE; + + if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX) + offset -= LRC_INDIRECT_CTX_BO_SIZE; + + return offset; +} + +static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE; } static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) @@ -944,8 +984,10 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - u32 *batch, size_t max_len) +static ssize_t setup_utilization_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, + size_t max_len) { u32 *cmd = batch; @@ -972,67 +1014,160 @@ static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine * return cmd - batch; } -struct wa_bb_setup { +struct bo_setup { ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *batch, size_t max_size); }; -static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +struct bo_setup_state { + /* Input: */ + struct xe_lrc *lrc; + struct xe_hw_engine *hwe; + size_t max_size; + size_t reserve_dw; + unsigned int offset; + const struct bo_setup *funcs; + unsigned int num_funcs; + + /* State: */ + u32 *buffer; + u32 *ptr; + unsigned int written; +}; + +static int setup_bo(struct bo_setup_state *state) { - const size_t max_size = LRC_WA_BB_SIZE; - static const struct wa_bb_setup funcs[] = { - { .setup = wa_bb_setup_utilization }, - }; ssize_t remain; - u32 *cmd, *buf = NULL; - if (lrc->bo->vmap.is_iomem) { - buf = kmalloc(max_size, GFP_KERNEL); - if (!buf) + if (state->lrc->bo->vmap.is_iomem) { + state->buffer = kmalloc(state->max_size, GFP_KERNEL); + if (!state->buffer) return -ENOMEM; - cmd = buf; + state->ptr = state->buffer; } else { - cmd = lrc->bo->vmap.vaddr + __xe_lrc_wa_bb_offset(lrc); + state->ptr = state->lrc->bo->vmap.vaddr + state->offset; + state->buffer = NULL; } - remain = max_size / sizeof(*cmd); + remain = state->max_size / sizeof(u32); - for (size_t i = 0; i < ARRAY_SIZE(funcs); i++) { - ssize_t len = funcs[i].setup(lrc, hwe, cmd, remain); + for (size_t i = 0; i < state->num_funcs; i++) { + ssize_t len = state->funcs[i].setup(state->lrc, state->hwe, + state->ptr, remain); remain -= len; /* - * There should always be at least 1 additional dword for - * the end marker + * Caller has asked for at least reserve_dw to remain unused. */ - if (len < 0 || xe_gt_WARN_ON(lrc->gt, remain < 1)) + if (len < 0 || + xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw)) goto fail; - cmd += len; - } - - *cmd++ = MI_BATCH_BUFFER_END; - - if (buf) { - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, - __xe_lrc_wa_bb_offset(lrc), buf, - (cmd - buf) * sizeof(*cmd)); - kfree(buf); + state->ptr += len; + state->written += len; } - xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + - __xe_lrc_wa_bb_offset(lrc) + 1); - return 0; fail: - kfree(buf); + kfree(state->buffer); return -ENOSPC; } -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) +static void finish_bo(struct bo_setup_state *state) +{ + if (!state->buffer) + return; + + xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, + state->offset, state->buffer, + state->written * sizeof(u32)); + kfree(state->buffer); +} + +static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static const struct bo_setup funcs[] = { + { .setup = setup_utilization_wa }, + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = LRC_WA_BB_SIZE, + .reserve_dw = 1, + .offset = __xe_lrc_wa_bb_offset(lrc), + .funcs = funcs, + .num_funcs = ARRAY_SIZE(funcs), + }; + int ret; + + ret = setup_bo(&state); + if (ret) + return ret; + + *state.ptr++ = MI_BATCH_BUFFER_END; + state.written++; + + finish_bo(&state); + + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, + xe_bo_ggtt_addr(lrc->bo) + state.offset + 1); + + return 0; +} + +static int +setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static struct bo_setup rcs_funcs[] = { + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = (63 * 64) /* max 63 cachelines */, + .offset = __xe_lrc_indirect_ctx_offset(lrc), + }; + int ret; + + if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)) + return 0; + + if (hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE) { + state.funcs = rcs_funcs; + state.num_funcs = ARRAY_SIZE(rcs_funcs); + } + + if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) + return 0; + + ret = setup_bo(&state); + if (ret) + return ret; + + /* + * Align to 64B cacheline so there's no garbage at the end for CS to + * execute: size for indirect ctx must be a multiple of 64. + */ + while (state.written & 0xf) { + *state.ptr++ = MI_NOOP; + state.written++; + } + + finish_bo(&state); + + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX, + (xe_bo_ggtt_addr(lrc->bo) + state.offset) | + /* Size in CLs. */ + (state.written * sizeof(u32) / 64)); + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX_OFFSET, + CTX_INDIRECT_CTX_OFFSET_DEFAULT); + + return 0; +} static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, u32 ring_size, u16 msix_vec, @@ -1040,11 +1175,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, { struct xe_gt *gt = hwe->gt; const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); - const u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; + u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; - void *init_data = NULL; u32 arb_enable; u32 bo_flags; int err; @@ -1055,6 +1189,12 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, lrc->flags = 0; lrc->ring.size = ring_size; lrc->ring.tail = 0; + + if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { + lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; + bo_size += LRC_INDIRECT_CTX_BO_SIZE; + } + if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; @@ -1063,10 +1203,6 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (vm && vm->xef) /* userspace */ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, ttm_bo_type_kernel, bo_flags); @@ -1076,25 +1212,26 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - /* * Init Per-Process of HW status Page, LRC / context state to known - * values + * values. If there's already a primed default_lrc, just copy it, otherwise + * it's the early submission to record the lrc: build a new empty one from + * scratch. */ map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { + if (gt->default_lrc[hwe->class]) { xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, lrc_size - LRC_PPHWSP_SIZE); } else { + void *init_data = empty_lrc_data(hwe); + + if (!init_data) { + err = -ENOMEM; + goto err_lrc_finish; + } + xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); kfree(init_data); } @@ -1149,7 +1286,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); + xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); @@ -1179,6 +1316,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (err) goto err_lrc_finish; + err = setup_indirect_ctx(lrc, hwe); + if (err) + goto err_lrc_finish; + return 0; err_lrc_finish: @@ -1771,7 +1912,7 @@ static const struct instr_state xe_hpg_svg_state[] = { { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, }; -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs) { struct xe_gt *gt = q->hwe->gt; struct xe_device *xe = gt_to_xe(gt); @@ -1806,7 +1947,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; + return cs; } for (int i = 0; i < state_table_size; i++) { @@ -1829,12 +1970,14 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b instr == CMD_3DSTATE_DRAWING_RECTANGLE) instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - bb->cs[bb->len] = instr; + *cs = instr; if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); + *cs |= (num_dw - 2); - bb->len += num_dw; + cs += num_dw; } + + return cs; } struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index eb6e8de8c939..b6c8053c581b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -112,7 +112,7 @@ void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class); -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 2c7c81079801..e9883706e004 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -29,7 +29,8 @@ struct xe_lrc { struct xe_gt *gt; /** @flags: LRC flags */ -#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 +#define XE_LRC_FLAG_INDIRECT_CTX 0x1 +#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x2 u32 flags; /** @refcount: ref count of this lrc */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 4e2bdf70eb70..ba1cff2e4cda 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1817,8 +1817,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, xe_bo_assert_held(bo); /* Use bounce buffer for small access and unaligned access */ - if (len & XE_CACHELINE_MASK || - ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) { + if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || + !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { int buf_offset = 0; /* @@ -1848,7 +1848,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, (void *)ptr, - sizeof(bounce), 0); + sizeof(bounce), write); if (err) return err; } else { diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 751586d6806a..e4db8d58ea2d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -22,6 +22,9 @@ #include "xe_macros.h" #include "xe_sriov.h" #include "xe_trace.h" +#include "xe_wa.h" + +#include "generated/xe_device_wa_oob.h" static void tiles_fini(void *arg) { @@ -167,7 +170,7 @@ static void mmio_flush_pending_writes(struct xe_mmio *mmio) #define DUMMY_REG_OFFSET 0x130030 int i; - if (mmio->tile->xe->info.platform != XE_LUNARLAKE) + if (!XE_DEVICE_WA(mmio->tile->xe, 15015404425)) return; /* 4 dummy writes */ @@ -180,7 +183,6 @@ u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u8 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readb(mmio->regs + addr); @@ -194,7 +196,6 @@ u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u16 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readw(mmio->regs + addr); @@ -221,7 +222,6 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u32 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e332f3142435..107ffe87808c 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -19,31 +19,40 @@ #include "xe_sched_job.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -#define DEFAULT_GUC_LOG_LEVEL 3 +#define DEFAULT_GUC_LOG_LEVEL 3 #else -#define DEFAULT_GUC_LOG_LEVEL 1 +#define DEFAULT_GUC_LOG_LEVEL 1 #endif +#define DEFAULT_PROBE_DISPLAY true +#define DEFAULT_VRAM_BAR_SIZE 0 +#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE +#define DEFAULT_WEDGED_MODE 1 +#define DEFAULT_SVM_NOTIFIER_SIZE 512 + struct xe_modparam xe_modparam = { - .probe_display = true, - .guc_log_level = DEFAULT_GUC_LOG_LEVEL, - .force_probe = CONFIG_DRM_XE_FORCE_PROBE, - .wedged_mode = 1, - .svm_notifier_size = 512, + .probe_display = DEFAULT_PROBE_DISPLAY, + .guc_log_level = DEFAULT_GUC_LOG_LEVEL, + .force_probe = DEFAULT_FORCE_PROBE, + .wedged_mode = DEFAULT_WEDGED_MODE, + .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE, /* the rest are 0 by default */ }; module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); -MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); +MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 " + "[default=" __stringify(DEFAULT_SVM_NOTIFIER_SIZE) "]"); module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); -MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " + "[default=" __stringify(DEFAULT_PROBE_DISPLAY) "])"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); -MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size"); +MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size " + "[default=" __stringify(DEFAULT_VRAM_BAR_SIZE) "])"); module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels " @@ -63,7 +72,8 @@ MODULE_PARM_DESC(gsc_firmware_path, module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, - "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details " + "[default=" DEFAULT_FORCE_PROBE "])"); #ifdef CONFIG_PCI_IOV module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); @@ -74,7 +84,8 @@ MODULE_PARM_DESC(max_vfs, module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, - "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); + "Module's default policy for the wedged mode (0=never, 1=upon-critical-errors, 2=upon-any-hang " + "[default=" __stringify(DEFAULT_WEDGED_MODE) "])"); static int xe_check_nomodeset(void) { diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a3379d39f76d..d991fbd90f20 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -2493,7 +2493,7 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { - if (!xe_gt_is_media_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) + if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) return 1; else if (!IS_DGFX(gt_to_xe(gt))) return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ @@ -2506,7 +2506,7 @@ static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270) return XE_OA_UNIT_INVALID; - xe_gt_WARN_ON(hwe->gt, !xe_gt_is_media_type(hwe->gt)); + xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt)); if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20) return 0; @@ -2589,7 +2589,7 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) for (i = 0; i < num_units; i++) { struct xe_oa_unit *u = >->oa.oa_unit[i]; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { u->regs = __oag_regs(); u->type = DRM_XE_OA_UNIT_TYPE_OAG; } else { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3614fcf3f088..3c40ef426f0c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -327,6 +327,7 @@ static const struct xe_device_desc bmg_desc = { .has_mbx_power_limits = true, .has_gsc_nvm = 1, .has_heci_cscfi = 1, + .has_sriov = true, .max_gt_per_tile = 2, .needs_scratch = true, }; diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index bc1689db4cd7..7b50c7c1ee21 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 *dw, int i) +static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i) { dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | - MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; - dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR; - dw[i++] = 0; + MI_FLUSH_IMM_DW; + + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; + dw[i++] = val; return i; } @@ -397,23 +398,20 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, static void emit_migration_job_gen12(struct xe_sched_job *job, struct xe_lrc *lrc, u32 seqno) { + u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc); u32 dw[MAX_JOB_SIZE_DW], i = 0; i = emit_copy_timestamp(lrc, dw, i); - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, dw, i); + i = emit_store_imm_ggtt(saddr, seqno, dw, i); dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i); - if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(dw, i); - dw[i++] = preparser_disable(false); - } + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(saddr, seqno, dw, i); + dw[i++] = preparser_disable(false); i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 29e694bb1219..95571b87aa73 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -56,37 +56,61 @@ static bool rule_matches(const struct xe_device *xe, xe->info.subplatform == r->subplatform; break; case XE_RTP_MATCH_GRAPHICS_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 >= r->ver_start && xe->info.graphics_verx100 <= r->ver_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start; break; case XE_RTP_MATCH_GRAPHICS_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 >= r->ver_start && xe->info.media_verx100 <= r->ver_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.media >= r->step_start && xe->info.step.media < r->step_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start; break; case XE_RTP_MATCH_INTEGRATED: @@ -108,6 +132,9 @@ static bool rule_matches(const struct xe_device *xe, match = hwe->class != r->engine_class; break; case XE_RTP_MATCH_FUNC: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = r->match_func(gt, hwe); break; default: @@ -186,6 +213,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, struct xe_device **xe) { switch (ctx->type) { + case XE_RTP_PROCESS_TYPE_DEVICE: + *hwe = NULL; + *gt = NULL; + *xe = ctx->xe; + break; case XE_RTP_PROCESS_TYPE_GT: *hwe = NULL; *gt = ctx->gt; @@ -326,21 +358,6 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, hwe->engine_id == __ffs(render_compute_mask); } -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe) -{ - unsigned int dss_per_gslice = 4; - unsigned int dss; - - if (drm_WARN(>_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask), - "Checking gslice for platform without geometry pipeline\n")) - return false; - - dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0); - - return dss >= dss_per_gslice; -} - bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, const struct xe_hw_engine *hwe) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 4fe736a11c42..5ed6c14b9ae3 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -422,7 +422,8 @@ struct xe_reg_sr; #define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ - struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) + struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }, \ + struct xe_device * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_DEVICE }) void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, unsigned long *active_entries, @@ -466,17 +467,6 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, const struct xe_hw_engine *hwe); /* - * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off - * - * @gt: GT structure - * @hwe: Engine instance - * - * Returns: true if first gslice is fused off, false otherwise. - */ -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe); - -/* * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device * * @gt: GT structure diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 1b76b947c706..f4cf30e298cf 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -110,12 +110,14 @@ struct xe_rtp_entry { }; enum xe_rtp_process_type { + XE_RTP_PROCESS_TYPE_DEVICE, XE_RTP_PROCESS_TYPE_GT, XE_RTP_PROCESS_TYPE_ENGINE, }; struct xe_rtp_process_ctx { union { + struct xe_device *xe; struct xe_gt *gt; struct xe_hw_engine *hwe; }; diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 0f721ae17b26..afbdd894bd6e 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -3,6 +3,8 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> #include <drm/drm_managed.h> #include "xe_assert.h" @@ -10,6 +12,8 @@ #include "xe_module.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_service.h" #include "xe_sriov_printk.h" static unsigned int wanted_max_vfs(struct xe_device *xe) @@ -80,9 +84,22 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) */ int xe_sriov_pf_init_early(struct xe_device *xe) { + int err; + xe_assert(xe, IS_SRIOV_PF(xe)); - return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + xe->sriov.pf.vfs = drmm_kcalloc(&xe->drm, 1 + xe_sriov_pf_get_totalvfs(xe), + sizeof(*xe->sriov.pf.vfs), GFP_KERNEL); + if (!xe->sriov.pf.vfs) + return -ENOMEM; + + err = drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + if (err) + return err; + + xe_sriov_pf_service_init(xe); + + return 0; } /** @@ -102,3 +119,45 @@ void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs); drm_printf(p, "enabled: %u\n", pci_num_vf(pdev)); } + +static int simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct xe_device *xe = parent->d_inode->i_private; + void (*print)(struct xe_device *, struct drm_printer *) = node->info_ent->data; + + print(xe, &p); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary }, + { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions }, +}; + +/** + * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry + * + * Prepare debugfs attributes exposed by the PF. + */ +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ + struct drm_minor *minor = xe->drm.primary; + struct dentry *parent; + + /* + * /sys/kernel/debug/dri/0/ + * ├── pf + * │ ├── ... + */ + parent = debugfs_create_dir("pf", root); + if (IS_ERR(parent)) + return; + parent->d_inode->i_private = xe; + + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index d1220e70e1c0..c392c3fcf085 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -8,12 +8,14 @@ #include <linux/types.h> +struct dentry; struct drm_printer; struct xe_device; #ifdef CONFIG_PCI_IOV bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else static inline bool xe_sriov_pf_readiness(struct xe_device *xe) @@ -25,6 +27,10 @@ static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } + +static inline void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_sriov_pf_service.c new file mode 100644 index 000000000000..eee3b2a1ba41 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#include "abi/guc_relay_actions_abi.h" + +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_printk.h" + +#include "xe_sriov_pf_service.h" +#include "xe_sriov_pf_service_types.h" + +/** + * xe_sriov_pf_service_init - Early initialization of the SR-IOV PF service. + * @xe: the &xe_device to initialize + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_init(struct xe_device *xe) +{ + BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); + BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* base versions may differ between platforms */ + xe->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; + xe->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; + + /* latest version is same for all platforms */ + xe->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; + xe->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_negotiate_version(struct xe_device *xe, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + struct xe_sriov_pf_service_version base = xe->sriov.pf.service.version.base; + struct xe_sriov_pf_service_version latest = xe->sriov.pf.service.version.latest; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, base.major); + xe_assert(xe, base.major <= latest.major); + xe_assert(xe, (base.major < latest.major) || (base.minor <= latest.minor)); + + /* VF doesn't care - return our latest */ + if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && + wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants newer than our - return our latest */ + if (wanted_major > latest.major) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants older than min required - reject */ + if (wanted_major < base.major || + (wanted_major == base.major && wanted_minor < base.minor)) { + return -EPERM; + } + + /* previous major - return wanted, as we should still support it */ + if (wanted_major < latest.major) { + /* XXX: we are not prepared for multi-versions yet */ + xe_assert(xe, base.major == latest.major); + return -ENOPKG; + } + + /* same major - return common minor */ + *major = wanted_major; + *minor = min_t(u32, latest.minor, wanted_minor); + return 0; +} + +static void pf_connect(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + xe_assert(xe, major || minor); + + xe->sriov.pf.vfs[vfid].version.major = major; + xe->sriov.pf.vfs[vfid].version.minor = minor; +} + +static void pf_disconnect(struct xe_device *xe, u32 vfid) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + xe->sriov.pf.vfs[vfid].version.major = 0; + xe->sriov.pf.vfs[vfid].version.minor = 0; +} + +/** + * xe_sriov_pf_service_is_negotiated - Check if VF has negotiated given ABI version. + * @xe: the &xe_device + * @vfid: the VF identifier + * @major: the major version to check + * @minor: the minor version to check + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + * + * Returns: true if VF can use given ABI version functionality. + */ +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + return major == xe->sriov.pf.vfs[vfid].version.major && + minor <= xe->sriov.pf.vfs[vfid].version.minor; +} + +/** + * xe_sriov_pf_service_handshake_vf - Confirm a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * @wanted_major: the major service version expected by the VF + * @wanted_minor: the minor service version expected by the VF + * @major: the major service version to be used by the VF + * @minor: the minor service version to be used by the VF + * + * Negotiate a VF/PF ABI version to allow VF use the PF services. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + int err; + + xe_sriov_dbg_verbose(xe, "VF%u wants ABI version %u.%u\n", + vfid, wanted_major, wanted_minor); + + err = pf_negotiate_version(xe, wanted_major, wanted_minor, major, minor); + + if (err < 0) { + xe_sriov_notice(xe, "VF%u failed to negotiate ABI %u.%u (%pe)\n", + vfid, wanted_major, wanted_minor, ERR_PTR(err)); + pf_disconnect(xe, vfid); + } else { + xe_sriov_dbg(xe, "VF%u negotiated ABI version %u.%u\n", + vfid, *major, *minor); + pf_connect(xe, vfid, *major, *minor); + } + + return err; +} + +/** + * xe_sriov_pf_service_reset_vf - Reset a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Reset a VF driver negotiated VF/PF ABI version. + * + * After that point, the VF driver will have to perform new version handshake + * to continue use of the PF services again. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid) +{ + pf_disconnect(xe, vfid); +} + +static void print_pf_version(struct drm_printer *p, const char *name, + const struct xe_sriov_pf_service_version *version) +{ + drm_printf(p, "%s:\t%u.%u\n", name, version->major, version->minor); +} + +/** + * xe_sriov_pf_service_print_versions - Print ABI versions negotiated with VFs. + * @xe: the &xe_device + * @p: the &drm_printer + * + * This function is for PF use only. + */ +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_sriov_pf_service_version *version; + char name[8]; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + print_pf_version(p, "base", &xe->sriov.pf.service.version.base); + print_pf_version(p, "latest", &xe->sriov.pf.service.version.latest); + + for (n = 1; n <= total_vfs; n++) { + version = &xe->sriov.pf.vfs[n].version; + if (!version->major && !version->minor) + continue; + + print_pf_version(p, xe_sriov_function_name(n, name, sizeof(name)), version); + } +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_sriov_pf_service_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_sriov_pf_service.h new file mode 100644 index 000000000000..d38c18f5ed10 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_H_ +#define _XE_SRIOV_PF_SERVICE_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_device; + +void xe_sriov_pf_service_init(struct xe_device *xe); +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p); + +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor); +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor); +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h new file mode 100644 index 000000000000..0835dde358c1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_TYPES_H_ +#define _XE_SRIOV_PF_SERVICE_TYPES_H_ + +#include <linux/types.h> + +/** + * struct xe_sriov_pf_service_version - VF/PF ABI Version. + * @major: the major version of the VF/PF ABI + * @minor: the minor version of the VF/PF ABI + * + * See `GuC Relay Communication`_. + */ +struct xe_sriov_pf_service_version { + u16 major; + u16 minor; +}; + +/** + * struct xe_sriov_pf_service - Data used by the PF service. + * @version: information about VF/PF ABI versions for current platform. + * @version.base: lowest VF/PF ABI version that could be negotiated with VF. + * @version.latest: latest VF/PF ABI version supported by the PF driver. + */ +struct xe_sriov_pf_service { + struct { + struct xe_sriov_pf_service_version base; + struct xe_sriov_pf_service_version latest; + } version; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h new file mode 100644 index 000000000000..956a88f9f213 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_TYPES_H_ +#define _XE_SRIOV_PF_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/types.h> + +#include "xe_sriov_pf_service_types.h" + +/** + * struct xe_sriov_metadata - per-VF device level metadata + */ +struct xe_sriov_metadata { + /** @version: negotiated VF/PF ABI version */ + struct xe_sriov_pf_service_version version; +}; + +/** + * struct xe_device_pf - Xe PF related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_PF mode. + */ +struct xe_device_pf { + /** @device_total_vfs: Maximum number of VFs supported by the device. */ + u16 device_total_vfs; + + /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ + u16 driver_max_vfs; + + /** @master_lock: protects all VFs configurations across GTs */ + struct mutex master_lock; + + /** @service: device level service data. */ + struct xe_sriov_pf_service service; + + /** @vfs: metadata for all VFs. */ + struct xe_sriov_metadata *vfs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index ca94382a721e..1a138108d139 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -7,9 +7,6 @@ #define _XE_SRIOV_TYPES_H_ #include <linux/build_bug.h> -#include <linux/mutex.h> -#include <linux/types.h> -#include <linux/workqueue_types.h> /** * VFID - Virtual Function Identifier @@ -40,37 +37,4 @@ enum xe_sriov_mode { }; static_assert(XE_SRIOV_MODE_NONE); -/** - * struct xe_device_pf - Xe PF related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_PF mode. - */ -struct xe_device_pf { - /** @device_total_vfs: Maximum number of VFs supported by the device. */ - u16 device_total_vfs; - - /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ - u16 driver_max_vfs; - - /** @master_lock: protects all VFs configurations across GTs */ - struct mutex master_lock; -}; - -/** - * struct xe_device_vf - Xe Virtual Function related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_VF mode. - */ -struct xe_device_vf { - /** @migration: VF Migration state data */ - struct { - /** @migration.worker: VF migration recovery worker */ - struct work_struct worker; - /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ - unsigned long gt_flags; - } migration; -}; - #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h new file mode 100644 index 000000000000..8300416a6226 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_TYPES_H_ +#define _XE_SRIOV_VF_TYPES_H_ + +#include <linux/types.h> +#include <linux/workqueue_types.h> + +/** + * struct xe_sriov_vf_relay_version - PF ABI version details. + */ +struct xe_sriov_vf_relay_version { + /** @major: major version. */ + u16 major; + /** @minor: minor version. */ + u16 minor; +}; + +/** + * struct xe_device_vf - Xe Virtual Function related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_VF mode. + */ +struct xe_device_vf { + /** @pf_version: negotiated VF/PF ABI version. */ + struct xe_sriov_vf_relay_version pf_version; + + /** @migration: VF Migration state data */ + struct { + /** @migration.worker: VF migration recovery worker */ + struct work_struct worker; + /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ + unsigned long gt_flags; + } migration; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 066a3d0cea79..cc33e8733983 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -27,4 +27,10 @@ static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) return NULL; } #endif + +static inline bool xe_tile_is_root(struct xe_tile *tile) +{ + return tile->id == 0; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e875ea4658a9..2035604121e6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2380,7 +2380,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, ctx.read_only = xe_vma_read_only(vma); ctx.devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); for_each_tile(tile, vm->xe, id) tile_mask |= 0x1 << id; @@ -2887,7 +2887,7 @@ static int check_ufence(struct xe_vma *vma) static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) { - bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); + bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); int err = 0; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index f51218a7a580..22a98600fd8f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -10,6 +10,7 @@ #include <linux/compiler_types.h> #include <linux/fault-inject.h> +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> #include "regs/xe_engine_regs.h" @@ -876,9 +877,34 @@ static __maybe_unused const struct xe_rtp_entry oob_was[] = { static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT); +static __maybe_unused const struct xe_rtp_entry device_oob_was[] = { +#include <generated/xe_device_wa_oob.c> + {} +}; + +static_assert(ARRAY_SIZE(device_oob_was) - 1 == _XE_DEVICE_WA_OOB_COUNT); + __diag_pop(); /** + * xe_wa_process_device_oob - process OOB workaround table + * @xe: device instance to process workarounds for + * + * process OOB workaround table for this device, marking in @xe the + * workarounds that are active. + */ + +void xe_wa_process_device_oob(struct xe_device *xe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(xe); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)); + + xe->wa_active.oob_initialized = true; + xe_rtp_process(&ctx, device_oob_was); +} + +/** * xe_wa_process_oob - process OOB workaround table * @gt: GT instance to process workarounds for * @@ -947,6 +973,28 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) } /** + * xe_wa_device_init - initialize device with workaround oob bookkeeping + * @xe: Xe device instance to initialize + * + * Returns 0 for success, negative with error code otherwise + */ +int xe_wa_device_init(struct xe_device *xe) +{ + unsigned long *p; + + p = drmm_kzalloc(&xe->drm, + sizeof(*p) * BITS_TO_LONGS(ARRAY_SIZE(device_oob_was)), + GFP_KERNEL); + + if (!p) + return -ENOMEM; + + xe->wa_active.oob = p; + + return 0; +} + +/** * xe_wa_init - initialize gt with workaround bookkeeping * @gt: GT instance to initialize * @@ -980,6 +1028,16 @@ int xe_wa_init(struct xe_gt *gt) } ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */ +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "Device OOB Workarounds\n"); + for_each_set_bit(idx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)) + if (device_oob_was[idx].name) + drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name); +} + void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) { size_t idx; diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index 52337405b5bc..f3880c65cb8d 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -13,17 +13,19 @@ struct xe_gt; struct xe_hw_engine; struct xe_tile; +int xe_wa_device_init(struct xe_device *xe); int xe_wa_init(struct xe_gt *gt); +void xe_wa_process_device_oob(struct xe_device *xe); void xe_wa_process_oob(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_wa_apply_tile_workarounds(struct xe_tile *tile); +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); /** - * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any - * other more specific type + * XE_WA - Out-of-band workarounds, to be queried and called as needed. * @gt__: gt instance * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h */ @@ -32,4 +34,20 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \ }) +/** + * XE_DEVICE_WA - Out-of-band Device workarounds, to be queried and called + * as needed. + * @xe__: xe_device + * @id__: XE_DEVICE_WA_OOB_<id__>, as generated by build system in generated/xe_device_wa_oob.h + */ +#define XE_DEVICE_WA(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + test_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + +#define XE_DEVICE_WA_DISABLE(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + clear_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + #endif diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index e7ed5d583d68..e990f20eccfe 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -70,3 +70,5 @@ no_media_l3 MEDIA_VERSION(3000) # SoC workaround - currently applies to all platforms with the following # primary GT GMDID 14022085890 GRAPHICS_VERSION(2001) + +15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) |
