aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt/uc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h42
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c33
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c62
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c157
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c377
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c75
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c11
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c188
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c175
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c2
21 files changed, 1032 insertions, 191 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index ba10bd374cee..fe5d7d261797 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -144,6 +144,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
+ INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
INTEL_GUC_ACTION_LIMIT
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 31cf9fb48c7e..f9240d4baa69 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -95,6 +95,11 @@ struct intel_guc {
*/
struct ida guc_ids;
/**
+ * @num_guc_ids: Number of guc_ids, selftest feature to be able
+ * to reduce this number while testing.
+ */
+ int num_guc_ids;
+ /**
* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
*/
unsigned long *guc_ids_bitmap;
@@ -138,6 +143,8 @@ struct intel_guc {
u32 ads_regset_size;
/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
+ /** @ads_engine_usage_size: size of engine usage in the ADS */
+ u32 ads_engine_usage_size;
/** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */
struct i915_vma *lrc_desc_pool;
@@ -172,6 +179,41 @@ struct intel_guc {
/** @send_mutex: used to serialize the intel_guc_send actions */
struct mutex send_mutex;
+
+ /**
+ * @timestamp: GT timestamp object that stores a copy of the timestamp
+ * and adjusts it for overflow using a worker.
+ */
+ struct {
+ /**
+ * @lock: Lock protecting the below fields and the engine stats.
+ */
+ spinlock_t lock;
+
+ /**
+ * @gt_stamp: 64 bit extended value of the GT timestamp.
+ */
+ u64 gt_stamp;
+
+ /**
+ * @ping_delay: Period for polling the GT timestamp for
+ * overflow.
+ */
+ unsigned long ping_delay;
+
+ /**
+ * @work: Periodic work to adjust GT timestamp, engine and
+ * context usage for overflows.
+ */
+ struct delayed_work work;
+ } timestamp;
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+ /**
+ * @number_guc_id_stolen: The number of guc_ids that have been stolen
+ */
+ int number_guc_id_stolen;
+#endif
};
static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 621c893a009f..1a1edae67e4e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -26,6 +26,8 @@
* | guc_policies |
* +---------------------------------------+
* | guc_gt_system_info |
+ * +---------------------------------------+
+ * | guc_engine_usage |
* +---------------------------------------+ <== static
* | guc_mmio_reg[countA] (engine 0.0) |
* | guc_mmio_reg[countB] (engine 0.1) |
@@ -47,6 +49,7 @@ struct __guc_ads_blob {
struct guc_ads ads;
struct guc_policies policies;
struct guc_gt_system_info system_info;
+ struct guc_engine_usage engine_usage;
/* From here on, location is dynamic! Refer to above diagram. */
struct guc_mmio_reg regset[0];
} __packed;
@@ -628,3 +631,21 @@ void intel_guc_ads_reset(struct intel_guc *guc)
guc_ads_private_data_reset(guc);
}
+
+u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
+{
+ struct __guc_ads_blob *blob = guc->ads_blob;
+ u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma);
+ u32 offset = base + ptr_offset(blob, engine_usage);
+
+ return offset;
+}
+
+struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine)
+{
+ struct intel_guc *guc = &engine->gt->uc.guc;
+ struct __guc_ads_blob *blob = guc->ads_blob;
+ u8 guc_class = engine_class_to_guc_class(engine->class);
+
+ return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)];
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
index 3d85051d57e4..e74c110facff 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
@@ -6,8 +6,11 @@
#ifndef _INTEL_GUC_ADS_H_
#define _INTEL_GUC_ADS_H_
+#include <linux/types.h>
+
struct intel_guc;
struct drm_printer;
+struct intel_engine_cs;
int intel_guc_ads_create(struct intel_guc *guc);
void intel_guc_ads_destroy(struct intel_guc *guc);
@@ -15,5 +18,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc);
void intel_guc_ads_reset(struct intel_guc *guc);
void intel_guc_ads_print_policy_info(struct intel_guc *guc,
struct drm_printer *p);
+struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine);
+u32 intel_guc_engine_usage_offset(struct intel_guc *guc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index a0cc34be7b56..aa6dd6415202 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -523,6 +523,15 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct)
CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n",
ktime_ms_delta(ktime_get(), ct->stall_time),
send->status, recv->status);
+ CT_ERROR(ct, "H2G Space: %u (Bytes)\n",
+ atomic_read(&ct->ctbs.send.space) * 4);
+ CT_ERROR(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
+ CT_ERROR(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
+ CT_ERROR(ct, "G2H Space: %u (Bytes)\n",
+ atomic_read(&ct->ctbs.recv.space) * 4);
+ CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
+ CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
+
ct->ctbs.send.broken = true;
}
@@ -582,12 +591,19 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
static int has_room_nb(struct intel_guc_ct *ct, u32 h2g_dw, u32 g2h_dw)
{
+ bool h2g = h2g_has_room(ct, h2g_dw);
+ bool g2h = g2h_has_room(ct, g2h_dw);
+
lockdep_assert_held(&ct->ctbs.send.lock);
- if (unlikely(!h2g_has_room(ct, h2g_dw) || !g2h_has_room(ct, g2h_dw))) {
+ if (unlikely(!h2g || !g2h)) {
if (ct->stall_time == KTIME_MAX)
ct->stall_time = ktime_get();
+ /* Be paranoid and kick G2H tasklet to free credits */
+ if (!g2h)
+ tasklet_hi_schedule(&ct->receive_tasklet);
+
if (unlikely(ct_deadlocked(ct)))
return -EPIPE;
else
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 196424be0998..31420ce1ce6b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -40,9 +40,8 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
}
}
-/* Copy RSA signature from the fw image to HW for verification */
-static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
- struct intel_uncore *uncore)
+static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw,
+ struct intel_uncore *uncore)
{
u32 rsa[UOS_RSA_SCRATCH_COUNT];
size_t copied;
@@ -58,6 +57,27 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
return 0;
}
+static int guc_xfer_rsa_vma(struct intel_uc_fw *guc_fw,
+ struct intel_uncore *uncore)
+{
+ struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw);
+
+ intel_uncore_write(uncore, UOS_RSA_SCRATCH(0),
+ intel_guc_ggtt_offset(guc, guc_fw->rsa_data));
+
+ return 0;
+}
+
+/* Copy RSA signature from the fw image to HW for verification */
+static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
+ struct intel_uncore *uncore)
+{
+ if (guc_fw->rsa_data)
+ return guc_xfer_rsa_vma(guc_fw, uncore);
+ else
+ return guc_xfer_rsa_mmio(guc_fw, uncore);
+}
+
/*
* Read the GuC status register (GUC_STATUS) and store it in the
* specified location; then return a boolean indicating whether
@@ -142,7 +162,10 @@ int intel_guc_fw_upload(struct intel_guc *guc)
/*
* Note that GuC needs the CSS header plus uKernel code to be copied
* by the DMA engine in one operation, whereas the RSA signature is
- * loaded via MMIO.
+ * loaded separately, either by copying it to the UOS_RSA_SCRATCH
+ * register (if key size <= 256) or through a ggtt-pinned vma (if key
+ * size > 256). The RSA size and therefore the way we provide it to the
+ * HW is fixed for each platform and hard-coded in the bootrom.
*/
ret = guc_xfer_rsa(&guc->fw, uncore);
if (ret)
@@ -164,6 +187,6 @@ int intel_guc_fw_upload(struct intel_guc *guc)
return 0;
out:
- intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_FAIL);
+ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 722933e26347..7072e30e99f4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -294,6 +294,19 @@ struct guc_ads {
u32 reserved[15];
} __packed;
+/* Engine usage stats */
+struct guc_engine_usage_record {
+ u32 current_context_index;
+ u32 last_switch_in_stamp;
+ u32 reserved0;
+ u32 total_runtime;
+ u32 reserved1[4];
+} __packed;
+
+struct guc_engine_usage {
+ struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+} __packed;
+
/* GuC logging structures */
enum guc_log_buffer_type {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index ac1ee1d5ce10..fe6ab7550a14 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -15,9 +15,12 @@
struct intel_guc;
-#ifdef CONFIG_DRM_I915_DEBUG_GUC
+#if defined(CONFIG_DRM_I915_DEBUG_GUC)
#define CRASH_BUFFER_SIZE SZ_2M
#define DEBUG_BUFFER_SIZE SZ_16M
+#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
+#define CRASH_BUFFER_SIZE SZ_1M
+#define DEBUG_BUFFER_SIZE SZ_2M
#else
#define CRASH_BUFFER_SIZE SZ_8K
#define DEBUG_BUFFER_SIZE SZ_64K
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
index 46026c2c1722..ddfbe334689f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
@@ -10,28 +10,80 @@
#include "intel_guc.h"
#include "intel_guc_log.h"
#include "intel_guc_log_debugfs.h"
+#include "intel_uc.h"
+
+static u32 obj_to_guc_log_dump_size(struct drm_i915_gem_object *obj)
+{
+ u32 size;
+
+ if (!obj)
+ return PAGE_SIZE;
+
+ /* "0x%08x 0x%08x 0x%08x 0x%08x\n" => 16 bytes -> 44 chars => x2.75 */
+ size = ((obj->base.size * 11) + 3) / 4;
+
+ /* Add padding for final blank line, any extra header info, etc. */
+ size = PAGE_ALIGN(size + PAGE_SIZE);
+
+ return size;
+}
+
+static u32 guc_log_dump_size(struct intel_guc_log *log)
+{
+ struct intel_guc *guc = log_to_guc(log);
+
+ if (!intel_guc_is_supported(guc))
+ return PAGE_SIZE;
+
+ if (!log->vma)
+ return PAGE_SIZE;
+
+ return obj_to_guc_log_dump_size(log->vma->obj);
+}
static int guc_log_dump_show(struct seq_file *m, void *data)
{
struct drm_printer p = drm_seq_file_printer(m);
+ int ret;
- return intel_guc_log_dump(m->private, &p, false);
+ ret = intel_guc_log_dump(m->private, &p, false);
+
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && seq_has_overflowed(m))
+ pr_warn_once("preallocated size:%zx for %s exceeded\n",
+ m->size, __func__);
+
+ return ret;
+}
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(guc_log_dump, guc_log_dump_size);
+
+static u32 guc_load_err_dump_size(struct intel_guc_log *log)
+{
+ struct intel_guc *guc = log_to_guc(log);
+ struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
+
+ if (!intel_guc_is_supported(guc))
+ return PAGE_SIZE;
+
+ return obj_to_guc_log_dump_size(uc->load_err_log);
}
-DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_log_dump);
static int guc_load_err_log_dump_show(struct seq_file *m, void *data)
{
struct drm_printer p = drm_seq_file_printer(m);
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && seq_has_overflowed(m))
+ pr_warn_once("preallocated size:%zx for %s exceeded\n",
+ m->size, __func__);
+
return intel_guc_log_dump(m->private, &p, true);
}
-DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(guc_load_err_log_dump, guc_load_err_dump_size);
static int guc_log_level_get(void *data, u64 *val)
{
struct intel_guc_log *log = data;
- if (!intel_guc_is_used(log_to_guc(log)))
+ if (!log->vma)
return -ENODEV;
*val = intel_guc_log_get_level(log);
@@ -43,7 +95,7 @@ static int guc_log_level_set(void *data, u64 val)
{
struct intel_guc_log *log = data;
- if (!intel_guc_is_used(log_to_guc(log)))
+ if (!log->vma)
return -ENODEV;
return intel_guc_log_set_level(log, val);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 65a3e7fdb2b2..13b27b8ff74e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -79,29 +79,6 @@ static void slpc_mem_set_disabled(struct slpc_shared_data *data,
slpc_mem_set_param(data, enable_id, 0);
}
-int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
-{
- struct intel_guc *guc = slpc_to_guc(slpc);
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
- u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
- int err;
-
- GEM_BUG_ON(slpc->vma);
-
- err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr);
- if (unlikely(err)) {
- drm_err(&i915->drm,
- "Failed to allocate SLPC struct (err=%pe)\n",
- ERR_PTR(err));
- return err;
- }
-
- slpc->max_freq_softlimit = 0;
- slpc->min_freq_softlimit = 0;
-
- return err;
-}
-
static u32 slpc_get_state(struct intel_guc_slpc *slpc)
{
struct slpc_shared_data *data;
@@ -203,6 +180,86 @@ static int slpc_unset_param(struct intel_guc_slpc *slpc,
return guc_action_slpc_unset_param(guc, id);
}
+static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
+{
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ struct intel_guc *guc = slpc_to_guc(slpc);
+ intel_wakeref_t wakeref;
+ int ret = 0;
+
+ lockdep_assert_held(&slpc->lock);
+
+ if (!intel_guc_is_ready(guc))
+ return -ENODEV;
+
+ /*
+ * This function is a little different as compared to
+ * intel_guc_slpc_set_min_freq(). Softlimit will not be updated
+ * here since this is used to temporarily change min freq,
+ * for example, during a waitboost. Caller is responsible for
+ * checking bounds.
+ */
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+ ret = slpc_set_param(slpc,
+ SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+ freq);
+ if (ret)
+ drm_err(&i915->drm, "Unable to force min freq to %u: %d",
+ freq, ret);
+ }
+
+ return ret;
+}
+
+static void slpc_boost_work(struct work_struct *work)
+{
+ struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
+
+ /*
+ * Raise min freq to boost. It's possible that
+ * this is greater than current max. But it will
+ * certainly be limited by RP0. An error setting
+ * the min param is not fatal.
+ */
+ mutex_lock(&slpc->lock);
+ if (atomic_read(&slpc->num_waiters)) {
+ slpc_force_min_freq(slpc, slpc->boost_freq);
+ slpc->num_boosts++;
+ }
+ mutex_unlock(&slpc->lock);
+}
+
+int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
+{
+ struct intel_guc *guc = slpc_to_guc(slpc);
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+ int err;
+
+ GEM_BUG_ON(slpc->vma);
+
+ err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr);
+ if (unlikely(err)) {
+ drm_err(&i915->drm,
+ "Failed to allocate SLPC struct (err=%pe)\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ slpc->max_freq_softlimit = 0;
+ slpc->min_freq_softlimit = 0;
+
+ slpc->boost_freq = 0;
+ atomic_set(&slpc->num_waiters, 0);
+ slpc->num_boosts = 0;
+
+ mutex_init(&slpc->lock);
+ INIT_WORK(&slpc->boost_work, slpc_boost_work);
+
+ return err;
+}
+
static const char *slpc_global_state_to_string(enum slpc_global_state state)
{
switch (state) {
@@ -393,7 +450,11 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
val > slpc->max_freq_softlimit)
return -EINVAL;
+ /* Need a lock now since waitboost can be modifying min as well */
+ mutex_lock(&slpc->lock);
+
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+
ret = slpc_set_param(slpc,
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
val);
@@ -406,6 +467,8 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
if (!ret)
slpc->min_freq_softlimit = val;
+ mutex_unlock(&slpc->lock);
+
return ret;
}
@@ -522,6 +585,9 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
GT_FREQUENCY_MULTIPLIER;
slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) *
GT_FREQUENCY_MULTIPLIER;
+
+ if (!slpc->boost_freq)
+ slpc->boost_freq = slpc->rp0_freq;
}
/*
@@ -557,7 +623,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
if (unlikely(ret < 0))
return ret;
- intel_guc_pm_intrmsk_enable(&i915->gt);
+ intel_guc_pm_intrmsk_enable(to_gt(i915));
slpc_get_rp_values(slpc);
@@ -588,6 +654,47 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
return 0;
}
+int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+ int ret = 0;
+
+ if (val < slpc->min_freq || val > slpc->rp0_freq)
+ return -EINVAL;
+
+ mutex_lock(&slpc->lock);
+
+ if (slpc->boost_freq != val) {
+ /* Apply only if there are active waiters */
+ if (atomic_read(&slpc->num_waiters)) {
+ ret = slpc_force_min_freq(slpc, val);
+ if (ret) {
+ ret = -EIO;
+ goto done;
+ }
+ }
+
+ slpc->boost_freq = val;
+ }
+
+done:
+ mutex_unlock(&slpc->lock);
+ return ret;
+}
+
+void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
+{
+ /*
+ * Return min back to the softlimit.
+ * This is called during request retire,
+ * so we don't need to fail that if the
+ * set_param fails.
+ */
+ mutex_lock(&slpc->lock);
+ if (atomic_dec_and_test(&slpc->num_waiters))
+ slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
+ mutex_unlock(&slpc->lock);
+}
+
int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
@@ -611,6 +718,8 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p
slpc_decode_max_freq(slpc));
drm_printf(p, "\tMin freq: %u MHz\n",
slpc_decode_min_freq(slpc));
+ drm_printf(p, "\twaitboosts: %u\n",
+ slpc->num_boosts);
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index e45054d5b9b4..0caa8fee3c04 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -34,9 +34,12 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc);
void intel_guc_slpc_fini(struct intel_guc_slpc *slpc);
int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val);
int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val);
+int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val);
int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val);
int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val);
int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p);
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
+void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
+void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
index 41d13527666f..bf5b9a563c09 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
@@ -6,6 +6,9 @@
#ifndef _INTEL_GUC_SLPC_TYPES_H_
#define _INTEL_GUC_SLPC_TYPES_H_
+#include <linux/atomic.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
#include <linux/types.h>
#define SLPC_RESET_TIMEOUT_MS 5
@@ -20,10 +23,20 @@ struct intel_guc_slpc {
u32 min_freq;
u32 rp0_freq;
u32 rp1_freq;
+ u32 boost_freq;
/* frequency softlimits */
u32 min_freq_softlimit;
u32 max_freq_softlimit;
+
+ /* Protects set/reset of boost freq
+ * and value of num_waiters
+ */
+ struct mutex lock;
+
+ struct work_struct boost_work;
+ atomic_t num_waiters;
+ u32 num_boosts;
};
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 38b47e73e35d..e7517206af82 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -13,6 +13,7 @@
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_clock_utils.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_gt_requests.h"
@@ -21,6 +22,7 @@
#include "gt/intel_mocs.h"
#include "gt/intel_ring.h"
+#include "intel_guc_ads.h"
#include "intel_guc_submission.h"
#include "i915_drv.h"
@@ -143,7 +145,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
* use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
* multi-lrc.
*/
-#define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16)
+#define NUMBER_MULTI_LRC_GUC_ID(guc) \
+ ((guc)->submission_state.num_guc_ids / 16)
/*
* Below is a set of functions which control the GuC scheduling state which
@@ -1038,8 +1041,6 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
spin_unlock(&ce->guc_state.lock);
- GEM_BUG_ON(!do_put && !destroyed);
-
if (pending_enable || destroyed || deregister) {
decr_outstanding_submission_g2h(guc);
if (deregister)
@@ -1077,6 +1078,271 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
xa_unlock_irqrestore(&guc->context_lookup, flags);
}
+/*
+ * GuC stores busyness stats for each engine at context in/out boundaries. A
+ * context 'in' logs execution start time, 'out' adds in -> out delta to total.
+ * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
+ * GuC.
+ *
+ * __i915_pmu_event_read samples engine busyness. When sampling, if context id
+ * is valid (!= ~0) and start is non-zero, the engine is considered to be
+ * active. For an active engine total busyness = total + (now - start), where
+ * 'now' is the time at which the busyness is sampled. For inactive engine,
+ * total busyness = total.
+ *
+ * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
+ *
+ * The start and total values provided by GuC are 32 bits and wrap around in a
+ * few minutes. Since perf pmu provides busyness as 64 bit monotonically
+ * increasing ns values, there is a need for this implementation to account for
+ * overflows and extend the GuC provided values to 64 bits before returning
+ * busyness to the user. In order to do that, a worker runs periodically at
+ * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
+ * 27 seconds for a gt clock frequency of 19.2 MHz).
+ */
+
+#define WRAP_TIME_CLKS U32_MAX
+#define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
+
+static void
+__extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
+{
+ u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
+ u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
+
+ if (new_start == lower_32_bits(*prev_start))
+ return;
+
+ if (new_start < gt_stamp_last &&
+ (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
+ gt_stamp_hi++;
+
+ if (new_start > gt_stamp_last &&
+ (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
+ gt_stamp_hi--;
+
+ *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
+}
+
+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+{
+ struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+ struct intel_engine_guc_stats *stats = &engine->stats.guc;
+ struct intel_guc *guc = &engine->gt->uc.guc;
+ u32 last_switch = rec->last_switch_in_stamp;
+ u32 ctx_id = rec->current_context_index;
+ u32 total = rec->total_runtime;
+
+ lockdep_assert_held(&guc->timestamp.lock);
+
+ stats->running = ctx_id != ~0U && last_switch;
+ if (stats->running)
+ __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
+
+ /*
+ * Instead of adjusting the total for overflow, just add the
+ * difference from previous sample stats->total_gt_clks
+ */
+ if (total && total != ~0U) {
+ stats->total_gt_clks += (u32)(total - stats->prev_total);
+ stats->prev_total = total;
+ }
+}
+
+static void guc_update_pm_timestamp(struct intel_guc *guc,
+ struct intel_engine_cs *engine,
+ ktime_t *now)
+{
+ u32 gt_stamp_now, gt_stamp_hi;
+
+ lockdep_assert_held(&guc->timestamp.lock);
+
+ gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
+ gt_stamp_now = intel_uncore_read(engine->uncore,
+ RING_TIMESTAMP(engine->mmio_base));
+ *now = ktime_get();
+
+ if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp))
+ gt_stamp_hi++;
+
+ guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now;
+}
+
+/*
+ * Unlike the execlist mode of submission total and active times are in terms of
+ * gt clocks. The *now parameter is retained to return the cpu time at which the
+ * busyness was sampled.
+ */
+static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
+{
+ struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
+ struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
+ struct intel_gt *gt = engine->gt;
+ struct intel_guc *guc = &gt->uc.guc;
+ u64 total, gt_stamp_saved;
+ unsigned long flags;
+ u32 reset_count;
+ bool in_reset;
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+
+ /*
+ * If a reset happened, we risk reading partially updated engine
+ * busyness from GuC, so we just use the driver stored copy of busyness.
+ * Synchronize with gt reset using reset_count and the
+ * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
+ * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
+ * usable by checking the flag afterwards.
+ */
+ reset_count = i915_reset_count(gpu_error);
+ in_reset = test_bit(I915_RESET_BACKOFF, &gt->reset.flags);
+
+ *now = ktime_get();
+
+ /*
+ * The active busyness depends on start_gt_clk and gt_stamp.
+ * gt_stamp is updated by i915 only when gt is awake and the
+ * start_gt_clk is derived from GuC state. To get a consistent
+ * view of activity, we query the GuC state only if gt is awake.
+ */
+ if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
+ stats_saved = *stats;
+ gt_stamp_saved = guc->timestamp.gt_stamp;
+ guc_update_engine_gt_clks(engine);
+ guc_update_pm_timestamp(guc, engine, now);
+ intel_gt_pm_put_async(gt);
+ if (i915_reset_count(gpu_error) != reset_count) {
+ *stats = stats_saved;
+ guc->timestamp.gt_stamp = gt_stamp_saved;
+ }
+ }
+
+ total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
+ if (stats->running) {
+ u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
+
+ total += intel_gt_clock_interval_to_ns(gt, clk);
+ }
+
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+
+ return ns_to_ktime(total);
+}
+
+static void __reset_guc_busyness_stats(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long flags;
+ ktime_t unused;
+
+ cancel_delayed_work_sync(&guc->timestamp.work);
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+
+ for_each_engine(engine, gt, id) {
+ guc_update_pm_timestamp(guc, engine, &unused);
+ guc_update_engine_gt_clks(engine);
+ engine->stats.guc.prev_total = 0;
+ }
+
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
+static void __update_guc_busyness_stats(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long flags;
+ ktime_t unused;
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ for_each_engine(engine, gt, id) {
+ guc_update_pm_timestamp(guc, engine, &unused);
+ guc_update_engine_gt_clks(engine);
+ }
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
+static void guc_timestamp_ping(struct work_struct *wrk)
+{
+ struct intel_guc *guc = container_of(wrk, typeof(*guc),
+ timestamp.work.work);
+ struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
+ struct intel_gt *gt = guc_to_gt(guc);
+ intel_wakeref_t wakeref;
+ int srcu, ret;
+
+ /*
+ * Synchronize with gt reset to make sure the worker does not
+ * corrupt the engine/guc stats.
+ */
+ ret = intel_gt_reset_trylock(gt, &srcu);
+ if (ret)
+ return;
+
+ with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+ __update_guc_busyness_stats(guc);
+
+ intel_gt_reset_unlock(gt, srcu);
+
+ mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
+ guc->timestamp.ping_delay);
+}
+
+static int guc_action_enable_usage_stats(struct intel_guc *guc)
+{
+ u32 offset = intel_guc_engine_usage_offset(guc);
+ u32 action[] = {
+ INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
+ offset,
+ 0,
+ };
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+static void guc_init_engine_stats(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ intel_wakeref_t wakeref;
+
+ mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
+ guc->timestamp.ping_delay);
+
+ with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
+ int ret = guc_action_enable_usage_stats(guc);
+
+ if (ret)
+ drm_err(&gt->i915->drm,
+ "Failed to enable usage stats: %d!\n", ret);
+ }
+}
+
+void intel_guc_busyness_park(struct intel_gt *gt)
+{
+ struct intel_guc *guc = &gt->uc.guc;
+
+ if (!guc_submission_initialized(guc))
+ return;
+
+ cancel_delayed_work(&guc->timestamp.work);
+ __update_guc_busyness_stats(guc);
+}
+
+void intel_guc_busyness_unpark(struct intel_gt *gt)
+{
+ struct intel_guc *guc = &gt->uc.guc;
+
+ if (!guc_submission_initialized(guc))
+ return;
+
+ mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
+ guc->timestamp.ping_delay);
+}
+
static inline bool
submission_disabled(struct intel_guc *guc)
{
@@ -1138,6 +1404,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
intel_gt_park_heartbeats(guc_to_gt(guc));
disable_submission(guc);
guc->interrupts.disable(guc);
+ __reset_guc_busyness_stats(guc);
/* Flush IRQ handler */
spin_lock_irq(&guc_to_gt(guc)->irq_lock);
@@ -1484,6 +1751,7 @@ static void destroyed_worker_func(struct work_struct *w);
*/
int intel_guc_submission_init(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
int ret;
if (guc->lrc_desc_pool)
@@ -1508,10 +1776,14 @@ int intel_guc_submission_init(struct intel_guc *guc)
destroyed_worker_func);
guc->submission_state.guc_ids_bitmap =
- bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
+ bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
+ spin_lock_init(&guc->timestamp.lock);
+ INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
+ guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
+
return 0;
}
@@ -1598,13 +1870,13 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
if (intel_context_is_parent(ce))
ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
- NUMBER_MULTI_LRC_GUC_ID,
+ NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
else
ret = ida_simple_get(&guc->submission_state.guc_ids,
- NUMBER_MULTI_LRC_GUC_ID,
- GUC_MAX_LRC_DESCRIPTORS,
+ NUMBER_MULTI_LRC_GUC_ID(guc),
+ guc->submission_state.num_guc_ids,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
if (unlikely(ret < 0))
@@ -1662,14 +1934,18 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(intel_context_is_parent(cn));
list_del_init(&cn->guc_id.link);
- ce->guc_id = cn->guc_id;
+ ce->guc_id.id = cn->guc_id.id;
- spin_lock(&ce->guc_state.lock);
+ spin_lock(&cn->guc_state.lock);
clr_context_registered(cn);
- spin_unlock(&ce->guc_state.lock);
+ spin_unlock(&cn->guc_state.lock);
set_context_guc_id_invalid(cn);
+#ifdef CONFIG_DRM_I915_SELFTEST
+ guc->number_guc_id_stolen++;
+#endif
+
return 0;
} else {
return -EAGAIN;
@@ -2373,7 +2649,6 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
unsigned long flags;
bool disabled;
- lockdep_assert_held(&guc->submission_state.lock);
GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
@@ -2389,7 +2664,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
}
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
if (unlikely(disabled)) {
- __release_guc_id(guc, ce);
+ release_guc_id(guc, ce);
__guc_context_destroy(ce);
return;
}
@@ -2423,36 +2698,48 @@ static void __guc_context_destroy(struct intel_context *ce)
static void guc_flush_destroyed_contexts(struct intel_guc *guc)
{
- struct intel_context *ce, *cn;
+ struct intel_context *ce;
unsigned long flags;
GEM_BUG_ON(!submission_disabled(guc) &&
guc_submission_initialized(guc));
- spin_lock_irqsave(&guc->submission_state.lock, flags);
- list_for_each_entry_safe(ce, cn,
- &guc->submission_state.destroyed_contexts,
- destroyed_link) {
- list_del_init(&ce->destroyed_link);
- __release_guc_id(guc, ce);
+ while (!list_empty(&guc->submission_state.destroyed_contexts)) {
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
+ struct intel_context,
+ destroyed_link);
+ if (ce)
+ list_del_init(&ce->destroyed_link);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+ if (!ce)
+ break;
+
+ release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
- spin_unlock_irqrestore(&guc->submission_state.lock, flags);
}
static void deregister_destroyed_contexts(struct intel_guc *guc)
{
- struct intel_context *ce, *cn;
+ struct intel_context *ce;
unsigned long flags;
- spin_lock_irqsave(&guc->submission_state.lock, flags);
- list_for_each_entry_safe(ce, cn,
- &guc->submission_state.destroyed_contexts,
- destroyed_link) {
- list_del_init(&ce->destroyed_link);
+ while (!list_empty(&guc->submission_state.destroyed_contexts)) {
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
+ struct intel_context,
+ destroyed_link);
+ if (ce)
+ list_del_init(&ce->destroyed_link);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+ if (!ce)
+ break;
+
guc_lrc_desc_unpin(ce);
}
- spin_unlock_irqrestore(&guc->submission_state.lock, flags);
}
static void destroyed_worker_func(struct work_struct *w)
@@ -3080,8 +3367,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
ce = intel_engine_create_virtual(siblings, num_siblings,
FORCE_VIRTUAL);
- if (!ce) {
- err = ERR_PTR(-ENOMEM);
+ if (IS_ERR(ce)) {
+ err = ERR_CAST(ce);
goto unwind;
}
@@ -3369,7 +3656,9 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_flush = gen12_emit_flush_xcs;
}
engine->set_default_submission = guc_set_default_submission;
+ engine->busyness = guc_engine_busyness;
+ engine->flags |= I915_ENGINE_SUPPORTS_STATS;
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
engine->flags |= I915_ENGINE_HAS_TIMESLICES;
@@ -3468,6 +3757,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
void intel_guc_submission_enable(struct intel_guc *guc)
{
guc_init_lrc_mapping(guc);
+ guc_init_engine_stats(guc);
}
void intel_guc_submission_disable(struct intel_guc *guc)
@@ -3494,6 +3784,7 @@ static bool __guc_submission_selected(struct intel_guc *guc)
void intel_guc_submission_init_early(struct intel_guc *guc)
{
+ guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS;
guc->submission_supported = __guc_submission_supported(guc);
guc->submission_selected = __guc_submission_selected(guc);
}
@@ -3695,6 +3986,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len)
{
struct intel_context *ce;
+ unsigned long flags;
int desc_idx;
if (unlikely(len != 1)) {
@@ -3703,11 +3995,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
}
desc_idx = msg[0];
+
+ /*
+ * The context lookup uses the xarray but lookups only require an RCU lock
+ * not the full spinlock. So take the lock explicitly and keep it until the
+ * context has been reference count locked to ensure it can't be destroyed
+ * asynchronously until the reset is done.
+ */
+ xa_lock_irqsave(&guc->context_lookup, flags);
ce = g2h_context_lookup(guc, desc_idx);
+ if (ce)
+ intel_context_get(ce);
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
+
if (unlikely(!ce))
return -EPROTO;
guc_handle_context_reset(guc, ce);
+ intel_context_put(ce);
return 0;
}
@@ -3728,11 +4033,12 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len)
{
struct intel_engine_cs *engine;
+ struct intel_gt *gt = guc_to_gt(guc);
u8 guc_class, instance;
u32 reason;
if (unlikely(len != 3)) {
- drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+ drm_err(&gt->i915->drm, "Invalid length %u", len);
return -EPROTO;
}
@@ -3742,12 +4048,19 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
engine = guc_lookup_engine(guc, guc_class, instance);
if (unlikely(!engine)) {
- drm_err(&guc_to_gt(guc)->i915->drm,
+ drm_err(&gt->i915->drm,
"Invalid engine %d:%d", guc_class, instance);
return -EPROTO;
}
- intel_gt_handle_error(guc_to_gt(guc), engine->mask,
+ /*
+ * This is an unexpected failure of a hardware feature. So, log a real
+ * error message not just the informational that comes with the reset.
+ */
+ drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
+ guc_class, instance, engine->name, reason);
+
+ intel_gt_handle_error(gt, engine->mask,
I915_ERROR_CAPTURE,
"GuC failed to reset %s (reason=0x%08x)\n",
engine->name, reason);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index c7ef44fa0c36..5a95a9f0a8e3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -28,6 +28,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
struct i915_request *hung_rq,
struct drm_printer *m);
+void intel_guc_busyness_park(struct intel_gt *gt);
+void intel_guc_busyness_unpark(struct intel_gt *gt);
bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index ff4b6869b80b..d10b227ac4aa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -54,65 +54,6 @@ void intel_huc_init_early(struct intel_huc *huc)
}
}
-static int intel_huc_rsa_data_create(struct intel_huc *huc)
-{
- struct intel_gt *gt = huc_to_gt(huc);
- struct intel_guc *guc = &gt->uc.guc;
- struct i915_vma *vma;
- size_t copied;
- void *vaddr;
- int err;
-
- err = i915_inject_probe_error(gt->i915, -ENXIO);
- if (err)
- return err;
-
- /*
- * HuC firmware will sit above GUC_GGTT_TOP and will not map
- * through GTT. Unfortunately, this means GuC cannot perform
- * the HuC auth. as the rsa offset now falls within the GuC
- * inaccessible range. We resort to perma-pinning an additional
- * vma within the accessible range that only contains the rsa
- * signature. The GuC can use this extra pinning to perform
- * the authentication since its GGTT offset will be GuC
- * accessible.
- */
- GEM_BUG_ON(huc->fw.rsa_size > PAGE_SIZE);
- vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
- i915_coherent_map_type(gt->i915,
- vma->obj, true));
- if (IS_ERR(vaddr)) {
- i915_vma_unpin_and_release(&vma, 0);
- err = PTR_ERR(vaddr);
- goto unpin_out;
- }
-
- copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size);
- i915_gem_object_unpin_map(vma->obj);
-
- if (copied < huc->fw.rsa_size) {
- err = -ENOMEM;
- goto unpin_out;
- }
-
- huc->rsa_data = vma;
-
- return 0;
-
-unpin_out:
- i915_vma_unpin_and_release(&vma, 0);
- return err;
-}
-
-static void intel_huc_rsa_data_destroy(struct intel_huc *huc)
-{
- i915_vma_unpin_and_release(&huc->rsa_data, 0);
-}
-
int intel_huc_init(struct intel_huc *huc)
{
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
@@ -122,21 +63,10 @@ int intel_huc_init(struct intel_huc *huc)
if (err)
goto out;
- /*
- * HuC firmware image is outside GuC accessible range.
- * Copy the RSA signature out of the image into
- * a perma-pinned region set aside for it
- */
- err = intel_huc_rsa_data_create(huc);
- if (err)
- goto out_fini;
-
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE);
return 0;
-out_fini:
- intel_uc_fw_fini(&huc->fw);
out:
i915_probe_error(i915, "failed with %d\n", err);
return err;
@@ -147,7 +77,6 @@ void intel_huc_fini(struct intel_huc *huc)
if (!intel_uc_fw_is_loadable(&huc->fw))
return;
- intel_huc_rsa_data_destroy(huc);
intel_uc_fw_fini(&huc->fw);
}
@@ -177,7 +106,7 @@ int intel_huc_auth(struct intel_huc *huc)
goto fail;
ret = intel_guc_auth_huc(guc,
- intel_guc_ggtt_offset(guc, huc->rsa_data));
+ intel_guc_ggtt_offset(guc, huc->fw.rsa_data));
if (ret) {
DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret);
goto fail;
@@ -199,7 +128,7 @@ int intel_huc_auth(struct intel_huc *huc)
fail:
i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
- intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_FAIL);
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index daee43b661d4..ae8c8a6c8cc8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -15,8 +15,6 @@ struct intel_huc {
struct intel_uc_fw fw;
/* HuC-specific additions */
- struct i915_vma *rsa_data;
-
struct {
i915_reg_t reg;
u32 mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 2fef3b0bbe95..09ed29df67bc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -8,6 +8,7 @@
#include "intel_guc.h"
#include "intel_guc_ads.h"
#include "intel_guc_submission.h"
+#include "gt/intel_rps.h"
#include "intel_uc.h"
#include "i915_drv.h"
@@ -35,7 +36,7 @@ static void uc_expand_default_options(struct intel_uc *uc)
}
/* Intermediate platforms are HuC authentication only */
- if (IS_ALDERLAKE_S(i915)) {
+ if (IS_ALDERLAKE_S(i915) && !IS_ADLS_RPLS(i915)) {
i915->params.enable_guc = ENABLE_GUC_LOAD_HUC;
return;
}
@@ -462,6 +463,8 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
+ intel_rps_raise_unslice(&uc_to_gt(uc)->rps);
+
while (attempts--) {
/*
* Always reset the GuC just before (re)loading, so
@@ -499,6 +502,9 @@ static int __uc_init_hw(struct intel_uc *uc)
ret = intel_guc_slpc_enable(&guc->slpc);
if (ret)
goto err_submission;
+ } else {
+ /* Restore GT back to RPn for non-SLPC path */
+ intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
}
drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
@@ -529,6 +535,9 @@ err_submission:
err_log_capture:
__uc_capture_load_err_log(uc);
err_out:
+ /* Return GT back to RPn */
+ intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
+
__uc_sanitize(uc);
if (!ret) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 3aa87be4f2e4..a5af05bde6f2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -48,22 +48,39 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* Note that RKL and ADL-S have the same GuC/HuC device ID's and use the same
* firmware as TGL.
*/
-#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
- fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \
- fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
- fw_def(DG1, 0, guc_def(dg1, 62, 0, 0), huc_def(dg1, 7, 9, 3)) \
- fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
- fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
- fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \
- fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \
- fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0), huc_def(icl, 9, 0, 0)) \
- fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0), huc_def(cml, 4, 0, 0)) \
- fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \
- fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \
- fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0), huc_def(glk, 4, 0, 0)) \
- fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \
- fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0), huc_def(bxt, 2, 0, 0)) \
- fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0), huc_def(skl, 2, 0, 0))
+#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
+ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3)) \
+ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0)) \
+ fw_def(DG1, 0, guc_def(dg1, 62, 0, 0)) \
+ fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0)) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0)) \
+ fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0)) \
+ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0)) \
+ fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0)) \
+ fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0)) \
+ fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0)) \
+ fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0)) \
+ fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0)) \
+ fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0)) \
+ fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0)) \
+ fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0))
+
+#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
+ fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \
+ fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \
+ fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \
+ fw_def(ROCKETLAKE, 0, huc_def(tgl, 7, 9, 3)) \
+ fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) \
+ fw_def(JASPERLAKE, 0, huc_def(ehl, 9, 0, 0)) \
+ fw_def(ELKHARTLAKE, 0, huc_def(ehl, 9, 0, 0)) \
+ fw_def(ICELAKE, 0, huc_def(icl, 9, 0, 0)) \
+ fw_def(COMETLAKE, 5, huc_def(cml, 4, 0, 0)) \
+ fw_def(COMETLAKE, 0, huc_def(kbl, 4, 0, 0)) \
+ fw_def(COFFEELAKE, 0, huc_def(kbl, 4, 0, 0)) \
+ fw_def(GEMINILAKE, 0, huc_def(glk, 4, 0, 0)) \
+ fw_def(KABYLAKE, 0, huc_def(kbl, 4, 0, 0)) \
+ fw_def(BROXTON, 0, huc_def(bxt, 2, 0, 0)) \
+ fw_def(SKYLAKE, 0, huc_def(skl, 2, 0, 0))
#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
"i915/" \
@@ -79,11 +96,11 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
__MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
/* All blobs need to be declared via MODULE_FIRMWARE() */
-#define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \
- MODULE_FIRMWARE(guc_); \
- MODULE_FIRMWARE(huc_);
+#define INTEL_UC_MODULE_FW(platform_, revid_, uc_) \
+ MODULE_FIRMWARE(uc_);
-INTEL_UC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH, MAKE_HUC_FW_PATH)
+INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH)
/* The below structs and macros are used to iterate across the list of blobs */
struct __packed uc_fw_blob {
@@ -106,31 +123,47 @@ struct __packed uc_fw_blob {
struct __packed uc_fw_platform_requirement {
enum intel_platform p;
u8 rev; /* first platform rev using this FW */
- const struct uc_fw_blob blobs[INTEL_UC_FW_NUM_TYPES];
+ const struct uc_fw_blob blob;
};
-#define MAKE_FW_LIST(platform_, revid_, guc_, huc_) \
+#define MAKE_FW_LIST(platform_, revid_, uc_) \
{ \
.p = INTEL_##platform_, \
.rev = revid_, \
- .blobs[INTEL_UC_FW_TYPE_GUC] = guc_, \
- .blobs[INTEL_UC_FW_TYPE_HUC] = huc_, \
+ .blob = uc_, \
},
+struct fw_blobs_by_type {
+ const struct uc_fw_platform_requirement *blobs;
+ u32 count;
+};
+
static void
__uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
{
- static const struct uc_fw_platform_requirement fw_blobs[] = {
- INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB)
+ static const struct uc_fw_platform_requirement blobs_guc[] = {
+ INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
+ };
+ static const struct uc_fw_platform_requirement blobs_huc[] = {
+ INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
};
+ static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
+ [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
+ [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
+ };
+ static const struct uc_fw_platform_requirement *fw_blobs;
enum intel_platform p = INTEL_INFO(i915)->platform;
+ u32 fw_count;
u8 rev = INTEL_REVID(i915);
int i;
- for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) {
+ GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
+ fw_blobs = blobs_all[uc_fw->type].blobs;
+ fw_count = blobs_all[uc_fw->type].count;
+
+ for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) {
if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
- const struct uc_fw_blob *blob =
- &fw_blobs[i].blobs[uc_fw->type];
+ const struct uc_fw_blob *blob = &fw_blobs[i].blob;
uc_fw->path = blob->path;
uc_fw->major_ver_wanted = blob->major;
uc_fw->minor_ver_wanted = blob->minor;
@@ -140,7 +173,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
/* make sure the list is ordered as expected */
if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
- for (i = 1; i < ARRAY_SIZE(fw_blobs); i++) {
+ for (i = 1; i < fw_count; i++) {
if (fw_blobs[i].p < fw_blobs[i - 1].p)
continue;
@@ -322,13 +355,6 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
/* now RSA */
- if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) {
- drm_warn(&i915->drm, "%s firmware %s: unexpected key size: %u != %u\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
- css->key_size_dw, UOS_RSA_SCRATCH_COUNT);
- err = -EPROTO;
- goto fail;
- }
uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
/* At least, it should have header, uCode and RSA. Size of all three. */
@@ -540,10 +566,79 @@ fail:
i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
err);
- intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL);
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return err;
}
+static inline bool uc_fw_need_rsa_in_memory(struct intel_uc_fw *uc_fw)
+{
+ /*
+ * The HW reads the GuC RSA from memory if the key size is > 256 bytes,
+ * while it reads it from the 64 RSA registers if it is smaller.
+ * The HuC RSA is always read from memory.
+ */
+ return uc_fw->type == INTEL_UC_FW_TYPE_HUC || uc_fw->rsa_size > 256;
+}
+
+static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw)
+{
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
+ struct i915_vma *vma;
+ size_t copied;
+ void *vaddr;
+ int err;
+
+ err = i915_inject_probe_error(gt->i915, -ENXIO);
+ if (err)
+ return err;
+
+ if (!uc_fw_need_rsa_in_memory(uc_fw))
+ return 0;
+
+ /*
+ * uC firmwares will sit above GUC_GGTT_TOP and will not map through
+ * GGTT. Unfortunately, this means that the GuC HW cannot perform the uC
+ * authentication from memory, as the RSA offset now falls within the
+ * GuC inaccessible range. We resort to perma-pinning an additional vma
+ * within the accessible range that only contains the RSA signature.
+ * The GuC HW can use this extra pinning to perform the authentication
+ * since its GGTT offset will be GuC accessible.
+ */
+ GEM_BUG_ON(uc_fw->rsa_size > PAGE_SIZE);
+ vma = intel_guc_allocate_vma(&gt->uc.guc, PAGE_SIZE);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
+ i915_coherent_map_type(gt->i915, vma->obj, true));
+ if (IS_ERR(vaddr)) {
+ i915_vma_unpin_and_release(&vma, 0);
+ err = PTR_ERR(vaddr);
+ goto unpin_out;
+ }
+
+ copied = intel_uc_fw_copy_rsa(uc_fw, vaddr, vma->size);
+ i915_gem_object_unpin_map(vma->obj);
+
+ if (copied < uc_fw->rsa_size) {
+ err = -ENOMEM;
+ goto unpin_out;
+ }
+
+ uc_fw->rsa_data = vma;
+
+ return 0;
+
+unpin_out:
+ i915_vma_unpin_and_release(&vma, 0);
+ return err;
+}
+
+static void uc_fw_rsa_data_destroy(struct intel_uc_fw *uc_fw)
+{
+ i915_vma_unpin_and_release(&uc_fw->rsa_data, 0);
+}
+
int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
{
int err;
@@ -558,14 +653,29 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
if (err) {
DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n",
intel_uc_fw_type_repr(uc_fw->type), err);
- intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL);
+ goto out;
}
+ err = uc_fw_rsa_data_create(uc_fw);
+ if (err) {
+ DRM_DEBUG_DRIVER("%s fw rsa data creation failed, err=%d\n",
+ intel_uc_fw_type_repr(uc_fw->type), err);
+ goto out_unpin;
+ }
+
+ return 0;
+
+out_unpin:
+ i915_gem_object_unpin_pages(uc_fw->obj);
+out:
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL);
return err;
}
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
{
+ uc_fw_rsa_data_destroy(uc_fw);
+
if (i915_gem_object_has_pinned_pages(uc_fw->obj))
i915_gem_object_unpin_pages(uc_fw->obj);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 1e00bf65639e..d9d1dc0b4cbb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -32,11 +32,12 @@ struct intel_gt;
* | | MISSING <--/ | \--> ERROR |
* | fetch | V |
* | | AVAILABLE |
- * +------------+- | -+
+ * +------------+- | \ -+
+ * | | | \--> INIT FAIL |
* | init | V |
* | | /------> LOADABLE <----<-----------\ |
* +------------+- \ / \ \ \ -+
- * | | FAIL <--< \--> TRANSFERRED \ |
+ * | | LOAD FAIL <--< \--> TRANSFERRED \ |
* | upload | \ / \ / |
* | | \---------/ \--> RUNNING |
* +------------+---------------------------------------------------+
@@ -50,8 +51,9 @@ enum intel_uc_fw_status {
INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */
INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */
INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+ INTEL_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */
INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
- INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */
+ INTEL_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
INTEL_UC_FIRMWARE_RUNNING /* init/auth done */
};
@@ -84,6 +86,7 @@ struct intel_uc_fw {
* or during a GT reset (mutex guarantees single threaded).
*/
struct i915_vma dummy;
+ struct i915_vma *rsa_data;
/*
* The firmware build process will generate a version header file with major and
@@ -130,10 +133,12 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
return "ERROR";
case INTEL_UC_FIRMWARE_AVAILABLE:
return "AVAILABLE";
+ case INTEL_UC_FIRMWARE_INIT_FAIL:
+ return "INIT FAIL";
case INTEL_UC_FIRMWARE_LOADABLE:
return "LOADABLE";
- case INTEL_UC_FIRMWARE_FAIL:
- return "FAIL";
+ case INTEL_UC_FIRMWARE_LOAD_FAIL:
+ return "LOAD FAIL";
case INTEL_UC_FIRMWARE_TRANSFERRED:
return "TRANSFERRED";
case INTEL_UC_FIRMWARE_RUNNING:
@@ -155,7 +160,8 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status)
return -ENOENT;
case INTEL_UC_FIRMWARE_ERROR:
return -ENOEXEC;
- case INTEL_UC_FIRMWARE_FAIL:
+ case INTEL_UC_FIRMWARE_INIT_FAIL:
+ case INTEL_UC_FIRMWARE_LOAD_FAIL:
return -EIO;
case INTEL_UC_FIRMWARE_SELECTED:
return -ESTALE;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index fb0e4a7bd8ca..d3327b802b76 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -3,8 +3,21 @@
* Copyright �� 2021 Intel Corporation
*/
+#include "selftests/igt_spinner.h"
#include "selftests/intel_scheduler_helpers.h"
+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+ int err = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (spin && !igt_wait_for_spinner(spin, rq))
+ err = -ETIMEDOUT;
+
+ return err;
+}
+
static struct i915_request *nop_user_request(struct intel_context *ce,
struct i915_request *from)
{
@@ -110,12 +123,172 @@ err:
return ret;
}
+/*
+ * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one
+ *
+ * This test creates a spinner which is used to block all subsequent submissions
+ * until it completes. Next, a loop creates a context and a NOP request each
+ * iteration until the guc_ids are exhausted (request creation returns -EAGAIN).
+ * The spinner is ended, unblocking all requests created in the loop. At this
+ * point all guc_ids are exhausted but are available to steal. Try to create
+ * another request which should successfully steal a guc_id. Wait on last
+ * request to complete, idle GPU, verify a guc_id was stolen via a counter, and
+ * exit the test. Test also artificially reduces the number of guc_ids so the
+ * test runs in a timely manner.
+ */
+static int intel_guc_steal_guc_ids(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_guc *guc = &gt->uc.guc;
+ int ret, sv, context_index = 0;
+ intel_wakeref_t wakeref;
+ struct intel_engine_cs *engine;
+ struct intel_context **ce;
+ struct igt_spinner spin;
+ struct i915_request *spin_rq = NULL, *rq, *last = NULL;
+ int number_guc_id_stolen = guc->number_guc_id_stolen;
+
+ ce = kzalloc(sizeof(*ce) * GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL);
+ if (!ce) {
+ pr_err("Context array allocation failed\n");
+ return -ENOMEM;
+ }
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+ engine = intel_selftest_find_any_engine(gt);
+ sv = guc->submission_state.num_guc_ids;
+ guc->submission_state.num_guc_ids = 4096;
+
+ /* Create spinner to block requests in below loop */
+ ce[context_index] = intel_context_create(engine);
+ if (IS_ERR(ce[context_index])) {
+ ret = PTR_ERR(ce[context_index]);
+ ce[context_index] = NULL;
+ pr_err("Failed to create context: %d\n", ret);
+ goto err_wakeref;
+ }
+ ret = igt_spinner_init(&spin, engine->gt);
+ if (ret) {
+ pr_err("Failed to create spinner: %d\n", ret);
+ goto err_contexts;
+ }
+ spin_rq = igt_spinner_create_request(&spin, ce[context_index],
+ MI_ARB_CHECK);
+ if (IS_ERR(spin_rq)) {
+ ret = PTR_ERR(spin_rq);
+ pr_err("Failed to create spinner request: %d\n", ret);
+ goto err_contexts;
+ }
+ ret = request_add_spin(spin_rq, &spin);
+ if (ret) {
+ pr_err("Failed to add Spinner request: %d\n", ret);
+ goto err_spin_rq;
+ }
+
+ /* Use all guc_ids */
+ while (ret != -EAGAIN) {
+ ce[++context_index] = intel_context_create(engine);
+ if (IS_ERR(ce[context_index])) {
+ ret = PTR_ERR(ce[context_index--]);
+ ce[context_index] = NULL;
+ pr_err("Failed to create context: %d\n", ret);
+ goto err_spin_rq;
+ }
+
+ rq = nop_user_request(ce[context_index], spin_rq);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ rq = NULL;
+ if (ret != -EAGAIN) {
+ pr_err("Failed to create request, %d: %d\n",
+ context_index, ret);
+ goto err_spin_rq;
+ }
+ } else {
+ if (last)
+ i915_request_put(last);
+ last = rq;
+ }
+ }
+
+ /* Release blocked requests */
+ igt_spinner_end(&spin);
+ ret = intel_selftest_wait_for_rq(spin_rq);
+ if (ret) {
+ pr_err("Spin request failed to complete: %d\n", ret);
+ i915_request_put(last);
+ goto err_spin_rq;
+ }
+ i915_request_put(spin_rq);
+ igt_spinner_fini(&spin);
+ spin_rq = NULL;
+
+ /* Wait for last request */
+ ret = i915_request_wait(last, 0, HZ * 30);
+ i915_request_put(last);
+ if (ret < 0) {
+ pr_err("Last request failed to complete: %d\n", ret);
+ goto err_spin_rq;
+ }
+
+ /* Try to steal guc_id */
+ rq = nop_user_request(ce[context_index], NULL);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ pr_err("Failed to steal guc_id, %d: %d\n", context_index, ret);
+ goto err_spin_rq;
+ }
+
+ /* Wait for request with stolen guc_id */
+ ret = i915_request_wait(rq, 0, HZ);
+ i915_request_put(rq);
+ if (ret < 0) {
+ pr_err("Request with stolen guc_id failed to complete: %d\n",
+ ret);
+ goto err_spin_rq;
+ }
+
+ /* Wait for idle */
+ ret = intel_gt_wait_for_idle(gt, HZ * 30);
+ if (ret < 0) {
+ pr_err("GT failed to idle: %d\n", ret);
+ goto err_spin_rq;
+ }
+
+ /* Verify a guc_id was stolen */
+ if (guc->number_guc_id_stolen == number_guc_id_stolen) {
+ pr_err("No guc_id was stolen");
+ ret = -EINVAL;
+ } else {
+ ret = 0;
+ }
+
+err_spin_rq:
+ if (spin_rq) {
+ igt_spinner_end(&spin);
+ intel_selftest_wait_for_rq(spin_rq);
+ i915_request_put(spin_rq);
+ igt_spinner_fini(&spin);
+ intel_gt_wait_for_idle(gt, HZ * 30);
+ }
+err_contexts:
+ for (; context_index >= 0 && ce[context_index]; --context_index)
+ intel_context_put(ce[context_index]);
+err_wakeref:
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ kfree(ce);
+ guc->submission_state.num_guc_ids = sv;
+
+ return ret;
+}
+
int intel_guc_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(intel_guc_scrub_ctbs),
+ SUBTEST(intel_guc_steal_guc_ids),
};
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
if (intel_gt_is_wedged(gt))
return 0;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
index 50953c8e8b53..1297ddbf7f88 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
@@ -167,7 +167,7 @@ int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915)
static const struct i915_subtest tests[] = {
SUBTEST(intel_guc_multi_lrc_basic),
};
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
if (intel_gt_is_wedged(gt))
return 0;