aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_perf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c446
1 files changed, 368 insertions, 78 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index d2ac51fe4f04..65d7c2e599de 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -200,6 +200,7 @@
#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
#include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
#include "i915_drv.h"
#include "i915_perf.h"
@@ -217,6 +218,7 @@
#include "oa/i915_oa_cflgt3.h"
#include "oa/i915_oa_cnl.h"
#include "oa/i915_oa_icl.h"
+#include "oa/i915_oa_tgl.h"
/* HW requires this to be a power of two, between 128k and 16M, though driver
* is currently generally designed assuming the largest 16M size is used such
@@ -293,6 +295,7 @@ static u32 i915_perf_stream_paranoid = true;
/* On Gen8+ automatically triggered OA reports include a 'reason' field... */
#define OAREPORT_REASON_MASK 0x3f
+#define OAREPORT_REASON_MASK_EXTENDED 0x7f
#define OAREPORT_REASON_SHIFT 19
#define OAREPORT_REASON_TIMER (1<<0)
#define OAREPORT_REASON_CTX_SWITCH (1<<3)
@@ -338,6 +341,10 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
[I915_OA_FORMAT_C4_B8] = { 7, 64 },
};
+static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
+ [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+};
+
#define SAMPLE_OA_REPORT (1<<0)
/**
@@ -418,6 +425,14 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
kfree(oa_bo);
}
+static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+
+ return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+ GEN12_OAG_OATAILPTR_MASK;
+}
+
static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
@@ -538,7 +553,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
aging_tail = hw_tail;
stream->oa_buffer.aging_timestamp = now;
} else {
- DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
+ DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %x\n",
hw_tail);
}
}
@@ -740,7 +755,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* it to userspace...
*/
reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
- OAREPORT_REASON_MASK);
+ (IS_GEN(stream->perf->i915, 12) ?
+ OAREPORT_REASON_MASK_EXTENDED :
+ OAREPORT_REASON_MASK));
if (reason == 0) {
if (__ratelimit(&stream->perf->spurious_report_rs))
DRM_NOTE("Skipping spurious, invalid OA report\n");
@@ -757,7 +774,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* Note: that we don't clear the valid_ctx_bit so userspace can
* understand that the ID has been squashed by the kernel.
*/
- if (!(report32[0] & stream->perf->gen8_valid_ctx_bit))
+ if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
+ INTEL_GEN(stream->perf->i915) <= 11)
ctx_id = report32[2] = INVALID_CTX_ID;
/*
@@ -824,6 +842,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
}
if (start_offset != *offset) {
+ i915_reg_t oaheadptr;
+
+ oaheadptr = IS_GEN(stream->perf->i915, 12) ?
+ GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
+
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
/*
@@ -831,9 +854,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* relative to oa_buf_base so put back here...
*/
head += gtt_offset;
-
- intel_uncore_write(uncore, GEN8_OAHEADPTR,
- head & GEN8_OAHEADPTR_MASK);
+ intel_uncore_write(uncore, oaheadptr,
+ head & GEN12_OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = head;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -869,12 +891,16 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
{
struct intel_uncore *uncore = stream->uncore;
u32 oastatus;
+ i915_reg_t oastatus_reg;
int ret;
if (WARN_ON(!stream->oa_buffer.vaddr))
return -EIO;
- oastatus = intel_uncore_read(uncore, GEN8_OASTATUS);
+ oastatus_reg = IS_GEN(stream->perf->i915, 12) ?
+ GEN12_OAG_OASTATUS : GEN8_OASTATUS;
+
+ oastatus = intel_uncore_read(uncore, oastatus_reg);
/*
* We treat OABUFFER_OVERFLOW as a significant error:
@@ -906,7 +932,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
* Note: .oa_enable() is expected to re-init the oabuffer and
* reset GEN8_OASTATUS for us
*/
- oastatus = intel_uncore_read(uncore, GEN8_OASTATUS);
+ oastatus = intel_uncore_read(uncore, oastatus_reg);
}
if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
@@ -914,7 +940,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
DRM_I915_PERF_RECORD_OA_REPORT_LOST);
if (ret)
return ret;
- intel_uncore_write(uncore, GEN8_OASTATUS,
+ intel_uncore_write(uncore, oastatus_reg,
oastatus & ~GEN8_OASTATUS_REPORT_LOST);
}
@@ -1260,7 +1286,11 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
case 8:
case 9:
case 10:
- if (USES_GUC_SUBMISSION(ce->engine->i915)) {
+ if (intel_engine_in_execlists_submission_mode(ce->engine)) {
+ stream->specific_ctx_id_mask =
+ (1U << GEN8_CTX_ID_WIDTH) - 1;
+ stream->specific_ctx_id = stream->specific_ctx_id_mask;
+ } else {
/*
* When using GuC, the context descriptor we write in
* i915 is read by GuC and rewritten before it's
@@ -1280,10 +1310,6 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
*/
stream->specific_ctx_id_mask =
(1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
- } else {
- stream->specific_ctx_id_mask =
- (1U << GEN8_CTX_ID_WIDTH) - 1;
- stream->specific_ctx_id = stream->specific_ctx_id_mask;
}
break;
@@ -1488,6 +1514,63 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
stream->pollin = false;
}
+static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+ u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
+ unsigned long flags;
+
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
+
+ intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
+ intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
+ gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
+ stream->oa_buffer.head = gtt_offset;
+
+ /*
+ * PRM says:
+ *
+ * "This MMIO must be set before the OATAILPTR
+ * register and after the OAHEADPTR register. This is
+ * to enable proper functionality of the overflow
+ * bit."
+ */
+ intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
+ OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
+ intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
+ gtt_offset & GEN12_OAG_OATAILPTR_MASK);
+
+ /* Mark that we need updated tail pointers to read from... */
+ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+
+ /*
+ * Reset state used to recognise context switches, affecting which
+ * reports we will forward to userspace while filtering for a single
+ * context.
+ */
+ stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
+
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
+
+ /*
+ * NB: although the OA buffer will initially be allocated
+ * zeroed via shmfs (and so this memset is redundant when
+ * first allocating), we may re-init the OA buffer, either
+ * when re-enabling a stream or in error/reset paths.
+ *
+ * The reason we clear the buffer for each re-init is for the
+ * sanity check in gen8_append_oa_reports() that looks at the
+ * reason field to make sure it's non-zero which relies on
+ * the assumption that new reports are being written to zeroed
+ * memory...
+ */
+ memset(stream->oa_buffer.vaddr, 0,
+ stream->oa_buffer.vma->size);
+
+ stream->pollin = false;
+}
+
static int alloc_oa_buffer(struct i915_perf_stream *stream)
{
struct drm_i915_gem_object *bo;
@@ -1787,7 +1870,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
config_length += num_lri_dwords(oa_config->mux_regs_len);
config_length += num_lri_dwords(oa_config->b_counter_regs_len);
config_length += num_lri_dwords(oa_config->flex_regs_len);
- config_length++; /* MI_BATCH_BUFFER_END */
+ config_length += 3; /* MI_BATCH_BUFFER_START */
config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
@@ -1812,7 +1895,12 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
oa_config->flex_regs,
oa_config->flex_regs_len);
- *cs++ = MI_BATCH_BUFFER_END;
+ /* Jump into the active wait. */
+ *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ?
+ MI_BATCH_BUFFER_START :
+ MI_BATCH_BUFFER_START_GEN8);
+ *cs++ = i915_ggtt_offset(stream->noa_wait);
+ *cs++ = 0;
i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
@@ -1990,12 +2078,20 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
u32 *reg_state = ce->lrc_reg_state;
int i;
- reg_state[ctx_oactxctrl + 1] =
- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
- GEN8_OA_COUNTER_RESUME;
+ if (IS_GEN(stream->perf->i915, 12)) {
+ u32 format = stream->oa_buffer.format;
+
+ reg_state[ctx_oactxctrl + 1] =
+ (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
+ (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
+ } else {
+ reg_state[ctx_oactxctrl + 1] =
+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+ GEN8_OA_COUNTER_RESUME;
+ }
- for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
+ for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
reg_state[ctx_flexeu0 + i * 2 + 1] =
oa_config_flex_reg(stream->oa_config, flex_regs[i]);
@@ -2128,6 +2224,36 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
return err;
}
+static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
+{
+ struct i915_request *rq;
+ u32 *cs;
+ int err = 0;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto out;
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
+ *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
+ enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+out:
+ i915_request_add(rq);
+
+ return err;
+}
+
/*
* Manages updating the per-context aspects of the OA stream
* configuration across all contexts.
@@ -2152,8 +2278,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
*
* Note: it's only the RCS/Render context that has any OA state.
*/
-static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
- const struct i915_oa_config *oa_config)
+static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
+ const struct i915_oa_config *oa_config)
{
struct drm_i915_private *i915 = stream->perf->i915;
/* The MMIO offsets for Flex EU registers aren't contiguous */
@@ -2165,11 +2291,9 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
CTX_R_PWR_CLK_STATE,
},
{
- GEN8_OACTXCONTROL,
+ IS_GEN(i915, 12) ?
+ GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
stream->perf->ctx_oactxctrl_offset + 1,
- ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
- GEN8_OA_COUNTER_RESUME)
},
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
{ EU_PERF_CNTL1, ctx_flexeuN(1) },
@@ -2182,9 +2306,23 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
#undef ctx_flexeuN
struct intel_engine_cs *engine;
struct i915_gem_context *ctx, *cn;
+ size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
int i, err;
- for (i = 2; i < ARRAY_SIZE(regs); i++)
+ if (IS_GEN(i915, 12)) {
+ u32 format = stream->oa_buffer.format;
+
+ regs[1].value =
+ (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
+ (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
+ } else {
+ regs[1].value =
+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+ GEN8_OA_COUNTER_RESUME;
+ }
+
+ for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
lockdep_assert_held(&stream->perf->lock);
@@ -2215,7 +2353,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
spin_unlock(&i915->gem.contexts.lock);
- err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs));
+ err = gen8_configure_context(ctx, regs, array_size);
if (err) {
i915_gem_context_put(ctx);
return err;
@@ -2240,7 +2378,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
- err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs));
+ err = gen8_modify_self(ce, regs, array_size);
if (err)
return err;
}
@@ -2288,19 +2426,69 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
* to make sure all slices/subslices are ON before writing to NOA
* registers.
*/
- ret = gen8_configure_all_contexts(stream, oa_config);
+ ret = lrc_configure_all_contexts(stream, oa_config);
if (ret)
return ret;
return emit_oa_config(stream, oa_config, oa_context(stream));
}
+static int gen12_enable_metric_set(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+ struct i915_oa_config *oa_config = stream->oa_config;
+ bool periodic = stream->periodic;
+ u32 period_exponent = stream->period_exponent;
+ int ret;
+
+ intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
+ /* Disable clk ratio reports, like previous Gens. */
+ _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
+ GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
+ /*
+ * If the user didn't require OA reports, instruct the
+ * hardware not to emit ctx switch reports.
+ */
+ !(stream->sample_flags & SAMPLE_OA_REPORT) ?
+ _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS) :
+ _MASKED_BIT_DISABLE(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS));
+
+ intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
+ (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
+ GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
+ (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
+ : 0);
+
+ /*
+ * Update all contexts prior writing the mux configurations as we need
+ * to make sure all slices/subslices are ON before writing to NOA
+ * registers.
+ */
+ ret = lrc_configure_all_contexts(stream, oa_config);
+ if (ret)
+ return ret;
+
+ /*
+ * For Gen12, performance counters are context
+ * saved/restored. Only enable it for the context that
+ * requested this.
+ */
+ if (stream->ctx) {
+ ret = gen12_emit_oar_config(stream->pinned_ctx,
+ oa_config != NULL);
+ if (ret)
+ return ret;
+ }
+
+ return emit_oa_config(stream, oa_config, oa_context(stream));
+}
+
static void gen8_disable_metric_set(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
/* Reset all contexts' slices/subslices configurations. */
- gen8_configure_all_contexts(stream, NULL);
+ lrc_configure_all_contexts(stream, NULL);
intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
}
@@ -2310,7 +2498,22 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream)
struct intel_uncore *uncore = stream->uncore;
/* Reset all contexts' slices/subslices configurations. */
- gen8_configure_all_contexts(stream, NULL);
+ lrc_configure_all_contexts(stream, NULL);
+
+ /* Make sure we disable noa to save power. */
+ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+}
+
+static void gen12_disable_metric_set(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+
+ /* Reset all contexts' slices/subslices configurations. */
+ lrc_configure_all_contexts(stream, NULL);
+
+ /* disable the context save/restore or OAR counters */
+ if (stream->ctx)
+ gen12_emit_oar_config(stream->pinned_ctx, false);
/* Make sure we disable noa to save power. */
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2372,6 +2575,25 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
GEN8_OA_COUNTER_ENABLE);
}
+static void gen12_oa_enable(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+ u32 report_format = stream->oa_buffer.format;
+
+ /*
+ * If we don't want OA reports from the OA buffer, then we don't even
+ * need to program the OAG unit.
+ */
+ if (!(stream->sample_flags & SAMPLE_OA_REPORT))
+ return;
+
+ gen12_init_oa_buffer(stream);
+
+ intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
+ (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
+ GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
+}
+
/**
* i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream
* @stream: An i915 perf stream opened for OA metrics
@@ -2413,6 +2635,18 @@ static void gen8_oa_disable(struct i915_perf_stream *stream)
DRM_ERROR("wait for OA to be disabled timed out\n");
}
+static void gen12_oa_disable(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+
+ intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
+ if (intel_wait_for_register(uncore,
+ GEN12_OAG_OACONTROL,
+ GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
+ 50))
+ DRM_ERROR("wait for OA to be disabled timed out\n");
+}
+
/**
* i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
* @stream: An i915 perf stream opened for OA metrics
@@ -2614,8 +2848,7 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
{
struct i915_perf_stream *stream;
- /* perf.exclusive_stream serialised by gen8_configure_all_contexts() */
- lockdep_assert_held(&ce->pin_mutex);
+ /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
if (engine->class != RENDER_CLASS)
return;
@@ -3079,32 +3312,40 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
}
}
- if (props->hold_preemption) {
- if (!props->single_context) {
- DRM_DEBUG("preemption disable with no context\n");
- ret = -EINVAL;
- goto err;
- }
- privileged_op = true;
- }
-
/*
* On Haswell the OA unit supports clock gating off for a specific
* context and in this mode there's no visibility of metrics for the
* rest of the system, which we consider acceptable for a
* non-privileged client.
*
- * For Gen8+ the OA unit no longer supports clock gating off for a
+ * For Gen8->11 the OA unit no longer supports clock gating off for a
* specific context and the kernel can't securely stop the counters
* from updating as system-wide / global values. Even though we can
* filter reports based on the included context ID we can't block
* clients from seeing the raw / global counter values via
* MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
* enable the OA unit by default.
+ *
+ * For Gen12+ we gain a new OAR unit that only monitors the RCS on a
+ * per context basis. So we can relax requirements there if the user
+ * doesn't request global stream access (i.e. query based sampling
+ * using MI_RECORD_PERF_COUNT.
*/
- if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption)
+ if (IS_HASWELL(perf->i915) && specific_ctx)
+ privileged_op = false;
+ else if (IS_GEN(perf->i915, 12) && specific_ctx &&
+ (props->sample_flags & SAMPLE_OA_REPORT) == 0)
privileged_op = false;
+ if (props->hold_preemption) {
+ if (!props->single_context) {
+ DRM_DEBUG("preemption disable with no context\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ privileged_op = true;
+ }
+
/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
* we check a dev.i915.perf_stream_paranoid sysctl option
* to determine if it's ok to access system wide OA counters
@@ -3418,7 +3659,9 @@ void i915_perf_register(struct drm_i915_private *i915)
sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr);
- if (INTEL_GEN(i915) >= 11) {
+ if (IS_TIGERLAKE(i915)) {
+ i915_perf_load_test_config_tgl(i915);
+ } else if (INTEL_GEN(i915) >= 11) {
i915_perf_load_test_config_icl(i915);
} else if (IS_CANNONLAKE(i915)) {
i915_perf_load_test_config_cnl(i915);
@@ -3515,56 +3758,80 @@ static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
return false;
}
+#define ADDR_IN_RANGE(addr, start, end) \
+ ((addr) >= (start) && \
+ (addr) <= (end))
+
+#define REG_IN_RANGE(addr, start, end) \
+ ((addr) >= i915_mmio_reg_offset(start) && \
+ (addr) <= i915_mmio_reg_offset(end))
+
+#define REG_EQUAL(addr, mmio) \
+ ((addr) == i915_mmio_reg_offset(mmio))
+
static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
- return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) &&
- addr <= i915_mmio_reg_offset(OASTARTTRIG8)) ||
- (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) &&
- addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) ||
- (addr >= i915_mmio_reg_offset(OACEC0_0) &&
- addr <= i915_mmio_reg_offset(OACEC7_1));
+ return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) ||
+ REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) ||
+ REG_IN_RANGE(addr, OACEC0_0, OACEC7_1);
}
static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) ||
- (addr >= i915_mmio_reg_offset(MICRO_BP0_0) &&
- addr <= i915_mmio_reg_offset(NOA_WRITE)) ||
- (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) &&
- addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) ||
- (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) &&
- addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI));
+ return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) ||
+ REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) ||
+ REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) ||
+ REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI);
}
static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
return gen7_is_valid_mux_addr(perf, addr) ||
- addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) ||
- (addr >= i915_mmio_reg_offset(RPM_CONFIG0) &&
- addr <= i915_mmio_reg_offset(NOA_CONFIG(8)));
+ REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
+ REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8));
}
static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
return gen8_is_valid_mux_addr(perf, addr) ||
- addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) ||
- (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
- addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
+ REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
+ REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI);
}
static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
return gen7_is_valid_mux_addr(perf, addr) ||
- (addr >= 0x25100 && addr <= 0x2FF90) ||
- (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) &&
- addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) ||
- addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0);
+ ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) ||
+ REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) ||
+ REG_EQUAL(addr, HSW_MBVID2_MISR0);
}
static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
return gen7_is_valid_mux_addr(perf, addr) ||
- (addr >= 0x182300 && addr <= 0x1823A4);
+ ADDR_IN_RANGE(addr, 0x182300, 0x1823A4);
+}
+
+static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+ return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) ||
+ REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) ||
+ REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) ||
+ REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) ||
+ REG_EQUAL(addr, GEN12_OAA_DBG_REG) ||
+ REG_EQUAL(addr, GEN12_OAG_OA_PESS) ||
+ REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF);
+}
+
+static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
+{
+ return REG_EQUAL(addr, NOA_WRITE) ||
+ REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
+ REG_EQUAL(addr, GDT_CHICKEN_BITS) ||
+ REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
+ REG_EQUAL(addr, RPM_CONFIG0) ||
+ REG_EQUAL(addr, RPM_CONFIG1) ||
+ REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8));
}
static u32 mask_reg_value(u32 reg, u32 val)
@@ -3573,14 +3840,14 @@ static u32 mask_reg_value(u32 reg, u32 val)
* WaDisableSTUnitPowerOptimization workaround. Make sure the value
* programmed by userspace doesn't change this.
*/
- if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg)
+ if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2))
val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
/* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function
* indicated by its name and a bunch of selection fields used by OA
* configs.
*/
- if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg)
+ if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT))
val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
return val;
@@ -3959,14 +4226,11 @@ void i915_perf_init(struct drm_i915_private *i915)
* worth the complexity to maintain now that BDW+ enable
* execlist mode by default.
*/
- perf->oa_formats = gen8_plus_oa_formats;
-
- perf->ops.oa_enable = gen8_oa_enable;
- perf->ops.oa_disable = gen8_oa_disable;
perf->ops.read = gen8_oa_read;
- perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
if (IS_GEN_RANGE(i915, 8, 9)) {
+ perf->oa_formats = gen8_plus_oa_formats;
+
perf->ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
perf->ops.is_valid_mux_reg =
@@ -3979,8 +4243,11 @@ void i915_perf_init(struct drm_i915_private *i915)
chv_is_valid_mux_addr;
}
+ perf->ops.oa_enable = gen8_oa_enable;
+ perf->ops.oa_disable = gen8_oa_disable;
perf->ops.enable_metric_set = gen8_enable_metric_set;
perf->ops.disable_metric_set = gen8_disable_metric_set;
+ perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
if (IS_GEN(i915, 8)) {
perf->ctx_oactxctrl_offset = 0x120;
@@ -3994,6 +4261,8 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->gen8_valid_ctx_bit = BIT(16);
}
} else if (IS_GEN_RANGE(i915, 10, 11)) {
+ perf->oa_formats = gen8_plus_oa_formats;
+
perf->ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
perf->ops.is_valid_mux_reg =
@@ -4001,8 +4270,11 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ops.is_valid_flex_reg =
gen8_is_valid_flex_addr;
+ perf->ops.oa_enable = gen8_oa_enable;
+ perf->ops.oa_disable = gen8_oa_disable;
perf->ops.enable_metric_set = gen8_enable_metric_set;
perf->ops.disable_metric_set = gen10_disable_metric_set;
+ perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
if (IS_GEN(i915, 10)) {
perf->ctx_oactxctrl_offset = 0x128;
@@ -4012,6 +4284,24 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ctx_flexeu0_offset = 0x78e;
}
perf->gen8_valid_ctx_bit = BIT(16);
+ } else if (IS_GEN(i915, 12)) {
+ perf->oa_formats = gen12_oa_formats;
+
+ perf->ops.is_valid_b_counter_reg =
+ gen12_is_valid_b_counter_addr;
+ perf->ops.is_valid_mux_reg =
+ gen12_is_valid_mux_addr;
+ perf->ops.is_valid_flex_reg =
+ gen8_is_valid_flex_addr;
+
+ perf->ops.oa_enable = gen12_oa_enable;
+ perf->ops.oa_disable = gen12_oa_disable;
+ perf->ops.enable_metric_set = gen12_enable_metric_set;
+ perf->ops.disable_metric_set = gen12_disable_metric_set;
+ perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
+
+ perf->ctx_flexeu0_offset = 0;
+ perf->ctx_oactxctrl_offset = 0x144;
}
}