aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_perf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c152
1 files changed, 99 insertions, 53 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e27f3b7cf094..0a9c3fcc09b1 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -196,17 +196,23 @@
#include <linux/uuid.h>
#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_internal.h"
#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_regs.h"
#include "gt/intel_engine_user.h"
#include "gt/intel_execlists_submission.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_clock_utils.h"
+#include "gt/intel_gt_regs.h"
#include "gt/intel_lrc.h"
+#include "gt/intel_lrc_reg.h"
#include "gt/intel_ring.h"
#include "i915_drv.h"
+#include "i915_file_private.h"
#include "i915_perf.h"
+#include "i915_perf_oa_regs.h"
/* HW requires this to be a power of two, between 128k and 16M, though driver
* is currently generally designed assuming the largest 16M size is used such
@@ -1630,8 +1636,8 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
struct drm_i915_gem_object *bo;
struct i915_vma *vma;
const u64 delay_ticks = 0xffffffffffffffff -
- intel_gt_ns_to_clock_interval(stream->perf->i915->ggtt.vm.gt,
- atomic64_read(&stream->perf->noa_programming_delay));
+ intel_gt_ns_to_clock_interval(to_gt(stream->perf->i915),
+ atomic64_read(&stream->perf->noa_programming_delay));
const u32 base = stream->engine->mmio_base;
#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
u32 *batch, *ts0, *cs, *jump;
@@ -1682,7 +1688,7 @@ retry:
stream, cs, true /* save */, CS_GPR(i),
INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
cs = save_restore_register(
- stream, cs, true /* save */, MI_PREDICATE_RESULT_1,
+ stream, cs, true /* save */, MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
/* First timestamp snapshot location. */
@@ -1736,7 +1742,7 @@ retry:
*/
*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
- *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+ *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
/* Restart from the beginning if we had timestamps roll over. */
*cs++ = (GRAPHICS_VER(i915) < 8 ?
@@ -1773,7 +1779,7 @@ retry:
*/
*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
- *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+ *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
/* Predicate the jump. */
*cs++ = (GRAPHICS_VER(i915) < 8 ?
@@ -1789,7 +1795,7 @@ retry:
stream, cs, false /* restore */, CS_GPR(i),
INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
cs = save_restore_register(
- stream, cs, false /* restore */, MI_PREDICATE_RESULT_1,
+ stream, cs, false /* restore */, MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
/* And return to the ring. */
@@ -2114,7 +2120,7 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
/* The MMIO offsets for Flex EU registers aren't contiguous */
- i915_reg_t flex_regs[] = {
+ static const i915_reg_t flex_regs[] = {
EU_PERF_CNTL0,
EU_PERF_CNTL1,
EU_PERF_CNTL2,
@@ -2418,7 +2424,7 @@ gen12_configure_all_contexts(struct i915_perf_stream *stream,
{
struct flex regs[] = {
{
- GEN8_R_PWR_CLK_STATE,
+ GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE),
CTX_R_PWR_CLK_STATE,
},
};
@@ -2438,7 +2444,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
struct flex regs[] = {
{
- GEN8_R_PWR_CLK_STATE,
+ GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE),
CTX_R_PWR_CLK_STATE,
},
{
@@ -3542,7 +3548,7 @@ err:
static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
{
- return intel_gt_clock_interval_to_ns(perf->i915->ggtt.vm.gt,
+ return intel_gt_clock_interval_to_ns(to_gt(perf->i915),
2ULL << exponent);
}
@@ -3862,80 +3868,116 @@ static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
return false;
}
-#define ADDR_IN_RANGE(addr, start, end) \
- ((addr) >= (start) && \
- (addr) <= (end))
+static bool reg_in_range_table(u32 addr, const struct i915_range *table)
+{
+ while (table->start || table->end) {
+ if (addr >= table->start && addr <= table->end)
+ return true;
-#define REG_IN_RANGE(addr, start, end) \
- ((addr) >= i915_mmio_reg_offset(start) && \
- (addr) <= i915_mmio_reg_offset(end))
+ table++;
+ }
+
+ return false;
+}
#define REG_EQUAL(addr, mmio) \
((addr) == i915_mmio_reg_offset(mmio))
-static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
-{
- return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) ||
- REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) ||
- REG_IN_RANGE(addr, OACEC0_0, OACEC7_1);
-}
+static const struct i915_range gen7_oa_b_counters[] = {
+ { .start = 0x2710, .end = 0x272c }, /* OASTARTTRIG[1-8] */
+ { .start = 0x2740, .end = 0x275c }, /* OAREPORTTRIG[1-8] */
+ { .start = 0x2770, .end = 0x27ac }, /* OACEC[0-7][0-1] */
+ {}
+};
+
+static const struct i915_range gen12_oa_b_counters[] = {
+ { .start = 0x2b2c, .end = 0x2b2c }, /* GEN12_OAG_OA_PESS */
+ { .start = 0xd900, .end = 0xd91c }, /* GEN12_OAG_OASTARTTRIG[1-8] */
+ { .start = 0xd920, .end = 0xd93c }, /* GEN12_OAG_OAREPORTTRIG1[1-8] */
+ { .start = 0xd940, .end = 0xd97c }, /* GEN12_OAG_CEC[0-7][0-1] */
+ { .start = 0xdc00, .end = 0xdc3c }, /* GEN12_OAG_SCEC[0-7][0-1] */
+ { .start = 0xdc40, .end = 0xdc40 }, /* GEN12_OAG_SPCTR_CNF */
+ { .start = 0xdc44, .end = 0xdc44 }, /* GEN12_OAA_DBG_REG */
+ {}
+};
-static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
+static const struct i915_range gen7_oa_mux_regs[] = {
+ { .start = 0x91b8, .end = 0x91cc }, /* OA_PERFCNT[1-2], OA_PERFMATRIX */
+ { .start = 0x9800, .end = 0x9888 }, /* MICRO_BP0_0 - NOA_WRITE */
+ { .start = 0xe180, .end = 0xe180 }, /* HALF_SLICE_CHICKEN2 */
+ {}
+};
+
+static const struct i915_range hsw_oa_mux_regs[] = {
+ { .start = 0x09e80, .end = 0x09ea4 }, /* HSW_MBVID2_NOA[0-9] */
+ { .start = 0x09ec0, .end = 0x09ec0 }, /* HSW_MBVID2_MISR0 */
+ { .start = 0x25100, .end = 0x2ff90 },
+ {}
+};
+
+static const struct i915_range chv_oa_mux_regs[] = {
+ { .start = 0x182300, .end = 0x1823a4 },
+ {}
+};
+
+static const struct i915_range gen8_oa_mux_regs[] = {
+ { .start = 0x0d00, .end = 0x0d2c }, /* RPM_CONFIG[0-1], NOA_CONFIG[0-8] */
+ { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */
+ {}
+};
+
+static const struct i915_range gen11_oa_mux_regs[] = {
+ { .start = 0x91c8, .end = 0x91dc }, /* OA_PERFCNT[3-4] */
+ {}
+};
+
+static const struct i915_range gen12_oa_mux_regs[] = {
+ { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */
+ { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
+ { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
+ { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */
+ { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */
+ {}
+};
+
+static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
- return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) ||
- REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) ||
- REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) ||
- REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI);
+ return reg_in_range_table(addr, gen7_oa_b_counters);
}
static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return gen7_is_valid_mux_addr(perf, addr) ||
- REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
- REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8));
+ return reg_in_range_table(addr, gen7_oa_mux_regs) ||
+ reg_in_range_table(addr, gen8_oa_mux_regs);
}
static bool gen11_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return gen8_is_valid_mux_addr(perf, addr) ||
- REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
- REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI);
+ return reg_in_range_table(addr, gen7_oa_mux_regs) ||
+ reg_in_range_table(addr, gen8_oa_mux_regs) ||
+ reg_in_range_table(addr, gen11_oa_mux_regs);
}
static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return gen7_is_valid_mux_addr(perf, addr) ||
- ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) ||
- REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) ||
- REG_EQUAL(addr, HSW_MBVID2_MISR0);
+ return reg_in_range_table(addr, gen7_oa_mux_regs) ||
+ reg_in_range_table(addr, hsw_oa_mux_regs);
}
static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return gen7_is_valid_mux_addr(perf, addr) ||
- ADDR_IN_RANGE(addr, 0x182300, 0x1823A4);
+ return reg_in_range_table(addr, gen7_oa_mux_regs) ||
+ reg_in_range_table(addr, chv_oa_mux_regs);
}
static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
- return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) ||
- REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) ||
- REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) ||
- REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) ||
- REG_EQUAL(addr, GEN12_OAA_DBG_REG) ||
- REG_EQUAL(addr, GEN12_OAG_OA_PESS) ||
- REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF);
+ return reg_in_range_table(addr, gen12_oa_b_counters);
}
static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return REG_EQUAL(addr, NOA_WRITE) ||
- REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
- REG_EQUAL(addr, GDT_CHICKEN_BITS) ||
- REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
- REG_EQUAL(addr, RPM_CONFIG0) ||
- REG_EQUAL(addr, RPM_CONFIG1) ||
- REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8));
+ return reg_in_range_table(addr, gen12_oa_mux_regs);
}
static u32 mask_reg_value(u32 reg, u32 val)
@@ -4332,6 +4374,10 @@ void i915_perf_init(struct drm_i915_private *i915)
/* XXX const struct i915_perf_ops! */
+ /* i915_perf is not enabled for DG2 yet */
+ if (IS_DG2(i915))
+ return;
+
perf->oa_formats = oa_formats;
if (IS_HASWELL(i915)) {
perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;