aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/gpu/drm/i915/i915_pmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_pmu.c')
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c145
1 files changed, 104 insertions, 41 deletions
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 6f09aa0be80a..aa729d04abe2 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -259,6 +259,16 @@ add_sample(struct i915_pmu_sample *sample, u32 val)
sample->cur += val;
}
+static bool exclusive_mmio_access(const struct drm_i915_private *i915)
+{
+ /*
+ * We have to avoid concurrent mmio cache line access on gen7 or
+ * risk a machine hang. For a fun history lesson dig out the old
+ * userspace intel_gpu_top and run it on Ivybridge or Haswell!
+ */
+ return IS_GEN(i915, 7);
+}
+
static void
engines_sample(struct intel_gt *gt, unsigned int period_ns)
{
@@ -269,8 +279,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
return;
+ if (!intel_gt_pm_is_awake(gt))
+ return;
+
for_each_engine(engine, gt, id) {
struct intel_engine_pmu *pmu = &engine->pmu;
+ spinlock_t *mmio_lock;
unsigned long flags;
bool busy;
u32 val;
@@ -278,7 +292,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
if (!intel_engine_pm_get_if_awake(engine))
continue;
- spin_lock_irqsave(&engine->uncore->lock, flags);
+ mmio_lock = NULL;
+ if (exclusive_mmio_access(i915))
+ mmio_lock = &engine->uncore->lock;
+
+ if (unlikely(mmio_lock))
+ spin_lock_irqsave(mmio_lock, flags);
val = ENGINE_READ_FW(engine, RING_CTL);
if (val == 0) /* powerwell off => engine idle */
@@ -309,7 +328,8 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
skip:
- spin_unlock_irqrestore(&engine->uncore->lock, flags);
+ if (unlikely(mmio_lock))
+ spin_unlock_irqrestore(mmio_lock, flags);
intel_engine_pm_put_async(engine);
}
}
@@ -320,6 +340,13 @@ add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
sample->cur += mul_u32_u32(val, mul);
}
+static bool frequency_sampling_enabled(struct i915_pmu *pmu)
+{
+ return pmu->enable &
+ (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
+ config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY));
+}
+
static void
frequency_sample(struct intel_gt *gt, unsigned int period_ns)
{
@@ -328,19 +355,33 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
struct i915_pmu *pmu = &i915->pmu;
struct intel_rps *rps = &gt->rps;
+ if (!frequency_sampling_enabled(pmu))
+ return;
+
+ /* Report 0/0 (actual/requested) frequency while parked. */
+ if (!intel_gt_pm_get_if_awake(gt))
+ return;
+
if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
u32 val;
- val = rps->cur_freq;
- if (intel_gt_pm_get_if_awake(gt)) {
- val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1);
- val = intel_get_cagf(rps, val);
- intel_gt_pm_put_async(gt);
- }
+ /*
+ * We take a quick peek here without using forcewake
+ * so that we don't perturb the system under observation
+ * (forcewake => !rc6 => increased power use). We expect
+ * that if the read fails because it is outside of the
+ * mmio power well, then it will return 0 -- in which
+ * case we assume the system is running at the intended
+ * frequency. Fortunately, the read should rarely fail!
+ */
+ val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
+ if (val)
+ val = intel_rps_get_cagf(rps, val);
+ else
+ val = rps->cur_freq;
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
- intel_gpu_freq(rps, val),
- period_ns / 1000);
+ intel_gpu_freq(rps, val), period_ns / 1000);
}
if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
@@ -348,6 +389,8 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
intel_gpu_freq(rps, rps->cur_freq),
period_ns / 1000);
}
+
+ intel_gt_pm_put_async(gt);
}
static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
@@ -594,8 +637,10 @@ static void i915_pmu_enable(struct perf_event *event)
container_of(event->pmu, typeof(*i915), pmu.base);
unsigned int bit = event_enabled_bit(event);
struct i915_pmu *pmu = &i915->pmu;
+ intel_wakeref_t wakeref;
unsigned long flags;
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
spin_lock_irqsave(&pmu->lock, flags);
/*
@@ -605,6 +650,14 @@ static void i915_pmu_enable(struct perf_event *event)
BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
GEM_BUG_ON(pmu->enable_count[bit] == ~0);
+
+ if (pmu->enable_count[bit] == 0 &&
+ config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) {
+ pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0;
+ pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
+ pmu->sleep_last = ktime_get();
+ }
+
pmu->enable |= BIT_ULL(bit);
pmu->enable_count[bit]++;
@@ -645,6 +698,8 @@ static void i915_pmu_enable(struct perf_event *event)
* an existing non-zero value.
*/
local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
+
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
}
static void i915_pmu_disable(struct perf_event *event)
@@ -767,11 +822,6 @@ static ssize_t i915_pmu_event_show(struct device *dev,
return sprintf(buf, "config=0x%lx\n", eattr->val);
}
-static struct attribute_group i915_pmu_events_attr_group = {
- .name = "events",
- /* Patch in attrs at runtime. */
-};
-
static ssize_t
i915_pmu_get_attr_cpumask(struct device *dev,
struct device_attribute *attr,
@@ -791,13 +841,6 @@ static const struct attribute_group i915_pmu_cpumask_attr_group = {
.attrs = i915_cpumask_attrs,
};
-static const struct attribute_group *i915_pmu_attr_groups[] = {
- &i915_pmu_format_attr_group,
- &i915_pmu_events_attr_group,
- &i915_pmu_cpumask_attr_group,
- NULL
-};
-
#define __event(__config, __name, __unit) \
{ \
.config = (__config), \
@@ -971,23 +1014,23 @@ err_alloc:
static void free_event_attributes(struct i915_pmu *pmu)
{
- struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
+ struct attribute **attr_iter = pmu->events_attr_group.attrs;
for (; *attr_iter; attr_iter++)
kfree((*attr_iter)->name);
- kfree(i915_pmu_events_attr_group.attrs);
+ kfree(pmu->events_attr_group.attrs);
kfree(pmu->i915_attr);
kfree(pmu->pmu_attr);
- i915_pmu_events_attr_group.attrs = NULL;
+ pmu->events_attr_group.attrs = NULL;
pmu->i915_attr = NULL;
pmu->pmu_attr = NULL;
}
static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
{
- struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
+ struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
GEM_BUG_ON(!pmu->base.event_init);
@@ -1000,7 +1043,7 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
{
- struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
+ struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
unsigned int target;
GEM_BUG_ON(!pmu->base.event_init);
@@ -1017,8 +1060,6 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
return 0;
}
-static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
-
static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
{
enum cpuhp_state slot;
@@ -1032,21 +1073,22 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
return ret;
slot = ret;
- ret = cpuhp_state_add_instance(slot, &pmu->node);
+ ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
if (ret) {
cpuhp_remove_multi_state(slot);
return ret;
}
- cpuhp_slot = slot;
+ pmu->cpuhp.slot = slot;
return 0;
}
static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
{
- WARN_ON(cpuhp_slot == CPUHP_INVALID);
- WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &pmu->node));
- cpuhp_remove_multi_state(cpuhp_slot);
+ WARN_ON(pmu->cpuhp.slot == CPUHP_INVALID);
+ WARN_ON(cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
+ cpuhp_remove_multi_state(pmu->cpuhp.slot);
+ pmu->cpuhp.slot = CPUHP_INVALID;
}
static bool is_igp(struct drm_i915_private *i915)
@@ -1063,6 +1105,13 @@ static bool is_igp(struct drm_i915_private *i915)
void i915_pmu_register(struct drm_i915_private *i915)
{
struct i915_pmu *pmu = &i915->pmu;
+ const struct attribute_group *attr_groups[] = {
+ &i915_pmu_format_attr_group,
+ &pmu->events_attr_group,
+ &i915_pmu_cpumask_attr_group,
+ NULL
+ };
+
int ret = -ENOMEM;
if (INTEL_GEN(i915) <= 2) {
@@ -1073,21 +1122,32 @@ void i915_pmu_register(struct drm_i915_private *i915)
spin_lock_init(&pmu->lock);
hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
pmu->timer.function = i915_sample;
+ pmu->cpuhp.slot = CPUHP_INVALID;
- if (!is_igp(i915))
+ if (!is_igp(i915)) {
pmu->name = kasprintf(GFP_KERNEL,
- "i915-%s",
+ "i915_%s",
dev_name(i915->drm.dev));
- else
+ if (pmu->name) {
+ /* tools/perf reserves colons as special. */
+ strreplace((char *)pmu->name, ':', '_');
+ }
+ } else {
pmu->name = "i915";
+ }
if (!pmu->name)
goto err;
- i915_pmu_events_attr_group.attrs = create_event_attributes(pmu);
- if (!i915_pmu_events_attr_group.attrs)
+ pmu->events_attr_group.name = "events";
+ pmu->events_attr_group.attrs = create_event_attributes(pmu);
+ if (!pmu->events_attr_group.attrs)
goto err_name;
- pmu->base.attr_groups = i915_pmu_attr_groups;
+ pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
+ GFP_KERNEL);
+ if (!pmu->base.attr_groups)
+ goto err_attr;
+
pmu->base.task_ctx_nr = perf_invalid_context;
pmu->base.event_init = i915_pmu_event_init;
pmu->base.add = i915_pmu_event_add;
@@ -1099,7 +1159,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
ret = perf_pmu_register(&pmu->base, pmu->name, -1);
if (ret)
- goto err_attr;
+ goto err_groups;
ret = i915_pmu_register_cpuhp_state(pmu);
if (ret)
@@ -1109,6 +1169,8 @@ void i915_pmu_register(struct drm_i915_private *i915)
err_unreg:
perf_pmu_unregister(&pmu->base);
+err_groups:
+ kfree(pmu->base.attr_groups);
err_attr:
pmu->base.event_init = NULL;
free_event_attributes(pmu);
@@ -1134,6 +1196,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
perf_pmu_unregister(&pmu->base);
pmu->base.event_init = NULL;
+ kfree(pmu->base.attr_groups);
if (!is_igp(i915))
kfree(pmu->name);
free_event_attributes(pmu);