aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_perf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c836
1 files changed, 492 insertions, 344 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5140017f9a39..e42b86827d6b 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -200,20 +200,21 @@
#include "gt/intel_lrc_reg.h"
#include "i915_drv.h"
-#include "i915_oa_hsw.h"
-#include "i915_oa_bdw.h"
-#include "i915_oa_chv.h"
-#include "i915_oa_sklgt2.h"
-#include "i915_oa_sklgt3.h"
-#include "i915_oa_sklgt4.h"
-#include "i915_oa_bxt.h"
-#include "i915_oa_kblgt2.h"
-#include "i915_oa_kblgt3.h"
-#include "i915_oa_glk.h"
-#include "i915_oa_cflgt2.h"
-#include "i915_oa_cflgt3.h"
-#include "i915_oa_cnl.h"
-#include "i915_oa_icl.h"
+#include "i915_perf.h"
+#include "oa/i915_oa_hsw.h"
+#include "oa/i915_oa_bdw.h"
+#include "oa/i915_oa_chv.h"
+#include "oa/i915_oa_sklgt2.h"
+#include "oa/i915_oa_sklgt3.h"
+#include "oa/i915_oa_sklgt4.h"
+#include "oa/i915_oa_bxt.h"
+#include "oa/i915_oa_kblgt2.h"
+#include "oa/i915_oa_kblgt3.h"
+#include "oa/i915_oa_glk.h"
+#include "oa/i915_oa_cflgt2.h"
+#include "oa/i915_oa_cflgt3.h"
+#include "oa/i915_oa_cnl.h"
+#include "oa/i915_oa_icl.h"
/* HW requires this to be a power of two, between 128k and 16M, though driver
* is currently generally designed assuming the largest 16M size is used such
@@ -364,6 +365,8 @@ struct perf_open_properties {
int oa_period_exponent;
};
+static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
+
static void free_oa_config(struct drm_i915_private *dev_priv,
struct i915_oa_config *oa_config)
{
@@ -392,8 +395,8 @@ static int get_oa_config(struct drm_i915_private *dev_priv,
int ret;
if (metrics_set == 1) {
- *out_config = &dev_priv->perf.oa.test_config;
- atomic_inc(&dev_priv->perf.oa.test_config.ref_count);
+ *out_config = &dev_priv->perf.test_config;
+ atomic_inc(&dev_priv->perf.test_config.ref_count);
return 0;
}
@@ -412,13 +415,16 @@ static int get_oa_config(struct drm_i915_private *dev_priv,
return ret;
}
-static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
+static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+
return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
}
-static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
+static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
@@ -426,7 +432,7 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
/**
* oa_buffer_check_unlocked - check for data and update tail ptr state
- * @dev_priv: i915 device instance
+ * @stream: i915 stream instance
*
* This is either called via fops (for blocking reads in user ctx) or the poll
* check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
@@ -448,9 +454,10 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
*
* Returns: %true if the OA buffer contains data, else %false
*/
-static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
+static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
{
- int report_size = dev_priv->perf.oa.oa_buffer.format_size;
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+ int report_size = stream->oa_buffer.format_size;
unsigned long flags;
unsigned int aged_idx;
u32 head, hw_tail, aged_tail, aging_tail;
@@ -460,19 +467,19 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
* could result in an OA buffer reset which might reset the head,
* tails[] and aged_tail state.
*/
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
/* NB: The head we observe here might effectively be a little out of
* date (between head and tails[aged_idx].offset if there is currently
* a read() in progress.
*/
- head = dev_priv->perf.oa.oa_buffer.head;
+ head = stream->oa_buffer.head;
- aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
- aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
+ aged_idx = stream->oa_buffer.aged_tail_idx;
+ aged_tail = stream->oa_buffer.tails[aged_idx].offset;
+ aging_tail = stream->oa_buffer.tails[!aged_idx].offset;
- hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
+ hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream);
/* The tail pointer increases in 64 byte increments,
* not in report_size steps...
@@ -492,16 +499,16 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
* available) without needing to wait for a later hrtimer callback.
*/
if (aging_tail != INVALID_TAIL_PTR &&
- ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
+ ((now - stream->oa_buffer.aging_timestamp) >
OA_TAIL_MARGIN_NSEC)) {
aged_idx ^= 1;
- dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
+ stream->oa_buffer.aged_tail_idx = aged_idx;
aged_tail = aging_tail;
/* Mark that we need a new pointer to start aging... */
- dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
aging_tail = INVALID_TAIL_PTR;
}
@@ -516,7 +523,7 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
if (aging_tail == INVALID_TAIL_PTR &&
(aged_tail == INVALID_TAIL_PTR ||
OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
- struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
+ struct i915_vma *vma = stream->oa_buffer.vma;
u32 gtt_offset = i915_ggtt_offset(vma);
/* Be paranoid and do a bounds check on the pointer read back
@@ -525,16 +532,16 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
*/
if (hw_tail >= gtt_offset &&
hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
- dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
+ stream->oa_buffer.tails[!aged_idx].offset =
aging_tail = hw_tail;
- dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
+ stream->oa_buffer.aging_timestamp = now;
} else {
DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
hw_tail);
}
}
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
return aged_tail == INVALID_TAIL_PTR ?
false : OA_TAKEN(aged_tail, head) >= report_size;
@@ -597,8 +604,7 @@ static int append_oa_sample(struct i915_perf_stream *stream,
size_t *offset,
const u8 *report)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
- int report_size = dev_priv->perf.oa.oa_buffer.format_size;
+ int report_size = stream->oa_buffer.format_size;
struct drm_i915_perf_record_header header;
u32 sample_flags = stream->sample_flags;
@@ -650,9 +656,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
size_t *offset)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
- u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+ int report_size = stream->oa_buffer.format_size;
+ u8 *oa_buf_base = stream->oa_buffer.vaddr;
+ u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
@@ -664,13 +670,13 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
if (WARN_ON(!stream->enabled))
return -EIO;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- head = dev_priv->perf.oa.oa_buffer.head;
- aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
+ head = stream->oa_buffer.head;
+ aged_tail_idx = stream->oa_buffer.aged_tail_idx;
+ tail = stream->oa_buffer.tails[aged_tail_idx].offset;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/*
* An invalid tail pointer here means we're still waiting for the poll
@@ -734,12 +740,12 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
OAREPORT_REASON_MASK);
if (reason == 0) {
- if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
+ if (__ratelimit(&dev_priv->perf.spurious_report_rs))
DRM_NOTE("Skipping spurious, invalid OA report\n");
continue;
}
- ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask;
+ ctx_id = report32[2] & stream->specific_ctx_id_mask;
/*
* Squash whatever is in the CTX_ID field if it's marked as
@@ -749,7 +755,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* Note: that we don't clear the valid_ctx_bit so userspace can
* understand that the ID has been squashed by the kernel.
*/
- if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
+ if (!(report32[0] & dev_priv->perf.gen8_valid_ctx_bit))
ctx_id = report32[2] = INVALID_CTX_ID;
/*
@@ -783,18 +789,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* switches since it's not-uncommon for periodic samples to
* identify a switch before any 'context switch' report.
*/
- if (!dev_priv->perf.oa.exclusive_stream->ctx ||
- dev_priv->perf.oa.specific_ctx_id == ctx_id ||
- (dev_priv->perf.oa.oa_buffer.last_ctx_id ==
- dev_priv->perf.oa.specific_ctx_id) ||
+ if (!dev_priv->perf.exclusive_stream->ctx ||
+ stream->specific_ctx_id == ctx_id ||
+ stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
reason & OAREPORT_REASON_CTX_SWITCH) {
/*
* While filtering for a single context we avoid
* leaking the IDs of other contexts.
*/
- if (dev_priv->perf.oa.exclusive_stream->ctx &&
- dev_priv->perf.oa.specific_ctx_id != ctx_id) {
+ if (dev_priv->perf.exclusive_stream->ctx &&
+ stream->specific_ctx_id != ctx_id) {
report32[2] = INVALID_CTX_ID;
}
@@ -803,7 +808,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
if (ret)
break;
- dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
+ stream->oa_buffer.last_ctx_id = ctx_id;
}
/*
@@ -817,7 +822,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
}
if (start_offset != *offset) {
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
/*
* We removed the gtt_offset for the copy loop above, indexing
@@ -826,9 +831,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
head += gtt_offset;
I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
- dev_priv->perf.oa.oa_buffer.head = head;
+ stream->oa_buffer.head = head;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
}
return ret;
@@ -863,7 +868,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
u32 oastatus;
int ret;
- if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
+ if (WARN_ON(!stream->oa_buffer.vaddr))
return -EIO;
oastatus = I915_READ(GEN8_OASTATUS);
@@ -889,10 +894,10 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
return ret;
DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
- dev_priv->perf.oa.period_exponent);
+ stream->period_exponent);
- dev_priv->perf.oa.ops.oa_disable(stream);
- dev_priv->perf.oa.ops.oa_enable(stream);
+ dev_priv->perf.ops.oa_disable(stream);
+ dev_priv->perf.ops.oa_enable(stream);
/*
* Note: .oa_enable() is expected to re-init the oabuffer and
@@ -939,9 +944,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
size_t *offset)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
- u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+ int report_size = stream->oa_buffer.format_size;
+ u8 *oa_buf_base = stream->oa_buffer.vaddr;
+ u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
@@ -953,13 +958,13 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
if (WARN_ON(!stream->enabled))
return -EIO;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- head = dev_priv->perf.oa.oa_buffer.head;
- aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
+ head = stream->oa_buffer.head;
+ aged_tail_idx = stream->oa_buffer.aged_tail_idx;
+ tail = stream->oa_buffer.tails[aged_tail_idx].offset;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* An invalid tail pointer here means we're still waiting for the poll
* hrtimer callback to give us a pointer
@@ -1012,7 +1017,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
* copying it to userspace...
*/
if (report32[0] == 0) {
- if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
+ if (__ratelimit(&dev_priv->perf.spurious_report_rs))
DRM_NOTE("Skipping spurious, invalid OA report\n");
continue;
}
@@ -1031,7 +1036,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
}
if (start_offset != *offset) {
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
/* We removed the gtt_offset for the copy loop above, indexing
* relative to oa_buf_base so put back here...
@@ -1041,9 +1046,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
I915_WRITE(GEN7_OASTATUS2,
((head & GEN7_OASTATUS2_HEAD_MASK) |
GEN7_OASTATUS2_MEM_SELECT_GGTT));
- dev_priv->perf.oa.oa_buffer.head = head;
+ stream->oa_buffer.head = head;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
}
return ret;
@@ -1074,7 +1079,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
u32 oastatus1;
int ret;
- if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
+ if (WARN_ON(!stream->oa_buffer.vaddr))
return -EIO;
oastatus1 = I915_READ(GEN7_OASTATUS1);
@@ -1084,7 +1089,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
* may be updated asynchronously) so we ignore status bits
* that have already been reported to userspace.
*/
- oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1;
+ oastatus1 &= ~dev_priv->perf.gen7_latched_oastatus1;
/* We treat OABUFFER_OVERFLOW as a significant error:
*
@@ -1113,10 +1118,10 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
return ret;
DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
- dev_priv->perf.oa.period_exponent);
+ stream->period_exponent);
- dev_priv->perf.oa.ops.oa_disable(stream);
- dev_priv->perf.oa.ops.oa_enable(stream);
+ dev_priv->perf.ops.oa_disable(stream);
+ dev_priv->perf.ops.oa_enable(stream);
oastatus1 = I915_READ(GEN7_OASTATUS1);
}
@@ -1126,7 +1131,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
DRM_I915_PERF_RECORD_OA_REPORT_LOST);
if (ret)
return ret;
- dev_priv->perf.oa.gen7_latched_oastatus1 |=
+ dev_priv->perf.gen7_latched_oastatus1 |=
GEN7_OASTATUS1_REPORT_LOST;
}
@@ -1149,14 +1154,12 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
*/
static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
-
/* We would wait indefinitely if periodic sampling is not enabled */
- if (!dev_priv->perf.oa.periodic)
+ if (!stream->periodic)
return -EIO;
- return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
- oa_buffer_check_unlocked(dev_priv));
+ return wait_event_interruptible(stream->poll_wq,
+ oa_buffer_check_unlocked(stream));
}
/**
@@ -1173,9 +1176,7 @@ static void i915_oa_poll_wait(struct i915_perf_stream *stream,
struct file *file,
poll_table *wait)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
-
- poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
+ poll_wait(file, &stream->poll_wq, wait);
}
/**
@@ -1197,13 +1198,14 @@ static int i915_oa_read(struct i915_perf_stream *stream,
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
+ return dev_priv->perf.ops.read(stream, buf, count, offset);
}
-static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
- struct i915_gem_context *ctx)
+static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
{
struct i915_gem_engines_iter it;
+ struct drm_i915_private *i915 = stream->dev_priv;
+ struct i915_gem_context *ctx = stream->ctx;
struct intel_context *ce;
int err;
@@ -1221,7 +1223,7 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
*/
err = intel_context_pin(ce);
if (err == 0) {
- i915->perf.oa.pinned_ctx = ce;
+ stream->pinned_ctx = ce;
break;
}
}
@@ -1231,7 +1233,7 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
if (err)
return ERR_PTR(err);
- return i915->perf.oa.pinned_ctx;
+ return stream->pinned_ctx;
}
/**
@@ -1249,7 +1251,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
struct drm_i915_private *i915 = stream->dev_priv;
struct intel_context *ce;
- ce = oa_pin_context(i915, stream->ctx);
+ ce = oa_pin_context(stream);
if (IS_ERR(ce))
return PTR_ERR(ce);
@@ -1259,8 +1261,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
* On Haswell we don't do any post processing of the reports
* and don't need to use the mask.
*/
- i915->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state);
- i915->perf.oa.specific_ctx_id_mask = 0;
+ stream->specific_ctx_id = i915_ggtt_offset(ce->state);
+ stream->specific_ctx_id_mask = 0;
break;
}
@@ -1278,33 +1280,33 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
* dropped by GuC. They won't be part of the context
* ID in the OA reports, so squash those lower bits.
*/
- i915->perf.oa.specific_ctx_id =
+ stream->specific_ctx_id =
lower_32_bits(ce->lrc_desc) >> 12;
/*
* GuC uses the top bit to signal proxy submission, so
* ignore that bit.
*/
- i915->perf.oa.specific_ctx_id_mask =
+ stream->specific_ctx_id_mask =
(1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
} else {
- i915->perf.oa.specific_ctx_id_mask =
+ stream->specific_ctx_id_mask =
(1U << GEN8_CTX_ID_WIDTH) - 1;
- i915->perf.oa.specific_ctx_id =
+ stream->specific_ctx_id =
upper_32_bits(ce->lrc_desc);
- i915->perf.oa.specific_ctx_id &=
- i915->perf.oa.specific_ctx_id_mask;
+ stream->specific_ctx_id &=
+ stream->specific_ctx_id_mask;
}
break;
case 11: {
- i915->perf.oa.specific_ctx_id_mask =
+ stream->specific_ctx_id_mask =
((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
- i915->perf.oa.specific_ctx_id = upper_32_bits(ce->lrc_desc);
- i915->perf.oa.specific_ctx_id &=
- i915->perf.oa.specific_ctx_id_mask;
+ stream->specific_ctx_id = upper_32_bits(ce->lrc_desc);
+ stream->specific_ctx_id &=
+ stream->specific_ctx_id_mask;
break;
}
@@ -1313,8 +1315,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
}
DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
- i915->perf.oa.specific_ctx_id,
- i915->perf.oa.specific_ctx_id_mask);
+ stream->specific_ctx_id,
+ stream->specific_ctx_id_mask);
return 0;
}
@@ -1331,10 +1333,10 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
struct drm_i915_private *dev_priv = stream->dev_priv;
struct intel_context *ce;
- dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
- dev_priv->perf.oa.specific_ctx_id_mask = 0;
+ stream->specific_ctx_id = INVALID_CTX_ID;
+ stream->specific_ctx_id_mask = 0;
- ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
+ ce = fetch_and_zero(&stream->pinned_ctx);
if (ce) {
mutex_lock(&dev_priv->drm.struct_mutex);
intel_context_unpin(ce);
@@ -1343,34 +1345,36 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
}
static void
-free_oa_buffer(struct drm_i915_private *i915)
+free_oa_buffer(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *i915 = stream->dev_priv;
+
mutex_lock(&i915->drm.struct_mutex);
- i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma,
+ i915_vma_unpin_and_release(&stream->oa_buffer.vma,
I915_VMA_RELEASE_MAP);
mutex_unlock(&i915->drm.struct_mutex);
- i915->perf.oa.oa_buffer.vaddr = NULL;
+ stream->oa_buffer.vaddr = NULL;
}
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
+ BUG_ON(stream != dev_priv->perf.exclusive_stream);
/*
* Unset exclusive_stream first, it will be checked while disabling
* the metric set on gen8+.
*/
mutex_lock(&dev_priv->drm.struct_mutex);
- dev_priv->perf.oa.exclusive_stream = NULL;
- dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+ dev_priv->perf.exclusive_stream = NULL;
+ dev_priv->perf.ops.disable_metric_set(stream);
mutex_unlock(&dev_priv->drm.struct_mutex);
- free_oa_buffer(dev_priv);
+ free_oa_buffer(stream);
intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
@@ -1380,41 +1384,42 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
put_oa_config(dev_priv, stream->oa_config);
- if (dev_priv->perf.oa.spurious_report_rs.missed) {
+ if (dev_priv->perf.spurious_report_rs.missed) {
DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
- dev_priv->perf.oa.spurious_report_rs.missed);
+ dev_priv->perf.spurious_report_rs.missed);
}
}
-static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
+static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
{
- u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+ u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
unsigned long flags;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
/* Pre-DevBDW: OABUFFER must be set with counters off,
* before OASTATUS1, but after OASTATUS2
*/
I915_WRITE(GEN7_OASTATUS2,
gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */
- dev_priv->perf.oa.oa_buffer.head = gtt_offset;
+ stream->oa_buffer.head = gtt_offset;
I915_WRITE(GEN7_OABUFFER, gtt_offset);
I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
/* Mark that we need updated tail pointers to read from... */
- dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
- dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* On Haswell we have to track which OASTATUS1 flags we've
* already seen since they can't be cleared while periodic
* sampling is enabled.
*/
- dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
+ dev_priv->perf.gen7_latched_oastatus1 = 0;
/* NB: although the OA buffer will initially be allocated
* zeroed via shmfs (and so this memset is redundant when
@@ -1427,24 +1432,25 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
* the assumption that new reports are being written to zeroed
* memory...
*/
- memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
+ memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
/* Maybe make ->pollin per-stream state if we support multiple
* concurrent streams in the future.
*/
- dev_priv->perf.oa.pollin = false;
+ stream->pollin = false;
}
-static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
+static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
{
- u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+ u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
unsigned long flags;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
I915_WRITE(GEN8_OASTATUS, 0);
I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
- dev_priv->perf.oa.oa_buffer.head = gtt_offset;
+ stream->oa_buffer.head = gtt_offset;
I915_WRITE(GEN8_OABUFFER_UDW, 0);
@@ -1461,17 +1467,17 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
- dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
- dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
+ stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
/*
* Reset state used to recognise context switches, affecting which
* reports we will forward to userspace while filtering for a single
* context.
*/
- dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
+ stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/*
* NB: although the OA buffer will initially be allocated
@@ -1485,22 +1491,23 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
* the assumption that new reports are being written to zeroed
* memory...
*/
- memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
+ memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
/*
* Maybe make ->pollin per-stream state if we support multiple
* concurrent streams in the future.
*/
- dev_priv->perf.oa.pollin = false;
+ stream->pollin = false;
}
-static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
+static int alloc_oa_buffer(struct i915_perf_stream *stream)
{
struct drm_i915_gem_object *bo;
+ struct drm_i915_private *dev_priv = stream->dev_priv;
struct i915_vma *vma;
int ret;
- if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma))
+ if (WARN_ON(stream->oa_buffer.vma))
return -ENODEV;
ret = i915_mutex_lock_interruptible(&dev_priv->drm);
@@ -1525,18 +1532,18 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
ret = PTR_ERR(vma);
goto err_unref;
}
- dev_priv->perf.oa.oa_buffer.vma = vma;
+ stream->oa_buffer.vma = vma;
- dev_priv->perf.oa.oa_buffer.vaddr =
+ stream->oa_buffer.vaddr =
i915_gem_object_pin_map(bo, I915_MAP_WB);
- if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
- ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
+ if (IS_ERR(stream->oa_buffer.vaddr)) {
+ ret = PTR_ERR(stream->oa_buffer.vaddr);
goto err_unpin;
}
DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
- i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
- dev_priv->perf.oa.oa_buffer.vaddr);
+ i915_ggtt_offset(stream->oa_buffer.vma),
+ stream->oa_buffer.vaddr);
goto unlock;
@@ -1546,8 +1553,8 @@ err_unpin:
err_unref:
i915_gem_object_put(bo);
- dev_priv->perf.oa.oa_buffer.vaddr = NULL;
- dev_priv->perf.oa.oa_buffer.vma = NULL;
+ stream->oa_buffer.vaddr = NULL;
+ stream->oa_buffer.vma = NULL;
unlock:
mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -1623,8 +1630,10 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream)
return 0;
}
-static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
+static void hsw_disable_metric_set(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+
I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
@@ -1634,6 +1643,27 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
~GT_NOA_ENABLE));
}
+static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
+ i915_reg_t reg)
+{
+ u32 mmio = i915_mmio_reg_offset(reg);
+ int i;
+
+ /*
+ * This arbitrary default will select the 'EU FPU0 Pipeline
+ * Active' event. In the future it's anticipated that there
+ * will be an explicit 'No Event' we can select, but not yet...
+ */
+ if (!oa_config)
+ return 0;
+
+ for (i = 0; i < oa_config->flex_regs_len; i++) {
+ if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
+ return oa_config->flex_regs[i].value;
+ }
+
+ return 0;
+}
/*
* NB: It must always remain pointer safe to run this even if the OA unit
* has been disabled.
@@ -1642,13 +1672,14 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
* in the case that the OA unit has been disabled.
*/
static void
-gen8_update_reg_state_unlocked(struct intel_context *ce,
+gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
+ struct intel_context *ce,
u32 *reg_state,
const struct i915_oa_config *oa_config)
{
- struct drm_i915_private *i915 = ce->gem_context->i915;
- u32 ctx_oactxctrl = i915->perf.oa.ctx_oactxctrl_offset;
- u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset;
+ struct drm_i915_private *i915 = ce->engine->i915;
+ u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset;
+ u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset;
/* The MMIO offsets for Flex EU registers aren't contiguous */
i915_reg_t flex_regs[] = {
EU_PERF_CNTL0,
@@ -1662,38 +1693,143 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
int i;
CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
- (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
- (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME);
for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
- u32 state_offset = ctx_flexeu0 + i * 2;
- u32 mmio = i915_mmio_reg_offset(flex_regs[i]);
+ CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i],
+ oa_config_flex_reg(oa_config, flex_regs[i]));
+ }
- /*
- * This arbitrary default will select the 'EU FPU0 Pipeline
- * Active' event. In the future it's anticipated that there
- * will be an explicit 'No Event' we can select, but not yet...
- */
- u32 value = 0;
+ CTX_REG(reg_state,
+ CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+ intel_sseu_make_rpcs(i915, &ce->sseu));
+}
- if (oa_config) {
- u32 j;
+struct flex {
+ i915_reg_t reg;
+ u32 offset;
+ u32 value;
+};
- for (j = 0; j < oa_config->flex_regs_len; j++) {
- if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
- value = oa_config->flex_regs[j].value;
- break;
- }
- }
- }
+static int
+gen8_store_flex(struct i915_request *rq,
+ struct intel_context *ce,
+ const struct flex *flex, unsigned int count)
+{
+ u32 offset;
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4 * count);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
+ do {
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = offset + (flex->offset + 1) * sizeof(u32);
+ *cs++ = 0;
+ *cs++ = flex->value;
+ } while (flex++, --count);
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+gen8_load_flex(struct i915_request *rq,
+ struct intel_context *ce,
+ const struct flex *flex, unsigned int count)
+{
+ u32 *cs;
+
+ GEM_BUG_ON(!count || count > 63);
+
+ cs = intel_ring_begin(rq, 2 * count + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(count);
+ do {
+ *cs++ = i915_mmio_reg_offset(flex->reg);
+ *cs++ = flex->value;
+ } while (flex++, --count);
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int gen8_modify_context(struct intel_context *ce,
+ const struct flex *flex, unsigned int count)
+{
+ struct i915_request *rq;
+ int err;
+
+ lockdep_assert_held(&ce->pin_mutex);
+
+ rq = i915_request_create(ce->engine->kernel_context);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ /* Serialise with the remote context */
+ err = intel_context_prepare_remote_request(ce, rq);
+ if (err == 0)
+ err = gen8_store_flex(rq, ce, flex, count);
+
+ i915_request_add(rq);
+ return err;
+}
+
+static int gen8_modify_self(struct intel_context *ce,
+ const struct flex *flex, unsigned int count)
+{
+ struct i915_request *rq;
+ int err;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ err = gen8_load_flex(rq, ce, flex, count);
+
+ i915_request_add(rq);
+ return err;
+}
+
+static int gen8_configure_context(struct i915_gem_context *ctx,
+ struct flex *flex, unsigned int count)
+{
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
+ int err = 0;
+
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ GEM_BUG_ON(ce == ce->engine->kernel_context);
+
+ if (ce->engine->class != RENDER_CLASS)
+ continue;
- CTX_REG(reg_state, state_offset, flex_regs[i], value);
+ err = intel_context_lock_pinned(ce);
+ if (err)
+ break;
+
+ flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu);
+
+ /* Otherwise OA settings will be set upon first use */
+ if (intel_context_is_pinned(ce))
+ err = gen8_modify_context(ce, flex, count);
+
+ intel_context_unlock_pinned(ce);
+ if (err)
+ break;
}
+ i915_gem_context_unlock_engines(ctx);
- CTX_REG(reg_state,
- CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
- intel_sseu_make_rpcs(i915, &ce->sseu));
+ return err;
}
/*
@@ -1720,15 +1856,42 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
*
* Note: it's only the RCS/Render context that has any OA state.
*/
-static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
+static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config)
{
- unsigned int map_type = i915_coherent_map_type(dev_priv);
+ struct drm_i915_private *i915 = stream->dev_priv;
+ /* The MMIO offsets for Flex EU registers aren't contiguous */
+ const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset;
+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N))
+ struct flex regs[] = {
+ {
+ GEN8_R_PWR_CLK_STATE,
+ CTX_R_PWR_CLK_STATE,
+ },
+ {
+ GEN8_OACTXCONTROL,
+ i915->perf.ctx_oactxctrl_offset,
+ ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+ GEN8_OA_COUNTER_RESUME)
+ },
+ { EU_PERF_CNTL0, ctx_flexeuN(0) },
+ { EU_PERF_CNTL1, ctx_flexeuN(1) },
+ { EU_PERF_CNTL2, ctx_flexeuN(2) },
+ { EU_PERF_CNTL3, ctx_flexeuN(3) },
+ { EU_PERF_CNTL4, ctx_flexeuN(4) },
+ { EU_PERF_CNTL5, ctx_flexeuN(5) },
+ { EU_PERF_CNTL6, ctx_flexeuN(6) },
+ };
+#undef ctx_flexeuN
+ struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
- struct i915_request *rq;
- int ret;
+ int i;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ for (i = 2; i < ARRAY_SIZE(regs); i++)
+ regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+
+ lockdep_assert_held(&i915->drm.struct_mutex);
/*
* The OA register config is setup through the context image. This image
@@ -1740,58 +1903,41 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
* this might leave small interval of time where the OA unit is
* configured at an invalid sampling period.
*
- * So far the best way to work around this issue seems to be draining
- * the GPU from any submitted work.
+ * Note that since we emit all requests from a single ring, there
+ * is still an implicit global barrier here that may cause a high
+ * priority context to wait for an otherwise independent low priority
+ * context. Contexts idle at the time of reconfiguration are not
+ * trapped behind the barrier.
*/
- ret = i915_gem_wait_for_idle(dev_priv,
- I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
- /* Update all contexts now that we've stalled the submission. */
- list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
- struct i915_gem_engines_iter it;
- struct intel_context *ce;
-
- for_each_gem_engine(ce,
- i915_gem_context_lock_engines(ctx),
- it) {
- u32 *regs;
-
- if (ce->engine->class != RENDER_CLASS)
- continue;
-
- /* OA settings will be set upon first use */
- if (!ce->state)
- continue;
-
- regs = i915_gem_object_pin_map(ce->state->obj,
- map_type);
- if (IS_ERR(regs)) {
- i915_gem_context_unlock_engines(ctx);
- return PTR_ERR(regs);
- }
-
- ce->state->obj->mm.dirty = true;
- regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
+ list_for_each_entry(ctx, &i915->contexts.list, link) {
+ int err;
- gen8_update_reg_state_unlocked(ce, regs, oa_config);
+ if (ctx == i915->kernel_context)
+ continue;
- i915_gem_object_unpin_map(ce->state->obj);
- }
- i915_gem_context_unlock_engines(ctx);
+ err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs));
+ if (err)
+ return err;
}
/*
- * Apply the configuration by doing one context restore of the edited
- * context image.
+ * After updating all other contexts, we need to modify ourselves.
+ * If we don't modify the kernel_context, we do not get events while
+ * idle.
*/
- rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
+ for_each_uabi_engine(engine, i915) {
+ struct intel_context *ce = engine->kernel_context;
+ int err;
- i915_request_add(rq);
+ if (engine->class != RENDER_CLASS)
+ continue;
+
+ regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
+
+ err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs));
+ if (err)
+ return err;
+ }
return 0;
}
@@ -1836,7 +1982,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
* to make sure all slices/subslices are ON before writing to NOA
* registers.
*/
- ret = gen8_configure_all_contexts(dev_priv, oa_config);
+ ret = gen8_configure_all_contexts(stream, oa_config);
if (ret)
return ret;
@@ -1849,19 +1995,23 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
return 0;
}
-static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
+static void gen8_disable_metric_set(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+
/* Reset all contexts' slices/subslices configurations. */
- gen8_configure_all_contexts(dev_priv, NULL);
+ gen8_configure_all_contexts(stream, NULL);
I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
~GT_NOA_ENABLE));
}
-static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
+static void gen10_disable_metric_set(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+
/* Reset all contexts' slices/subslices configurations. */
- gen8_configure_all_contexts(dev_priv, NULL);
+ gen8_configure_all_contexts(stream, NULL);
/* Make sure we disable noa to save power. */
I915_WRITE(RPM_CONFIG1,
@@ -1872,10 +2022,10 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
struct i915_gem_context *ctx = stream->ctx;
- u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
- bool periodic = dev_priv->perf.oa.periodic;
- u32 period_exponent = dev_priv->perf.oa.period_exponent;
- u32 report_format = dev_priv->perf.oa.oa_buffer.format;
+ u32 ctx_id = stream->specific_ctx_id;
+ bool periodic = stream->periodic;
+ u32 period_exponent = stream->period_exponent;
+ u32 report_format = stream->oa_buffer.format;
/*
* Reset buf pointers so we don't forward reports from before now.
@@ -1886,7 +2036,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
* on the assumption that certain fields are written to zeroed
* memory which this helps maintains.
*/
- gen7_init_oa_buffer(dev_priv);
+ gen7_init_oa_buffer(stream);
I915_WRITE(GEN7_OACONTROL,
(ctx_id & GEN7_OACONTROL_CTX_MASK) |
@@ -1901,7 +2051,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
static void gen8_oa_enable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- u32 report_format = dev_priv->perf.oa.oa_buffer.format;
+ u32 report_format = stream->oa_buffer.format;
/*
* Reset buf pointers so we don't forward reports from before now.
@@ -1912,7 +2062,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
* on the assumption that certain fields are written to zeroed
* memory which this helps maintains.
*/
- gen8_init_oa_buffer(dev_priv);
+ gen8_init_oa_buffer(stream);
/*
* Note: we don't rely on the hardware to perform single context
@@ -1937,10 +2087,10 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- dev_priv->perf.oa.ops.oa_enable(stream);
+ dev_priv->perf.ops.oa_enable(stream);
- if (dev_priv->perf.oa.periodic)
- hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
+ if (stream->periodic)
+ hrtimer_start(&stream->poll_check_timer,
ns_to_ktime(POLL_PERIOD),
HRTIMER_MODE_REL_PINNED);
}
@@ -1979,10 +2129,10 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- dev_priv->perf.oa.ops.oa_disable(stream);
+ dev_priv->perf.ops.oa_disable(stream);
- if (dev_priv->perf.oa.periodic)
- hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
+ if (stream->periodic)
+ hrtimer_cancel(&stream->poll_check_timer);
}
static const struct i915_perf_stream_ops i915_oa_stream_ops = {
@@ -2034,7 +2184,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
- if (!dev_priv->perf.oa.ops.enable_metric_set) {
+ if (!dev_priv->perf.ops.enable_metric_set) {
DRM_DEBUG("OA unit not supported\n");
return -ENODEV;
}
@@ -2043,7 +2193,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
* counter reports and marshal to the appropriate client
* we currently only allow exclusive access
*/
- if (dev_priv->perf.oa.exclusive_stream) {
+ if (dev_priv->perf.exclusive_stream) {
DRM_DEBUG("OA unit already in use\n");
return -EBUSY;
}
@@ -2053,43 +2203,23 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
- /* We set up some ratelimit state to potentially throttle any _NOTES
- * about spurious, invalid OA reports which we don't forward to
- * userspace.
- *
- * The initialization is associated with opening the stream (not driver
- * init) considering we print a _NOTE about any throttling when closing
- * the stream instead of waiting until driver _fini which no one would
- * ever see.
- *
- * Using the same limiting factors as printk_ratelimit()
- */
- ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
- 5 * HZ, 10);
- /* Since we use a DRM_NOTE for spurious reports it would be
- * inconsistent to let __ratelimit() automatically print a warning for
- * throttling.
- */
- ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
- RATELIMIT_MSG_ON_RELEASE);
-
stream->sample_size = sizeof(struct drm_i915_perf_record_header);
- format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
+ format_size = dev_priv->perf.oa_formats[props->oa_format].size;
stream->sample_flags |= SAMPLE_OA_REPORT;
stream->sample_size += format_size;
- dev_priv->perf.oa.oa_buffer.format_size = format_size;
- if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
+ stream->oa_buffer.format_size = format_size;
+ if (WARN_ON(stream->oa_buffer.format_size == 0))
return -EINVAL;
- dev_priv->perf.oa.oa_buffer.format =
- dev_priv->perf.oa.oa_formats[props->oa_format].format;
+ stream->oa_buffer.format =
+ dev_priv->perf.oa_formats[props->oa_format].format;
- dev_priv->perf.oa.periodic = props->oa_periodic;
- if (dev_priv->perf.oa.periodic)
- dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
+ stream->periodic = props->oa_periodic;
+ if (stream->periodic)
+ stream->period_exponent = props->oa_period_exponent;
if (stream->ctx) {
ret = oa_get_render_ctx_id(stream);
@@ -2120,7 +2250,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
- ret = alloc_oa_buffer(dev_priv);
+ ret = alloc_oa_buffer(stream);
if (ret)
goto err_oa_buf_alloc;
@@ -2129,9 +2259,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
goto err_lock;
stream->ops = &i915_oa_stream_ops;
- dev_priv->perf.oa.exclusive_stream = stream;
+ dev_priv->perf.exclusive_stream = stream;
- ret = dev_priv->perf.oa.ops.enable_metric_set(stream);
+ ret = dev_priv->perf.ops.enable_metric_set(stream);
if (ret) {
DRM_DEBUG("Unable to enable metric set\n");
goto err_enable;
@@ -2139,15 +2269,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
mutex_unlock(&dev_priv->drm.struct_mutex);
+ hrtimer_init(&stream->poll_check_timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ stream->poll_check_timer.function = oa_poll_check_timer_cb;
+ init_waitqueue_head(&stream->poll_wq);
+ spin_lock_init(&stream->oa_buffer.ptr_lock);
+
return 0;
err_enable:
- dev_priv->perf.oa.exclusive_stream = NULL;
- dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+ dev_priv->perf.exclusive_stream = NULL;
+ dev_priv->perf.ops.disable_metric_set(stream);
mutex_unlock(&dev_priv->drm.struct_mutex);
err_lock:
- free_oa_buffer(dev_priv);
+ free_oa_buffer(stream);
err_oa_buf_alloc:
put_oa_config(dev_priv, stream->oa_config);
@@ -2171,9 +2307,9 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
return;
- stream = engine->i915->perf.oa.exclusive_stream;
+ stream = engine->i915->perf.exclusive_stream;
if (stream)
- gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
+ gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config);
}
/**
@@ -2289,7 +2425,7 @@ static ssize_t i915_perf_read(struct file *file,
/* Maybe make ->pollin per-stream state if we support multiple
* concurrent streams in the future.
*/
- dev_priv->perf.oa.pollin = false;
+ stream->pollin = false;
}
return ret;
@@ -2297,13 +2433,12 @@ static ssize_t i915_perf_read(struct file *file,
static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
{
- struct drm_i915_private *dev_priv =
- container_of(hrtimer, typeof(*dev_priv),
- perf.oa.poll_check_timer);
+ struct i915_perf_stream *stream =
+ container_of(hrtimer, typeof(*stream), poll_check_timer);
- if (oa_buffer_check_unlocked(dev_priv)) {
- dev_priv->perf.oa.pollin = true;
- wake_up(&dev_priv->perf.oa.poll_wq);
+ if (oa_buffer_check_unlocked(stream)) {
+ stream->pollin = true;
+ wake_up(&stream->poll_wq);
}
hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
@@ -2342,7 +2477,7 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
* the hrtimer/oa_poll_check_timer_cb to notify us when there are
* samples to read.
*/
- if (dev_priv->perf.oa.pollin)
+ if (stream->pollin)
events |= EPOLLIN;
return events;
@@ -2768,7 +2903,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
value);
return -EINVAL;
}
- if (!dev_priv->perf.oa.oa_formats[value].size) {
+ if (!dev_priv->perf.oa_formats[value].size) {
DRM_DEBUG("Unsupported OA report format %llu\n",
value);
return -EINVAL;
@@ -2912,7 +3047,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
if (!dev_priv->perf.metrics_kobj)
goto exit;
- sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr);
+ sysfs_attr_init(&dev_priv->perf.test_config.sysfs_metric_id.attr);
if (INTEL_GEN(dev_priv) >= 11) {
i915_perf_load_test_config_icl(dev_priv);
@@ -2947,15 +3082,15 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
i915_perf_load_test_config_hsw(dev_priv);
}
- if (dev_priv->perf.oa.test_config.id == 0)
+ if (dev_priv->perf.test_config.id == 0)
goto sysfs_error;
ret = sysfs_create_group(dev_priv->perf.metrics_kobj,
- &dev_priv->perf.oa.test_config.sysfs_metric);
+ &dev_priv->perf.test_config.sysfs_metric);
if (ret)
goto sysfs_error;
- atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
+ atomic_set(&dev_priv->perf.test_config.ref_count, 1);
goto exit;
@@ -2982,7 +3117,7 @@ void i915_perf_unregister(struct drm_i915_private *dev_priv)
return;
sysfs_remove_group(dev_priv->perf.metrics_kobj,
- &dev_priv->perf.oa.test_config.sysfs_metric);
+ &dev_priv->perf.test_config.sysfs_metric);
kobject_put(dev_priv->perf.metrics_kobj);
dev_priv->perf.metrics_kobj = NULL;
@@ -3227,7 +3362,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
oa_config->mux_regs_len = args->n_mux_regs;
oa_config->mux_regs =
alloc_oa_regs(dev_priv,
- dev_priv->perf.oa.ops.is_valid_mux_reg,
+ dev_priv->perf.ops.is_valid_mux_reg,
u64_to_user_ptr(args->mux_regs_ptr),
args->n_mux_regs);
@@ -3240,7 +3375,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
oa_config->b_counter_regs_len = args->n_boolean_regs;
oa_config->b_counter_regs =
alloc_oa_regs(dev_priv,
- dev_priv->perf.oa.ops.is_valid_b_counter_reg,
+ dev_priv->perf.ops.is_valid_b_counter_reg,
u64_to_user_ptr(args->boolean_regs_ptr),
args->n_boolean_regs);
@@ -3259,7 +3394,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
oa_config->flex_regs_len = args->n_flex_regs;
oa_config->flex_regs =
alloc_oa_regs(dev_priv,
- dev_priv->perf.oa.ops.is_valid_flex_reg,
+ dev_priv->perf.ops.is_valid_flex_reg,
u64_to_user_ptr(args->flex_regs_ptr),
args->n_flex_regs);
@@ -3426,20 +3561,20 @@ static struct ctl_table dev_root[] = {
void i915_perf_init(struct drm_i915_private *dev_priv)
{
if (IS_HASWELL(dev_priv)) {
- dev_priv->perf.oa.ops.is_valid_b_counter_reg =
+ dev_priv->perf.ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
- dev_priv->perf.oa.ops.is_valid_mux_reg =
+ dev_priv->perf.ops.is_valid_mux_reg =
hsw_is_valid_mux_addr;
- dev_priv->perf.oa.ops.is_valid_flex_reg = NULL;
- dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
- dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
- dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
- dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
- dev_priv->perf.oa.ops.read = gen7_oa_read;
- dev_priv->perf.oa.ops.oa_hw_tail_read =
+ dev_priv->perf.ops.is_valid_flex_reg = NULL;
+ dev_priv->perf.ops.enable_metric_set = hsw_enable_metric_set;
+ dev_priv->perf.ops.disable_metric_set = hsw_disable_metric_set;
+ dev_priv->perf.ops.oa_enable = gen7_oa_enable;
+ dev_priv->perf.ops.oa_disable = gen7_oa_disable;
+ dev_priv->perf.ops.read = gen7_oa_read;
+ dev_priv->perf.ops.oa_hw_tail_read =
gen7_oa_hw_tail_read;
- dev_priv->perf.oa.oa_formats = hsw_oa_formats;
+ dev_priv->perf.oa_formats = hsw_oa_formats;
} else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
/* Note: that although we could theoretically also support the
* legacy ringbuffer mode on BDW (and earlier iterations of
@@ -3447,71 +3582,65 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
* worth the complexity to maintain now that BDW+ enable
* execlist mode by default.
*/
- dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
+ dev_priv->perf.oa_formats = gen8_plus_oa_formats;
- dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
- dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
- dev_priv->perf.oa.ops.read = gen8_oa_read;
- dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
+ dev_priv->perf.ops.oa_enable = gen8_oa_enable;
+ dev_priv->perf.ops.oa_disable = gen8_oa_disable;
+ dev_priv->perf.ops.read = gen8_oa_read;
+ dev_priv->perf.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
if (IS_GEN_RANGE(dev_priv, 8, 9)) {
- dev_priv->perf.oa.ops.is_valid_b_counter_reg =
+ dev_priv->perf.ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
- dev_priv->perf.oa.ops.is_valid_mux_reg =
+ dev_priv->perf.ops.is_valid_mux_reg =
gen8_is_valid_mux_addr;
- dev_priv->perf.oa.ops.is_valid_flex_reg =
+ dev_priv->perf.ops.is_valid_flex_reg =
gen8_is_valid_flex_addr;
if (IS_CHERRYVIEW(dev_priv)) {
- dev_priv->perf.oa.ops.is_valid_mux_reg =
+ dev_priv->perf.ops.is_valid_mux_reg =
chv_is_valid_mux_addr;
}
- dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
- dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set;
+ dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set;
+ dev_priv->perf.ops.disable_metric_set = gen8_disable_metric_set;
if (IS_GEN(dev_priv, 8)) {
- dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
- dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
+ dev_priv->perf.ctx_oactxctrl_offset = 0x120;
+ dev_priv->perf.ctx_flexeu0_offset = 0x2ce;
- dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
+ dev_priv->perf.gen8_valid_ctx_bit = BIT(25);
} else {
- dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
- dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
+ dev_priv->perf.ctx_oactxctrl_offset = 0x128;
+ dev_priv->perf.ctx_flexeu0_offset = 0x3de;
- dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
+ dev_priv->perf.gen8_valid_ctx_bit = BIT(16);
}
} else if (IS_GEN_RANGE(dev_priv, 10, 11)) {
- dev_priv->perf.oa.ops.is_valid_b_counter_reg =
+ dev_priv->perf.ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
- dev_priv->perf.oa.ops.is_valid_mux_reg =
+ dev_priv->perf.ops.is_valid_mux_reg =
gen10_is_valid_mux_addr;
- dev_priv->perf.oa.ops.is_valid_flex_reg =
+ dev_priv->perf.ops.is_valid_flex_reg =
gen8_is_valid_flex_addr;
- dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
- dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set;
+ dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set;
+ dev_priv->perf.ops.disable_metric_set = gen10_disable_metric_set;
if (IS_GEN(dev_priv, 10)) {
- dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
- dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
+ dev_priv->perf.ctx_oactxctrl_offset = 0x128;
+ dev_priv->perf.ctx_flexeu0_offset = 0x3de;
} else {
- dev_priv->perf.oa.ctx_oactxctrl_offset = 0x124;
- dev_priv->perf.oa.ctx_flexeu0_offset = 0x78e;
+ dev_priv->perf.ctx_oactxctrl_offset = 0x124;
+ dev_priv->perf.ctx_flexeu0_offset = 0x78e;
}
- dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
+ dev_priv->perf.gen8_valid_ctx_bit = BIT(16);
}
}
- if (dev_priv->perf.oa.ops.enable_metric_set) {
- hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
- init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
-
+ if (dev_priv->perf.ops.enable_metric_set) {
INIT_LIST_HEAD(&dev_priv->perf.streams);
mutex_init(&dev_priv->perf.lock);
- spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
oa_sample_rate_hard_limit = 1000 *
(RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
@@ -3520,6 +3649,25 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
mutex_init(&dev_priv->perf.metrics_lock);
idr_init(&dev_priv->perf.metrics_idr);
+ /* We set up some ratelimit state to potentially throttle any
+ * _NOTES about spurious, invalid OA reports which we don't
+ * forward to userspace.
+ *
+ * We print a _NOTE about any throttling when closing the
+ * stream instead of waiting until driver _fini which no one
+ * would ever see.
+ *
+ * Using the same limiting factors as printk_ratelimit()
+ */
+ ratelimit_state_init(&dev_priv->perf.spurious_report_rs,
+ 5 * HZ, 10);
+ /* Since we use a DRM_NOTE for spurious reports it would be
+ * inconsistent to let __ratelimit() automatically print a
+ * warning for throttling.
+ */
+ ratelimit_set_flags(&dev_priv->perf.spurious_report_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+
dev_priv->perf.initialized = true;
}
}
@@ -3548,7 +3696,7 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
unregister_sysctl_table(dev_priv->perf.sysctl_header);
- memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
+ memset(&dev_priv->perf.ops, 0, sizeof(dev_priv->perf.ops));
dev_priv->perf.initialized = false;
}