diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_perf.c | 836 |
1 files changed, 492 insertions, 344 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 5140017f9a39..e42b86827d6b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -200,20 +200,21 @@ #include "gt/intel_lrc_reg.h" #include "i915_drv.h" -#include "i915_oa_hsw.h" -#include "i915_oa_bdw.h" -#include "i915_oa_chv.h" -#include "i915_oa_sklgt2.h" -#include "i915_oa_sklgt3.h" -#include "i915_oa_sklgt4.h" -#include "i915_oa_bxt.h" -#include "i915_oa_kblgt2.h" -#include "i915_oa_kblgt3.h" -#include "i915_oa_glk.h" -#include "i915_oa_cflgt2.h" -#include "i915_oa_cflgt3.h" -#include "i915_oa_cnl.h" -#include "i915_oa_icl.h" +#include "i915_perf.h" +#include "oa/i915_oa_hsw.h" +#include "oa/i915_oa_bdw.h" +#include "oa/i915_oa_chv.h" +#include "oa/i915_oa_sklgt2.h" +#include "oa/i915_oa_sklgt3.h" +#include "oa/i915_oa_sklgt4.h" +#include "oa/i915_oa_bxt.h" +#include "oa/i915_oa_kblgt2.h" +#include "oa/i915_oa_kblgt3.h" +#include "oa/i915_oa_glk.h" +#include "oa/i915_oa_cflgt2.h" +#include "oa/i915_oa_cflgt3.h" +#include "oa/i915_oa_cnl.h" +#include "oa/i915_oa_icl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -364,6 +365,8 @@ struct perf_open_properties { int oa_period_exponent; }; +static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); + static void free_oa_config(struct drm_i915_private *dev_priv, struct i915_oa_config *oa_config) { @@ -392,8 +395,8 @@ static int get_oa_config(struct drm_i915_private *dev_priv, int ret; if (metrics_set == 1) { - *out_config = &dev_priv->perf.oa.test_config; - atomic_inc(&dev_priv->perf.oa.test_config.ref_count); + *out_config = &dev_priv->perf.test_config; + atomic_inc(&dev_priv->perf.test_config.ref_count); return 0; } @@ -412,13 +415,16 @@ static int get_oa_config(struct drm_i915_private *dev_priv, return ret; } -static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv) +static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; + return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; } -static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv) +static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; u32 oastatus1 = I915_READ(GEN7_OASTATUS1); return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; @@ -426,7 +432,7 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv) /** * oa_buffer_check_unlocked - check for data and update tail ptr state - * @dev_priv: i915 device instance + * @stream: i915 stream instance * * This is either called via fops (for blocking reads in user ctx) or the poll * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check @@ -448,9 +454,10 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv) * * Returns: %true if the OA buffer contains data, else %false */ -static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) +static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) { - int report_size = dev_priv->perf.oa.oa_buffer.format_size; + struct drm_i915_private *dev_priv = stream->dev_priv; + int report_size = stream->oa_buffer.format_size; unsigned long flags; unsigned int aged_idx; u32 head, hw_tail, aged_tail, aging_tail; @@ -460,19 +467,19 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) * could result in an OA buffer reset which might reset the head, * tails[] and aged_tail state. */ - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* NB: The head we observe here might effectively be a little out of * date (between head and tails[aged_idx].offset if there is currently * a read() in progress. */ - head = dev_priv->perf.oa.oa_buffer.head; + head = stream->oa_buffer.head; - aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; - aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset; - aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset; + aged_idx = stream->oa_buffer.aged_tail_idx; + aged_tail = stream->oa_buffer.tails[aged_idx].offset; + aging_tail = stream->oa_buffer.tails[!aged_idx].offset; - hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv); + hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream); /* The tail pointer increases in 64 byte increments, * not in report_size steps... @@ -492,16 +499,16 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) * available) without needing to wait for a later hrtimer callback. */ if (aging_tail != INVALID_TAIL_PTR && - ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) > + ((now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC)) { aged_idx ^= 1; - dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx; + stream->oa_buffer.aged_tail_idx = aged_idx; aged_tail = aging_tail; /* Mark that we need a new pointer to start aging... */ - dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; aging_tail = INVALID_TAIL_PTR; } @@ -516,7 +523,7 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) if (aging_tail == INVALID_TAIL_PTR && (aged_tail == INVALID_TAIL_PTR || OA_TAKEN(hw_tail, aged_tail) >= report_size)) { - struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma; + struct i915_vma *vma = stream->oa_buffer.vma; u32 gtt_offset = i915_ggtt_offset(vma); /* Be paranoid and do a bounds check on the pointer read back @@ -525,16 +532,16 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) */ if (hw_tail >= gtt_offset && hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { - dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = + stream->oa_buffer.tails[!aged_idx].offset = aging_tail = hw_tail; - dev_priv->perf.oa.oa_buffer.aging_timestamp = now; + stream->oa_buffer.aging_timestamp = now; } else { DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", hw_tail); } } - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); return aged_tail == INVALID_TAIL_PTR ? false : OA_TAKEN(aged_tail, head) >= report_size; @@ -597,8 +604,7 @@ static int append_oa_sample(struct i915_perf_stream *stream, size_t *offset, const u8 *report) { - struct drm_i915_private *dev_priv = stream->dev_priv; - int report_size = dev_priv->perf.oa.oa_buffer.format_size; + int report_size = stream->oa_buffer.format_size; struct drm_i915_perf_record_header header; u32 sample_flags = stream->sample_flags; @@ -650,9 +656,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, size_t *offset) { struct drm_i915_private *dev_priv = stream->dev_priv; - int report_size = dev_priv->perf.oa.oa_buffer.format_size; - u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + int report_size = stream->oa_buffer.format_size; + u8 *oa_buf_base = stream->oa_buffer.vaddr; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; @@ -664,13 +670,13 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, if (WARN_ON(!stream->enabled)) return -EIO; - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - head = dev_priv->perf.oa.oa_buffer.head; - aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; - tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; + head = stream->oa_buffer.head; + aged_tail_idx = stream->oa_buffer.aged_tail_idx; + tail = stream->oa_buffer.tails[aged_tail_idx].offset; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* * An invalid tail pointer here means we're still waiting for the poll @@ -734,12 +740,12 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & OAREPORT_REASON_MASK); if (reason == 0) { - if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) + if (__ratelimit(&dev_priv->perf.spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } - ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask; + ctx_id = report32[2] & stream->specific_ctx_id_mask; /* * Squash whatever is in the CTX_ID field if it's marked as @@ -749,7 +755,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit)) + if (!(report32[0] & dev_priv->perf.gen8_valid_ctx_bit)) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -783,18 +789,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * switches since it's not-uncommon for periodic samples to * identify a switch before any 'context switch' report. */ - if (!dev_priv->perf.oa.exclusive_stream->ctx || - dev_priv->perf.oa.specific_ctx_id == ctx_id || - (dev_priv->perf.oa.oa_buffer.last_ctx_id == - dev_priv->perf.oa.specific_ctx_id) || + if (!dev_priv->perf.exclusive_stream->ctx || + stream->specific_ctx_id == ctx_id || + stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || reason & OAREPORT_REASON_CTX_SWITCH) { /* * While filtering for a single context we avoid * leaking the IDs of other contexts. */ - if (dev_priv->perf.oa.exclusive_stream->ctx && - dev_priv->perf.oa.specific_ctx_id != ctx_id) { + if (dev_priv->perf.exclusive_stream->ctx && + stream->specific_ctx_id != ctx_id) { report32[2] = INVALID_CTX_ID; } @@ -803,7 +808,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, if (ret) break; - dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id; + stream->oa_buffer.last_ctx_id = ctx_id; } /* @@ -817,7 +822,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* * We removed the gtt_offset for the copy loop above, indexing @@ -826,9 +831,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, head += gtt_offset; I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); - dev_priv->perf.oa.oa_buffer.head = head; + stream->oa_buffer.head = head; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); } return ret; @@ -863,7 +868,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream, u32 oastatus; int ret; - if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) + if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; oastatus = I915_READ(GEN8_OASTATUS); @@ -889,10 +894,10 @@ static int gen8_oa_read(struct i915_perf_stream *stream, return ret; DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", - dev_priv->perf.oa.period_exponent); + stream->period_exponent); - dev_priv->perf.oa.ops.oa_disable(stream); - dev_priv->perf.oa.ops.oa_enable(stream); + dev_priv->perf.ops.oa_disable(stream); + dev_priv->perf.ops.oa_enable(stream); /* * Note: .oa_enable() is expected to re-init the oabuffer and @@ -939,9 +944,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, size_t *offset) { struct drm_i915_private *dev_priv = stream->dev_priv; - int report_size = dev_priv->perf.oa.oa_buffer.format_size; - u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + int report_size = stream->oa_buffer.format_size; + u8 *oa_buf_base = stream->oa_buffer.vaddr; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; @@ -953,13 +958,13 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, if (WARN_ON(!stream->enabled)) return -EIO; - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - head = dev_priv->perf.oa.oa_buffer.head; - aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; - tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; + head = stream->oa_buffer.head; + aged_tail_idx = stream->oa_buffer.aged_tail_idx; + tail = stream->oa_buffer.tails[aged_tail_idx].offset; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* An invalid tail pointer here means we're still waiting for the poll * hrtimer callback to give us a pointer @@ -1012,7 +1017,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, * copying it to userspace... */ if (report32[0] == 0) { - if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) + if (__ratelimit(&dev_priv->perf.spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } @@ -1031,7 +1036,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* We removed the gtt_offset for the copy loop above, indexing * relative to oa_buf_base so put back here... @@ -1041,9 +1046,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, I915_WRITE(GEN7_OASTATUS2, ((head & GEN7_OASTATUS2_HEAD_MASK) | GEN7_OASTATUS2_MEM_SELECT_GGTT)); - dev_priv->perf.oa.oa_buffer.head = head; + stream->oa_buffer.head = head; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); } return ret; @@ -1074,7 +1079,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream, u32 oastatus1; int ret; - if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) + if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; oastatus1 = I915_READ(GEN7_OASTATUS1); @@ -1084,7 +1089,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream, * may be updated asynchronously) so we ignore status bits * that have already been reported to userspace. */ - oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1; + oastatus1 &= ~dev_priv->perf.gen7_latched_oastatus1; /* We treat OABUFFER_OVERFLOW as a significant error: * @@ -1113,10 +1118,10 @@ static int gen7_oa_read(struct i915_perf_stream *stream, return ret; DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", - dev_priv->perf.oa.period_exponent); + stream->period_exponent); - dev_priv->perf.oa.ops.oa_disable(stream); - dev_priv->perf.oa.ops.oa_enable(stream); + dev_priv->perf.ops.oa_disable(stream); + dev_priv->perf.ops.oa_enable(stream); oastatus1 = I915_READ(GEN7_OASTATUS1); } @@ -1126,7 +1131,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - dev_priv->perf.oa.gen7_latched_oastatus1 |= + dev_priv->perf.gen7_latched_oastatus1 |= GEN7_OASTATUS1_REPORT_LOST; } @@ -1149,14 +1154,12 @@ static int gen7_oa_read(struct i915_perf_stream *stream, */ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - /* We would wait indefinitely if periodic sampling is not enabled */ - if (!dev_priv->perf.oa.periodic) + if (!stream->periodic) return -EIO; - return wait_event_interruptible(dev_priv->perf.oa.poll_wq, - oa_buffer_check_unlocked(dev_priv)); + return wait_event_interruptible(stream->poll_wq, + oa_buffer_check_unlocked(stream)); } /** @@ -1173,9 +1176,7 @@ static void i915_oa_poll_wait(struct i915_perf_stream *stream, struct file *file, poll_table *wait) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - poll_wait(file, &dev_priv->perf.oa.poll_wq, wait); + poll_wait(file, &stream->poll_wq, wait); } /** @@ -1197,13 +1198,14 @@ static int i915_oa_read(struct i915_perf_stream *stream, { struct drm_i915_private *dev_priv = stream->dev_priv; - return dev_priv->perf.oa.ops.read(stream, buf, count, offset); + return dev_priv->perf.ops.read(stream, buf, count, offset); } -static struct intel_context *oa_pin_context(struct drm_i915_private *i915, - struct i915_gem_context *ctx) +static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) { struct i915_gem_engines_iter it; + struct drm_i915_private *i915 = stream->dev_priv; + struct i915_gem_context *ctx = stream->ctx; struct intel_context *ce; int err; @@ -1221,7 +1223,7 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915, */ err = intel_context_pin(ce); if (err == 0) { - i915->perf.oa.pinned_ctx = ce; + stream->pinned_ctx = ce; break; } } @@ -1231,7 +1233,7 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915, if (err) return ERR_PTR(err); - return i915->perf.oa.pinned_ctx; + return stream->pinned_ctx; } /** @@ -1249,7 +1251,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) struct drm_i915_private *i915 = stream->dev_priv; struct intel_context *ce; - ce = oa_pin_context(i915, stream->ctx); + ce = oa_pin_context(stream); if (IS_ERR(ce)) return PTR_ERR(ce); @@ -1259,8 +1261,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * On Haswell we don't do any post processing of the reports * and don't need to use the mask. */ - i915->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state); - i915->perf.oa.specific_ctx_id_mask = 0; + stream->specific_ctx_id = i915_ggtt_offset(ce->state); + stream->specific_ctx_id_mask = 0; break; } @@ -1278,33 +1280,33 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * dropped by GuC. They won't be part of the context * ID in the OA reports, so squash those lower bits. */ - i915->perf.oa.specific_ctx_id = + stream->specific_ctx_id = lower_32_bits(ce->lrc_desc) >> 12; /* * GuC uses the top bit to signal proxy submission, so * ignore that bit. */ - i915->perf.oa.specific_ctx_id_mask = + stream->specific_ctx_id_mask = (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; } else { - i915->perf.oa.specific_ctx_id_mask = + stream->specific_ctx_id_mask = (1U << GEN8_CTX_ID_WIDTH) - 1; - i915->perf.oa.specific_ctx_id = + stream->specific_ctx_id = upper_32_bits(ce->lrc_desc); - i915->perf.oa.specific_ctx_id &= - i915->perf.oa.specific_ctx_id_mask; + stream->specific_ctx_id &= + stream->specific_ctx_id_mask; } break; case 11: { - i915->perf.oa.specific_ctx_id_mask = + stream->specific_ctx_id_mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) | ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) | ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32); - i915->perf.oa.specific_ctx_id = upper_32_bits(ce->lrc_desc); - i915->perf.oa.specific_ctx_id &= - i915->perf.oa.specific_ctx_id_mask; + stream->specific_ctx_id = upper_32_bits(ce->lrc_desc); + stream->specific_ctx_id &= + stream->specific_ctx_id_mask; break; } @@ -1313,8 +1315,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) } DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", - i915->perf.oa.specific_ctx_id, - i915->perf.oa.specific_ctx_id_mask); + stream->specific_ctx_id, + stream->specific_ctx_id_mask); return 0; } @@ -1331,10 +1333,10 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) struct drm_i915_private *dev_priv = stream->dev_priv; struct intel_context *ce; - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - dev_priv->perf.oa.specific_ctx_id_mask = 0; + stream->specific_ctx_id = INVALID_CTX_ID; + stream->specific_ctx_id_mask = 0; - ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx); + ce = fetch_and_zero(&stream->pinned_ctx); if (ce) { mutex_lock(&dev_priv->drm.struct_mutex); intel_context_unpin(ce); @@ -1343,34 +1345,36 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) } static void -free_oa_buffer(struct drm_i915_private *i915) +free_oa_buffer(struct i915_perf_stream *stream) { + struct drm_i915_private *i915 = stream->dev_priv; + mutex_lock(&i915->drm.struct_mutex); - i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma, + i915_vma_unpin_and_release(&stream->oa_buffer.vma, I915_VMA_RELEASE_MAP); mutex_unlock(&i915->drm.struct_mutex); - i915->perf.oa.oa_buffer.vaddr = NULL; + stream->oa_buffer.vaddr = NULL; } static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); + BUG_ON(stream != dev_priv->perf.exclusive_stream); /* * Unset exclusive_stream first, it will be checked while disabling * the metric set on gen8+. */ mutex_lock(&dev_priv->drm.struct_mutex); - dev_priv->perf.oa.exclusive_stream = NULL; - dev_priv->perf.oa.ops.disable_metric_set(dev_priv); + dev_priv->perf.exclusive_stream = NULL; + dev_priv->perf.ops.disable_metric_set(stream); mutex_unlock(&dev_priv->drm.struct_mutex); - free_oa_buffer(dev_priv); + free_oa_buffer(stream); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); @@ -1380,41 +1384,42 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) put_oa_config(dev_priv, stream->oa_config); - if (dev_priv->perf.oa.spurious_report_rs.missed) { + if (dev_priv->perf.spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", - dev_priv->perf.oa.spurious_report_rs.missed); + dev_priv->perf.spurious_report_rs.missed); } } -static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) +static void gen7_init_oa_buffer(struct i915_perf_stream *stream) { - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + struct drm_i915_private *dev_priv = stream->dev_priv; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* Pre-DevBDW: OABUFFER must be set with counters off, * before OASTATUS1, but after OASTATUS2 */ I915_WRITE(GEN7_OASTATUS2, gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ - dev_priv->perf.oa.oa_buffer.head = gtt_offset; + stream->oa_buffer.head = gtt_offset; I915_WRITE(GEN7_OABUFFER, gtt_offset); I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ /* Mark that we need updated tail pointers to read from... */ - dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; - dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* On Haswell we have to track which OASTATUS1 flags we've * already seen since they can't be cleared while periodic * sampling is enabled. */ - dev_priv->perf.oa.gen7_latched_oastatus1 = 0; + dev_priv->perf.gen7_latched_oastatus1 = 0; /* NB: although the OA buffer will initially be allocated * zeroed via shmfs (and so this memset is redundant when @@ -1427,24 +1432,25 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) * the assumption that new reports are being written to zeroed * memory... */ - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); + memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); /* Maybe make ->pollin per-stream state if we support multiple * concurrent streams in the future. */ - dev_priv->perf.oa.pollin = false; + stream->pollin = false; } -static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) +static void gen8_init_oa_buffer(struct i915_perf_stream *stream) { - u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + struct drm_i915_private *dev_priv = stream->dev_priv; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; - spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); I915_WRITE(GEN8_OASTATUS, 0); I915_WRITE(GEN8_OAHEADPTR, gtt_offset); - dev_priv->perf.oa.oa_buffer.head = gtt_offset; + stream->oa_buffer.head = gtt_offset; I915_WRITE(GEN8_OABUFFER_UDW, 0); @@ -1461,17 +1467,17 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ - dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; - dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; /* * Reset state used to recognise context switches, affecting which * reports we will forward to userspace while filtering for a single * context. */ - dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID; + stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* * NB: although the OA buffer will initially be allocated @@ -1485,22 +1491,23 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) * the assumption that new reports are being written to zeroed * memory... */ - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); + memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); /* * Maybe make ->pollin per-stream state if we support multiple * concurrent streams in the future. */ - dev_priv->perf.oa.pollin = false; + stream->pollin = false; } -static int alloc_oa_buffer(struct drm_i915_private *dev_priv) +static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_gem_object *bo; + struct drm_i915_private *dev_priv = stream->dev_priv; struct i915_vma *vma; int ret; - if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma)) + if (WARN_ON(stream->oa_buffer.vma)) return -ENODEV; ret = i915_mutex_lock_interruptible(&dev_priv->drm); @@ -1525,18 +1532,18 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) ret = PTR_ERR(vma); goto err_unref; } - dev_priv->perf.oa.oa_buffer.vma = vma; + stream->oa_buffer.vma = vma; - dev_priv->perf.oa.oa_buffer.vaddr = + stream->oa_buffer.vaddr = i915_gem_object_pin_map(bo, I915_MAP_WB); - if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) { - ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr); + if (IS_ERR(stream->oa_buffer.vaddr)) { + ret = PTR_ERR(stream->oa_buffer.vaddr); goto err_unpin; } DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", - i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), - dev_priv->perf.oa.oa_buffer.vaddr); + i915_ggtt_offset(stream->oa_buffer.vma), + stream->oa_buffer.vaddr); goto unlock; @@ -1546,8 +1553,8 @@ err_unpin: err_unref: i915_gem_object_put(bo); - dev_priv->perf.oa.oa_buffer.vaddr = NULL; - dev_priv->perf.oa.oa_buffer.vma = NULL; + stream->oa_buffer.vaddr = NULL; + stream->oa_buffer.vma = NULL; unlock: mutex_unlock(&dev_priv->drm.struct_mutex); @@ -1623,8 +1630,10 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) return 0; } -static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) +static void hsw_disable_metric_set(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; + I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | @@ -1634,6 +1643,27 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) ~GT_NOA_ENABLE)); } +static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, + i915_reg_t reg) +{ + u32 mmio = i915_mmio_reg_offset(reg); + int i; + + /* + * This arbitrary default will select the 'EU FPU0 Pipeline + * Active' event. In the future it's anticipated that there + * will be an explicit 'No Event' we can select, but not yet... + */ + if (!oa_config) + return 0; + + for (i = 0; i < oa_config->flex_regs_len; i++) { + if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio) + return oa_config->flex_regs[i].value; + } + + return 0; +} /* * NB: It must always remain pointer safe to run this even if the OA unit * has been disabled. @@ -1642,13 +1672,14 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) * in the case that the OA unit has been disabled. */ static void -gen8_update_reg_state_unlocked(struct intel_context *ce, +gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, + struct intel_context *ce, u32 *reg_state, const struct i915_oa_config *oa_config) { - struct drm_i915_private *i915 = ce->gem_context->i915; - u32 ctx_oactxctrl = i915->perf.oa.ctx_oactxctrl_offset; - u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset; + struct drm_i915_private *i915 = ce->engine->i915; + u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset; + u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; /* The MMIO offsets for Flex EU registers aren't contiguous */ i915_reg_t flex_regs[] = { EU_PERF_CNTL0, @@ -1662,38 +1693,143 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, int i; CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, - (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | GEN8_OA_COUNTER_RESUME); for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { - u32 state_offset = ctx_flexeu0 + i * 2; - u32 mmio = i915_mmio_reg_offset(flex_regs[i]); + CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i], + oa_config_flex_reg(oa_config, flex_regs[i])); + } - /* - * This arbitrary default will select the 'EU FPU0 Pipeline - * Active' event. In the future it's anticipated that there - * will be an explicit 'No Event' we can select, but not yet... - */ - u32 value = 0; + CTX_REG(reg_state, + CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, + intel_sseu_make_rpcs(i915, &ce->sseu)); +} - if (oa_config) { - u32 j; +struct flex { + i915_reg_t reg; + u32 offset; + u32 value; +}; - for (j = 0; j < oa_config->flex_regs_len; j++) { - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { - value = oa_config->flex_regs[j].value; - break; - } - } - } +static int +gen8_store_flex(struct i915_request *rq, + struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + u32 offset; + u32 *cs; + + cs = intel_ring_begin(rq, 4 * count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; + do { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset + (flex->offset + 1) * sizeof(u32); + *cs++ = 0; + *cs++ = flex->value; + } while (flex++, --count); + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen8_load_flex(struct i915_request *rq, + struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + u32 *cs; + + GEM_BUG_ON(!count || count > 63); + + cs = intel_ring_begin(rq, 2 * count + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(count); + do { + *cs++ = i915_mmio_reg_offset(flex->reg); + *cs++ = flex->value; + } while (flex++, --count); + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int gen8_modify_context(struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + struct i915_request *rq; + int err; + + lockdep_assert_held(&ce->pin_mutex); + + rq = i915_request_create(ce->engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + /* Serialise with the remote context */ + err = intel_context_prepare_remote_request(ce, rq); + if (err == 0) + err = gen8_store_flex(rq, ce, flex, count); + + i915_request_add(rq); + return err; +} + +static int gen8_modify_self(struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + struct i915_request *rq; + int err; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + err = gen8_load_flex(rq, ce, flex, count); + + i915_request_add(rq); + return err; +} + +static int gen8_configure_context(struct i915_gem_context *ctx, + struct flex *flex, unsigned int count) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + int err = 0; + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + GEM_BUG_ON(ce == ce->engine->kernel_context); + + if (ce->engine->class != RENDER_CLASS) + continue; - CTX_REG(reg_state, state_offset, flex_regs[i], value); + err = intel_context_lock_pinned(ce); + if (err) + break; + + flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu); + + /* Otherwise OA settings will be set upon first use */ + if (intel_context_is_pinned(ce)) + err = gen8_modify_context(ce, flex, count); + + intel_context_unlock_pinned(ce); + if (err) + break; } + i915_gem_context_unlock_engines(ctx); - CTX_REG(reg_state, - CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - intel_sseu_make_rpcs(i915, &ce->sseu)); + return err; } /* @@ -1720,15 +1856,42 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, * * Note: it's only the RCS/Render context that has any OA state. */ -static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, +static int gen8_configure_all_contexts(struct i915_perf_stream *stream, const struct i915_oa_config *oa_config) { - unsigned int map_type = i915_coherent_map_type(dev_priv); + struct drm_i915_private *i915 = stream->dev_priv; + /* The MMIO offsets for Flex EU registers aren't contiguous */ + const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N)) + struct flex regs[] = { + { + GEN8_R_PWR_CLK_STATE, + CTX_R_PWR_CLK_STATE, + }, + { + GEN8_OACTXCONTROL, + i915->perf.ctx_oactxctrl_offset, + ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME) + }, + { EU_PERF_CNTL0, ctx_flexeuN(0) }, + { EU_PERF_CNTL1, ctx_flexeuN(1) }, + { EU_PERF_CNTL2, ctx_flexeuN(2) }, + { EU_PERF_CNTL3, ctx_flexeuN(3) }, + { EU_PERF_CNTL4, ctx_flexeuN(4) }, + { EU_PERF_CNTL5, ctx_flexeuN(5) }, + { EU_PERF_CNTL6, ctx_flexeuN(6) }, + }; +#undef ctx_flexeuN + struct intel_engine_cs *engine; struct i915_gem_context *ctx; - struct i915_request *rq; - int ret; + int i; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + for (i = 2; i < ARRAY_SIZE(regs); i++) + regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); + + lockdep_assert_held(&i915->drm.struct_mutex); /* * The OA register config is setup through the context image. This image @@ -1740,58 +1903,41 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * this might leave small interval of time where the OA unit is * configured at an invalid sampling period. * - * So far the best way to work around this issue seems to be draining - * the GPU from any submitted work. + * Note that since we emit all requests from a single ring, there + * is still an implicit global barrier here that may cause a high + * priority context to wait for an otherwise independent low priority + * context. Contexts idle at the time of reconfiguration are not + * trapped behind the barrier. */ - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - - /* Update all contexts now that we've stalled the submission. */ - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct i915_gem_engines_iter it; - struct intel_context *ce; - - for_each_gem_engine(ce, - i915_gem_context_lock_engines(ctx), - it) { - u32 *regs; - - if (ce->engine->class != RENDER_CLASS) - continue; - - /* OA settings will be set upon first use */ - if (!ce->state) - continue; - - regs = i915_gem_object_pin_map(ce->state->obj, - map_type); - if (IS_ERR(regs)) { - i915_gem_context_unlock_engines(ctx); - return PTR_ERR(regs); - } - - ce->state->obj->mm.dirty = true; - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); + list_for_each_entry(ctx, &i915->contexts.list, link) { + int err; - gen8_update_reg_state_unlocked(ce, regs, oa_config); + if (ctx == i915->kernel_context) + continue; - i915_gem_object_unpin_map(ce->state->obj); - } - i915_gem_context_unlock_engines(ctx); + err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs)); + if (err) + return err; } /* - * Apply the configuration by doing one context restore of the edited - * context image. + * After updating all other contexts, we need to modify ourselves. + * If we don't modify the kernel_context, we do not get events while + * idle. */ - rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); + for_each_uabi_engine(engine, i915) { + struct intel_context *ce = engine->kernel_context; + int err; - i915_request_add(rq); + if (engine->class != RENDER_CLASS) + continue; + + regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); + + err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs)); + if (err) + return err; + } return 0; } @@ -1836,7 +1982,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = gen8_configure_all_contexts(dev_priv, oa_config); + ret = gen8_configure_all_contexts(stream, oa_config); if (ret) return ret; @@ -1849,19 +1995,23 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) return 0; } -static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) +static void gen8_disable_metric_set(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; + /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(dev_priv, NULL); + gen8_configure_all_contexts(stream, NULL); I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & ~GT_NOA_ENABLE)); } -static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) +static void gen10_disable_metric_set(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; + /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(dev_priv, NULL); + gen8_configure_all_contexts(stream, NULL); /* Make sure we disable noa to save power. */ I915_WRITE(RPM_CONFIG1, @@ -1872,10 +2022,10 @@ static void gen7_oa_enable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; struct i915_gem_context *ctx = stream->ctx; - u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; - bool periodic = dev_priv->perf.oa.periodic; - u32 period_exponent = dev_priv->perf.oa.period_exponent; - u32 report_format = dev_priv->perf.oa.oa_buffer.format; + u32 ctx_id = stream->specific_ctx_id; + bool periodic = stream->periodic; + u32 period_exponent = stream->period_exponent; + u32 report_format = stream->oa_buffer.format; /* * Reset buf pointers so we don't forward reports from before now. @@ -1886,7 +2036,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream) * on the assumption that certain fields are written to zeroed * memory which this helps maintains. */ - gen7_init_oa_buffer(dev_priv); + gen7_init_oa_buffer(stream); I915_WRITE(GEN7_OACONTROL, (ctx_id & GEN7_OACONTROL_CTX_MASK) | @@ -1901,7 +2051,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream) static void gen8_oa_enable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - u32 report_format = dev_priv->perf.oa.oa_buffer.format; + u32 report_format = stream->oa_buffer.format; /* * Reset buf pointers so we don't forward reports from before now. @@ -1912,7 +2062,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) * on the assumption that certain fields are written to zeroed * memory which this helps maintains. */ - gen8_init_oa_buffer(dev_priv); + gen8_init_oa_buffer(stream); /* * Note: we don't rely on the hardware to perform single context @@ -1937,10 +2087,10 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - dev_priv->perf.oa.ops.oa_enable(stream); + dev_priv->perf.ops.oa_enable(stream); - if (dev_priv->perf.oa.periodic) - hrtimer_start(&dev_priv->perf.oa.poll_check_timer, + if (stream->periodic) + hrtimer_start(&stream->poll_check_timer, ns_to_ktime(POLL_PERIOD), HRTIMER_MODE_REL_PINNED); } @@ -1979,10 +2129,10 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - dev_priv->perf.oa.ops.oa_disable(stream); + dev_priv->perf.ops.oa_disable(stream); - if (dev_priv->perf.oa.periodic) - hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); + if (stream->periodic) + hrtimer_cancel(&stream->poll_check_timer); } static const struct i915_perf_stream_ops i915_oa_stream_ops = { @@ -2034,7 +2184,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } - if (!dev_priv->perf.oa.ops.enable_metric_set) { + if (!dev_priv->perf.ops.enable_metric_set) { DRM_DEBUG("OA unit not supported\n"); return -ENODEV; } @@ -2043,7 +2193,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * counter reports and marshal to the appropriate client * we currently only allow exclusive access */ - if (dev_priv->perf.oa.exclusive_stream) { + if (dev_priv->perf.exclusive_stream) { DRM_DEBUG("OA unit already in use\n"); return -EBUSY; } @@ -2053,43 +2203,23 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } - /* We set up some ratelimit state to potentially throttle any _NOTES - * about spurious, invalid OA reports which we don't forward to - * userspace. - * - * The initialization is associated with opening the stream (not driver - * init) considering we print a _NOTE about any throttling when closing - * the stream instead of waiting until driver _fini which no one would - * ever see. - * - * Using the same limiting factors as printk_ratelimit() - */ - ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs, - 5 * HZ, 10); - /* Since we use a DRM_NOTE for spurious reports it would be - * inconsistent to let __ratelimit() automatically print a warning for - * throttling. - */ - ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs, - RATELIMIT_MSG_ON_RELEASE); - stream->sample_size = sizeof(struct drm_i915_perf_record_header); - format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size; + format_size = dev_priv->perf.oa_formats[props->oa_format].size; stream->sample_flags |= SAMPLE_OA_REPORT; stream->sample_size += format_size; - dev_priv->perf.oa.oa_buffer.format_size = format_size; - if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0)) + stream->oa_buffer.format_size = format_size; + if (WARN_ON(stream->oa_buffer.format_size == 0)) return -EINVAL; - dev_priv->perf.oa.oa_buffer.format = - dev_priv->perf.oa.oa_formats[props->oa_format].format; + stream->oa_buffer.format = + dev_priv->perf.oa_formats[props->oa_format].format; - dev_priv->perf.oa.periodic = props->oa_periodic; - if (dev_priv->perf.oa.periodic) - dev_priv->perf.oa.period_exponent = props->oa_period_exponent; + stream->periodic = props->oa_periodic; + if (stream->periodic) + stream->period_exponent = props->oa_period_exponent; if (stream->ctx) { ret = oa_get_render_ctx_id(stream); @@ -2120,7 +2250,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - ret = alloc_oa_buffer(dev_priv); + ret = alloc_oa_buffer(stream); if (ret) goto err_oa_buf_alloc; @@ -2129,9 +2259,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, goto err_lock; stream->ops = &i915_oa_stream_ops; - dev_priv->perf.oa.exclusive_stream = stream; + dev_priv->perf.exclusive_stream = stream; - ret = dev_priv->perf.oa.ops.enable_metric_set(stream); + ret = dev_priv->perf.ops.enable_metric_set(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; @@ -2139,15 +2269,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, mutex_unlock(&dev_priv->drm.struct_mutex); + hrtimer_init(&stream->poll_check_timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stream->poll_check_timer.function = oa_poll_check_timer_cb; + init_waitqueue_head(&stream->poll_wq); + spin_lock_init(&stream->oa_buffer.ptr_lock); + return 0; err_enable: - dev_priv->perf.oa.exclusive_stream = NULL; - dev_priv->perf.oa.ops.disable_metric_set(dev_priv); + dev_priv->perf.exclusive_stream = NULL; + dev_priv->perf.ops.disable_metric_set(stream); mutex_unlock(&dev_priv->drm.struct_mutex); err_lock: - free_oa_buffer(dev_priv); + free_oa_buffer(stream); err_oa_buf_alloc: put_oa_config(dev_priv, stream->oa_config); @@ -2171,9 +2307,9 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) return; - stream = engine->i915->perf.oa.exclusive_stream; + stream = engine->i915->perf.exclusive_stream; if (stream) - gen8_update_reg_state_unlocked(ce, regs, stream->oa_config); + gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config); } /** @@ -2289,7 +2425,7 @@ static ssize_t i915_perf_read(struct file *file, /* Maybe make ->pollin per-stream state if we support multiple * concurrent streams in the future. */ - dev_priv->perf.oa.pollin = false; + stream->pollin = false; } return ret; @@ -2297,13 +2433,12 @@ static ssize_t i915_perf_read(struct file *file, static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) { - struct drm_i915_private *dev_priv = - container_of(hrtimer, typeof(*dev_priv), - perf.oa.poll_check_timer); + struct i915_perf_stream *stream = + container_of(hrtimer, typeof(*stream), poll_check_timer); - if (oa_buffer_check_unlocked(dev_priv)) { - dev_priv->perf.oa.pollin = true; - wake_up(&dev_priv->perf.oa.poll_wq); + if (oa_buffer_check_unlocked(stream)) { + stream->pollin = true; + wake_up(&stream->poll_wq); } hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); @@ -2342,7 +2477,7 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, * the hrtimer/oa_poll_check_timer_cb to notify us when there are * samples to read. */ - if (dev_priv->perf.oa.pollin) + if (stream->pollin) events |= EPOLLIN; return events; @@ -2768,7 +2903,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, value); return -EINVAL; } - if (!dev_priv->perf.oa.oa_formats[value].size) { + if (!dev_priv->perf.oa_formats[value].size) { DRM_DEBUG("Unsupported OA report format %llu\n", value); return -EINVAL; @@ -2912,7 +3047,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv) if (!dev_priv->perf.metrics_kobj) goto exit; - sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr); + sysfs_attr_init(&dev_priv->perf.test_config.sysfs_metric_id.attr); if (INTEL_GEN(dev_priv) >= 11) { i915_perf_load_test_config_icl(dev_priv); @@ -2947,15 +3082,15 @@ void i915_perf_register(struct drm_i915_private *dev_priv) i915_perf_load_test_config_hsw(dev_priv); } - if (dev_priv->perf.oa.test_config.id == 0) + if (dev_priv->perf.test_config.id == 0) goto sysfs_error; ret = sysfs_create_group(dev_priv->perf.metrics_kobj, - &dev_priv->perf.oa.test_config.sysfs_metric); + &dev_priv->perf.test_config.sysfs_metric); if (ret) goto sysfs_error; - atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); + atomic_set(&dev_priv->perf.test_config.ref_count, 1); goto exit; @@ -2982,7 +3117,7 @@ void i915_perf_unregister(struct drm_i915_private *dev_priv) return; sysfs_remove_group(dev_priv->perf.metrics_kobj, - &dev_priv->perf.oa.test_config.sysfs_metric); + &dev_priv->perf.test_config.sysfs_metric); kobject_put(dev_priv->perf.metrics_kobj); dev_priv->perf.metrics_kobj = NULL; @@ -3227,7 +3362,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, oa_config->mux_regs_len = args->n_mux_regs; oa_config->mux_regs = alloc_oa_regs(dev_priv, - dev_priv->perf.oa.ops.is_valid_mux_reg, + dev_priv->perf.ops.is_valid_mux_reg, u64_to_user_ptr(args->mux_regs_ptr), args->n_mux_regs); @@ -3240,7 +3375,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, oa_config->b_counter_regs_len = args->n_boolean_regs; oa_config->b_counter_regs = alloc_oa_regs(dev_priv, - dev_priv->perf.oa.ops.is_valid_b_counter_reg, + dev_priv->perf.ops.is_valid_b_counter_reg, u64_to_user_ptr(args->boolean_regs_ptr), args->n_boolean_regs); @@ -3259,7 +3394,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, oa_config->flex_regs_len = args->n_flex_regs; oa_config->flex_regs = alloc_oa_regs(dev_priv, - dev_priv->perf.oa.ops.is_valid_flex_reg, + dev_priv->perf.ops.is_valid_flex_reg, u64_to_user_ptr(args->flex_regs_ptr), args->n_flex_regs); @@ -3426,20 +3561,20 @@ static struct ctl_table dev_root[] = { void i915_perf_init(struct drm_i915_private *dev_priv) { if (IS_HASWELL(dev_priv)) { - dev_priv->perf.oa.ops.is_valid_b_counter_reg = + dev_priv->perf.ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.oa.ops.is_valid_mux_reg = + dev_priv->perf.ops.is_valid_mux_reg = hsw_is_valid_mux_addr; - dev_priv->perf.oa.ops.is_valid_flex_reg = NULL; - dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; - dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; - dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable; - dev_priv->perf.oa.ops.read = gen7_oa_read; - dev_priv->perf.oa.ops.oa_hw_tail_read = + dev_priv->perf.ops.is_valid_flex_reg = NULL; + dev_priv->perf.ops.enable_metric_set = hsw_enable_metric_set; + dev_priv->perf.ops.disable_metric_set = hsw_disable_metric_set; + dev_priv->perf.ops.oa_enable = gen7_oa_enable; + dev_priv->perf.ops.oa_disable = gen7_oa_disable; + dev_priv->perf.ops.read = gen7_oa_read; + dev_priv->perf.ops.oa_hw_tail_read = gen7_oa_hw_tail_read; - dev_priv->perf.oa.oa_formats = hsw_oa_formats; + dev_priv->perf.oa_formats = hsw_oa_formats; } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { /* Note: that although we could theoretically also support the * legacy ringbuffer mode on BDW (and earlier iterations of @@ -3447,71 +3582,65 @@ void i915_perf_init(struct drm_i915_private *dev_priv) * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; + dev_priv->perf.oa_formats = gen8_plus_oa_formats; - dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; - dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; - dev_priv->perf.oa.ops.read = gen8_oa_read; - dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; + dev_priv->perf.ops.oa_enable = gen8_oa_enable; + dev_priv->perf.ops.oa_disable = gen8_oa_disable; + dev_priv->perf.ops.read = gen8_oa_read; + dev_priv->perf.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN_RANGE(dev_priv, 8, 9)) { - dev_priv->perf.oa.ops.is_valid_b_counter_reg = + dev_priv->perf.ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.oa.ops.is_valid_mux_reg = + dev_priv->perf.ops.is_valid_mux_reg = gen8_is_valid_mux_addr; - dev_priv->perf.oa.ops.is_valid_flex_reg = + dev_priv->perf.ops.is_valid_flex_reg = gen8_is_valid_flex_addr; if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->perf.oa.ops.is_valid_mux_reg = + dev_priv->perf.ops.is_valid_mux_reg = chv_is_valid_mux_addr; } - dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; + dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; + dev_priv->perf.ops.disable_metric_set = gen8_disable_metric_set; if (IS_GEN(dev_priv, 8)) { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; + dev_priv->perf.ctx_oactxctrl_offset = 0x120; + dev_priv->perf.ctx_flexeu0_offset = 0x2ce; - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); + dev_priv->perf.gen8_valid_ctx_bit = BIT(25); } else { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + dev_priv->perf.ctx_oactxctrl_offset = 0x128; + dev_priv->perf.ctx_flexeu0_offset = 0x3de; - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); + dev_priv->perf.gen8_valid_ctx_bit = BIT(16); } } else if (IS_GEN_RANGE(dev_priv, 10, 11)) { - dev_priv->perf.oa.ops.is_valid_b_counter_reg = + dev_priv->perf.ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.oa.ops.is_valid_mux_reg = + dev_priv->perf.ops.is_valid_mux_reg = gen10_is_valid_mux_addr; - dev_priv->perf.oa.ops.is_valid_flex_reg = + dev_priv->perf.ops.is_valid_flex_reg = gen8_is_valid_flex_addr; - dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set; + dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; + dev_priv->perf.ops.disable_metric_set = gen10_disable_metric_set; if (IS_GEN(dev_priv, 10)) { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + dev_priv->perf.ctx_oactxctrl_offset = 0x128; + dev_priv->perf.ctx_flexeu0_offset = 0x3de; } else { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x124; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x78e; + dev_priv->perf.ctx_oactxctrl_offset = 0x124; + dev_priv->perf.ctx_flexeu0_offset = 0x78e; } - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); + dev_priv->perf.gen8_valid_ctx_bit = BIT(16); } } - if (dev_priv->perf.oa.ops.enable_metric_set) { - hrtimer_init(&dev_priv->perf.oa.poll_check_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); - dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; - init_waitqueue_head(&dev_priv->perf.oa.poll_wq); - + if (dev_priv->perf.ops.enable_metric_set) { INIT_LIST_HEAD(&dev_priv->perf.streams); mutex_init(&dev_priv->perf.lock); - spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); oa_sample_rate_hard_limit = 1000 * (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); @@ -3520,6 +3649,25 @@ void i915_perf_init(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->perf.metrics_lock); idr_init(&dev_priv->perf.metrics_idr); + /* We set up some ratelimit state to potentially throttle any + * _NOTES about spurious, invalid OA reports which we don't + * forward to userspace. + * + * We print a _NOTE about any throttling when closing the + * stream instead of waiting until driver _fini which no one + * would ever see. + * + * Using the same limiting factors as printk_ratelimit() + */ + ratelimit_state_init(&dev_priv->perf.spurious_report_rs, + 5 * HZ, 10); + /* Since we use a DRM_NOTE for spurious reports it would be + * inconsistent to let __ratelimit() automatically print a + * warning for throttling. + */ + ratelimit_set_flags(&dev_priv->perf.spurious_report_rs, + RATELIMIT_MSG_ON_RELEASE); + dev_priv->perf.initialized = true; } } @@ -3548,7 +3696,7 @@ void i915_perf_fini(struct drm_i915_private *dev_priv) unregister_sysctl_table(dev_priv->perf.sysctl_header); - memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); + memset(&dev_priv->perf.ops, 0, sizeof(dev_priv->perf.ops)); dev_priv->perf.initialized = false; } |