aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c1125
1 files changed, 800 insertions, 325 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d925a1035c9d..31455eceeb0c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -133,12 +133,11 @@
*/
#include <linux/interrupt.h>
-#include "gem/i915_gem_context.h"
-
#include "i915_drv.h"
#include "i915_perf.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
+#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
@@ -238,7 +237,8 @@ static void execlists_init_reg_state(u32 *reg_state,
bool close);
static void
__execlists_update_reg_state(const struct intel_context *ce,
- const struct intel_engine_cs *engine);
+ const struct intel_engine_cs *engine,
+ u32 head);
static void mark_eio(struct i915_request *rq)
{
@@ -489,17 +489,23 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
return desc;
}
-static u32 *set_offsets(u32 *regs,
+static inline unsigned int dword_in_page(void *addr)
+{
+ return offset_in_page(addr) / sizeof(u32);
+}
+
+static void set_offsets(u32 *regs,
const u8 *data,
- const struct intel_engine_cs *engine)
+ const struct intel_engine_cs *engine,
+ bool clear)
#define NOP(x) (BIT(7) | (x))
-#define LRI(count, flags) ((flags) << 6 | (count))
+#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
#define POSTED BIT(0)
#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
#define REG16(x) \
(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
(((x) >> 2) & 0x7f)
-#define END() 0
+#define END(x) 0, (x)
{
const u32 base = engine->mmio_base;
@@ -507,7 +513,10 @@ static u32 *set_offsets(u32 *regs,
u8 count, flags;
if (*data & BIT(7)) { /* skip */
- regs += *data++ & ~BIT(7);
+ count = *data++ & ~BIT(7);
+ if (clear)
+ memset32(regs, MI_NOOP, count);
+ regs += count;
continue;
}
@@ -533,12 +542,25 @@ static u32 *set_offsets(u32 *regs,
offset |= v & ~BIT(7);
} while (v & BIT(7));
- *regs = base + (offset << 2);
+ regs[0] = base + (offset << 2);
+ if (clear)
+ regs[1] = 0;
regs += 2;
} while (--count);
}
- return regs;
+ if (clear) {
+ u8 count = *++data;
+
+ /* Clear past the tail for HW access */
+ GEM_BUG_ON(dword_in_page(regs) > count);
+ memset32(regs, MI_NOOP, count - dword_in_page(regs));
+
+ /* Close the batch; used mainly by live_lrc_layout() */
+ *regs = MI_BATCH_BUFFER_END;
+ if (INTEL_GEN(engine->i915) >= 10)
+ *regs |= BIT(0);
+ }
}
static const u8 gen8_xcs_offsets[] = {
@@ -573,7 +595,7 @@ static const u8 gen8_xcs_offsets[] = {
REG16(0x200),
REG(0x028),
- END(),
+ END(80)
};
static const u8 gen9_xcs_offsets[] = {
@@ -657,7 +679,7 @@ static const u8 gen9_xcs_offsets[] = {
REG16(0x67c),
REG(0x068),
- END(),
+ END(176)
};
static const u8 gen12_xcs_offsets[] = {
@@ -689,7 +711,7 @@ static const u8 gen12_xcs_offsets[] = {
REG16(0x274),
REG16(0x270),
- END(),
+ END(80)
};
static const u8 gen8_rcs_offsets[] = {
@@ -726,7 +748,91 @@ static const u8 gen8_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END(),
+ END(80)
+};
+
+static const u8 gen9_rcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x34),
+ REG(0x30),
+ REG(0x38),
+ REG(0x3c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, 0),
+ REG(0xc8),
+
+ NOP(13),
+ LRI(44, POSTED),
+ REG(0x28),
+ REG(0x9c),
+ REG(0xc0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x68),
+
+ END(176)
};
static const u8 gen11_rcs_offsets[] = {
@@ -767,7 +873,7 @@ static const u8 gen11_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END(),
+ END(80)
};
static const u8 gen12_rcs_offsets[] = {
@@ -808,7 +914,7 @@ static const u8 gen12_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END(),
+ END(80)
};
#undef END
@@ -833,6 +939,8 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
return gen12_rcs_offsets;
else if (INTEL_GEN(engine->i915) >= 11)
return gen11_rcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return gen9_rcs_offsets;
else
return gen8_rcs_offsets;
} else {
@@ -878,9 +986,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
list_move(&rq->sched.link, pl);
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+
active = rq;
} else {
- struct intel_engine_cs *owner = rq->hw_context->engine;
+ struct intel_engine_cs *owner = rq->context->engine;
/*
* Decouple the virtual breadcrumb before moving it
@@ -983,6 +1093,58 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x60;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x54;
+ else if (engine->class == RENDER_CLASS)
+ return 0x58;
+ else
+ return -1;
+}
+
+static void
+execlists_check_context(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ const struct intel_ring *ring = ce->ring;
+ u32 *regs = ce->lrc_reg_state;
+ bool valid = true;
+ int x;
+
+ if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
+ pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_START],
+ i915_ggtt_offset(ring->vma));
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+ valid = false;
+ }
+
+ if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
+ (RING_CTL_SIZE(ring->size) | RING_VALID)) {
+ pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_CTL],
+ (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ valid = false;
+ }
+
+ x = lrc_ring_mi_mode(engine);
+ if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
+ pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
+ engine->name, regs[x + 1]);
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
+ valid = false;
+ }
+
+ WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
+}
+
static void restore_default_state(struct intel_context *ce,
struct intel_engine_cs *engine)
{
@@ -999,7 +1161,7 @@ static void restore_default_state(struct intel_context *ce,
static void reset_active(struct i915_request *rq,
struct intel_engine_cs *engine)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
u32 head;
/*
@@ -1017,20 +1179,19 @@ static void reset_active(struct i915_request *rq,
* remain correctly ordered. And we defer to __i915_request_submit()
* so that all asynchronous waits are correctly handled.
*/
- GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
- __func__, engine->name, rq->fence.context, rq->fence.seqno);
+ ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
+ rq->fence.context, rq->fence.seqno);
/* On resubmission of the active request, payload will be scrubbed */
if (i915_request_completed(rq))
head = rq->tail;
else
head = active_request(ce->timeline, rq)->head;
- ce->ring->head = intel_ring_wrap(ce->ring, head);
- intel_ring_update_space(ce->ring);
+ head = intel_ring_wrap(ce->ring, head);
/* Scrub the context image to prevent replaying the previous batch */
restore_default_state(ce, engine);
- __execlists_update_reg_state(ce, engine);
+ __execlists_update_reg_state(ce, engine, head);
/* We've switched away, so this should be a no-op, but intent matters */
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
@@ -1040,13 +1201,16 @@ static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
struct intel_engine_cs * const engine = rq->engine;
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
intel_context_get(ce);
- if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+ if (unlikely(intel_context_is_banned(ce)))
reset_active(rq, engine);
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ execlists_check_context(ce, engine);
+
if (ce->tag) {
/* Use a fixed tag for OA and friends */
ce->lrc_desc |= (u64)ce->tag << 32;
@@ -1054,12 +1218,12 @@ __execlists_schedule_in(struct i915_request *rq)
/* We don't need a strict matching tag, just different values */
ce->lrc_desc &= ~GENMASK_ULL(47, 37);
ce->lrc_desc |=
- (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
+ (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
GEN11_SW_CTX_ID_SHIFT;
BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
}
- intel_gt_pm_get(engine->gt);
+ __intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -1069,7 +1233,7 @@ __execlists_schedule_in(struct i915_request *rq)
static inline struct i915_request *
execlists_schedule_in(struct i915_request *rq, int idx)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
struct intel_engine_cs *old;
GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
@@ -1100,7 +1264,7 @@ static inline void
__execlists_schedule_out(struct i915_request *rq,
struct intel_engine_cs * const engine)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
/*
* NB process_csb() is not under the engine->active.lock and hence
@@ -1138,7 +1302,7 @@ __execlists_schedule_out(struct i915_request *rq,
static inline void
execlists_schedule_out(struct i915_request *rq)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
struct intel_engine_cs *cur, *old;
trace_i915_request_out(rq);
@@ -1155,9 +1319,9 @@ execlists_schedule_out(struct i915_request *rq)
static u64 execlists_update_context(struct i915_request *rq)
{
- struct intel_context *ce = rq->hw_context;
+ struct intel_context *ce = rq->context;
u64 desc = ce->lrc_desc;
- u32 tail;
+ u32 tail, prev;
/*
* WaIdleLiteRestore:bdw,skl
@@ -1170,9 +1334,15 @@ static u64 execlists_update_context(struct i915_request *rq)
* subsequent resubmissions (for lite restore). Should that fail us,
* and we try and submit the same tail again, force the context
* reload.
+ *
+ * If we need to return to a preempted context, we need to skip the
+ * lite-restore and force it to reload the RING_TAIL. Otherwise, the
+ * HW has a tendency to ignore us rewinding the TAIL to the end of
+ * an earlier request.
*/
tail = intel_ring_set_tail(rq->ring, rq->tail);
- if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+ prev = ce->lrc_reg_state[CTX_RING_TAIL];
+ if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
desc |= CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state[CTX_RING_TAIL] = tail;
rq->tail = rq->wa_tail;
@@ -1186,17 +1356,8 @@ static u64 execlists_update_context(struct i915_request *rq)
* may not be visible to the HW prior to the completion of the UC
* register write and that we may begin execution from the context
* before its image is complete leading to invalid PD chasing.
- *
- * Furthermore, Braswell, at least, wants a full mb to be sure that
- * the writes are coherent in memory (visible to the GPU) prior to
- * execution, and not just visible to other CPUs (as is the result of
- * wmb).
*/
- mb();
-
- /* Wa_1607138340:tgl */
- if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
- desc |= CTX_DESC_FORCE_RESTORE;
+ wmb();
ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
return desc;
@@ -1224,15 +1385,14 @@ trace_ports(const struct intel_engine_execlists *execlists,
if (!ports[0])
return;
- GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
- engine->name, msg,
- ports[0]->fence.context,
- ports[0]->fence.seqno,
- i915_request_completed(ports[0]) ? "!" :
- i915_request_started(ports[0]) ? "*" :
- "",
- ports[1] ? ports[1]->fence.context : 0,
- ports[1] ? ports[1]->fence.seqno : 0);
+ ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
+ ports[0]->fence.context,
+ ports[0]->fence.seqno,
+ i915_request_completed(ports[0]) ? "!" :
+ i915_request_started(ports[0]) ? "*" :
+ "",
+ ports[1] ? ports[1]->fence.context : 0,
+ ports[1] ? ports[1]->fence.seqno : 0);
}
static __maybe_unused bool
@@ -1256,33 +1416,56 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
for (port = execlists->pending; (rq = *port); port++) {
- if (ce == rq->hw_context) {
- GEM_TRACE_ERR("Duplicate context in pending[%zd]\n",
+ unsigned long flags;
+ bool ok = true;
+
+ GEM_BUG_ON(!kref_read(&rq->fence.refcount));
+ GEM_BUG_ON(!i915_request_is_active(rq));
+
+ if (ce == rq->context) {
+ GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
return false;
}
+ ce = rq->context;
- ce = rq->hw_context;
- if (i915_request_completed(rq))
+ /* Hold tightly onto the lock to prevent concurrent retires! */
+ if (!spin_trylock_irqsave(&rq->lock, flags))
continue;
- if (i915_active_is_idle(&ce->active)) {
- GEM_TRACE_ERR("Inactive context in pending[%zd]\n",
+ if (i915_request_completed(rq))
+ goto unlock;
+
+ if (i915_active_is_idle(&ce->active) &&
+ !intel_context_is_barrier(ce)) {
+ GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
if (!i915_vma_is_pinned(ce->state)) {
- GEM_TRACE_ERR("Unpinned context in pending[%zd]\n",
+ GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
if (!i915_vma_is_pinned(ce->ring->vma)) {
- GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n",
+ GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
+
+unlock:
+ spin_unlock_irqrestore(&rq->lock, flags);
+ if (!ok)
+ return false;
}
return ce;
@@ -1327,7 +1510,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
static bool ctx_single_port_submission(const struct intel_context *ce)
{
return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
- i915_gem_context_force_single_submission(ce->gem_context));
+ intel_context_force_single_submission(ce));
}
static bool can_merge_ctx(const struct intel_context *prev,
@@ -1359,11 +1542,12 @@ static bool can_merge_rq(const struct i915_request *prev,
if (i915_request_completed(next))
return true;
- if (unlikely((prev->flags ^ next->flags) &
- (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
+ if (unlikely((prev->fence.flags ^ next->fence.flags) &
+ (BIT(I915_FENCE_FLAG_NOPREEMPT) |
+ BIT(I915_FENCE_FLAG_SENTINEL))))
return false;
- if (!can_merge_ctx(prev->hw_context, next->hw_context))
+ if (!can_merge_ctx(prev->context, next->context))
return false;
return true;
@@ -1372,7 +1556,7 @@ static bool can_merge_rq(const struct i915_request *prev,
static void virtual_update_register_offsets(u32 *regs,
struct intel_engine_cs *engine)
{
- set_offsets(regs, reg_offsets(engine), engine);
+ set_offsets(regs, reg_offsets(engine), engine, false);
}
static bool virtual_matches(const struct virtual_engine *ve,
@@ -1411,21 +1595,15 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
if (!list_empty(&ve->context.signal_link)) {
list_move_tail(&ve->context.signal_link,
&engine->breadcrumbs.signalers);
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
spin_unlock(&old->breadcrumbs.irq_lock);
}
-static struct i915_request *
-last_active(const struct intel_engine_execlists *execlists)
-{
- struct i915_request * const *last = READ_ONCE(execlists->active);
-
- while (*last && i915_request_completed(*last))
- last++;
-
- return *last;
-}
+#define for_each_waiter(p__, rq__) \
+ list_for_each_entry_lockless(p__, \
+ &(rq__)->sched.waiters_list, \
+ wait_link)
static void defer_request(struct i915_request *rq, struct list_head * const pl)
{
@@ -1444,7 +1622,7 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
GEM_BUG_ON(i915_request_is_active(rq));
list_move_tail(&rq->sched.link, pl);
- list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ for_each_waiter(p, rq) {
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
@@ -1457,8 +1635,8 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
!i915_request_completed(rq));
GEM_BUG_ON(i915_request_is_active(w));
- if (list_empty(&w->sched.link))
- continue; /* Not yet submitted; unready */
+ if (!i915_request_is_ready(w))
+ continue;
if (rq_prio(w) < rq_prio(rq))
continue;
@@ -1490,11 +1668,9 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
if (!intel_engine_has_timeslices(engine))
return false;
- if (list_is_last(&rq->sched.link, &engine->active.requests))
- return false;
-
- hint = max(rq_prio(list_next_entry(rq, sched.link)),
- engine->execlists.queue_priority_hint);
+ hint = engine->execlists.queue_priority_hint;
+ if (!list_is_last(&rq->sched.link, &engine->active.requests))
+ hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
return hint >= effective_prio(rq);
}
@@ -1519,7 +1695,7 @@ active_timeslice(const struct intel_engine_cs *engine)
{
const struct i915_request *rq = *engine->execlists.active;
- if (i915_request_completed(rq))
+ if (!rq || i915_request_completed(rq))
return 0;
if (engine->execlists.switch_priority_hint < effective_prio(rq))
@@ -1536,33 +1712,49 @@ static void set_timeslice(struct intel_engine_cs *engine)
set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
}
+static void start_timeslice(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ execlists->switch_priority_hint = execlists->queue_priority_hint;
+
+ if (timer_pending(&execlists->timer))
+ return;
+
+ set_timer_ms(&execlists->timer, timeslice(engine));
+}
+
static void record_preemption(struct intel_engine_execlists *execlists)
{
(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
}
-static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
+static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
- struct i915_request *rq;
-
- rq = last_active(&engine->execlists);
if (!rq)
return 0;
/* Force a fast reset for terminated contexts (ignoring sysfs!) */
- if (unlikely(i915_gem_context_is_banned(rq->gem_context)))
+ if (unlikely(intel_context_is_banned(rq->context)))
return 1;
return READ_ONCE(engine->props.preempt_timeout_ms);
}
-static void set_preempt_timeout(struct intel_engine_cs *engine)
+static void set_preempt_timeout(struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
if (!intel_engine_has_preempt_reset(engine))
return;
set_timer_ms(&engine->execlists.preempt,
- active_preempt_timeout(engine));
+ active_preempt_timeout(engine, rq));
+}
+
+static inline void clear_ports(struct i915_request **ports, int count)
+{
+ memset_p((void **)ports, NULL, count);
}
static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -1570,6 +1762,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request **port = execlists->pending;
struct i915_request ** const last_port = port + execlists->port_mask;
+ struct i915_request * const *active;
struct i915_request *last;
struct rb_node *rb;
bool submit = false;
@@ -1624,15 +1817,18 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* i.e. we will retrigger preemption following the ack in case
* of trouble.
*/
- last = last_active(execlists);
+ active = READ_ONCE(execlists->active);
+ while ((last = *active) && i915_request_completed(last))
+ active++;
+
if (last) {
if (need_preempt(engine, last, rb)) {
- GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
- engine->name,
- last->fence.context,
- last->fence.seqno,
- last->sched.attr.priority,
- execlists->queue_priority_hint);
+ ENGINE_TRACE(engine,
+ "preempting last=%llx:%lld, prio=%d, hint=%d\n",
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
record_preemption(execlists);
/*
@@ -1651,23 +1847,15 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
__unwind_incomplete_requests(engine);
- /*
- * If we need to return to the preempted context, we
- * need to skip the lite-restore and force it to
- * reload the RING_TAIL. Otherwise, the HW has a
- * tendency to ignore us rewinding the TAIL to the
- * end of an earlier request.
- */
- last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
last = NULL;
} else if (need_timeslice(engine, last) &&
timer_expired(&engine->execlists.timer)) {
- GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
- engine->name,
- last->fence.context,
- last->fence.seqno,
- last->sched.attr.priority,
- execlists->queue_priority_hint);
+ ENGINE_TRACE(engine,
+ "expired last=%llx:%lld, prio=%d, hint=%d\n",
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
ring_set_paused(engine, 1);
defer_active(engine);
@@ -1702,11 +1890,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- if (!execlists->timer.expires &&
- need_timeslice(engine, last))
- set_timer_ms(&execlists->timer,
- timeslice(engine));
-
+ start_timeslice(engine);
return;
}
}
@@ -1730,7 +1914,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(rq != ve->request);
GEM_BUG_ON(rq->engine != &ve->base);
- GEM_BUG_ON(rq->hw_context != &ve->context);
+ GEM_BUG_ON(rq->context != &ve->context);
if (rq_prio(rq) >= queue_prio(execlists)) {
if (!virtual_matches(ve, rq, engine)) {
@@ -1741,17 +1925,18 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
- return; /* leave this for another */
+ start_timeslice(engine);
+ return; /* leave this for another sibling */
}
- GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
- engine->name,
- rq->fence.context,
- rq->fence.seqno,
- i915_request_completed(rq) ? "!" :
- i915_request_started(rq) ? "*" :
- "",
- yesno(engine != ve->siblings[0]));
+ ENGINE_TRACE(engine,
+ "virtual rq=%llx:%lld%s, new engine? %s\n",
+ rq->fence.context,
+ rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ yesno(engine != ve->siblings[0]));
ve->request = NULL;
ve->base.execlists.queue_priority_hint = INT_MIN;
@@ -1849,7 +2034,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* same LRCA, i.e. we must submit 2 different
* contexts if we submit 2 ELSP.
*/
- if (last->hw_context == rq->hw_context)
+ if (last->context == rq->context)
goto done;
if (i915_request_has_sentinel(last))
@@ -1862,8 +2047,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* the same context (even though a different
* request) to the second port.
*/
- if (ctx_single_port_submission(last->hw_context) ||
- ctx_single_port_submission(rq->hw_context))
+ if (ctx_single_port_submission(last->context) ||
+ ctx_single_port_submission(rq->context))
goto done;
merge = false;
@@ -1877,8 +2062,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
GEM_BUG_ON(last &&
- !can_merge_ctx(last->hw_context,
- rq->hw_context));
+ !can_merge_ctx(last->context,
+ rq->context));
submit = true;
last = rq;
@@ -1907,9 +2092,6 @@ done:
* interrupt for secondary ports).
*/
execlists->queue_priority_hint = queue_prio(execlists);
- GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
- engine->name, execlists->queue_priority_hint,
- yesno(submit));
if (submit) {
*port = execlists_schedule_in(last, port - execlists->pending);
@@ -1920,7 +2102,7 @@ done:
* Skip if we ended up with exactly the same set of requests,
* e.g. trying to timeslice a pair of ordered contexts
*/
- if (!memcmp(execlists->active, execlists->pending,
+ if (!memcmp(active, execlists->pending,
(port - execlists->pending + 1) * sizeof(*port))) {
do
execlists_schedule_out(fetch_and_zero(port));
@@ -1928,11 +2110,10 @@ done:
goto skip_submit;
}
+ clear_ports(port + 1, last_port - port);
- memset(port + 1, 0, (last_port - port) * sizeof(*port));
execlists_submit_ports(engine);
-
- set_preempt_timeout(engine);
+ set_preempt_timeout(engine, *active);
} else {
skip_submit:
ring_set_paused(engine, 0);
@@ -1946,13 +2127,14 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
for (port = execlists->pending; *port; port++)
execlists_schedule_out(*port);
- memset(execlists->pending, 0, sizeof(execlists->pending));
+ clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
/* Mark the end of active before we overwrite *active */
for (port = xchg(&execlists->active, execlists->pending); *port; port++)
execlists_schedule_out(*port);
- WRITE_ONCE(execlists->active,
- memset(execlists->inflight, 0, sizeof(execlists->inflight)));
+ clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
+
+ WRITE_ONCE(execlists->active, execlists->inflight);
}
static inline void
@@ -2058,7 +2240,7 @@ static void process_csb(struct intel_engine_cs *engine)
*/
head = execlists->csb_head;
tail = READ_ONCE(*execlists->csb_write);
- GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+ ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
if (unlikely(head == tail))
return;
@@ -2096,9 +2278,8 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier.
*/
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
- engine->name, head,
- buf[2 * head + 0], buf[2 * head + 1]);
+ ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
+ head, buf[2 * head + 0], buf[2 * head + 1]);
if (INTEL_GEN(engine->i915) >= 12)
promote = gen12_csb_parse(execlists, buf + 2 * head);
@@ -2109,7 +2290,6 @@ static void process_csb(struct intel_engine_cs *engine)
/* Point active to the new ELSP; prevent overwriting */
WRITE_ONCE(execlists->active, execlists->pending);
- set_timeslice(engine);
if (!inject_preempt_hang(execlists))
ring_set_paused(engine, 0);
@@ -2150,6 +2330,7 @@ static void process_csb(struct intel_engine_cs *engine)
} while (head != tail);
execlists->csb_head = head;
+ set_timeslice(engine);
/*
* Gen11 has proven to fail wrt global observation point between
@@ -2175,6 +2356,310 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
}
}
+static void __execlists_hold(struct i915_request *rq)
+{
+ LIST_HEAD(list);
+
+ do {
+ struct i915_dependency *p;
+
+ if (i915_request_is_active(rq))
+ __i915_request_unsubmit(rq);
+
+ RQ_TRACE(rq, "on hold\n");
+ clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ list_move_tail(&rq->sched.link, &rq->engine->active.hold);
+ i915_request_set_hold(rq);
+
+ list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ struct i915_request *w =
+ container_of(p->waiter, typeof(*w), sched);
+
+ /* Leave semaphores spinning on the other engines */
+ if (w->engine != rq->engine)
+ continue;
+
+ if (!i915_request_is_ready(w))
+ continue;
+
+ if (i915_request_completed(w))
+ continue;
+
+ if (i915_request_on_hold(rq))
+ continue;
+
+ list_move_tail(&w->sched.link, &list);
+ }
+
+ rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+ } while (rq);
+}
+
+static bool execlists_hold(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ spin_lock_irq(&engine->active.lock);
+
+ if (i915_request_completed(rq)) { /* too late! */
+ rq = NULL;
+ goto unlock;
+ }
+
+ if (rq->engine != engine) { /* preempted virtual engine */
+ struct virtual_engine *ve = to_virtual_engine(rq->engine);
+
+ /*
+ * intel_context_inflight() is only protected by virtue
+ * of process_csb() being called only by the tasklet (or
+ * directly from inside reset while the tasklet is suspended).
+ * Assert that neither of those are allowed to run while we
+ * poke at the request queues.
+ */
+ GEM_BUG_ON(!reset_in_progress(&engine->execlists));
+
+ /*
+ * An unsubmitted request along a virtual engine will
+ * remain on the active (this) engine until we are able
+ * to process the context switch away (and so mark the
+ * context as no longer in flight). That cannot have happened
+ * yet, otherwise we would not be hanging!
+ */
+ spin_lock(&ve->base.active.lock);
+ GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
+ GEM_BUG_ON(ve->request != rq);
+ ve->request = NULL;
+ spin_unlock(&ve->base.active.lock);
+ i915_request_put(rq);
+
+ rq->engine = engine;
+ }
+
+ /*
+ * Transfer this request onto the hold queue to prevent it
+ * being resumbitted to HW (and potentially completed) before we have
+ * released it. Since we may have already submitted following
+ * requests, we need to remove those as well.
+ */
+ GEM_BUG_ON(i915_request_on_hold(rq));
+ GEM_BUG_ON(rq->engine != engine);
+ __execlists_hold(rq);
+
+unlock:
+ spin_unlock_irq(&engine->active.lock);
+ return rq;
+}
+
+static bool hold_request(const struct i915_request *rq)
+{
+ struct i915_dependency *p;
+
+ /*
+ * If one of our ancestors is on hold, we must also be on hold,
+ * otherwise we will bypass it and execute before it.
+ */
+ list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
+ const struct i915_request *s =
+ container_of(p->signaler, typeof(*s), sched);
+
+ if (s->engine != rq->engine)
+ continue;
+
+ if (i915_request_on_hold(s))
+ return true;
+ }
+
+ return false;
+}
+
+static void __execlists_unhold(struct i915_request *rq)
+{
+ LIST_HEAD(list);
+
+ do {
+ struct i915_dependency *p;
+
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+ GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+
+ i915_request_clear_hold(rq);
+ list_move_tail(&rq->sched.link,
+ i915_sched_lookup_priolist(rq->engine,
+ rq_prio(rq)));
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ RQ_TRACE(rq, "hold release\n");
+
+ /* Also release any children on this engine that are ready */
+ list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ struct i915_request *w =
+ container_of(p->waiter, typeof(*w), sched);
+
+ if (w->engine != rq->engine)
+ continue;
+
+ if (!i915_request_on_hold(rq))
+ continue;
+
+ /* Check that no other parents are also on hold */
+ if (hold_request(rq))
+ continue;
+
+ list_move_tail(&w->sched.link, &list);
+ }
+
+ rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+ } while (rq);
+}
+
+static void execlists_unhold(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ spin_lock_irq(&engine->active.lock);
+
+ /*
+ * Move this request back to the priority queue, and all of its
+ * children and grandchildren that were suspended along with it.
+ */
+ __execlists_unhold(rq);
+
+ if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
+ engine->execlists.queue_priority_hint = rq_prio(rq);
+ tasklet_hi_schedule(&engine->execlists.tasklet);
+ }
+
+ spin_unlock_irq(&engine->active.lock);
+}
+
+struct execlists_capture {
+ struct work_struct work;
+ struct i915_request *rq;
+ struct i915_gpu_coredump *error;
+};
+
+static void execlists_capture_work(struct work_struct *work)
+{
+ struct execlists_capture *cap = container_of(work, typeof(*cap), work);
+ const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
+ struct intel_engine_cs *engine = cap->rq->engine;
+ struct intel_gt_coredump *gt = cap->error->gt;
+ struct intel_engine_capture_vma *vma;
+
+ /* Compress all the objects attached to the request, slow! */
+ vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
+ if (vma) {
+ struct i915_vma_compress *compress =
+ i915_vma_capture_prepare(gt);
+
+ intel_engine_coredump_add_vma(gt->engine, vma, compress);
+ i915_vma_capture_finish(gt, compress);
+ }
+
+ gt->simulated = gt->engine->simulated;
+ cap->error->simulated = gt->simulated;
+
+ /* Publish the error state, and announce it to the world */
+ i915_error_state_store(cap->error);
+ i915_gpu_coredump_put(cap->error);
+
+ /* Return this request and all that depend upon it for signaling */
+ execlists_unhold(engine, cap->rq);
+ i915_request_put(cap->rq);
+
+ kfree(cap);
+}
+
+static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
+{
+ const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+ struct execlists_capture *cap;
+
+ cap = kmalloc(sizeof(*cap), gfp);
+ if (!cap)
+ return NULL;
+
+ cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
+ if (!cap->error)
+ goto err_cap;
+
+ cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
+ if (!cap->error->gt)
+ goto err_gpu;
+
+ cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
+ if (!cap->error->gt->engine)
+ goto err_gt;
+
+ return cap;
+
+err_gt:
+ kfree(cap->error->gt);
+err_gpu:
+ kfree(cap->error);
+err_cap:
+ kfree(cap);
+ return NULL;
+}
+
+static bool execlists_capture(struct intel_engine_cs *engine)
+{
+ struct execlists_capture *cap;
+
+ if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
+ return true;
+
+ /*
+ * We need to _quickly_ capture the engine state before we reset.
+ * We are inside an atomic section (softirq) here and we are delaying
+ * the forced preemption event.
+ */
+ cap = capture_regs(engine);
+ if (!cap)
+ return true;
+
+ cap->rq = execlists_active(&engine->execlists);
+ GEM_BUG_ON(!cap->rq);
+
+ rcu_read_lock();
+ cap->rq = active_request(cap->rq->context->timeline, cap->rq);
+ cap->rq = i915_request_get_rcu(cap->rq);
+ rcu_read_unlock();
+ if (!cap->rq)
+ goto err_free;
+
+ /*
+ * Remove the request from the execlists queue, and take ownership
+ * of the request. We pass it to our worker who will _slowly_ compress
+ * all the pages the _user_ requested for debugging their batch, after
+ * which we return it to the queue for signaling.
+ *
+ * By removing them from the execlists queue, we also remove the
+ * requests from being processed by __unwind_incomplete_requests()
+ * during the intel_engine_reset(), and so they will *not* be replayed
+ * afterwards.
+ *
+ * Note that because we have not yet reset the engine at this point,
+ * it is possible for the request that we have identified as being
+ * guilty, did in fact complete and we will then hit an arbitration
+ * point allowing the outstanding preemption to succeed. The likelihood
+ * of that is very low (as capturing of the engine registers should be
+ * fast enough to run inside an irq-off atomic section!), so we will
+ * simply hold that request accountable for being non-preemptible
+ * long enough to force the reset.
+ */
+ if (!execlists_hold(engine, cap->rq))
+ goto err_rq;
+
+ INIT_WORK(&cap->work, execlists_capture_work);
+ schedule_work(&cap->work);
+ return true;
+
+err_rq:
+ i915_request_put(cap->rq);
+err_free:
+ i915_gpu_coredump_put(cap->error);
+ kfree(cap);
+ return false;
+}
+
static noinline void preempt_reset(struct intel_engine_cs *engine)
{
const unsigned int bit = I915_RESET_ENGINE + engine->id;
@@ -2189,11 +2674,15 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
/* Mark this tasklet as disabled to avoid waiting for it to complete */
tasklet_disable_nosync(&engine->execlists.tasklet);
- GEM_TRACE("%s: preempt timeout %lu+%ums\n",
- engine->name,
- READ_ONCE(engine->props.preempt_timeout_ms),
- jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
- intel_engine_reset(engine, "preemption time out");
+ ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
+ READ_ONCE(engine->props.preempt_timeout_ms),
+ jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+
+ ring_set_paused(engine, 1); /* Freeze the current request in place */
+ if (execlists_capture(engine))
+ intel_engine_reset(engine, "preemption time out");
+ else
+ ring_set_paused(engine, 0);
tasklet_enable(&engine->execlists.tasklet);
clear_and_wake_up_bit(bit, lock);
@@ -2255,11 +2744,12 @@ static void execlists_preempt(struct timer_list *timer)
}
static void queue_request(struct intel_engine_cs *engine,
- struct i915_sched_node *node,
- int prio)
+ struct i915_request *rq)
{
- GEM_BUG_ON(!list_empty(&node->link));
- list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
+ GEM_BUG_ON(!list_empty(&rq->sched.link));
+ list_add_tail(&rq->sched.link,
+ i915_sched_lookup_priolist(engine, rq_prio(rq)));
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
}
static void __submit_queue_imm(struct intel_engine_cs *engine)
@@ -2287,6 +2777,13 @@ static void submit_queue(struct intel_engine_cs *engine,
__submit_queue_imm(engine);
}
+static bool ancestor_on_hold(const struct intel_engine_cs *engine,
+ const struct i915_request *rq)
+{
+ GEM_BUG_ON(i915_request_on_hold(rq));
+ return !list_empty(&engine->active.hold) && hold_request(rq);
+}
+
static void execlists_submit_request(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
@@ -2295,12 +2792,17 @@ static void execlists_submit_request(struct i915_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->active.lock, flags);
- queue_request(engine, &request->sched, rq_prio(request));
+ if (unlikely(ancestor_on_hold(engine, request))) {
+ list_add_tail(&request->sched.link, &engine->active.hold);
+ i915_request_set_hold(request);
+ } else {
+ queue_request(engine, request);
- GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
- GEM_BUG_ON(list_empty(&request->sched.link));
+ GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+ GEM_BUG_ON(list_empty(&request->sched.link));
- submit_queue(engine, request);
+ submit_queue(engine, request);
+ }
spin_unlock_irqrestore(&engine->active.lock, flags);
}
@@ -2333,7 +2835,7 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine)
vaddr += engine->context_size;
- memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
+ memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
}
static void
@@ -2344,7 +2846,7 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
vaddr += engine->context_size;
- if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
+ if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
dev_err_once(engine->i915->drm.dev,
"%s context redzone overwritten!\n",
engine->name);
@@ -2356,21 +2858,21 @@ static void execlists_context_unpin(struct intel_context *ce)
ce->engine);
i915_gem_object_unpin_map(ce->state->obj);
- intel_ring_reset(ce->ring, ce->ring->tail);
}
static void
__execlists_update_reg_state(const struct intel_context *ce,
- const struct intel_engine_cs *engine)
+ const struct intel_engine_cs *engine,
+ u32 head)
{
struct intel_ring *ring = ce->ring;
u32 *regs = ce->lrc_reg_state;
- GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
- regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma);
- regs[CTX_RING_HEAD] = ring->head;
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+ regs[CTX_RING_HEAD] = head;
regs[CTX_RING_TAIL] = ring->tail;
/* RPCS */
@@ -2387,33 +2889,21 @@ __execlists_context_pin(struct intel_context *ce,
struct intel_engine_cs *engine)
{
void *vaddr;
- int ret;
GEM_BUG_ON(!ce->state);
-
- ret = intel_context_active_acquire(ce);
- if (ret)
- goto err;
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
vaddr = i915_gem_object_pin_map(ce->state->obj,
i915_coherent_map_type(engine->i915) |
I915_MAP_OVERRIDE);
- if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- goto unpin_active;
- }
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
- ce->lrc_desc = lrc_descriptor(ce, engine);
+ ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
- __execlists_update_reg_state(ce, engine);
+ __execlists_update_reg_state(ce, engine, ce->ring->tail);
return 0;
-
-unpin_active:
- intel_context_active_release(ce);
-err:
- return ret;
}
static int execlists_context_pin(struct intel_context *ce)
@@ -2428,6 +2918,9 @@ static int execlists_context_alloc(struct intel_context *ce)
static void execlists_context_reset(struct intel_context *ce)
{
+ CE_TRACE(ce, "reset\n");
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+
/*
* Because we emit WA_TAIL_DWORDS there may be a disparity
* between our bookkeeping in ce->ring->head and ce->ring->tail and
@@ -2444,8 +2937,14 @@ static void execlists_context_reset(struct intel_context *ce)
* So to avoid that we reset the context images upon resume. For
* simplicity, we just zero everything out.
*/
- intel_ring_reset(ce->ring, 0);
- __execlists_update_reg_state(ce, ce->engine);
+ intel_ring_reset(ce->ring, ce->ring->emit);
+
+ /* Scrub away the garbage */
+ execlists_init_reg_state(ce->lrc_reg_state,
+ ce, ce->engine, ce->ring, true);
+ __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
+
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}
static const struct intel_context_ops execlists_context_ops = {
@@ -2497,7 +2996,7 @@ static int execlists_request_alloc(struct i915_request *request)
{
int ret;
- GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+ GEM_BUG_ON(!intel_context_is_pinned(request->context));
/*
* Flush enough space to reduce the likelihood of waiting after
@@ -2867,6 +3366,8 @@ static void enable_execlists(struct intel_engine_cs *engine)
RING_HWS_PGA,
i915_ggtt_offset(engine->status_page.vma));
ENGINE_POSTING_READ(engine, RING_HWS_PGA);
+
+ engine->context_tag = 0;
}
static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -2906,8 +3407,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
unsigned long flags;
- GEM_TRACE("%s: depth<-%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth<-%d\n",
+ atomic_read(&execlists->tasklet.count));
/*
* Prevent request submission to the hardware until we have
@@ -2960,26 +3461,20 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
WRITE_ONCE(*execlists->csb_write, reset_value);
wmb(); /* Make sure this is visible to HW (paranoia?) */
+ /*
+ * Sometimes Icelake forgets to reset its pointers on a GPU reset.
+ * Bludgeon them with a mmio update to be sure.
+ */
+ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+ reset_value << 8 | reset_value);
+ ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
invalidate_csb_entries(&execlists->csb_status[0],
&execlists->csb_status[reset_value]);
}
-static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
-{
- if (INTEL_GEN(engine->i915) >= 12)
- return 0x60;
- else if (INTEL_GEN(engine->i915) >= 9)
- return 0x54;
- else if (engine->class == RENDER_CLASS)
- return 0x58;
- else
- return -1;
-}
-
-static void __execlists_reset_reg_state(const struct intel_context *ce,
- const struct intel_engine_cs *engine)
+static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
{
- u32 *regs = ce->lrc_reg_state;
int x;
x = lrc_ring_mi_mode(engine);
@@ -2989,11 +3484,20 @@ static void __execlists_reset_reg_state(const struct intel_context *ce,
}
}
+static void __execlists_reset_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+
+ __reset_stop_ring(regs, engine);
+}
+
static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct intel_context *ce;
struct i915_request *rq;
+ u32 head;
mb(); /* paranoia: read the CSB pointers from after the reset */
clflush(execlists->csb_write);
@@ -3016,20 +3520,20 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
/* We still have requests in-flight; the engine should be active */
GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
- ce = rq->hw_context;
+ ce = rq->context;
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
if (i915_request_completed(rq)) {
/* Idle context; tidy up the ring so we can restart afresh */
- ce->ring->head = intel_ring_wrap(ce->ring, rq->tail);
+ head = intel_ring_wrap(ce->ring, rq->tail);
goto out_replay;
}
/* Context has requests still in-flight; it should not be idle! */
GEM_BUG_ON(i915_active_is_idle(&ce->active));
rq = active_request(ce->timeline, rq);
- ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
- GEM_BUG_ON(ce->ring->head == ce->ring->tail);
+ head = intel_ring_wrap(ce->ring, rq->head);
+ GEM_BUG_ON(head == ce->ring->tail);
/*
* If this request hasn't started yet, e.g. it is waiting on a
@@ -3073,11 +3577,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
restore_default_state(ce, engine);
out_replay:
- GEM_TRACE("%s replay {head:%04x, tail:%04x}\n",
- engine->name, ce->ring->head, ce->ring->tail);
- intel_ring_update_space(ce->ring);
+ ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
+ head, ce->ring->tail);
__execlists_reset_reg_state(ce, engine);
- __execlists_update_reg_state(ce, engine);
+ __execlists_update_reg_state(ce, engine, head);
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
unwind:
@@ -3086,11 +3589,11 @@ unwind:
__unwind_incomplete_requests(engine);
}
-static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
+static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
spin_lock_irqsave(&engine->active.lock, flags);
@@ -3104,14 +3607,14 @@ static void nop_submission_tasklet(unsigned long data)
/* The driver is wedged; don't process any more events. */
}
-static void execlists_cancel_requests(struct intel_engine_cs *engine)
+static void execlists_reset_cancel(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *rq, *rn;
struct rb_node *rb;
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
/*
* Before we call engine->cancel_requests(), we should have exclusive
@@ -3149,6 +3652,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
i915_priolist_free(p);
}
+ /* On-hold requests will be flushed to timeline upon their release */
+ list_for_each_entry(rq, &engine->active.hold, sched.link)
+ mark_eio(rq);
+
/* Cancel all attached virtual engines */
while ((rb = rb_first_cached(&execlists->virtual))) {
struct virtual_engine *ve =
@@ -3198,13 +3705,13 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
if (__tasklet_enable(&execlists->tasklet))
/* And kick in case we missed a new request submission. */
tasklet_hi_schedule(&execlists->tasklet);
- GEM_TRACE("%s: depth->%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth->%d\n",
+ atomic_read(&execlists->tasklet.count));
}
-static int gen8_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags)
+static int gen8_emit_bb_start_noarb(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
{
u32 *cs;
@@ -3238,7 +3745,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
return 0;
}
-static int gen9_emit_bb_start(struct i915_request *rq,
+static int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
@@ -3493,26 +4000,6 @@ static int gen12_emit_flush_render(struct i915_request *request,
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
-
- /*
- * Wa_1604544889:tgl
- */
- if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
- flags = 0;
- flags |= PIPE_CONTROL_CS_STALL;
- flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
-
- flags |= PIPE_CONTROL_STORE_DATA_INDEX;
- flags |= PIPE_CONTROL_QW_WRITE;
-
- cs = intel_ring_begin(request, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- cs = gen8_emit_pipe_control(cs, flags,
- LRC_PPHWSP_SCRATCH_ADDR);
- intel_ring_advance(request, cs);
- }
}
return 0;
@@ -3693,12 +4180,12 @@ static void execlists_park(struct intel_engine_cs *engine)
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = execlists_submit_request;
- engine->cancel_requests = execlists_cancel_requests;
engine->schedule = i915_schedule;
engine->execlists.tasklet.func = execlists_submission_tasklet;
engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
+ engine->reset.rewind = execlists_reset_rewind;
+ engine->reset.cancel = execlists_reset_cancel;
engine->reset.finish = execlists_reset_finish;
engine->park = execlists_park;
@@ -3713,13 +4200,27 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
if (INTEL_GEN(engine->i915) >= 12)
engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
+
+ if (intel_engine_has_preemption(engine))
+ engine->emit_bb_start = gen8_emit_bb_start;
+ else
+ engine->emit_bb_start = gen8_emit_bb_start_noarb;
}
-static void execlists_destroy(struct intel_engine_cs *engine)
+static void execlists_shutdown(struct intel_engine_cs *engine)
{
+ /* Synchronise with residual timers and any softirq they raise */
+ del_timer_sync(&engine->execlists.timer);
+ del_timer_sync(&engine->execlists.preempt);
+ tasklet_kill(&engine->execlists.tasklet);
+}
+
+static void execlists_release(struct intel_engine_cs *engine)
+{
+ execlists_shutdown(engine);
+
intel_engine_cleanup_common(engine);
lrc_destroy_wa_ctx(engine);
- kfree(engine);
}
static void
@@ -3727,13 +4228,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
{
/* Default vfuncs which can be overriden by each engine. */
- engine->destroy = execlists_destroy;
engine->resume = execlists_resume;
- engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
- engine->reset.finish = execlists_reset_finish;
-
engine->cops = &execlists_context_ops;
engine->request_alloc = execlists_request_alloc;
@@ -3756,10 +4252,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
* until a more refined solution exists.
*/
}
- if (IS_GEN(engine->i915, 8))
- engine->emit_bb_start = gen8_emit_bb_start;
- else
- engine->emit_bb_start = gen9_emit_bb_start;
}
static inline void
@@ -3803,6 +4295,11 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
int intel_execlists_submission_setup(struct intel_engine_cs *engine)
{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_uncore *uncore = engine->uncore;
+ u32 base = engine->mmio_base;
+
tasklet_init(&engine->execlists.tasklet,
execlists_submission_tasklet, (unsigned long)engine);
timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
@@ -3814,21 +4311,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
if (engine->class == RENDER_CLASS)
rcs_submission_override(engine);
- return 0;
-}
-
-int intel_execlists_submission_init(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct drm_i915_private *i915 = engine->i915;
- struct intel_uncore *uncore = engine->uncore;
- u32 base = engine->mmio_base;
- int ret;
-
- ret = intel_engine_init_common(engine);
- if (ret)
- return ret;
-
if (intel_init_workaround_bb(engine))
/*
* We continue even if we fail to initialize WA batch
@@ -3860,6 +4342,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
reset_csb_pointers(engine);
+ /* Finally, take ownership and responsibility for cleanup! */
+ engine->release = execlists_release;
+
return 0;
}
@@ -3899,18 +4384,21 @@ static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
static void init_common_reg_state(u32 * const regs,
const struct intel_engine_cs *engine,
- const struct intel_ring *ring)
+ const struct intel_ring *ring,
+ bool inhibit)
{
- regs[CTX_CONTEXT_CONTROL] =
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+ u32 ctl;
+
+ ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+ ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ if (inhibit)
+ ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
if (INTEL_GEN(engine->i915) < 11)
- regs[CTX_CONTEXT_CONTROL] |=
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
- CTX_CTRL_RS_CTX_ENABLE);
+ ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+ CTX_CTRL_RS_CTX_ENABLE);
+ regs[CTX_CONTEXT_CONTROL] = ctl;
- regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID;
- regs[CTX_BB_STATE] = RING_BB_PPGTT;
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
}
static void init_wa_bb_reg_state(u32 * const regs,
@@ -3966,7 +4454,7 @@ static void execlists_init_reg_state(u32 *regs,
const struct intel_context *ce,
const struct intel_engine_cs *engine,
const struct intel_ring *ring,
- bool close)
+ bool inhibit)
{
/*
* A context is actually a big batch buffer with several
@@ -3978,21 +4466,17 @@ static void execlists_init_reg_state(u32 *regs,
*
* Must keep consistent with virtual_update_register_offsets().
*/
- u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
-
- if (close) { /* Close the batch; used mainly by live_lrc_layout() */
- *bbe = MI_BATCH_BUFFER_END;
- if (INTEL_GEN(engine->i915) >= 10)
- *bbe |= BIT(0);
- }
+ set_offsets(regs, reg_offsets(engine), engine, inhibit);
- init_common_reg_state(regs, engine, ring);
+ init_common_reg_state(regs, engine, ring, inhibit);
init_ppgtt_reg_state(regs, vm_alias(ce->vm));
init_wa_bb_reg_state(regs, engine,
INTEL_GEN(engine->i915) >= 12 ?
GEN12_CTX_BB_PER_CTX_PTR :
CTX_BB_PER_CTX_PTR);
+
+ __reset_stop_ring(regs, engine);
}
static int
@@ -4003,7 +4487,6 @@ populate_lr_context(struct intel_context *ce,
{
bool inhibit = true;
void *vaddr;
- u32 *regs;
int ret;
vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
@@ -4027,16 +4510,14 @@ populate_lr_context(struct intel_context *ce,
memcpy(vaddr, defaults, engine->context_size);
i915_gem_object_unpin_map(engine->default_state);
+ __set_bit(CONTEXT_VALID_BIT, &ce->flags);
inhibit = false;
}
/* The second page of the context object contains some fields which must
* be set up prior to the first execution. */
- regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
- execlists_init_reg_state(regs, ce, engine, ring, inhibit);
- if (inhibit)
- regs[CTX_CONTEXT_CONTROL] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+ ce, engine, ring, inhibit);
ret = 0;
err_unpin_ctx:
@@ -4174,6 +4655,13 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
ve->siblings[0]);
}
+static int virtual_context_alloc(struct intel_context *ce)
+{
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+ return __execlists_context_alloc(ce, ve->siblings[0]);
+}
+
static int virtual_context_pin(struct intel_context *ce)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
@@ -4211,6 +4699,8 @@ static void virtual_context_exit(struct intel_context *ce)
}
static const struct intel_context_ops virtual_context_ops = {
+ .alloc = virtual_context_alloc,
+
.pin = virtual_context_pin,
.unpin = execlists_context_unpin,
@@ -4237,10 +4727,9 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
mask = ve->siblings[0]->mask;
}
- GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
- ve->base.name,
- rq->fence.context, rq->fence.seqno,
- mask, ve->base.execlists.queue_priority_hint);
+ ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
+ rq->fence.context, rq->fence.seqno,
+ mask, ve->base.execlists.queue_priority_hint);
return mask;
}
@@ -4331,10 +4820,9 @@ static void virtual_submit_request(struct i915_request *rq)
struct i915_request *old;
unsigned long flags;
- GEM_TRACE("%s: rq=%llx:%lld\n",
- ve->base.name,
- rq->fence.context,
- rq->fence.seqno);
+ ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
+ rq->fence.context,
+ rq->fence.seqno);
GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
@@ -4402,8 +4890,7 @@ virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
}
struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs **siblings,
+intel_execlists_create_virtual(struct intel_engine_cs **siblings,
unsigned int count)
{
struct virtual_engine *ve;
@@ -4414,13 +4901,13 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
return ERR_PTR(-EINVAL);
if (count == 1)
- return intel_context_create(ctx, siblings[0]);
+ return intel_context_create(siblings[0]);
ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
if (!ve)
return ERR_PTR(-ENOMEM);
- ve->base.i915 = ctx->i915;
+ ve->base.i915 = siblings[0]->i915;
ve->base.gt = siblings[0]->gt;
ve->base.uncore = siblings[0]->uncore;
ve->base.id = -1;
@@ -4449,7 +4936,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
intel_engine_init_breadcrumbs(&ve->base);
-
intel_engine_init_execlists(&ve->base);
ve->base.cops = &virtual_context_ops;
@@ -4465,7 +4951,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
virtual_submission_tasklet,
(unsigned long)ve);
- intel_context_init(&ve->context, ctx, &ve->base);
+ intel_context_init(&ve->context, &ve->base);
for (n = 0; n < count; n++) {
struct intel_engine_cs *sibling = siblings[n];
@@ -4532,12 +5018,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
- err = __execlists_context_alloc(&ve->context, siblings[0]);
- if (err)
- goto err_put;
-
- __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
-
return &ve->context;
err_put:
@@ -4546,14 +5026,12 @@ err_put:
}
struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs *src)
+intel_execlists_clone_virtual(struct intel_engine_cs *src)
{
struct virtual_engine *se = to_virtual_engine(src);
struct intel_context *dst;
- dst = intel_execlists_create_virtual(ctx,
- se->siblings,
+ dst = intel_execlists_create_virtual(se->siblings,
se->num_siblings);
if (IS_ERR(dst))
return dst;
@@ -4725,10 +5203,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
restore_default_state(ce, engine);
/* Rerun the request; its payload has been neutered (if guilty). */
- ce->ring->head = head;
- intel_ring_update_space(ce->ring);
-
- __execlists_update_reg_state(ce, engine);
+ __execlists_update_reg_state(ce, engine, head);
}
bool