1 files changed, 171 insertions, 147 deletions
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index f24096e27bef..a0e437aa65b7 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -235,7 +235,7 @@ static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
 	/*
 	 * MI_BATCH_BUFFER_START requires some special handling. It's not
 	 * really a 'skip' action but it doesn't seem like it's worth adding
-	 * a new action. See i915_parse_cmds().
+	 * a new action. See intel_engine_cmd_parser().
 	 */
 	CMD(  MI_BATCH_BUFFER_START,            SMI,   !F,  0xFF,   S  ),
 };
@@ -731,7 +731,7 @@ static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
 			return 0xFF;
 	}
 
-	DRM_DEBUG_DRIVER("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header);
+	DRM_DEBUG("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header);
 	return 0;
 }
 
@@ -754,7 +754,7 @@ static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header)
 			return 0xFF;
 	}
 
-	DRM_DEBUG_DRIVER("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header);
+	DRM_DEBUG("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header);
 	return 0;
 }
 
@@ -767,7 +767,7 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
 	else if (client == INSTR_BC_CLIENT)
 		return 0xFF;
 
-	DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
+	DRM_DEBUG("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
 	return 0;
 }
 
@@ -778,7 +778,7 @@ static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header)
 	if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT)
 		return 0xFF;
 
-	DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
+	DRM_DEBUG("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
 	return 0;
 }
 
@@ -1127,79 +1127,71 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
 /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
 static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		       struct drm_i915_gem_object *src_obj,
-		       u32 batch_start_offset,
-		       u32 batch_len,
-		       bool *needs_clflush_after)
+		       u32 offset, u32 length)
 {
-	unsigned int src_needs_clflush;
-	unsigned int dst_needs_clflush;
+	bool needs_clflush;
 	void *dst, *src;
 	int ret;
 
-	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
-	if (ret)
-		return ERR_PTR(ret);
-
 	dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
-	i915_gem_object_finish_access(dst_obj);
 	if (IS_ERR(dst))
 		return dst;
 
-	ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+	ret = i915_gem_object_pin_pages(src_obj);
 	if (ret) {
 		i915_gem_object_unpin_map(dst_obj);
 		return ERR_PTR(ret);
 	}
 
+	needs_clflush =
+		!(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
+
 	src = ERR_PTR(-ENODEV);
-	if (src_needs_clflush &&
-	    i915_can_memcpy_from_wc(NULL, batch_start_offset, 0)) {
+	if (needs_clflush && i915_has_memcpy_from_wc()) {
 		src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
 		if (!IS_ERR(src)) {
-			i915_memcpy_from_wc(dst,
-					    src + batch_start_offset,
-					    ALIGN(batch_len, 16));
+			i915_unaligned_memcpy_from_wc(dst,
+						      src + offset,
+						      length);
 			i915_gem_object_unpin_map(src_obj);
 		}
 	}
 	if (IS_ERR(src)) {
 		void *ptr;
-		int offset, n;
+		int x, n;
 
-		offset = offset_in_page(batch_start_offset);
-
-		/* We can avoid clflushing partial cachelines before the write
+		/*
+		 * We can avoid clflushing partial cachelines before the write
 		 * if we only every write full cache-lines. Since we know that
 		 * both the source and destination are in multiples of
 		 * PAGE_SIZE, we can simply round up to the next cacheline.
 		 * We don't care about copying too much here as we only
 		 * validate up to the end of the batch.
 		 */
-		if (dst_needs_clflush & CLFLUSH_BEFORE)
-			batch_len = roundup(batch_len,
-					    boot_cpu_data.x86_clflush_size);
+		if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+			length = round_up(length,
+					  boot_cpu_data.x86_clflush_size);
 
 		ptr = dst;
-		for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
-			int len = min_t(int, batch_len, PAGE_SIZE - offset);
+		x = offset_in_page(offset);
+		for (n = offset >> PAGE_SHIFT; length; n++) {
+			int len = min_t(int, length, PAGE_SIZE - x);
 
 			src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
-			if (src_needs_clflush)
-				drm_clflush_virt_range(src + offset, len);
-			memcpy(ptr, src + offset, len);
+			if (needs_clflush)
+				drm_clflush_virt_range(src + x, len);
+			memcpy(ptr, src + x, len);
 			kunmap_atomic(src);
 
 			ptr += len;
-			batch_len -= len;
-			offset = 0;
+			length -= len;
+			x = 0;
 		}
 	}
 
-	i915_gem_object_finish_access(src_obj);
+	i915_gem_object_unpin_pages(src_obj);
 
 	/* dst_obj is returned with vmap pinned */
-	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
-
 	return dst;
 }
 
@@ -1211,7 +1203,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 		return true;
 
 	if (desc->flags & CMD_DESC_REJECT) {
-		DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
+		DRM_DEBUG("CMD: Rejected command: 0x%08X\n", *cmd);
 		return false;
 	}
 
@@ -1231,8 +1223,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 				find_reg(engine, reg_addr);
 
 			if (!reg) {
-				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
-						 reg_addr, *cmd, engine->name);
+				DRM_DEBUG("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
+					  reg_addr, *cmd, engine->name);
 				return false;
 			}
 
@@ -1242,22 +1234,22 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 			 */
 			if (reg->mask) {
 				if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
-					DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n",
-							 reg_addr);
+					DRM_DEBUG("CMD: Rejected LRM to masked register 0x%08X\n",
+						  reg_addr);
 					return false;
 				}
 
 				if (desc->cmd.value == MI_LOAD_REGISTER_REG) {
-					DRM_DEBUG_DRIVER("CMD: Rejected LRR to masked register 0x%08X\n",
-							 reg_addr);
+					DRM_DEBUG("CMD: Rejected LRR to masked register 0x%08X\n",
+						  reg_addr);
 					return false;
 				}
 
 				if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
 				    (offset + 2 > length ||
 				     (cmd[offset + 1] & reg->mask) != reg->value)) {
-					DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
-							 reg_addr);
+					DRM_DEBUG("CMD: Rejected LRI to masked register 0x%08X\n",
+						  reg_addr);
 					return false;
 				}
 			}
@@ -1284,8 +1276,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 			}
 
 			if (desc->bits[i].offset >= length) {
-				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n",
-						 *cmd, engine->name);
+				DRM_DEBUG("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n",
+					  *cmd, engine->name);
 				return false;
 			}
 
@@ -1293,11 +1285,11 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 				desc->bits[i].mask;
 
 			if (dword != desc->bits[i].expected) {
-				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n",
-						 *cmd,
-						 desc->bits[i].mask,
-						 desc->bits[i].expected,
-						 dword, engine->name);
+				DRM_DEBUG("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n",
+					  *cmd,
+					  desc->bits[i].mask,
+					  desc->bits[i].expected,
+					  dword, engine->name);
 				return false;
 			}
 		}
@@ -1306,17 +1298,17 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 	return true;
 }
 
-static int check_bbstart(const struct i915_gem_context *ctx,
-			 u32 *cmd, u32 offset, u32 length,
-			 u32 batch_len,
-			 u64 batch_start,
-			 u64 shadow_batch_start)
+static int check_bbstart(u32 *cmd, u32 offset, u32 length,
+			 u32 batch_length,
+			 u64 batch_addr,
+			 u64 shadow_addr,
+			 const unsigned long *jump_whitelist)
 {
 	u64 jump_offset, jump_target;
 	u32 target_cmd_offset, target_cmd_index;
 
 	/* For igt compatibility on older platforms */
-	if (CMDPARSER_USES_GGTT(ctx->i915)) {
+	if (!jump_whitelist) {
 		DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
 		return -EACCES;
 	}
@@ -1327,14 +1319,14 @@ static int check_bbstart(const struct i915_gem_context *ctx,
 		return -EINVAL;
 	}
 
-	jump_target = *(u64*)(cmd+1);
-	jump_offset = jump_target - batch_start;
+	jump_target = *(u64 *)(cmd + 1);
+	jump_offset = jump_target - batch_addr;
 
 	/*
 	 * Any underflow of jump_target is guaranteed to be outside the range
 	 * of a u32, so >= test catches both too large and too small
 	 */
-	if (jump_offset >= batch_len) {
+	if (jump_offset >= batch_length) {
 		DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n",
 			  jump_target);
 		return -EINVAL;
@@ -1342,20 +1334,20 @@ static int check_bbstart(const struct i915_gem_context *ctx,
 
 	/*
 	 * This cannot overflow a u32 because we already checked jump_offset
-	 * is within the BB, and the batch_len is a u32
+	 * is within the BB, and the batch_length is a u32
 	 */
 	target_cmd_offset = lower_32_bits(jump_offset);
 	target_cmd_index = target_cmd_offset / sizeof(u32);
 
-	*(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset;
+	*(u64 *)(cmd + 1) = shadow_addr + target_cmd_offset;
 
 	if (target_cmd_index == offset)
 		return 0;
 
-	if (ctx->jump_whitelist_cmds <= target_cmd_index) {
-		DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n");
-		return -EINVAL;
-	} else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) {
+	if (IS_ERR(jump_whitelist))
+		return PTR_ERR(jump_whitelist);
+
+	if (!test_bit(target_cmd_index, jump_whitelist)) {
 		DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
 			  jump_target);
 		return -EINVAL;
@@ -1364,54 +1356,40 @@ static int check_bbstart(const struct i915_gem_context *ctx,
 	return 0;
 }
 
-static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len)
+static unsigned long *alloc_whitelist(u32 batch_length)
 {
-	const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32));
-	const u32 exact_size = BITS_TO_LONGS(batch_cmds);
-	u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
-	unsigned long *next_whitelist;
+	unsigned long *jmp;
 
-	if (CMDPARSER_USES_GGTT(ctx->i915))
-		return;
-
-	if (batch_cmds <= ctx->jump_whitelist_cmds) {
-		bitmap_zero(ctx->jump_whitelist, batch_cmds);
-		return;
-	}
-
-again:
-	next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL);
-	if (next_whitelist) {
-		kfree(ctx->jump_whitelist);
-		ctx->jump_whitelist = next_whitelist;
-		ctx->jump_whitelist_cmds =
-			next_size * BITS_PER_BYTE * sizeof(long);
-		return;
-	}
-
-	if (next_size > exact_size) {
-		next_size = exact_size;
-		goto again;
-	}
+	/*
+	 * We expect batch_length to be less than 256KiB for known users,
+	 * i.e. we need at most an 8KiB bitmap allocation which should be
+	 * reasonably cheap due to kmalloc caches.
+	 */
 
-	DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
-	bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
+	/* Prefer to report transient allocation failure rather than hit oom */
+	jmp = bitmap_zalloc(DIV_ROUND_UP(batch_length, sizeof(u32)),
+			    GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+	if (!jmp)
+		return ERR_PTR(-ENOMEM);
 
-	return;
+	return jmp;
 }
 
 #define LENGTH_BIAS 2
 
+static bool shadow_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
+}
+
 /**
- * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
- * @ctx: the context in which the batch is to execute
+ * intel_engine_cmd_parser() - parse a batch buffer for privilege violations
  * @engine: the engine on which the batch is to execute
- * @batch_obj: the batch buffer in question
- * @batch_start: Canonical base address of batch
- * @batch_start_offset: byte offset in the batch at which execution starts
- * @batch_len: length of the commands in batch_obj
- * @shadow_batch_obj: copy of the batch buffer in question
- * @shadow_batch_start: Canonical base address of shadow_batch_obj
+ * @batch: the batch buffer in question
+ * @batch_offset: byte offset in the batch at which execution starts
+ * @batch_length: length of the commands in batch_obj
+ * @shadow: validated copy of the batch buffer in question
+ * @trampoline: whether to emit a conditional trampoline at the end of the batch
  *
  * Parses the specified batch buffer looking for privilege violations as
  * described in the overview.
@@ -1419,38 +1397,46 @@ again:
  * Return: non-zero if the parser finds violations or otherwise fails; -EACCES
  * if the batch appears legal but should use hardware parsing
  */
-
-int intel_engine_cmd_parser(struct i915_gem_context *ctx,
-			    struct intel_engine_cs *engine,
-			    struct drm_i915_gem_object *batch_obj,
-			    u64 batch_start,
-			    u32 batch_start_offset,
-			    u32 batch_len,
-			    struct drm_i915_gem_object *shadow_batch_obj,
-			    u64 shadow_batch_start)
+int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+			    struct i915_vma *batch,
+			    u32 batch_offset,
+			    u32 batch_length,
+			    struct i915_vma *shadow,
+			    bool trampoline)
 {
 	u32 *cmd, *batch_end, offset = 0;
 	struct drm_i915_cmd_descriptor default_desc = noop_desc;
 	const struct drm_i915_cmd_descriptor *desc = &default_desc;
-	bool needs_clflush_after = false;
+	unsigned long *jump_whitelist;
+	u64 batch_addr, shadow_addr;
 	int ret = 0;
 
-	cmd = copy_batch(shadow_batch_obj, batch_obj,
-			 batch_start_offset, batch_len,
-			 &needs_clflush_after);
+	GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd)));
+	GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd)));
+	GEM_BUG_ON(range_overflows_t(u64, batch_offset, batch_length,
+				     batch->size));
+	GEM_BUG_ON(!batch_length);
+
+	cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length);
 	if (IS_ERR(cmd)) {
-		DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
+		DRM_DEBUG("CMD: Failed to copy batch\n");
 		return PTR_ERR(cmd);
 	}
 
-	init_whitelist(ctx, batch_len);
+	jump_whitelist = NULL;
+	if (!trampoline)
+		/* Defer failure until attempted use */
+		jump_whitelist = alloc_whitelist(batch_length);
+
+	shadow_addr = gen8_canonical_addr(shadow->node.start);
+	batch_addr = gen8_canonical_addr(batch->node.start + batch_offset);
 
 	/*
 	 * We use the batch length as size because the shadow object is as
 	 * large or larger and copy_batch() will write MI_NOPs to the extra
 	 * space. Parsing should be faster in some cases this way.
 	 */
-	batch_end = cmd + (batch_len / sizeof(*batch_end));
+	batch_end = cmd + batch_length / sizeof(*batch_end);
 	do {
 		u32 length;
 
@@ -1459,61 +1445,99 @@ int intel_engine_cmd_parser(struct i915_gem_context *ctx,
 
 		desc = find_cmd(engine, *cmd, desc, &default_desc);
 		if (!desc) {
-			DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
-					 *cmd);
+			DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd);
 			ret = -EINVAL;
-			goto err;
+			break;
 		}
 
 		if (desc->flags & CMD_DESC_FIXED)
 			length = desc->length.fixed;
 		else
-			length = ((*cmd & desc->length.mask) + LENGTH_BIAS);
+			length = (*cmd & desc->length.mask) + LENGTH_BIAS;
 
 		if ((batch_end - cmd) < length) {
-			DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
-					 *cmd,
-					 length,
-					 batch_end - cmd);
+			DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
+				  *cmd,
+				  length,
+				  batch_end - cmd);
 			ret = -EINVAL;
-			goto err;
+			break;
 		}
 
 		if (!check_cmd(engine, desc, cmd, length)) {
 			ret = -EACCES;
-			goto err;
+			break;
 		}
 
 		if (desc->cmd.value == MI_BATCH_BUFFER_START) {
-			ret = check_bbstart(ctx, cmd, offset, length,
-					    batch_len, batch_start,
-					    shadow_batch_start);
-
-			if (ret)
-				goto err;
+			ret = check_bbstart(cmd, offset, length, batch_length,
+					    batch_addr, shadow_addr,
+					    jump_whitelist);
 			break;
 		}
 
-		if (ctx->jump_whitelist_cmds > offset)
-			set_bit(offset, ctx->jump_whitelist);
+		if (!IS_ERR_OR_NULL(jump_whitelist))
+			__set_bit(offset, jump_whitelist);
 
 		cmd += length;
 		offset += length;
 		if  (cmd >= batch_end) {
-			DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
+			DRM_DEBUG("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
 			ret = -EINVAL;
-			goto err;
+			break;
 		}
 	} while (1);
 
-	if (needs_clflush_after) {
-		void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
+	if (trampoline) {
+		/*
+		 * With the trampoline, the shadow is executed twice.
+		 *
+		 *   1 - starting at offset 0, in privileged mode
+		 *   2 - starting at offset batch_len, as non-privileged
+		 *
+		 * Only if the batch is valid and safe to execute, do we
+		 * allow the first privileged execution to proceed. If not,
+		 * we terminate the first batch and use the second batchbuffer
+		 * entry to chain to the original unsafe non-privileged batch,
+		 * leaving it to the HW to validate.
+		 */
+		*batch_end = MI_BATCH_BUFFER_END;
+
+		if (ret) {
+			/* Batch unsafe to execute with privileges, cancel! */
+			cmd = page_mask_bits(shadow->obj->mm.mapping);
+			*cmd = MI_BATCH_BUFFER_END;
+
+			/* If batch is unsafe but valid, jump to the original */
+			if (ret == -EACCES) {
+				unsigned int flags;
+
+				flags = MI_BATCH_NON_SECURE_I965;
+				if (IS_HASWELL(engine->i915))
+					flags = MI_BATCH_NON_SECURE_HSW;
+
+				GEM_BUG_ON(!IS_GEN_RANGE(engine->i915, 6, 7));
+				__gen6_emit_bb_start(batch_end,
+						     batch_addr,
+						     flags);
+
+				ret = 0; /* allow execution */
+			}
+		}
+
+		if (shadow_needs_clflush(shadow->obj))
+			drm_clflush_virt_range(batch_end, 8);
+	}
+
+	if (shadow_needs_clflush(shadow->obj)) {
+		void *ptr = page_mask_bits(shadow->obj->mm.mapping);
 
 		drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
 	}
 
-err:
-	i915_gem_object_unpin_map(shadow_batch_obj);
+	if (!IS_ERR_OR_NULL(jump_whitelist))
+		kfree(jump_whitelist);
+	i915_gem_object_unpin_map(shadow->obj);
 	return ret;
 }