From de4e783a3f86f63b03303b463cd7ef885e14b476 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Apr 2015 16:20:35 +0100 Subject: drm/i915: Tidy batch pool logic Move the madvise logic out of the execbuffer main path into the relatively rare allocation path, making the execbuffer manipulation less fragile. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_cmd_parser.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_cmd_parser.c') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 61ae8ff4eaed..9605ff8f2fcd 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -869,6 +869,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, batch_len + batch_start_offset > src_obj->base.size) return ERR_PTR(-E2BIG); + if (WARN_ON(dest_obj->pages_pin_count == 0)) + return ERR_PTR(-ENODEV); + ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush); if (ret) { DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n"); @@ -882,13 +885,6 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, goto unpin_src; } - ret = i915_gem_object_get_pages(dest_obj); - if (ret) { - DRM_DEBUG_DRIVER("CMD: Failed to get pages for shadow batch\n"); - goto unmap_src; - } - i915_gem_object_pin_pages(dest_obj); - ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); if (ret) { DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n"); @@ -898,7 +894,6 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, dst = vmap_batch(dest_obj, 0, batch_len); if (!dst) { DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n"); - i915_gem_object_unpin_pages(dest_obj); ret = -ENOMEM; goto unmap_src; } @@ -1129,7 +1124,6 @@ int i915_parse_cmds(struct intel_engine_cs *ring, } vunmap(batch_base); - i915_gem_object_unpin_pages(shadow_batch_obj); return ret; } -- cgit v1.2.3-59-g8ed1b From 8a389cac1fd7dd6411c7e586ab098938a75870e0 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 29 May 2015 16:44:13 +0300 Subject: drm/i915: Fix command parser to validate multiple register access with the same command. Until now the software command checker assumed that commands could read or write at most a single register per packet. This is not necessarily the case, MI_LOAD_REGISTER_IMM expects a variable-length list of offset/value pairs and writes them in sequence. The previous code would only check whether the first entry was valid, effectively allowing userspace to write unrestricted registers of the MMIO space by sending a multi-register write with a legal first register, with potential security implications on Gen6 and 7 hardware. Fix it by extending the drm_i915_cmd_descriptor table to represent multi-register access and making validate_cmd() iterate for all register offsets present in the command packet. Signed-off-by: Francisco Jerez Reviewed-by: Zhigang Gong Signed-off-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_cmd_parser.c | 74 ++++++++++++++++++++-------------- drivers/gpu/drm/i915/i915_drv.h | 5 +++ 2 files changed, 48 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_cmd_parser.c') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 9605ff8f2fcd..5fc49bbcdb9d 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -123,7 +123,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, - .reg = { .offset = 1, .mask = 0x007FFFFC } ), + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W | B, .reg = { .offset = 1, .mask = 0x007FFFFC }, .bits = {{ @@ -934,7 +934,7 @@ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) static bool check_cmd(const struct intel_engine_cs *ring, const struct drm_i915_cmd_descriptor *desc, - const u32 *cmd, + const u32 *cmd, u32 length, const bool is_master, bool *oacontrol_set) { @@ -950,38 +950,49 @@ static bool check_cmd(const struct intel_engine_cs *ring, } if (desc->flags & CMD_DESC_REGISTER) { - u32 reg_addr = cmd[desc->reg.offset] & desc->reg.mask; - /* - * OACONTROL requires some special handling for writes. We - * want to make sure that any batch which enables OA also - * disables it before the end of the batch. The goal is to - * prevent one process from snooping on the perf data from - * another process. To do that, we need to check the value - * that will be written to the register. Hence, limit - * OACONTROL writes to only MI_LOAD_REGISTER_IMM commands. + * Get the distance between individual register offset + * fields if the command can perform more than one + * access at a time. */ - if (reg_addr == OACONTROL) { - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { - DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n"); - return false; + const u32 step = desc->reg.step ? desc->reg.step : length; + u32 offset; + + for (offset = desc->reg.offset; offset < length; + offset += step) { + const u32 reg_addr = cmd[offset] & desc->reg.mask; + + /* + * OACONTROL requires some special handling for + * writes. We want to make sure that any batch which + * enables OA also disables it before the end of the + * batch. The goal is to prevent one process from + * snooping on the perf data from another process. To do + * that, we need to check the value that will be written + * to the register. Hence, limit OACONTROL writes to + * only MI_LOAD_REGISTER_IMM commands. + */ + if (reg_addr == OACONTROL) { + if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { + DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n"); + return false; + } + + if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1)) + *oacontrol_set = (cmd[offset + 1] != 0); } - if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1)) - *oacontrol_set = (cmd[2] != 0); - } - - if (!valid_reg(ring->reg_table, - ring->reg_count, reg_addr)) { - if (!is_master || - !valid_reg(ring->master_reg_table, - ring->master_reg_count, - reg_addr)) { - DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", - reg_addr, - *cmd, - ring->id); - return false; + if (!valid_reg(ring->reg_table, + ring->reg_count, reg_addr)) { + if (!is_master || + !valid_reg(ring->master_reg_table, + ring->master_reg_count, + reg_addr)) { + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", + reg_addr, *cmd, + ring->id); + return false; + } } } } @@ -1105,7 +1116,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring, break; } - if (!check_cmd(ring, desc, cmd, is_master, &oacontrol_set)) { + if (!check_cmd(ring, desc, cmd, length, is_master, + &oacontrol_set)) { ret = -EINVAL; break; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 72f5a3f9dbf2..542fac628b28 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2300,10 +2300,15 @@ struct drm_i915_cmd_descriptor { * Describes where to find a register address in the command to check * against the ring's register whitelist. Only valid if flags has the * CMD_DESC_REGISTER bit set. + * + * A non-zero step value implies that the command may access multiple + * registers in sequence (e.g. LRI), in that case step gives the + * distance in dwords between individual offset fields. */ struct { u32 offset; u32 mask; + u32 step; } reg; #define MAX_CMD_DESC_BITMASKS 3 -- cgit v1.2.3-59-g8ed1b From c1091b25f547d45ffdcb443a3790aed652fb9b48 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 29 May 2015 16:44:14 +0300 Subject: drm/i915: Extend the parser to check register writes against a mask/value pair. In some cases it might be unnecessary or dangerous to give userspace the right to write arbitrary values to some register, even though it might be desirable to give it control of some of its bits. This patch extends the register whitelist entries to contain a mask/value pair in addition to the register offset. For registers with non-zero mask, any LRM writes and LRI writes where the bits of the immediate given by the mask don't match the specified value will be rejected. This will be used in my next patch to grant userspace partial write access to some sensitive registers. Signed-off-by: Francisco Jerez Reviewed-by: Zhigang Gong Signed-off-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_cmd_parser.c | 138 +++++++++++++++++++++----------- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +- 2 files changed, 96 insertions(+), 47 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_cmd_parser.c') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 5fc49bbcdb9d..cafa3e2f16fc 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -395,16 +395,38 @@ static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { /* * Register whitelists, sorted by increasing register offset. + */ + +/* + * An individual whitelist entry granting access to register addr. If + * mask is non-zero the argument of immediate register writes will be + * AND-ed with mask, and the command will be rejected if the result + * doesn't match value. + * + * Registers with non-zero mask are only allowed to be written using + * LRI. + */ +struct drm_i915_reg_descriptor { + u32 addr; + u32 mask; + u32 value; +}; + +/* Convenience macro for adding 32-bit registers. */ +#define REG32(address, ...) \ + { .addr = address, __VA_ARGS__ } + +/* + * Convenience macro for adding 64-bit registers. * * Some registers that userspace accesses are 64 bits. The register * access commands only allow 32-bit accesses. Hence, we have to include * entries for both halves of the 64-bit registers. */ +#define REG64(addr) \ + REG32(addr), REG32(addr + sizeof(u32)) -/* Convenience macro for adding 64-bit registers */ -#define REG64(addr) (addr), (addr + sizeof(u32)) - -static const u32 gen7_render_regs[] = { +static const struct drm_i915_reg_descriptor gen7_render_regs[] = { REG64(GPGPU_THREADS_DISPATCHED), REG64(HS_INVOCATION_COUNT), REG64(DS_INVOCATION_COUNT), @@ -417,15 +439,15 @@ static const u32 gen7_render_regs[] = { REG64(CL_PRIMITIVES_COUNT), REG64(PS_INVOCATION_COUNT), REG64(PS_DEPTH_COUNT), - OACONTROL, /* Only allowed for LRI and SRM. See below. */ + REG32(OACONTROL), /* Only allowed for LRI and SRM. See below. */ REG64(MI_PREDICATE_SRC0), REG64(MI_PREDICATE_SRC1), - GEN7_3DPRIM_END_OFFSET, - GEN7_3DPRIM_START_VERTEX, - GEN7_3DPRIM_VERTEX_COUNT, - GEN7_3DPRIM_INSTANCE_COUNT, - GEN7_3DPRIM_START_INSTANCE, - GEN7_3DPRIM_BASE_VERTEX, + REG32(GEN7_3DPRIM_END_OFFSET), + REG32(GEN7_3DPRIM_START_VERTEX), + REG32(GEN7_3DPRIM_VERTEX_COUNT), + REG32(GEN7_3DPRIM_INSTANCE_COUNT), + REG32(GEN7_3DPRIM_START_INSTANCE), + REG32(GEN7_3DPRIM_BASE_VERTEX), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)), @@ -434,33 +456,34 @@ static const u32 gen7_render_regs[] = { REG64(GEN7_SO_PRIM_STORAGE_NEEDED(1)), REG64(GEN7_SO_PRIM_STORAGE_NEEDED(2)), REG64(GEN7_SO_PRIM_STORAGE_NEEDED(3)), - GEN7_SO_WRITE_OFFSET(0), - GEN7_SO_WRITE_OFFSET(1), - GEN7_SO_WRITE_OFFSET(2), - GEN7_SO_WRITE_OFFSET(3), - GEN7_L3SQCREG1, - GEN7_L3CNTLREG2, - GEN7_L3CNTLREG3, + REG32(GEN7_SO_WRITE_OFFSET(0)), + REG32(GEN7_SO_WRITE_OFFSET(1)), + REG32(GEN7_SO_WRITE_OFFSET(2)), + REG32(GEN7_SO_WRITE_OFFSET(3)), + REG32(GEN7_L3SQCREG1), + REG32(GEN7_L3CNTLREG2), + REG32(GEN7_L3CNTLREG3), }; -static const u32 gen7_blt_regs[] = { - BCS_SWCTRL, +static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { + REG32(BCS_SWCTRL), }; -static const u32 ivb_master_regs[] = { - FORCEWAKE_MT, - DERRMR, - GEN7_PIPE_DE_LOAD_SL(PIPE_A), - GEN7_PIPE_DE_LOAD_SL(PIPE_B), - GEN7_PIPE_DE_LOAD_SL(PIPE_C), +static const struct drm_i915_reg_descriptor ivb_master_regs[] = { + REG32(FORCEWAKE_MT), + REG32(DERRMR), + REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), + REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), + REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), }; -static const u32 hsw_master_regs[] = { - FORCEWAKE_MT, - DERRMR, +static const struct drm_i915_reg_descriptor hsw_master_regs[] = { + REG32(FORCEWAKE_MT), + REG32(DERRMR), }; #undef REG64 +#undef REG32 static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) { @@ -550,14 +573,16 @@ static bool validate_cmds_sorted(struct intel_engine_cs *ring, return ret; } -static bool check_sorted(int ring_id, const u32 *reg_table, int reg_count) +static bool check_sorted(int ring_id, + const struct drm_i915_reg_descriptor *reg_table, + int reg_count) { int i; u32 previous = 0; bool ret = true; for (i = 0; i < reg_count; i++) { - u32 curr = reg_table[i]; + u32 curr = reg_table[i].addr; if (curr < previous) { DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n", @@ -804,18 +829,20 @@ find_cmd(struct intel_engine_cs *ring, return default_desc; } -static bool valid_reg(const u32 *table, int count, u32 addr) +static const struct drm_i915_reg_descriptor * +find_reg(const struct drm_i915_reg_descriptor *table, + int count, u32 addr) { - if (table && count != 0) { + if (table) { int i; for (i = 0; i < count; i++) { - if (table[i] == addr) - return true; + if (table[i].addr == addr) + return &table[i]; } } - return false; + return NULL; } static u32 *vmap_batch(struct drm_i915_gem_object *obj, @@ -961,6 +988,20 @@ static bool check_cmd(const struct intel_engine_cs *ring, for (offset = desc->reg.offset; offset < length; offset += step) { const u32 reg_addr = cmd[offset] & desc->reg.mask; + const struct drm_i915_reg_descriptor *reg = + find_reg(ring->reg_table, ring->reg_count, + reg_addr); + + if (!reg && is_master) + reg = find_reg(ring->master_reg_table, + ring->master_reg_count, + reg_addr); + + if (!reg) { + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", + reg_addr, *cmd, ring->id); + return false; + } /* * OACONTROL requires some special handling for @@ -982,15 +1023,22 @@ static bool check_cmd(const struct intel_engine_cs *ring, *oacontrol_set = (cmd[offset + 1] != 0); } - if (!valid_reg(ring->reg_table, - ring->reg_count, reg_addr)) { - if (!is_master || - !valid_reg(ring->master_reg_table, - ring->master_reg_count, - reg_addr)) { - DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", - reg_addr, *cmd, - ring->id); + /* + * Check the value written to the register against the + * allowed mask/value pair given in the whitelist entry. + */ + if (reg->mask) { + if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { + DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n", + reg_addr); + return false; + } + + if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) && + (offset + 2 > length || + (cmd[offset + 1] & reg->mask) != reg->value)) { + DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n", + reg_addr); return false; } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 39f6dfc0ee54..e539314ae87e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -118,6 +118,7 @@ struct intel_ringbuffer { }; struct intel_context; +struct drm_i915_reg_descriptor; struct intel_engine_cs { const char *name; @@ -300,14 +301,14 @@ struct intel_engine_cs { /* * Table of registers allowed in commands that read/write registers. */ - const u32 *reg_table; + const struct drm_i915_reg_descriptor *reg_table; int reg_count; /* * Table of registers allowed in commands that read/write registers, but * only from the DRM master. */ - const u32 *master_reg_table; + const struct drm_i915_reg_descriptor *master_reg_table; int master_reg_count; /* -- cgit v1.2.3-59-g8ed1b From 41d232b7c891fcecb358784645633850936a95c7 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 29 May 2015 16:44:15 +0300 Subject: drm/i915: Add SCRATCH1 and ROW_CHICKEN3 to the register whitelist. Only bit 27 of SCRATCH1 and bit 6 of ROW_CHICKEN3 are allowed to be set because of security-sensitive bits we don't want userspace to mess with. On HSW hardware the whitelisted bits control whether atomic read-modify-write operations are performed on L3 or on GTI, and when set to L3 (which can be 10x-30x better performing than on GTI, depending on the application) require great care to avoid a system hang, so we currently program them to be handled on GTI by default. Beignet can immediately start taking advantage of this change to enable L3 atomics. Mesa should eventually switch to L3 atomics too, but a number of non-trivial changes are still required so it will continue using GTI atomics for now. Signed-off-by: Francisco Jerez Reviewed-by: Zhigang Gong Signed-off-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_cmd_parser.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_cmd_parser.c') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index cafa3e2f16fc..306d9e4e5cf3 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -463,6 +463,13 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = { REG32(GEN7_L3SQCREG1), REG32(GEN7_L3CNTLREG2), REG32(GEN7_L3CNTLREG3), + REG32(HSW_SCRATCH1, + .mask = ~HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE, + .value = 0), + REG32(HSW_ROW_CHICKEN3, + .mask = ~(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE << 16 | + HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), + .value = 0), }; static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { -- cgit v1.2.3-59-g8ed1b