aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2019-10-30 14:40:09 -0400
committerAlex Deucher <alexander.deucher@amd.com>2019-11-06 16:27:48 -0500
commitef177d11d696fe0eea36ef74edfeaeb62cd89c35 (patch)
tree0afa217d955e8374209ca7a18dfeb93417269ed0 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parentdrm/radeon: fix si_enable_smc_cac() failed issue (diff)
downloadwireguard-linux-ef177d11d696fe0eea36ef74edfeaeb62cd89c35.tar.xz
wireguard-linux-ef177d11d696fe0eea36ef74edfeaeb62cd89c35.zip
drm/amdgpu: Improve RAS documentation (v2)
Clarify some areas, clean up formatting, add section for unrecoverable error handling. v2: fix grammatical errors Reviewed-by: Yong Zhao <yong.zhao@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c40
1 files changed, 33 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index dab90c280476..404483437bd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -220,7 +220,7 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* As their names indicate, inject operation will write the
* value to the address.
*
- * Second member: struct ras_debug_if::op.
+ * The second member: struct ras_debug_if::op.
* It has three kinds of operations.
*
* - 0: disable RAS on the block. Take ::head as its data.
@@ -228,14 +228,20 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* - 2: inject errors on the block. Take ::inject as its data.
*
* How to use the interface?
- * programs:
- * copy the struct ras_debug_if in your codes and initialize it.
- * write the struct to the control node.
+ *
+ * Programs
+ *
+ * Copy the struct ras_debug_if in your codes and initialize it.
+ * Write the struct to the control node.
+ *
+ * Shells
*
* .. code-block:: bash
*
* echo op block [error [sub_block address value]] > .../ras/ras_ctrl
*
+ * Parameters:
+ *
* op: disable, enable, inject
* disable: only block is needed
* enable: block and error are needed
@@ -265,8 +271,10 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
*
* .. note::
- * Operation is only allowed on blocks which are supported.
+ * Operations are only allowed on blocks which are supported.
* Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ * to see which blocks support RAS on a particular asic.
+ *
*/
static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
@@ -322,7 +330,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
*
* Some boards contain an EEPROM which is used to persistently store a list of
- * bad pages containing ECC errors detected in vram. This interface provides
+ * bad pages which experiences ECC errors in vram. This interface provides
* a way to reset the EEPROM, e.g., after testing error injection.
*
* Usage:
@@ -362,7 +370,7 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
/**
* DOC: AMDGPU RAS sysfs Error Count Interface
*
- * It allows user to read the error count for each IP block on the gpu through
+ * It allows the user to read the error count for each IP block on the gpu through
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
*
* It outputs the multiple lines which report the uncorrected (ue) and corrected
@@ -1027,6 +1035,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
}
/* sysfs end */
+/**
+ * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
+ *
+ * Normally when there is an uncorrectable error, the driver will reset
+ * the GPU to recover. However, in the event of an unrecoverable error,
+ * the driver provides an interface to reboot the system automatically
+ * in that event.
+ *
+ * The following file in debugfs provides that interface:
+ * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ * echo true > .../ras/auto_reboot
+ *
+ */
/* debugfs begin */
static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{