aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/misc/habanalabs/firmware_if.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/firmware_if.c')
-rw-r--r--drivers/misc/habanalabs/firmware_if.c297
1 files changed, 295 insertions, 2 deletions
diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c
index f5bd03171dac..baf790cf4b78 100644
--- a/drivers/misc/habanalabs/firmware_if.c
+++ b/drivers/misc/habanalabs/firmware_if.c
@@ -6,20 +6,22 @@
*/
#include "habanalabs.h"
+#include "include/hl_boot_if.h"
#include <linux/firmware.h>
#include <linux/genalloc.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/slab.h>
/**
- * hl_fw_push_fw_to_device() - Push FW code to device.
+ * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
* @hdev: pointer to hl_device structure.
*
* Copy fw code from firmware file to device memory.
*
* Return: 0 on success, non-zero for failure.
*/
-int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
+int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
void __iomem *dst)
{
const struct firmware *fw;
@@ -129,6 +131,68 @@ out:
return rc;
}
+int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
+{
+ struct armcp_packet pkt;
+ long result;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.value = cpu_to_le64(event_type);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_DEVICE_TIMEOUT_USEC, &result);
+
+ if (rc)
+ dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
+
+ return rc;
+}
+
+int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
+ size_t irq_arr_size)
+{
+ struct armcp_unmask_irq_arr_packet *pkt;
+ size_t total_pkt_size;
+ long result;
+ int rc;
+
+ total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
+ irq_arr_size;
+
+ /* data should be aligned to 8 bytes in order to ArmCP to copy it */
+ total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
+
+ /* total_pkt_size is casted to u16 later on */
+ if (total_pkt_size > USHRT_MAX) {
+ dev_err(hdev->dev, "too many elements in IRQ array\n");
+ return -EINVAL;
+ }
+
+ pkt = kzalloc(total_pkt_size, GFP_KERNEL);
+ if (!pkt)
+ return -ENOMEM;
+
+ pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
+ memcpy(&pkt->irqs, irq_arr, irq_arr_size);
+
+ pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
+ total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
+
+ if (rc)
+ dev_err(hdev->dev, "failed to unmask IRQ array\n");
+
+ kfree(pkt);
+
+ return rc;
+}
+
int hl_fw_test_cpu_queue(struct hl_device *hdev)
{
struct armcp_packet test_pkt = {};
@@ -286,3 +350,232 @@ out:
return rc;
}
+
+static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
+{
+ u32 err_val;
+
+ /* Some of the firmware status codes are deprecated in newer f/w
+ * versions. In those versions, the errors are reported
+ * in different registers. Therefore, we need to check those
+ * registers and print the exact errors. Moreover, there
+ * may be multiple errors, so we need to report on each error
+ * separately. Some of the error codes might indicate a state
+ * that is not an error per-se, but it is an error in production
+ * environment
+ */
+ err_val = RREG32(boot_err0_reg);
+ if (!(err_val & CPU_BOOT_ERR0_ENABLED))
+ return;
+
+ if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+ dev_err(hdev->dev,
+ "Device boot error - DRAM initialization failed\n");
+ if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
+ dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
+ if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+ dev_err(hdev->dev,
+ "Device boot error - Thermal Sensor initialization failed\n");
+ if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+ dev_warn(hdev->dev,
+ "Device boot warning - Skipped DRAM initialization\n");
+ if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
+ dev_warn(hdev->dev,
+ "Device boot error - Skipped waiting for BMC\n");
+ if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+ dev_err(hdev->dev,
+ "Device boot error - Serdes data from BMC not available\n");
+ if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+ dev_err(hdev->dev,
+ "Device boot error - NIC F/W initialization failed\n");
+}
+
+int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
+ u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
+ u32 boot_err0_reg, bool skip_bmc,
+ u32 cpu_timeout, u32 boot_fit_timeout)
+{
+ u32 status;
+ int rc;
+
+ dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
+ cpu_timeout / USEC_PER_SEC);
+
+ /* Wait for boot FIT request */
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_boot_status_reg,
+ status,
+ status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
+ 10000,
+ boot_fit_timeout);
+
+ if (rc) {
+ dev_dbg(hdev->dev,
+ "No boot fit request received, resuming boot\n");
+ } else {
+ rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
+ if (rc)
+ goto out;
+
+ /* Clear device CPU message status */
+ WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
+
+ /* Signal device CPU that boot loader is ready */
+ WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
+
+ /* Poll for CPU device ack */
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_msg_status_reg,
+ status,
+ status == CPU_MSG_OK,
+ 10000,
+ boot_fit_timeout);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Timeout waiting for boot fit load ack\n");
+ goto out;
+ }
+
+ /* Clear message */
+ WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+ }
+
+ /* Make sure CPU boot-loader is running */
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_boot_status_reg,
+ status,
+ (status == CPU_BOOT_STATUS_DRAM_RDY) ||
+ (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
+ (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
+ (status == CPU_BOOT_STATUS_SRAM_AVAIL),
+ 10000,
+ cpu_timeout);
+
+ /* Read U-Boot, preboot versions now in case we will later fail */
+ hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
+ hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
+
+ /* Some of the status codes below are deprecated in newer f/w
+ * versions but we keep them here for backward compatibility
+ */
+ if (rc) {
+ switch (status) {
+ case CPU_BOOT_STATUS_NA:
+ dev_err(hdev->dev,
+ "Device boot error - BTL did NOT run\n");
+ break;
+ case CPU_BOOT_STATUS_IN_WFE:
+ dev_err(hdev->dev,
+ "Device boot error - Stuck inside WFE loop\n");
+ break;
+ case CPU_BOOT_STATUS_IN_BTL:
+ dev_err(hdev->dev,
+ "Device boot error - Stuck in BTL\n");
+ break;
+ case CPU_BOOT_STATUS_IN_PREBOOT:
+ dev_err(hdev->dev,
+ "Device boot error - Stuck in Preboot\n");
+ break;
+ case CPU_BOOT_STATUS_IN_SPL:
+ dev_err(hdev->dev,
+ "Device boot error - Stuck in SPL\n");
+ break;
+ case CPU_BOOT_STATUS_IN_UBOOT:
+ dev_err(hdev->dev,
+ "Device boot error - Stuck in u-boot\n");
+ break;
+ case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
+ dev_err(hdev->dev,
+ "Device boot error - DRAM initialization failed\n");
+ break;
+ case CPU_BOOT_STATUS_UBOOT_NOT_READY:
+ dev_err(hdev->dev,
+ "Device boot error - u-boot stopped by user\n");
+ break;
+ case CPU_BOOT_STATUS_TS_INIT_FAIL:
+ dev_err(hdev->dev,
+ "Device boot error - Thermal Sensor initialization failed\n");
+ break;
+ default:
+ dev_err(hdev->dev,
+ "Device boot error - Invalid status code %d\n",
+ status);
+ break;
+ }
+
+ rc = -EIO;
+ goto out;
+ }
+
+ if (!hdev->fw_loading) {
+ dev_info(hdev->dev, "Skip loading FW\n");
+ goto out;
+ }
+
+ if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
+ goto out;
+
+ dev_info(hdev->dev,
+ "Loading firmware to device, may take some time...\n");
+
+ rc = hdev->asic_funcs->load_firmware_to_device(hdev);
+ if (rc)
+ goto out;
+
+ if (skip_bmc) {
+ WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
+
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_boot_status_reg,
+ status,
+ (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
+ 10000,
+ cpu_timeout);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to get ACK on skipping BMC, %d\n",
+ status);
+ WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+ rc = -EIO;
+ goto out;
+ }
+ }
+
+ WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
+
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_boot_status_reg,
+ status,
+ (status == CPU_BOOT_STATUS_SRAM_AVAIL),
+ 10000,
+ cpu_timeout);
+
+ /* Clear message */
+ WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+
+ if (rc) {
+ if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
+ dev_err(hdev->dev,
+ "Device reports FIT image is corrupted\n");
+ else
+ dev_err(hdev->dev,
+ "Device failed to load, %d\n", status);
+
+ rc = -EIO;
+ goto out;
+ }
+
+ dev_info(hdev->dev, "Successfully loaded firmware to device\n");
+
+out:
+ fw_read_errors(hdev, boot_err0_reg);
+
+ return rc;
+}