diff options
| author | 2022-08-02 10:06:12 -0700 | |
|---|---|---|
| committer | 2022-08-02 10:06:12 -0700 | |
| commit | 8bb5e7f4dcd9b9ef22a3ea25c9066a8a968f12dd (patch) | |
| tree | 0f1383880607a227142f9388a066959926233ff1 /drivers/misc | |
| parent | Input: document the units for resolution of size axes (diff) | |
| parent | Input: adc-joystick - fix ordering in adc_joystick_probe() (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 5.20 (or 6.0) merge window.
Diffstat (limited to 'drivers/misc')
102 files changed, 5702 insertions, 3194 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 0f5a49fc7c9e..41d2bb0ae23a 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -259,6 +259,7 @@ config QCOM_FASTRPC depends on ARCH_QCOM || COMPILE_TEST depends on RPMSG select DMA_SHARED_BUFFER + select QCOM_SCM help Provides a communication mechanism that allows for clients to make remote method invocations across processor boundary to @@ -470,6 +471,18 @@ config HISI_HIKEY_USB switching between the dual-role USB-C port and the USB-A host ports using only one USB controller. +config OPEN_DICE + tristate "Open Profile for DICE driver" + depends on OF_RESERVED_MEM + help + This driver exposes a DICE reserved memory region to userspace via + a character device. The memory region contains Compound Device + Identifiers (CDIs) generated by firmware as an output of DICE + measured boot flow. Userspace can use CDIs for remote attestation + and sealing. + + If unsure, say N. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index a086197af544..70e800e9127f 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -59,3 +59,4 @@ obj-$(CONFIG_UACCE) += uacce/ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o obj-$(CONFIG_HI6421V600_IRQ) += hi6421v600-irq.o +obj-$(CONFIG_OPEN_DICE) += open-dice.o diff --git a/drivers/misc/ad525x_dpot-spi.c b/drivers/misc/ad525x_dpot-spi.c index a9e75d80ad36..263055bda48b 100644 --- a/drivers/misc/ad525x_dpot-spi.c +++ b/drivers/misc/ad525x_dpot-spi.c @@ -90,10 +90,9 @@ static int ad_dpot_spi_probe(struct spi_device *spi) spi_get_device_id(spi)->name); } -static int ad_dpot_spi_remove(struct spi_device *spi) +static void ad_dpot_spi_remove(struct spi_device *spi) { ad_dpot_remove(&spi->dev); - return 0; } static const struct spi_device_id ad_dpot_spi_id[] = { diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c index 92c0611034b0..075f3a36d512 100644 --- a/drivers/misc/altera-stapl/altera.c +++ b/drivers/misc/altera-stapl/altera.c @@ -530,11 +530,8 @@ exit_done: } break; case OP_SWP: - if (altera_check_stack(stack_ptr, 2, &status)) { - long_tmp = stack[stack_ptr - 2]; - stack[stack_ptr - 2] = stack[stack_ptr - 1]; - stack[stack_ptr - 1] = long_tmp; - } + if (altera_check_stack(stack_ptr, 2, &status)) + swap(stack[stack_ptr - 2], stack[stack_ptr - 1]); break; case OP_ADD: if (altera_check_stack(stack_ptr, 2, &status)) { @@ -912,34 +909,22 @@ exit_done: */ /* SWP */ - if (altera_check_stack(stack_ptr, 2, &status)) { - long_tmp = stack[stack_ptr - 2]; - stack[stack_ptr - 2] = stack[stack_ptr - 1]; - stack[stack_ptr - 1] = long_tmp; - } + if (altera_check_stack(stack_ptr, 2, &status)) + swap(stack[stack_ptr - 2], stack[stack_ptr - 1]); /* SWPN 7 */ index = 7 + 1; - if (altera_check_stack(stack_ptr, index, &status)) { - long_tmp = stack[stack_ptr - index]; - stack[stack_ptr - index] = stack[stack_ptr - 1]; - stack[stack_ptr - 1] = long_tmp; - } + if (altera_check_stack(stack_ptr, index, &status)) + swap(stack[stack_ptr - index], stack[stack_ptr - 1]); /* SWP */ - if (altera_check_stack(stack_ptr, 2, &status)) { - long_tmp = stack[stack_ptr - 2]; - stack[stack_ptr - 2] = stack[stack_ptr - 1]; - stack[stack_ptr - 1] = long_tmp; - } + if (altera_check_stack(stack_ptr, 2, &status)) + swap(stack[stack_ptr - 2], stack[stack_ptr - 1]); /* SWPN 6 */ index = 6 + 1; - if (altera_check_stack(stack_ptr, index, &status)) { - long_tmp = stack[stack_ptr - index]; - stack[stack_ptr - index] = stack[stack_ptr - 1]; - stack[stack_ptr - 1] = long_tmp; - } + if (altera_check_stack(stack_ptr, index, &status)) + swap(stack[stack_ptr - index], stack[stack_ptr - 1]); /* DUPN 8 */ index = 8 + 1; @@ -950,18 +935,12 @@ exit_done: /* SWPN 2 */ index = 2 + 1; - if (altera_check_stack(stack_ptr, index, &status)) { - long_tmp = stack[stack_ptr - index]; - stack[stack_ptr - index] = stack[stack_ptr - 1]; - stack[stack_ptr - 1] = long_tmp; - } + if (altera_check_stack(stack_ptr, index, &status)) + swap(stack[stack_ptr - index], stack[stack_ptr - 1]); /* SWP */ - if (altera_check_stack(stack_ptr, 2, &status)) { - long_tmp = stack[stack_ptr - 2]; - stack[stack_ptr - 2] = stack[stack_ptr - 1]; - stack[stack_ptr - 1] = long_tmp; - } + if (altera_check_stack(stack_ptr, 2, &status)) + swap(stack[stack_ptr - 2], stack[stack_ptr - 1]); /* DUPN 6 */ index = 6 + 1; @@ -1075,11 +1054,8 @@ exit_done: * to swap with top element */ index = (args[0]) + 1; - if (altera_check_stack(stack_ptr, index, &status)) { - long_tmp = stack[stack_ptr - index]; - stack[stack_ptr - index] = stack[stack_ptr - 1]; - stack[stack_ptr - 1] = long_tmp; - } + if (altera_check_stack(stack_ptr, index, &status)) + swap(stack[stack_ptr - index], stack[stack_ptr - 1]); break; case OP_DUPN: /* diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index d6cd5537126c..69f9b0336410 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -232,9 +232,9 @@ static int ssc_probe(struct platform_device *pdev) clk_disable_unprepare(ssc->clk); ssc->irq = platform_get_irq(pdev, 0); - if (!ssc->irq) { + if (ssc->irq < 0) { dev_dbg(&pdev->dev, "could not get irq\n"); - return -ENXIO; + return ssc->irq; } mutex_lock(&user_lock); diff --git a/drivers/misc/bcm-vk/bcm_vk.h b/drivers/misc/bcm-vk/bcm_vk.h index a1338f375589..25d51222eedf 100644 --- a/drivers/misc/bcm-vk/bcm_vk.h +++ b/drivers/misc/bcm-vk/bcm_vk.h @@ -311,7 +311,7 @@ struct bcm_vk_peer_log { u32 wr_idx; u32 buf_size; u32 mask; - char data[0]; + char data[]; }; /* max buf size allowed */ diff --git a/drivers/misc/bcm-vk/bcm_vk_dev.c b/drivers/misc/bcm-vk/bcm_vk_dev.c index ad639ee85b2a..a16b99bdaa13 100644 --- a/drivers/misc/bcm-vk/bcm_vk_dev.c +++ b/drivers/misc/bcm-vk/bcm_vk_dev.c @@ -1633,7 +1633,6 @@ static void bcm_vk_shutdown(struct pci_dev *pdev) static const struct pci_device_id bcm_vk_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VALKYRIE), }, - { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VIPER), }, { } }; MODULE_DEVICE_TABLE(pci, bcm_vk_ids); diff --git a/drivers/misc/bcm-vk/bcm_vk_msg.c b/drivers/misc/bcm-vk/bcm_vk_msg.c index 066b9ef7fcd7..3c081504f38c 100644 --- a/drivers/misc/bcm-vk/bcm_vk_msg.c +++ b/drivers/misc/bcm-vk/bcm_vk_msg.c @@ -757,20 +757,19 @@ static struct bcm_vk_wkent *bcm_vk_dequeue_pending(struct bcm_vk *vk, u16 q_num, u16 msg_id) { - bool found = false; - struct bcm_vk_wkent *entry; + struct bcm_vk_wkent *entry = NULL, *iter; spin_lock(&chan->pendq_lock); - list_for_each_entry(entry, &chan->pendq[q_num], node) { - if (get_msg_id(&entry->to_v_msg[0]) == msg_id) { - list_del(&entry->node); - found = true; + list_for_each_entry(iter, &chan->pendq[q_num], node) { + if (get_msg_id(&iter->to_v_msg[0]) == msg_id) { + list_del(&iter->node); + entry = iter; bcm_vk_msgid_bitmap_clear(vk, msg_id, 1); break; } } spin_unlock(&chan->pendq_lock); - return ((found) ? entry : NULL); + return entry; } s32 bcm_to_h_msg_dequeue(struct bcm_vk *vk) @@ -1010,16 +1009,14 @@ ssize_t bcm_vk_read(struct file *p_file, miscdev); struct device *dev = &vk->pdev->dev; struct bcm_vk_msg_chan *chan = &vk->to_h_msg_chan; - struct bcm_vk_wkent *entry = NULL; + struct bcm_vk_wkent *entry = NULL, *iter; u32 q_num; u32 rsp_length; - bool found = false; if (!bcm_vk_drv_access_ok(vk)) return -EPERM; dev_dbg(dev, "Buf count %zu\n", count); - found = false; /* * search through the pendq on the to_h chan, and return only those @@ -1028,13 +1025,13 @@ ssize_t bcm_vk_read(struct file *p_file, */ spin_lock(&chan->pendq_lock); for (q_num = 0; q_num < chan->q_nr; q_num++) { - list_for_each_entry(entry, &chan->pendq[q_num], node) { - if (entry->ctx->idx == ctx->idx) { + list_for_each_entry(iter, &chan->pendq[q_num], node) { + if (iter->ctx->idx == ctx->idx) { if (count >= - (entry->to_h_blks * VK_MSGQ_BLK_SIZE)) { - list_del(&entry->node); + (iter->to_h_blks * VK_MSGQ_BLK_SIZE)) { + list_del(&iter->node); atomic_dec(&ctx->pend_cnt); - found = true; + entry = iter; } else { /* buffer not big enough */ rc = -EMSGSIZE; @@ -1046,7 +1043,7 @@ ssize_t bcm_vk_read(struct file *p_file, read_loop_exit: spin_unlock(&chan->pendq_lock); - if (found) { + if (entry) { /* retrieve the passed down msg_id */ set_msg_id(&entry->to_h_msg[0], entry->usr_msg_id); rsp_length = entry->to_h_blks * VK_MSGQ_BLK_SIZE; diff --git a/drivers/misc/cardreader/alcor_pci.c b/drivers/misc/cardreader/alcor_pci.c index de6d44a158bb..9080f9f150a2 100644 --- a/drivers/misc/cardreader/alcor_pci.c +++ b/drivers/misc/cardreader/alcor_pci.c @@ -266,7 +266,7 @@ static int alcor_pci_probe(struct pci_dev *pdev, if (!priv) return -ENOMEM; - ret = ida_simple_get(&alcor_pci_idr, 0, 0, GFP_KERNEL); + ret = ida_alloc(&alcor_pci_idr, GFP_KERNEL); if (ret < 0) return ret; priv->id = ret; @@ -280,7 +280,8 @@ static int alcor_pci_probe(struct pci_dev *pdev, ret = pci_request_regions(pdev, DRV_NAME_ALCOR_PCI); if (ret) { dev_err(&pdev->dev, "Cannot request region\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error_free_ida; } if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { @@ -316,14 +317,19 @@ static int alcor_pci_probe(struct pci_dev *pdev, ret = mfd_add_devices(&pdev->dev, priv->id, alcor_pci_cells, ARRAY_SIZE(alcor_pci_cells), NULL, 0, NULL); if (ret < 0) - goto error_release_regions; + goto error_clear_drvdata; alcor_pci_aspm_ctrl(priv, 0); return 0; +error_clear_drvdata: + pci_clear_master(pdev); + pci_set_drvdata(pdev, NULL); error_release_regions: pci_release_regions(pdev); +error_free_ida: + ida_free(&alcor_pci_idr, priv->id); return ret; } @@ -337,9 +343,10 @@ static void alcor_pci_remove(struct pci_dev *pdev) mfd_remove_devices(&pdev->dev); - ida_simple_remove(&alcor_pci_idr, priv->id); + ida_free(&alcor_pci_idr, priv->id); pci_release_regions(pdev); + pci_clear_master(pdev); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/misc/cardreader/rtl8411.c b/drivers/misc/cardreader/rtl8411.c index 4c5621b17a6f..06457e875a90 100644 --- a/drivers/misc/cardreader/rtl8411.c +++ b/drivers/misc/cardreader/rtl8411.c @@ -76,7 +76,7 @@ static void rtl8411b_fetch_vendor_settings(struct rtsx_pcr *pcr) map_sd_drive(rtl8411b_reg_to_sd30_drive_sel_3v3(reg)); } -static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime) { rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07); } diff --git a/drivers/misc/cardreader/rts5209.c b/drivers/misc/cardreader/rts5209.c index 29f5414072bf..52b0a476ba51 100644 --- a/drivers/misc/cardreader/rts5209.c +++ b/drivers/misc/cardreader/rts5209.c @@ -47,7 +47,7 @@ static void rts5209_fetch_vendor_settings(struct rtsx_pcr *pcr) } } -static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime) { rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07); } diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c index 4bcfbc9afbac..d676cf63a966 100644 --- a/drivers/misc/cardreader/rts5227.c +++ b/drivers/misc/cardreader/rts5227.c @@ -72,6 +72,8 @@ static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr) pci_read_config_dword(pdev, PCR_SETTING_REG2, ®); pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + if (CHK_PCI_PID(pcr, 0x522A)) + pcr->rtd3_en = rtsx_reg_to_rtd3(reg); if (rtsx_check_mmc_support(reg)) pcr->extra_caps |= EXTRA_CAPS_NO_MMC; pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); @@ -171,6 +173,28 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr) else rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x00); + if (CHK_PCI_PID(pcr, 0x522A)) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_AUTOLOAD_CFG1, + CD_RESUME_EN_MASK, CD_RESUME_EN_MASK); + + if (pcr->rtd3_en) { + if (CHK_PCI_PID(pcr, 0x522A)) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PM_CTRL3, 0x01, 0x01); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PME_FORCE_CTL, 0x30, 0x30); + } else { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CTRL3, 0x01, 0x01); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PME_FORCE_CTL, 0xFF, 0x33); + } + } else { + if (CHK_PCI_PID(pcr, 0x522A)) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PM_CTRL3, 0x01, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PME_FORCE_CTL, 0x30, 0x20); + } else { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PME_FORCE_CTL, 0xFF, 0x30); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CTRL3, 0x01, 0x00); + } + } + if (option->force_clkreq_0) rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); @@ -438,6 +462,28 @@ static int rts522a_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) return rtsx_pci_send_cmd(pcr, 100); } +static void rts522a_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, + RELINK_TIME_MASK, 0); + + rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, + D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + + if (!runtime) { + rtsx_pci_write_register(pcr, RTS522A_AUTOLOAD_CFG1, + CD_RESUME_EN_MASK, 0); + rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, 0x01, 0x00); + rtsx_pci_write_register(pcr, RTS522A_PME_FORCE_CTL, 0x30, 0x20); + } + + rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN); +} + + static void rts522a_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active) { struct rtsx_cr_option *option = &pcr->option; @@ -473,6 +519,7 @@ static const struct pcr_ops rts522a_pcr_ops = { .card_power_on = rts5227_card_power_on, .card_power_off = rts5227_card_power_off, .switch_output_voltage = rts522a_switch_output_voltage, + .force_power_down = rts522a_force_power_down, .cd_deglitch = NULL, .conv_clk_and_div_n = NULL, .set_l1off_cfg_sub_d0 = rts522a_set_l1off_cfg_sub_d0, diff --git a/drivers/misc/cardreader/rts5228.c b/drivers/misc/cardreader/rts5228.c index ffc128278613..cfebad51d1d8 100644 --- a/drivers/misc/cardreader/rts5228.c +++ b/drivers/misc/cardreader/rts5228.c @@ -91,7 +91,7 @@ static int rts5228_optimize_phy(struct rtsx_pcr *pcr) return rtsx_pci_write_phy_register(pcr, 0x07, 0x8F40); } -static void rts5228_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +static void rts5228_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime) { /* Set relink_time to 0 */ rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0); @@ -102,6 +102,14 @@ static void rts5228_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + if (!runtime) { + rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1, + CD_RESUME_EN_MASK, 0); + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00); + rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL, + FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL); + } + rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, SSC_POWER_DOWN); } @@ -480,9 +488,18 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr) FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB); - rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00); - rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL, - FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL); + + if (pcr->rtd3_en) { + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x01); + rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL, + FORCE_PM_CONTROL | FORCE_PM_VALUE, + FORCE_PM_CONTROL | FORCE_PM_VALUE); + } else { + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00); + rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL, + FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL); + } + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, D3_DELINK_MODE_EN, 0x00); return 0; } diff --git a/drivers/misc/cardreader/rts5229.c b/drivers/misc/cardreader/rts5229.c index c748eaf1ec1f..b0edd8006d52 100644 --- a/drivers/misc/cardreader/rts5229.c +++ b/drivers/misc/cardreader/rts5229.c @@ -44,7 +44,7 @@ static void rts5229_fetch_vendor_settings(struct rtsx_pcr *pcr) map_sd_drive(rtsx_reg_to_sd30_drive_sel_3v3(reg)); } -static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime) { rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03); } diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c index 53f3a1f45c4a..91d240dd68fa 100644 --- a/drivers/misc/cardreader/rts5249.c +++ b/drivers/misc/cardreader/rts5249.c @@ -74,7 +74,8 @@ static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) pci_read_config_dword(pdev, PCR_SETTING_REG2, ®); pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); - pcr->rtd3_en = rtsx_reg_to_rtd3_uhsii(reg); + if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) + pcr->rtd3_en = rtsx_reg_to_rtd3_uhsii(reg); if (rtsx_check_mmc_support(reg)) pcr->extra_caps |= EXTRA_CAPS_NO_MMC; @@ -143,6 +144,27 @@ static int rts5249_init_from_hw(struct rtsx_pcr *pcr) return 0; } +static void rts52xa_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, + RELINK_TIME_MASK, 0); + + rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, + D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + + if (!runtime) { + rtsx_pci_write_register(pcr, RTS524A_AUTOLOAD_CFG1, + CD_RESUME_EN_MASK, 0); + rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, 0x01, 0x00); + rtsx_pci_write_register(pcr, RTS524A_PME_FORCE_CTL, 0x30, 0x20); + } + + rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN); +} + static void rts52xa_save_content_from_efuse(struct rtsx_pcr *pcr) { u8 cnt, sv; @@ -281,8 +303,11 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); - if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) + if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) { rtsx_pci_write_register(pcr, REG_VREF, PWD_SUSPND_EN, PWD_SUSPND_EN); + rtsx_pci_write_register(pcr, RTS524A_AUTOLOAD_CFG1, + CD_RESUME_EN_MASK, CD_RESUME_EN_MASK); + } if (pcr->rtd3_en) { if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) { @@ -724,6 +749,7 @@ static const struct pcr_ops rts524a_pcr_ops = { .card_power_on = rtsx_base_card_power_on, .card_power_off = rtsx_base_card_power_off, .switch_output_voltage = rtsx_base_switch_output_voltage, + .force_power_down = rts52xa_force_power_down, .set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0, }; @@ -841,6 +867,7 @@ static const struct pcr_ops rts525a_pcr_ops = { .card_power_on = rts525a_card_power_on, .card_power_off = rtsx_base_card_power_off, .switch_output_voltage = rts525a_switch_output_voltage, + .force_power_down = rts52xa_force_power_down, .set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0, }; diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c index 1fd4e0e50730..b1e76030cafd 100644 --- a/drivers/misc/cardreader/rts5261.c +++ b/drivers/misc/cardreader/rts5261.c @@ -57,41 +57,7 @@ static void rts5261_fill_driving(struct rtsx_pcr *pcr, u8 voltage) 0xFF, driving[drive_sel][2]); } -static void rtsx5261_fetch_vendor_settings(struct rtsx_pcr *pcr) -{ - struct pci_dev *pdev = pcr->pci; - u32 reg; - - /* 0x814~0x817 */ - pci_read_config_dword(pdev, PCR_SETTING_REG2, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); - - if (!rts5261_vendor_setting_valid(reg)) { - /* Not support MMC default */ - pcr->extra_caps |= EXTRA_CAPS_NO_MMC; - pcr_dbg(pcr, "skip fetch vendor setting\n"); - return; - } - - if (!rts5261_reg_check_mmc_support(reg)) - pcr->extra_caps |= EXTRA_CAPS_NO_MMC; - - /* TO do: need to add rtd3 function */ - pcr->rtd3_en = rts5261_reg_to_rtd3(reg); - - if (rts5261_reg_check_reverse_socket(reg)) - pcr->flags |= PCR_REVERSE_SOCKET; - - /* 0x724~0x727 */ - pci_read_config_dword(pdev, PCR_SETTING_REG1, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); - - pcr->aspm_en = rts5261_reg_to_aspm(reg); - pcr->sd30_drive_sel_1v8 = rts5261_reg_to_sd30_drive_sel_1v8(reg); - pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(reg); -} - -static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime) { /* Set relink_time to 0 */ rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0); @@ -103,6 +69,24 @@ static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + if (!runtime) { + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1, + CD_RESUME_EN_MASK, 0); + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00); + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL); + + } else { + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + FORCE_PM_CONTROL | FORCE_PM_VALUE, 0); + + rtsx_pci_write_register(pcr, RTS5261_FW_CTL, + RTS5261_INFORM_RTD3_COLD, RTS5261_INFORM_RTD3_COLD); + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4, + RTS5261_FORCE_PRSNT_LOW, RTS5261_FORCE_PRSNT_LOW); + + } + rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL, SSC_POWER_DOWN, SSC_POWER_DOWN); } @@ -373,11 +357,11 @@ static void rts5261_process_ocp(struct rtsx_pcr *pcr) } -static int rts5261_init_from_hw(struct rtsx_pcr *pcr) +static void rts5261_init_from_hw(struct rtsx_pcr *pcr) { struct pci_dev *pdev = pcr->pci; - int retval; - u32 lval, i; + u32 lval1, lval2, i; + u16 setting_reg1, setting_reg2; u8 valid, efuse_valid, tmp; rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, @@ -400,26 +384,72 @@ static int rts5261_init_from_hw(struct rtsx_pcr *pcr) efuse_valid = ((tmp & 0x0C) >> 2); pcr_dbg(pcr, "Load efuse valid: 0x%x\n", efuse_valid); - if (efuse_valid == 0) { - retval = pci_read_config_dword(pdev, PCR_SETTING_REG2, &lval); - if (retval != 0) - pcr_dbg(pcr, "read 0x814 DW fail\n"); - pcr_dbg(pcr, "DW from 0x814: 0x%x\n", lval); - /* 0x816 */ - valid = (u8)((lval >> 16) & 0x03); - pcr_dbg(pcr, "0x816: %d\n", valid); - } + pci_read_config_dword(pdev, PCR_SETTING_REG2, &lval2); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, lval2); + /* 0x816 */ + valid = (u8)((lval2 >> 16) & 0x03); + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, REG_EFUSE_POR, 0); pcr_dbg(pcr, "Disable efuse por!\n"); - pci_read_config_dword(pdev, PCR_SETTING_REG2, &lval); - lval = lval & 0x00FFFFFF; - retval = pci_write_config_dword(pdev, PCR_SETTING_REG2, lval); - if (retval != 0) - pcr_dbg(pcr, "write config fail\n"); + if (efuse_valid == 2 || efuse_valid == 3) { + if (valid == 3) { + /* Bypass efuse */ + setting_reg1 = PCR_SETTING_REG1; + setting_reg2 = PCR_SETTING_REG2; + } else { + /* Use efuse data */ + setting_reg1 = PCR_SETTING_REG4; + setting_reg2 = PCR_SETTING_REG5; + } + } else if (efuse_valid == 0) { + // default + setting_reg1 = PCR_SETTING_REG1; + setting_reg2 = PCR_SETTING_REG2; + } else { + return; + } + + pci_read_config_dword(pdev, setting_reg2, &lval2); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", setting_reg2, lval2); - return retval; + if (!rts5261_vendor_setting_valid(lval2)) { + /* Not support MMC default */ + pcr->extra_caps |= EXTRA_CAPS_NO_MMC; + pcr_dbg(pcr, "skip fetch vendor setting\n"); + return; + } + + if (!rts5261_reg_check_mmc_support(lval2)) + pcr->extra_caps |= EXTRA_CAPS_NO_MMC; + + pcr->rtd3_en = rts5261_reg_to_rtd3(lval2); + + if (rts5261_reg_check_reverse_socket(lval2)) + pcr->flags |= PCR_REVERSE_SOCKET; + + pci_read_config_dword(pdev, setting_reg1, &lval1); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", setting_reg1, lval1); + + pcr->aspm_en = rts5261_reg_to_aspm(lval1); + pcr->sd30_drive_sel_1v8 = rts5261_reg_to_sd30_drive_sel_1v8(lval1); + pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(lval1); + + if (setting_reg1 == PCR_SETTING_REG1) { + /* store setting */ + rtsx_pci_write_register(pcr, 0xFF0C, 0xFF, (u8)(lval1 & 0xFF)); + rtsx_pci_write_register(pcr, 0xFF0D, 0xFF, (u8)((lval1 >> 8) & 0xFF)); + rtsx_pci_write_register(pcr, 0xFF0E, 0xFF, (u8)((lval1 >> 16) & 0xFF)); + rtsx_pci_write_register(pcr, 0xFF0F, 0xFF, (u8)((lval1 >> 24) & 0xFF)); + rtsx_pci_write_register(pcr, 0xFF10, 0xFF, (u8)(lval2 & 0xFF)); + rtsx_pci_write_register(pcr, 0xFF11, 0xFF, (u8)((lval2 >> 8) & 0xFF)); + rtsx_pci_write_register(pcr, 0xFF12, 0xFF, (u8)((lval2 >> 16) & 0xFF)); + + pci_write_config_dword(pdev, PCR_SETTING_REG4, lval1); + lval2 = lval2 & 0x00FFFFFF; + pci_write_config_dword(pdev, PCR_SETTING_REG5, lval2); + } } static void rts5261_init_from_cfg(struct rtsx_pcr *pcr) @@ -536,9 +566,18 @@ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr) FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB); - rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00); - rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, - FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL); + + if (pcr->rtd3_en) { + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x01); + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + FORCE_PM_CONTROL | FORCE_PM_VALUE, + FORCE_PM_CONTROL | FORCE_PM_VALUE); + } else { + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00); + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL); + } + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, D3_DELINK_MODE_EN, 0x00); /* Clear Enter RTD3_cold Information*/ rtsx_pci_write_register(pcr, RTS5261_FW_CTL, @@ -609,7 +648,6 @@ static void rts5261_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active) } static const struct pcr_ops rts5261_pcr_ops = { - .fetch_vendor_settings = rtsx5261_fetch_vendor_settings, .turn_on_led = rts5261_turn_on_led, .turn_off_led = rts5261_turn_off_led, .extra_init_hw = rts5261_extra_init_hw, diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index 6ac509c1821c..2a2619e3c72c 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -152,20 +152,12 @@ void rtsx_pci_start_run(struct rtsx_pcr *pcr) if (pcr->remove_pci) return; - if (pcr->rtd3_en) - if (pcr->is_runtime_suspended) { - pm_runtime_get(&(pcr->pci->dev)); - pcr->is_runtime_suspended = false; - } - if (pcr->state != PDEV_STAT_RUN) { pcr->state = PDEV_STAT_RUN; if (pcr->ops->enable_auto_blink) pcr->ops->enable_auto_blink(pcr); rtsx_pm_full_on(pcr); } - - mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200)); } EXPORT_SYMBOL_GPL(rtsx_pci_start_run); @@ -1062,73 +1054,7 @@ static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr) return 0; } -static void rtsx_enable_aspm(struct rtsx_pcr *pcr) -{ - if (pcr->ops->set_aspm) - pcr->ops->set_aspm(pcr, true); - else - rtsx_comm_set_aspm(pcr, true); -} - -static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr) -{ - struct rtsx_cr_option *option = &pcr->option; - - if (option->ltr_enabled) { - u32 latency = option->ltr_l1off_latency; - - if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN)) - mdelay(option->l1_snooze_delay); - - rtsx_set_ltr_latency(pcr, latency); - } - - if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN)) - rtsx_set_l1off_sub_cfg_d0(pcr, 0); - - rtsx_enable_aspm(pcr); -} - -static void rtsx_pm_power_saving(struct rtsx_pcr *pcr) -{ - rtsx_comm_pm_power_saving(pcr); -} - -static void rtsx_pci_rtd3_work(struct work_struct *work) -{ - struct delayed_work *dwork = to_delayed_work(work); - struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, rtd3_work); - - pcr_dbg(pcr, "--> %s\n", __func__); - if (!pcr->is_runtime_suspended) - pm_runtime_put(&(pcr->pci->dev)); -} - -static void rtsx_pci_idle_work(struct work_struct *work) -{ - struct delayed_work *dwork = to_delayed_work(work); - struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work); - - pcr_dbg(pcr, "--> %s\n", __func__); - - mutex_lock(&pcr->pcr_mutex); - - pcr->state = PDEV_STAT_IDLE; - - if (pcr->ops->disable_auto_blink) - pcr->ops->disable_auto_blink(pcr); - if (pcr->ops->turn_off_led) - pcr->ops->turn_off_led(pcr); - - rtsx_pm_power_saving(pcr); - - mutex_unlock(&pcr->pcr_mutex); - - if (pcr->rtd3_en) - mod_delayed_work(system_wq, &pcr->rtd3_work, msecs_to_jiffies(10000)); -} - -static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +static void rtsx_base_force_power_down(struct rtsx_pcr *pcr) { /* Set relink_time to 0 */ rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0); @@ -1142,7 +1068,7 @@ static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN); } -static void __maybe_unused rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state) +static void __maybe_unused rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state, bool runtime) { if (pcr->ops->turn_off_led) pcr->ops->turn_off_led(pcr); @@ -1154,9 +1080,9 @@ static void __maybe_unused rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state) rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, pm_state); if (pcr->ops->force_power_down) - pcr->ops->force_power_down(pcr, pm_state); + pcr->ops->force_power_down(pcr, pm_state, runtime); else - rtsx_base_force_power_down(pcr, pm_state); + rtsx_base_force_power_down(pcr); } void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr) @@ -1598,7 +1524,6 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, pcr->card_inserted = 0; pcr->card_removed = 0; INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect); - INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work); pcr->msi_en = msi_en; if (pcr->msi_en) { @@ -1623,20 +1548,14 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, rtsx_pcr_cells[i].pdata_size = sizeof(*handle); } - if (pcr->rtd3_en) { - INIT_DELAYED_WORK(&pcr->rtd3_work, rtsx_pci_rtd3_work); - pm_runtime_allow(&pcidev->dev); - pm_runtime_enable(&pcidev->dev); - pcr->is_runtime_suspended = false; - } - ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells, ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL); if (ret < 0) goto free_slots; - schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); + pm_runtime_allow(&pcidev->dev); + pm_runtime_put(&pcidev->dev); return 0; @@ -1668,11 +1587,11 @@ static void rtsx_pci_remove(struct pci_dev *pcidev) struct pcr_handle *handle = pci_get_drvdata(pcidev); struct rtsx_pcr *pcr = handle->pcr; - if (pcr->rtd3_en) - pm_runtime_get_noresume(&pcr->pci->dev); - pcr->remove_pci = true; + pm_runtime_get_sync(&pcidev->dev); + pm_runtime_forbid(&pcidev->dev); + /* Disable interrupts at the pcr level */ spin_lock_irq(&pcr->lock); rtsx_pci_writel(pcr, RTSX_BIER, 0); @@ -1680,9 +1599,6 @@ static void rtsx_pci_remove(struct pci_dev *pcidev) spin_unlock_irq(&pcr->lock); cancel_delayed_work_sync(&pcr->carddet_work); - cancel_delayed_work_sync(&pcr->idle_work); - if (pcr->rtd3_en) - cancel_delayed_work_sync(&pcr->rtd3_work); mfd_remove_devices(&pcidev->dev); @@ -1700,11 +1616,6 @@ static void rtsx_pci_remove(struct pci_dev *pcidev) idr_remove(&rtsx_pci_idr, pcr->id); spin_unlock(&rtsx_pci_lock); - if (pcr->rtd3_en) { - pm_runtime_disable(&pcr->pci->dev); - pm_runtime_put_noidle(&pcr->pci->dev); - } - kfree(pcr->slots); kfree(pcr); kfree(handle); @@ -1717,22 +1628,16 @@ static void rtsx_pci_remove(struct pci_dev *pcidev) static int __maybe_unused rtsx_pci_suspend(struct device *dev_d) { struct pci_dev *pcidev = to_pci_dev(dev_d); - struct pcr_handle *handle; - struct rtsx_pcr *pcr; + struct pcr_handle *handle = pci_get_drvdata(pcidev); + struct rtsx_pcr *pcr = handle->pcr; dev_dbg(&(pcidev->dev), "--> %s\n", __func__); - handle = pci_get_drvdata(pcidev); - pcr = handle->pcr; - - cancel_delayed_work(&pcr->carddet_work); - cancel_delayed_work(&pcr->idle_work); + cancel_delayed_work_sync(&pcr->carddet_work); mutex_lock(&pcr->pcr_mutex); - rtsx_pci_power_off(pcr, HOST_ENTER_S3); - - device_wakeup_disable(dev_d); + rtsx_pci_power_off(pcr, HOST_ENTER_S3, false); mutex_unlock(&pcr->pcr_mutex); return 0; @@ -1741,15 +1646,12 @@ static int __maybe_unused rtsx_pci_suspend(struct device *dev_d) static int __maybe_unused rtsx_pci_resume(struct device *dev_d) { struct pci_dev *pcidev = to_pci_dev(dev_d); - struct pcr_handle *handle; - struct rtsx_pcr *pcr; + struct pcr_handle *handle = pci_get_drvdata(pcidev); + struct rtsx_pcr *pcr = handle->pcr; int ret = 0; dev_dbg(&(pcidev->dev), "--> %s\n", __func__); - handle = pci_get_drvdata(pcidev); - pcr = handle->pcr; - mutex_lock(&pcr->pcr_mutex); ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00); @@ -1760,8 +1662,6 @@ static int __maybe_unused rtsx_pci_resume(struct device *dev_d) if (ret) goto out; - schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); - out: mutex_unlock(&pcr->pcr_mutex); return ret; @@ -1769,16 +1669,46 @@ out: #ifdef CONFIG_PM +static void rtsx_enable_aspm(struct rtsx_pcr *pcr) +{ + if (pcr->ops->set_aspm) + pcr->ops->set_aspm(pcr, true); + else + rtsx_comm_set_aspm(pcr, true); +} + +static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ltr_enabled) { + u32 latency = option->ltr_l1off_latency; + + if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN)) + mdelay(option->l1_snooze_delay); + + rtsx_set_ltr_latency(pcr, latency); + } + + if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN)) + rtsx_set_l1off_sub_cfg_d0(pcr, 0); + + rtsx_enable_aspm(pcr); +} + +static void rtsx_pm_power_saving(struct rtsx_pcr *pcr) +{ + rtsx_comm_pm_power_saving(pcr); +} + static void rtsx_pci_shutdown(struct pci_dev *pcidev) { - struct pcr_handle *handle; - struct rtsx_pcr *pcr; + struct pcr_handle *handle = pci_get_drvdata(pcidev); + struct rtsx_pcr *pcr = handle->pcr; dev_dbg(&(pcidev->dev), "--> %s\n", __func__); - handle = pci_get_drvdata(pcidev); - pcr = handle->pcr; - rtsx_pci_power_off(pcr, HOST_ENTER_S1); + rtsx_pci_power_off(pcr, HOST_ENTER_S1, false); pci_disable_device(pcidev); free_irq(pcr->irq, (void *)pcr); @@ -1786,47 +1716,63 @@ static void rtsx_pci_shutdown(struct pci_dev *pcidev) pci_disable_msi(pcr->pci); } +static int rtsx_pci_runtime_idle(struct device *device) +{ + struct pci_dev *pcidev = to_pci_dev(device); + struct pcr_handle *handle = pci_get_drvdata(pcidev); + struct rtsx_pcr *pcr = handle->pcr; + + dev_dbg(device, "--> %s\n", __func__); + + mutex_lock(&pcr->pcr_mutex); + + pcr->state = PDEV_STAT_IDLE; + + if (pcr->ops->disable_auto_blink) + pcr->ops->disable_auto_blink(pcr); + if (pcr->ops->turn_off_led) + pcr->ops->turn_off_led(pcr); + + rtsx_pm_power_saving(pcr); + + mutex_unlock(&pcr->pcr_mutex); + + if (pcr->rtd3_en) + pm_schedule_suspend(device, 10000); + + return -EBUSY; +} + static int rtsx_pci_runtime_suspend(struct device *device) { struct pci_dev *pcidev = to_pci_dev(device); - struct pcr_handle *handle; - struct rtsx_pcr *pcr; + struct pcr_handle *handle = pci_get_drvdata(pcidev); + struct rtsx_pcr *pcr = handle->pcr; - handle = pci_get_drvdata(pcidev); - pcr = handle->pcr; - dev_dbg(&(pcidev->dev), "--> %s\n", __func__); + dev_dbg(device, "--> %s\n", __func__); - cancel_delayed_work(&pcr->carddet_work); - cancel_delayed_work(&pcr->rtd3_work); - cancel_delayed_work(&pcr->idle_work); + cancel_delayed_work_sync(&pcr->carddet_work); mutex_lock(&pcr->pcr_mutex); - rtsx_pci_power_off(pcr, HOST_ENTER_S3); + rtsx_pci_power_off(pcr, HOST_ENTER_S3, true); mutex_unlock(&pcr->pcr_mutex); - pcr->is_runtime_suspended = true; - return 0; } static int rtsx_pci_runtime_resume(struct device *device) { struct pci_dev *pcidev = to_pci_dev(device); - struct pcr_handle *handle; - struct rtsx_pcr *pcr; + struct pcr_handle *handle = pci_get_drvdata(pcidev); + struct rtsx_pcr *pcr = handle->pcr; - handle = pci_get_drvdata(pcidev); - pcr = handle->pcr; - dev_dbg(&(pcidev->dev), "--> %s\n", __func__); + dev_dbg(device, "--> %s\n", __func__); mutex_lock(&pcr->pcr_mutex); rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00); - if (pcr->ops->fetch_vendor_settings) - pcr->ops->fetch_vendor_settings(pcr); - rtsx_pci_init_hw(pcr); if (pcr->slots[RTSX_SD_CARD].p_dev != NULL) { @@ -1834,8 +1780,6 @@ static int rtsx_pci_runtime_resume(struct device *device) pcr->slots[RTSX_SD_CARD].p_dev); } - schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); - mutex_unlock(&pcr->pcr_mutex); return 0; } @@ -1850,7 +1794,7 @@ static int rtsx_pci_runtime_resume(struct device *device) static const struct dev_pm_ops rtsx_pci_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(rtsx_pci_suspend, rtsx_pci_resume) - SET_RUNTIME_PM_OPS(rtsx_pci_runtime_suspend, rtsx_pci_runtime_resume, NULL) + SET_RUNTIME_PM_OPS(rtsx_pci_runtime_suspend, rtsx_pci_runtime_resume, rtsx_pci_runtime_idle) }; static struct pci_driver rtsx_pci_driver = { diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h index daf057c4eea6..37d1f316ae17 100644 --- a/drivers/misc/cardreader/rtsx_pcr.h +++ b/drivers/misc/cardreader/rtsx_pcr.h @@ -15,6 +15,8 @@ #define MIN_DIV_N_PCR 80 #define MAX_DIV_N_PCR 208 +#define RTS522A_PME_FORCE_CTL 0xFF78 +#define RTS522A_AUTOLOAD_CFG1 0xFF7C #define RTS522A_PM_CTRL3 0xFF7E #define RTS524A_PME_FORCE_CTL 0xFF78 @@ -25,6 +27,7 @@ #define REG_EFUSE_POWEROFF 0x00 #define RTS5250_CLK_CFG3 0xFF79 #define RTS525A_CFG_MEM_PD 0xF0 +#define RTS524A_AUTOLOAD_CFG1 0xFF7C #define RTS524A_PM_CTRL3 0xFF7E #define RTS525A_BIOS_CFG 0xFF2D #define RTS525A_LOAD_BIOS_FLAG 0x01 diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index 59eda55d92a3..1ef9b61077c4 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -667,6 +667,7 @@ static int rtsx_usb_probe(struct usb_interface *intf, return 0; out_init_fail: + usb_set_intfdata(ucr->pusb_intf, NULL); usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, ucr->iobuf_dma); return ret; diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index b493de962153..d85c56530863 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -12,6 +12,7 @@ #include <linux/pseudo_fs.h> #include <linux/sched/mm.h> #include <linux/mmu_context.h> +#include <linux/irqdomain.h> #include "cxl.h" diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 5dc0f6093f9d..7a6dd91987fd 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -25,6 +25,8 @@ extern uint cxl_verbose; +struct property; + #define CXL_TIMEOUT 5 /* diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index 53b919856426..e5fe0a171472 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -5,6 +5,7 @@ #include <linux/hugetlb.h> #include <linux/sched/mm.h> +#include <asm/opal-api.h> #include <asm/pnv-pci.h> #include <misc/cxllib.h> diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c index 5b93ff51d82a..eee9decc121e 100644 --- a/drivers/misc/cxl/flash.c +++ b/drivers/misc/cxl/flash.c @@ -4,6 +4,7 @@ #include <linux/semaphore.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/of.h> #include <asm/rtas.h> #include "cxl.h" diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index 9d485c9e3fff..3321c014913c 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -6,6 +6,8 @@ #include <linux/spinlock.h> #include <linux/uaccess.h> #include <linux/delay.h> +#include <linux/irqdomain.h> +#include <linux/platform_device.h> #include "cxl.h" #include "hcalls.h" diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 4cb829d5d873..5f0e2dcebb34 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -4,6 +4,7 @@ */ #include <linux/interrupt.h> +#include <linux/irqdomain.h> #include <linux/workqueue.h> #include <linux/sched.h> #include <linux/wait.h> diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 43b312d06e3e..c1fbf6f588f7 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/idr.h> #include <linux/pci.h> +#include <linux/platform_device.h> #include <linux/sched/task.h> #include <asm/cputable.h> diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 1a7f22836041..50b0c44bb8d7 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -11,6 +11,7 @@ #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/delay.h> +#include <linux/irqdomain.h> #include <asm/synch.h> #include <asm/switch_to.h> #include <misc/cxl-base.h> diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index bee727ed98db..c9c56fd194c1 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -31,6 +31,8 @@ */ #define FM25_SN_LEN 8 /* serial number length */ +#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */ + struct at25_data { struct spi_eeprom chip; struct spi_device *spi; @@ -39,6 +41,7 @@ struct at25_data { struct nvmem_config nvmem_config; struct nvmem_device *nvmem; u8 sernum[FM25_SN_LEN]; + u8 command[EE_MAXADDRLEN + 1]; }; #define AT25_WREN 0x06 /* latch the write enable */ @@ -61,8 +64,6 @@ struct at25_data { #define FM25_ID_LEN 9 /* ID length */ -#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */ - /* * Specs often allow 5ms for a page write, sometimes 20ms; * it's important to recover from write timeouts. @@ -78,7 +79,11 @@ static int at25_ee_read(void *priv, unsigned int offset, { struct at25_data *at25 = priv; char *buf = val; - u8 command[EE_MAXADDRLEN + 1]; + size_t max_chunk = spi_max_transfer_size(at25->spi); + size_t num_msgs = DIV_ROUND_UP(count, max_chunk); + size_t nr_bytes = 0; + unsigned int msg_offset; + size_t msg_count; u8 *cp; ssize_t status; struct spi_transfer t[2]; @@ -92,53 +97,59 @@ static int at25_ee_read(void *priv, unsigned int offset, if (unlikely(!count)) return -EINVAL; - cp = command; + msg_offset = (unsigned int)offset; + msg_count = min(count, max_chunk); + while (num_msgs) { + cp = at25->command; - instr = AT25_READ; - if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR) - if (offset >= BIT(at25->addrlen * 8)) - instr |= AT25_INSTR_BIT3; - *cp++ = instr; + instr = AT25_READ; + if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR) + if (msg_offset >= BIT(at25->addrlen * 8)) + instr |= AT25_INSTR_BIT3; - /* 8/16/24-bit address is written MSB first */ - switch (at25->addrlen) { - default: /* case 3 */ - *cp++ = offset >> 16; - fallthrough; - case 2: - *cp++ = offset >> 8; - fallthrough; - case 1: - case 0: /* can't happen: for better code generation */ - *cp++ = offset >> 0; - } + mutex_lock(&at25->lock); - spi_message_init(&m); - memset(t, 0, sizeof(t)); + *cp++ = instr; - t[0].tx_buf = command; - t[0].len = at25->addrlen + 1; - spi_message_add_tail(&t[0], &m); + /* 8/16/24-bit address is written MSB first */ + switch (at25->addrlen) { + default: /* case 3 */ + *cp++ = msg_offset >> 16; + fallthrough; + case 2: + *cp++ = msg_offset >> 8; + fallthrough; + case 1: + case 0: /* can't happen: for better code generation */ + *cp++ = msg_offset >> 0; + } - t[1].rx_buf = buf; - t[1].len = count; - spi_message_add_tail(&t[1], &m); + spi_message_init(&m); + memset(t, 0, sizeof(t)); - mutex_lock(&at25->lock); + t[0].tx_buf = at25->command; + t[0].len = at25->addrlen + 1; + spi_message_add_tail(&t[0], &m); - /* - * Read it all at once. - * - * REVISIT that's potentially a problem with large chips, if - * other devices on the bus need to be accessed regularly or - * this chip is clocked very slowly. - */ - status = spi_sync(at25->spi, &m); - dev_dbg(&at25->spi->dev, "read %zu bytes at %d --> %zd\n", - count, offset, status); + t[1].rx_buf = buf + nr_bytes; + t[1].len = msg_count; + spi_message_add_tail(&t[1], &m); - mutex_unlock(&at25->lock); - return status; + status = spi_sync(at25->spi, &m); + + mutex_unlock(&at25->lock); + + if (status) + return status; + + --num_msgs; + msg_offset += msg_count; + nr_bytes += msg_count; + } + + dev_dbg(&at25->spi->dev, "read %zu bytes at %d\n", + count, offset); + return 0; } /* Read extra registers as ID or serial number */ @@ -152,7 +163,7 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command, spi_message_init(&m); memset(t, 0, sizeof(t)); - t[0].tx_buf = &command; + t[0].tx_buf = at25->command; t[0].len = 1; spi_message_add_tail(&t[0], &m); @@ -162,6 +173,8 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command, mutex_lock(&at25->lock); + at25->command[0] = command; + status = spi_sync(at25->spi, &m); dev_dbg(&at25->spi->dev, "read %d aux bytes --> %d\n", len, status); @@ -187,6 +200,7 @@ ATTRIBUTE_GROUPS(sernum); static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count) { struct at25_data *at25 = priv; + size_t maxsz = spi_max_transfer_size(at25->spi); const char *buf = val; int status = 0; unsigned buf_size; @@ -250,6 +264,8 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count) segment = buf_size - (offset % buf_size); if (segment > count) segment = count; + if (segment > maxsz) + segment = maxsz; memcpy(cp, buf, segment); status = spi_write(at25->spi, bounce, segment + at25->addrlen + 1); @@ -309,7 +325,7 @@ static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip) u32 val; int err; - strncpy(chip->name, "at25", sizeof(chip->name)); + strscpy(chip->name, "at25", sizeof(chip->name)); err = device_property_read_u32(dev, "size", &val); if (err) @@ -370,7 +386,7 @@ static int at25_fram_to_chip(struct device *dev, struct spi_eeprom *chip) u8 id[FM25_ID_LEN]; int i; - strncpy(chip->name, "fm25", sizeof(chip->name)); + strscpy(chip->name, "fm25", sizeof(chip->name)); /* Get ID of chip */ fm25_aux_read(at25, id, FM25_RDID, FM25_ID_LEN); diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c index 1f15399e5cb4..b630625b3024 100644 --- a/drivers/misc/eeprom/eeprom_93xx46.c +++ b/drivers/misc/eeprom/eeprom_93xx46.c @@ -555,14 +555,12 @@ static int eeprom_93xx46_probe(struct spi_device *spi) return 0; } -static int eeprom_93xx46_remove(struct spi_device *spi) +static void eeprom_93xx46_remove(struct spi_device *spi) { struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi); if (!(edev->pdata->flags & EE_READONLY)) device_remove_file(&spi->dev, &dev_attr_erase); - - return 0; } static struct spi_driver eeprom_93xx46_driver = { diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index aa1682b94a23..93ebd174d848 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -17,6 +17,7 @@ #include <linux/rpmsg.h> #include <linux/scatterlist.h> #include <linux/slab.h> +#include <linux/qcom_scm.h> #include <uapi/misc/fastrpc.h> #define ADSP_DOMAIN_ID (0) @@ -25,16 +26,22 @@ #define CDSP_DOMAIN_ID (3) #define FASTRPC_DEV_MAX 4 /* adsp, mdsp, slpi, cdsp*/ #define FASTRPC_MAX_SESSIONS 13 /*12 compute, 1 cpz*/ +#define FASTRPC_MAX_VMIDS 16 #define FASTRPC_ALIGN 128 #define FASTRPC_MAX_FDLIST 16 #define FASTRPC_MAX_CRCLIST 64 #define FASTRPC_PHYS(p) ((p) & 0xffffffff) #define FASTRPC_CTX_MAX (256) #define FASTRPC_INIT_HANDLE 1 +#define FASTRPC_DSP_UTILITIES_HANDLE 2 #define FASTRPC_CTXID_MASK (0xFF0) #define INIT_FILELEN_MAX (2 * 1024 * 1024) #define FASTRPC_DEVICE_NAME "fastrpc" #define ADSP_MMAP_ADD_PAGES 0x1000 +#define DSP_UNSUPPORTED_API (0x80000414) +/* MAX NUMBER of DSP ATTRIBUTES SUPPORTED */ +#define FASTRPC_MAX_DSP_ATTRIBUTES (256) +#define FASTRPC_MAX_DSP_ATTRIBUTES_LEN (sizeof(u32) * FASTRPC_MAX_DSP_ATTRIBUTES) /* Retrives number of input buffers from the scalars parameter */ #define REMOTE_SCALARS_INBUFS(sc) (((sc) >> 16) & 0x0ff) @@ -72,13 +79,15 @@ #define FASTRPC_RMID_INIT_CREATE 6 #define FASTRPC_RMID_INIT_CREATE_ATTR 7 #define FASTRPC_RMID_INIT_CREATE_STATIC 8 +#define FASTRPC_RMID_INIT_MEM_MAP 10 +#define FASTRPC_RMID_INIT_MEM_UNMAP 11 /* Protection Domain(PD) ids */ #define AUDIO_PD (0) /* also GUEST_OS PD? */ #define USER_PD (1) #define SENSORS_PD (2) -#define miscdev_to_cctx(d) container_of(d, struct fastrpc_channel_ctx, miscdev) +#define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev) static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp", "sdsp", "cdsp"}; @@ -92,9 +101,20 @@ struct fastrpc_invoke_buf { u32 pgidx; /* index to start of contiguous region */ }; -struct fastrpc_remote_arg { - u64 pv; - u64 len; +struct fastrpc_remote_dmahandle { + s32 fd; /* dma handle fd */ + u32 offset; /* dma handle offset */ + u32 len; /* dma handle length */ +}; + +struct fastrpc_remote_buf { + u64 pv; /* buffer pointer */ + u64 len; /* length of buffer */ +}; + +union fastrpc_remote_arg { + struct fastrpc_remote_buf buf; + struct fastrpc_remote_dmahandle dma; }; struct fastrpc_mmap_rsp_msg { @@ -108,12 +128,29 @@ struct fastrpc_mmap_req_msg { s32 num; }; +struct fastrpc_mem_map_req_msg { + s32 pgid; + s32 fd; + s32 offset; + u32 flags; + u64 vaddrin; + s32 num; + s32 data_len; +}; + struct fastrpc_munmap_req_msg { s32 pgid; u64 vaddr; u64 size; }; +struct fastrpc_mem_unmap_req_msg { + s32 pgid; + s32 fd; + u64 vaddrin; + u64 len; +}; + struct fastrpc_msg { int pid; /* process group id */ int tid; /* thread id */ @@ -170,6 +207,8 @@ struct fastrpc_map { u64 size; void *va; u64 len; + u64 raddr; + u32 attr; struct kref refcount; }; @@ -189,7 +228,7 @@ struct fastrpc_invoke_ctx { struct work_struct put_work; struct fastrpc_msg msg; struct fastrpc_user *fl; - struct fastrpc_remote_arg *rpra; + union fastrpc_remote_arg *rpra; struct fastrpc_map **maps; struct fastrpc_buf *buf; struct fastrpc_invoke_args *args; @@ -207,13 +246,28 @@ struct fastrpc_session_ctx { struct fastrpc_channel_ctx { int domain_id; int sesscount; + int vmcount; + u32 perms; + struct qcom_scm_vmperm vmperms[FASTRPC_MAX_VMIDS]; struct rpmsg_device *rpdev; struct fastrpc_session_ctx session[FASTRPC_MAX_SESSIONS]; spinlock_t lock; struct idr ctx_idr; struct list_head users; - struct miscdevice miscdev; struct kref refcount; + /* Flag if dsp attributes are cached */ + bool valid_attributes; + u32 dsp_attributes[FASTRPC_MAX_DSP_ATTRIBUTES]; + struct fastrpc_device *secure_fdevice; + struct fastrpc_device *fdevice; + bool secure; + bool unsigned_support; +}; + +struct fastrpc_device { + struct fastrpc_channel_ctx *cctx; + struct miscdevice miscdev; + bool secure; }; struct fastrpc_user { @@ -228,6 +282,7 @@ struct fastrpc_user { int tgid; int pd; + bool is_secure_dev; /* Lock for lists */ spinlock_t lock; /* lock for allocations */ @@ -241,6 +296,20 @@ static void fastrpc_free_map(struct kref *ref) map = container_of(ref, struct fastrpc_map, refcount); if (map->table) { + if (map->attr & FASTRPC_ATTR_SECUREMAP) { + struct qcom_scm_vmperm perm; + int err = 0; + + perm.vmid = QCOM_SCM_VMID_HLOS; + perm.perm = QCOM_SCM_PERM_RWX; + err = qcom_scm_assign_mem(map->phys, map->size, + &(map->fl->cctx->vmperms[0].vmid), &perm, 1); + if (err) { + dev_err(map->fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d", + map->phys, map->size, err); + return; + } + } dma_buf_unmap_attachment(map->attach, map->table, DMA_BIDIRECTIONAL); dma_buf_detach(map->buf, map->attach); @@ -262,7 +331,8 @@ static void fastrpc_map_get(struct fastrpc_map *map) kref_get(&map->refcount); } -static int fastrpc_map_find(struct fastrpc_user *fl, int fd, + +static int fastrpc_map_lookup(struct fastrpc_user *fl, int fd, struct fastrpc_map **ppmap) { struct fastrpc_map *map = NULL; @@ -270,7 +340,6 @@ static int fastrpc_map_find(struct fastrpc_user *fl, int fd, mutex_lock(&fl->mutex); list_for_each_entry(map, &fl->maps, node) { if (map->fd == fd) { - fastrpc_map_get(map); *ppmap = map; mutex_unlock(&fl->mutex); return 0; @@ -281,6 +350,17 @@ static int fastrpc_map_find(struct fastrpc_user *fl, int fd, return -ENOENT; } +static int fastrpc_map_find(struct fastrpc_user *fl, int fd, + struct fastrpc_map **ppmap) +{ + int ret = fastrpc_map_lookup(fl, fd, ppmap); + + if (!ret) + fastrpc_map_get(*ppmap); + + return ret; +} + static void fastrpc_buf_free(struct fastrpc_buf *buf) { dma_free_coherent(buf->dev, buf->size, buf->virt, @@ -353,7 +433,7 @@ static void fastrpc_context_free(struct kref *ref) ctx = container_of(ref, struct fastrpc_invoke_ctx, refcount); cctx = ctx->cctx; - for (i = 0; i < ctx->nscalars; i++) + for (i = 0; i < ctx->nbufs; i++) fastrpc_map_put(ctx->maps[i]); if (ctx->buf) @@ -587,11 +667,11 @@ static void fastrpc_dma_buf_detatch(struct dma_buf *dmabuf, kfree(a); } -static int fastrpc_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map) +static int fastrpc_vmap(struct dma_buf *dmabuf, struct iosys_map *map) { struct fastrpc_buf *buf = dmabuf->priv; - dma_buf_map_set_vaddr(map, buf->virt); + iosys_map_set_vaddr(map, buf->virt); return 0; } @@ -617,7 +697,7 @@ static const struct dma_buf_ops fastrpc_dma_buf_ops = { }; static int fastrpc_map_create(struct fastrpc_user *fl, int fd, - u64 len, struct fastrpc_map **ppmap) + u64 len, u32 attr, struct fastrpc_map **ppmap) { struct fastrpc_session_ctx *sess = fl->sctx; struct fastrpc_map *map = NULL; @@ -659,6 +739,22 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd, map->len = len; kref_init(&map->refcount); + if (attr & FASTRPC_ATTR_SECUREMAP) { + /* + * If subsystem VMIDs are defined in DTSI, then do + * hyp_assign from HLOS to those VM(s) + */ + unsigned int perms = BIT(QCOM_SCM_VMID_HLOS); + + map->attr = attr; + err = qcom_scm_assign_mem(map->phys, (u64)map->size, &perms, + fl->cctx->vmperms, fl->cctx->vmcount); + if (err) { + dev_err(sess->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d", + map->phys, map->size, err); + goto map_err; + } + } spin_lock(&fl->lock); list_add_tail(&map->node, &fl->maps); spin_unlock(&fl->lock); @@ -682,7 +778,7 @@ get_err: * >>>>>> START of METADATA <<<<<<<<< * +---------------------------------+ * | Arguments | - * | type:(struct fastrpc_remote_arg)| + * | type:(union fastrpc_remote_arg)| * | (0 - N) | * +---------------------------------+ * | Invoke Buffer list | @@ -707,7 +803,7 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx) { int size = 0; - size = (sizeof(struct fastrpc_remote_arg) + + size = (sizeof(struct fastrpc_remote_buf) + sizeof(struct fastrpc_invoke_buf) + sizeof(struct fastrpc_phy_page)) * ctx->nscalars + sizeof(u64) * FASTRPC_MAX_FDLIST + @@ -743,16 +839,13 @@ static int fastrpc_create_maps(struct fastrpc_invoke_ctx *ctx) int i, err; for (i = 0; i < ctx->nscalars; ++i) { - /* Make sure reserved field is set to 0 */ - if (ctx->args[i].reserved) - return -EINVAL; if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1 || ctx->args[i].length == 0) continue; err = fastrpc_map_create(ctx->fl, ctx->args[i].fd, - ctx->args[i].length, &ctx->maps[i]); + ctx->args[i].length, ctx->args[i].attr, &ctx->maps[i]); if (err) { dev_err(dev, "Error Creating map %d\n", err); return -EINVAL; @@ -762,10 +855,20 @@ static int fastrpc_create_maps(struct fastrpc_invoke_ctx *ctx) return 0; } +static struct fastrpc_invoke_buf *fastrpc_invoke_buf_start(union fastrpc_remote_arg *pra, int len) +{ + return (struct fastrpc_invoke_buf *)(&pra[len]); +} + +static struct fastrpc_phy_page *fastrpc_phy_page_start(struct fastrpc_invoke_buf *buf, int len) +{ + return (struct fastrpc_phy_page *)(&buf[len]); +} + static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) { struct device *dev = ctx->fl->sctx->dev; - struct fastrpc_remote_arg *rpra; + union fastrpc_remote_arg *rpra; struct fastrpc_invoke_buf *list; struct fastrpc_phy_page *pages; int inbufs, i, oix, err = 0; @@ -789,9 +892,8 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) return err; rpra = ctx->buf->virt; - list = ctx->buf->virt + ctx->nscalars * sizeof(*rpra); - pages = ctx->buf->virt + ctx->nscalars * (sizeof(*list) + - sizeof(*rpra)); + list = fastrpc_invoke_buf_start(rpra, ctx->nscalars); + pages = fastrpc_phy_page_start(list, ctx->nscalars); args = (uintptr_t)ctx->buf->virt + metalen; rlen = pkt_size - metalen; ctx->rpra = rpra; @@ -802,8 +904,8 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) i = ctx->olaps[oix].raix; len = ctx->args[i].length; - rpra[i].pv = 0; - rpra[i].len = len; + rpra[i].buf.pv = 0; + rpra[i].buf.len = len; list[i].num = len ? 1 : 0; list[i].pgidx = i; @@ -813,7 +915,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) if (ctx->maps[i]) { struct vm_area_struct *vma = NULL; - rpra[i].pv = (u64) ctx->args[i].ptr; + rpra[i].buf.pv = (u64) ctx->args[i].ptr; pages[i].addr = ctx->maps[i]->phys; mmap_read_lock(current->mm); @@ -840,7 +942,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) if (rlen < mlen) goto bail; - rpra[i].pv = args - ctx->olaps[oix].offset; + rpra[i].buf.pv = args - ctx->olaps[oix].offset; pages[i].addr = ctx->buf->phys - ctx->olaps[oix].offset + (pkt_size - rlen); @@ -854,7 +956,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) } if (i < inbufs && !ctx->maps[i]) { - void *dst = (void *)(uintptr_t)rpra[i].pv; + void *dst = (void *)(uintptr_t)rpra[i].buf.pv; void *src = (void *)(uintptr_t)ctx->args[i].ptr; if (!kernel) { @@ -870,12 +972,15 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) } for (i = ctx->nbufs; i < ctx->nscalars; ++i) { - rpra[i].pv = (u64) ctx->args[i].ptr; - rpra[i].len = ctx->args[i].length; list[i].num = ctx->args[i].length ? 1 : 0; list[i].pgidx = i; - pages[i].addr = ctx->maps[i]->phys; - pages[i].size = ctx->maps[i]->size; + if (ctx->maps[i]) { + pages[i].addr = ctx->maps[i]->phys; + pages[i].size = ctx->maps[i]->size; + } + rpra[i].dma.fd = ctx->args[i].fd; + rpra[i].dma.len = ctx->args[i].length; + rpra[i].dma.offset = (u64) ctx->args[i].ptr; } bail: @@ -888,16 +993,26 @@ bail: static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, u32 kernel) { - struct fastrpc_remote_arg *rpra = ctx->rpra; - int i, inbufs; + union fastrpc_remote_arg *rpra = ctx->rpra; + struct fastrpc_user *fl = ctx->fl; + struct fastrpc_map *mmap = NULL; + struct fastrpc_invoke_buf *list; + struct fastrpc_phy_page *pages; + u64 *fdlist; + int i, inbufs, outbufs, handles; inbufs = REMOTE_SCALARS_INBUFS(ctx->sc); + outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc); + handles = REMOTE_SCALARS_INHANDLES(ctx->sc) + REMOTE_SCALARS_OUTHANDLES(ctx->sc); + list = fastrpc_invoke_buf_start(rpra, ctx->nscalars); + pages = fastrpc_phy_page_start(list, ctx->nscalars); + fdlist = (uint64_t *)(pages + inbufs + outbufs + handles); for (i = inbufs; i < ctx->nbufs; ++i) { if (!ctx->maps[i]) { - void *src = (void *)(uintptr_t)rpra[i].pv; + void *src = (void *)(uintptr_t)rpra[i].buf.pv; void *dst = (void *)(uintptr_t)ctx->args[i].ptr; - u64 len = rpra[i].len; + u64 len = rpra[i].buf.len; if (!kernel) { if (copy_to_user((void __user *)dst, src, len)) @@ -908,6 +1023,13 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, } } + for (i = 0; i < FASTRPC_MAX_FDLIST; i++) { + if (!fdlist[i]) + break; + if (!fastrpc_map_lookup(fl, (int)fdlist[i], &mmap)) + fastrpc_map_put(mmap); + } + return 0; } @@ -1016,6 +1138,24 @@ bail: return err; } +static bool is_session_rejected(struct fastrpc_user *fl, bool unsigned_pd_request) +{ + /* Check if the device node is non-secure and channel is secure*/ + if (!fl->is_secure_dev && fl->cctx->secure) { + /* + * Allow untrusted applications to offload only to Unsigned PD when + * channel is configured as secure and block untrusted apps on channel + * that does not support unsigned PD offload + */ + if (!fl->cctx->unsigned_support || !unsigned_pd_request) { + dev_err(&fl->cctx->rpdev->dev, "Error: Untrusted application trying to offload to signed PD"); + return true; + } + } + + return false; +} + static int fastrpc_init_create_process(struct fastrpc_user *fl, char __user *argp) { @@ -1035,6 +1175,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl, u32 siglen; } inbuf; u32 sc; + bool unsigned_module = false; args = kcalloc(FASTRPC_CREATE_PROCESS_NARGS, sizeof(*args), GFP_KERNEL); if (!args) @@ -1045,6 +1186,14 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl, goto err; } + if (init.attrs & FASTRPC_MODE_UNSIGNED_MODULE) + unsigned_module = true; + + if (is_session_rejected(fl, unsigned_module)) { + err = -ECONNREFUSED; + goto err; + } + if (init.filelen > INIT_FILELEN_MAX) { err = -EINVAL; goto err; @@ -1059,7 +1208,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl, fl->pd = USER_PD; if (init.filelen && init.filefd) { - err = fastrpc_map_create(fl, init.filefd, init.filelen, &map); + err = fastrpc_map_create(fl, init.filefd, init.filelen, 0, &map); if (err) goto err; } @@ -1168,7 +1317,6 @@ static int fastrpc_release_current_dsp_process(struct fastrpc_user *fl) args[0].ptr = (u64)(uintptr_t) &tgid; args[0].length = sizeof(tgid); args[0].fd = -1; - args[0].reserved = 0; sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_RELEASE, 1, 0); return fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, @@ -1220,10 +1368,14 @@ static int fastrpc_device_release(struct inode *inode, struct file *file) static int fastrpc_device_open(struct inode *inode, struct file *filp) { - struct fastrpc_channel_ctx *cctx = miscdev_to_cctx(filp->private_data); + struct fastrpc_channel_ctx *cctx; + struct fastrpc_device *fdevice; struct fastrpc_user *fl = NULL; unsigned long flags; + fdevice = miscdev_to_fdevice(filp->private_data); + cctx = fdevice->cctx; + fl = kzalloc(sizeof(*fl), GFP_KERNEL); if (!fl) return -ENOMEM; @@ -1240,6 +1392,7 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp) INIT_LIST_HEAD(&fl->user); fl->tgid = current->tgid; fl->cctx = cctx; + fl->is_secure_dev = fdevice->secure; fl->sctx = fastrpc_session_alloc(cctx); if (!fl->sctx) { @@ -1311,7 +1464,6 @@ static int fastrpc_init_attach(struct fastrpc_user *fl, int pd) args[0].ptr = (u64)(uintptr_t) &tgid; args[0].length = sizeof(tgid); args[0].fd = -1; - args[0].reserved = 0; sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_ATTACH, 1, 0); fl->pd = pd; @@ -1349,21 +1501,123 @@ static int fastrpc_invoke(struct fastrpc_user *fl, char __user *argp) return err; } +static int fastrpc_get_info_from_dsp(struct fastrpc_user *fl, uint32_t *dsp_attr_buf, + uint32_t dsp_attr_buf_len) +{ + struct fastrpc_invoke_args args[2] = { 0 }; + + /* Capability filled in userspace */ + dsp_attr_buf[0] = 0; + + args[0].ptr = (u64)(uintptr_t)&dsp_attr_buf_len; + args[0].length = sizeof(dsp_attr_buf_len); + args[0].fd = -1; + args[1].ptr = (u64)(uintptr_t)&dsp_attr_buf[1]; + args[1].length = dsp_attr_buf_len; + args[1].fd = -1; + fl->pd = 1; + + return fastrpc_internal_invoke(fl, true, FASTRPC_DSP_UTILITIES_HANDLE, + FASTRPC_SCALARS(0, 1, 1), args); +} + +static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap, + struct fastrpc_user *fl) +{ + struct fastrpc_channel_ctx *cctx = fl->cctx; + uint32_t attribute_id = cap->attribute_id; + uint32_t *dsp_attributes; + unsigned long flags; + uint32_t domain = cap->domain; + int err; + + spin_lock_irqsave(&cctx->lock, flags); + /* check if we already have queried dsp for attributes */ + if (cctx->valid_attributes) { + spin_unlock_irqrestore(&cctx->lock, flags); + goto done; + } + spin_unlock_irqrestore(&cctx->lock, flags); + + dsp_attributes = kzalloc(FASTRPC_MAX_DSP_ATTRIBUTES_LEN, GFP_KERNEL); + if (!dsp_attributes) + return -ENOMEM; + + err = fastrpc_get_info_from_dsp(fl, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN); + if (err == DSP_UNSUPPORTED_API) { + dev_info(&cctx->rpdev->dev, + "Warning: DSP capabilities not supported on domain: %d\n", domain); + kfree(dsp_attributes); + return -EOPNOTSUPP; + } else if (err) { + dev_err(&cctx->rpdev->dev, "Error: dsp information is incorrect err: %d\n", err); + kfree(dsp_attributes); + return err; + } + + spin_lock_irqsave(&cctx->lock, flags); + memcpy(cctx->dsp_attributes, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN); + cctx->valid_attributes = true; + spin_unlock_irqrestore(&cctx->lock, flags); + kfree(dsp_attributes); +done: + cap->capability = cctx->dsp_attributes[attribute_id]; + return 0; +} + +static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp) +{ + struct fastrpc_ioctl_capability cap = {0}; + int err = 0; + + if (copy_from_user(&cap, argp, sizeof(cap))) + return -EFAULT; + + cap.capability = 0; + if (cap.domain >= FASTRPC_DEV_MAX) { + dev_err(&fl->cctx->rpdev->dev, "Error: Invalid domain id:%d, err:%d\n", + cap.domain, err); + return -ECHRNG; + } + + /* Fastrpc Capablities does not support modem domain */ + if (cap.domain == MDSP_DOMAIN_ID) { + dev_err(&fl->cctx->rpdev->dev, "Error: modem not supported %d\n", err); + return -ECHRNG; + } + + if (cap.attribute_id >= FASTRPC_MAX_DSP_ATTRIBUTES) { + dev_err(&fl->cctx->rpdev->dev, "Error: invalid attribute: %d, err: %d\n", + cap.attribute_id, err); + return -EOVERFLOW; + } + + err = fastrpc_get_info_from_kernel(&cap, fl); + if (err) + return err; + + if (copy_to_user(argp, &cap.capability, sizeof(cap.capability))) + return -EFAULT; + + return 0; +} + static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, struct fastrpc_req_munmap *req) { struct fastrpc_invoke_args args[1] = { [0] = { 0 } }; - struct fastrpc_buf *buf, *b; + struct fastrpc_buf *buf = NULL, *iter, *b; struct fastrpc_munmap_req_msg req_msg; struct device *dev = fl->sctx->dev; int err; u32 sc; spin_lock(&fl->lock); - list_for_each_entry_safe(buf, b, &fl->mmaps, node) { - if ((buf->raddr == req->vaddrout) && (buf->size == req->size)) + list_for_each_entry_safe(iter, b, &fl->mmaps, node) { + if ((iter->raddr == req->vaddrout) && (iter->size == req->size)) { + buf = iter; break; - buf = NULL; + } } spin_unlock(&fl->lock); @@ -1491,6 +1745,135 @@ err_invoke: return err; } +static int fastrpc_req_mem_unmap_impl(struct fastrpc_user *fl, struct fastrpc_mem_unmap *req) +{ + struct fastrpc_invoke_args args[1] = { [0] = { 0 } }; + struct fastrpc_map *map = NULL, *iter, *m; + struct fastrpc_mem_unmap_req_msg req_msg = { 0 }; + int err = 0; + u32 sc; + struct device *dev = fl->sctx->dev; + + spin_lock(&fl->lock); + list_for_each_entry_safe(iter, m, &fl->maps, node) { + if ((req->fd < 0 || iter->fd == req->fd) && (iter->raddr == req->vaddr)) { + map = iter; + break; + } + } + + spin_unlock(&fl->lock); + + if (!map) { + dev_err(dev, "map not in list\n"); + return -EINVAL; + } + + req_msg.pgid = fl->tgid; + req_msg.len = map->len; + req_msg.vaddrin = map->raddr; + req_msg.fd = map->fd; + + args[0].ptr = (u64) (uintptr_t) &req_msg; + args[0].length = sizeof(req_msg); + + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MEM_UNMAP, 1, 0); + err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, + &args[0]); + fastrpc_map_put(map); + if (err) + dev_err(dev, "unmmap\tpt fd = %d, 0x%09llx error\n", map->fd, map->raddr); + + return err; +} + +static int fastrpc_req_mem_unmap(struct fastrpc_user *fl, char __user *argp) +{ + struct fastrpc_mem_unmap req; + + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + + return fastrpc_req_mem_unmap_impl(fl, &req); +} + +static int fastrpc_req_mem_map(struct fastrpc_user *fl, char __user *argp) +{ + struct fastrpc_invoke_args args[4] = { [0 ... 3] = { 0 } }; + struct fastrpc_mem_map_req_msg req_msg = { 0 }; + struct fastrpc_mmap_rsp_msg rsp_msg = { 0 }; + struct fastrpc_mem_unmap req_unmap = { 0 }; + struct fastrpc_phy_page pages = { 0 }; + struct fastrpc_mem_map req; + struct device *dev = fl->sctx->dev; + struct fastrpc_map *map = NULL; + int err; + u32 sc; + + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + + /* create SMMU mapping */ + err = fastrpc_map_create(fl, req.fd, req.length, 0, &map); + if (err) { + dev_err(dev, "failed to map buffer, fd = %d\n", req.fd); + return err; + } + + req_msg.pgid = fl->tgid; + req_msg.fd = req.fd; + req_msg.offset = req.offset; + req_msg.vaddrin = req.vaddrin; + map->va = (void *) (uintptr_t) req.vaddrin; + req_msg.flags = req.flags; + req_msg.num = sizeof(pages); + req_msg.data_len = 0; + + args[0].ptr = (u64) (uintptr_t) &req_msg; + args[0].length = sizeof(req_msg); + + pages.addr = map->phys; + pages.size = map->size; + + args[1].ptr = (u64) (uintptr_t) &pages; + args[1].length = sizeof(pages); + + args[2].ptr = (u64) (uintptr_t) &pages; + args[2].length = 0; + + args[3].ptr = (u64) (uintptr_t) &rsp_msg; + args[3].length = sizeof(rsp_msg); + + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MEM_MAP, 3, 1); + err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, &args[0]); + if (err) { + dev_err(dev, "mem mmap error, fd %d, vaddr %llx, size %lld\n", + req.fd, req.vaddrin, map->size); + goto err_invoke; + } + + /* update the buffer to be able to deallocate the memory on the DSP */ + map->raddr = rsp_msg.vaddr; + + /* let the client know the address to use */ + req.vaddrout = rsp_msg.vaddr; + + if (copy_to_user((void __user *)argp, &req, sizeof(req))) { + /* unmap the memory and release the buffer */ + req_unmap.vaddr = (uintptr_t) rsp_msg.vaddr; + req_unmap.length = map->size; + fastrpc_req_mem_unmap_impl(fl, &req_unmap); + return -EFAULT; + } + + return 0; + +err_invoke: + fastrpc_map_put(map); + + return err; +} + static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1520,6 +1903,15 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, case FASTRPC_IOCTL_MUNMAP: err = fastrpc_req_munmap(fl, argp); break; + case FASTRPC_IOCTL_MEM_MAP: + err = fastrpc_req_mem_map(fl, argp); + break; + case FASTRPC_IOCTL_MEM_UNMAP: + err = fastrpc_req_mem_unmap(fl, argp); + break; + case FASTRPC_IOCTL_GET_DSP_INFO: + err = fastrpc_get_dsp_info(fl, argp); + break; default: err = -ENOTTY; break; @@ -1615,12 +2007,41 @@ static struct platform_driver fastrpc_cb_driver = { }, }; +static int fastrpc_device_register(struct device *dev, struct fastrpc_channel_ctx *cctx, + bool is_secured, const char *domain) +{ + struct fastrpc_device *fdev; + int err; + + fdev = devm_kzalloc(dev, sizeof(*fdev), GFP_KERNEL); + if (!fdev) + return -ENOMEM; + + fdev->secure = is_secured; + fdev->cctx = cctx; + fdev->miscdev.minor = MISC_DYNAMIC_MINOR; + fdev->miscdev.fops = &fastrpc_fops; + fdev->miscdev.name = devm_kasprintf(dev, GFP_KERNEL, "fastrpc-%s%s", + domain, is_secured ? "-secure" : ""); + err = misc_register(&fdev->miscdev); + if (!err) { + if (is_secured) + cctx->secure_fdevice = fdev; + else + cctx->fdevice = fdev; + } + + return err; +} + static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) { struct device *rdev = &rpdev->dev; struct fastrpc_channel_ctx *data; - int i, err, domain_id = -1; + int i, err, domain_id = -1, vmcount; const char *domain; + bool secure_dsp; + unsigned int vmids[FASTRPC_MAX_VMIDS]; err = of_property_read_string(rdev->of_node, "label", &domain); if (err) { @@ -1640,18 +2061,53 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) return -EINVAL; } + vmcount = of_property_read_variable_u32_array(rdev->of_node, + "qcom,vmids", &vmids[0], 0, FASTRPC_MAX_VMIDS); + if (vmcount < 0) + vmcount = 0; + else if (!qcom_scm_is_available()) + return -EPROBE_DEFER; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; - data->miscdev.minor = MISC_DYNAMIC_MINOR; - data->miscdev.name = devm_kasprintf(rdev, GFP_KERNEL, "fastrpc-%s", - domains[domain_id]); - data->miscdev.fops = &fastrpc_fops; - err = misc_register(&data->miscdev); - if (err) { - kfree(data); - return err; + if (vmcount) { + data->vmcount = vmcount; + data->perms = BIT(QCOM_SCM_VMID_HLOS); + for (i = 0; i < data->vmcount; i++) { + data->vmperms[i].vmid = vmids[i]; + data->vmperms[i].perm = QCOM_SCM_PERM_RWX; + } + } + + secure_dsp = !(of_property_read_bool(rdev->of_node, "qcom,non-secure-domain")); + data->secure = secure_dsp; + + switch (domain_id) { + case ADSP_DOMAIN_ID: + case MDSP_DOMAIN_ID: + case SDSP_DOMAIN_ID: + /* Unsigned PD offloading is only supported on CDSP*/ + data->unsigned_support = false; + err = fastrpc_device_register(rdev, data, secure_dsp, domains[domain_id]); + if (err) + goto fdev_error; + break; + case CDSP_DOMAIN_ID: + data->unsigned_support = true; + /* Create both device nodes so that we can allow both Signed and Unsigned PD */ + err = fastrpc_device_register(rdev, data, true, domains[domain_id]); + if (err) + goto fdev_error; + + err = fastrpc_device_register(rdev, data, false, domains[domain_id]); + if (err) + goto fdev_error; + break; + default: + err = -EINVAL; + goto fdev_error; } kref_init(&data->refcount); @@ -1665,6 +2121,9 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) data->rpdev = rpdev; return of_platform_populate(rdev->of_node, NULL, NULL, rdev); +fdev_error: + kfree(data); + return err; } static void fastrpc_notify_users(struct fastrpc_user *user) @@ -1688,7 +2147,12 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev) fastrpc_notify_users(user); spin_unlock_irqrestore(&cctx->lock, flags); - misc_deregister(&cctx->miscdev); + if (cctx->fdevice) + misc_deregister(&cctx->fdevice->miscdev); + + if (cctx->secure_fdevice) + misc_deregister(&cctx->secure_fdevice->miscdev); + of_platform_depopulate(&rpdev->dev); cctx->rpdev = NULL; diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile index 82c3824cad00..934a3a4aedc9 100644 --- a/drivers/misc/habanalabs/common/Makefile +++ b/drivers/misc/habanalabs/common/Makefile @@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ common/command_buffer.o common/hw_queue.o common/irq.o \ common/sysfs.o common/hwmon.o common/memory.o \ common/command_submission.o common/firmware_if.o \ - common/state_dump.o common/hwmgr.o + common/state_dump.o common/memory_mgr.o diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 3c0ae07a2d80..e13b2b39c058 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -160,24 +160,6 @@ static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb) } } -static void cb_release(struct kref *ref) -{ - struct hl_device *hdev; - struct hl_cb *cb; - - cb = container_of(ref, struct hl_cb, refcount); - hdev = cb->hdev; - - hl_debugfs_remove_cb(cb); - - if (cb->is_mmu_mapped) - cb_unmap_mem(cb->ctx, cb); - - hl_ctx_put(cb->ctx); - - cb_do_release(hdev, cb); -} - static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, int ctx_id, bool internal_cb) { @@ -238,168 +220,175 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, return cb; } -int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, - struct hl_ctx *ctx, u32 cb_size, bool internal_cb, - bool map_cb, u64 *handle) +struct hl_cb_mmap_mem_alloc_args { + struct hl_device *hdev; + struct hl_ctx *ctx; + u32 cb_size; + bool internal_cb; + bool map_cb; +}; + +static void hl_cb_mmap_mem_release(struct hl_mmap_mem_buf *buf) { - struct hl_cb *cb; - bool alloc_new_cb = true; - int rc, ctx_id = ctx->asid; + struct hl_cb *cb = buf->private; - /* - * Can't use generic function to check this because of special case - * where we create a CB as part of the reset process - */ - if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) { - dev_warn_ratelimited(hdev->dev, - "Device is disabled or in reset. Can't create new CBs\n"); - rc = -EBUSY; - goto out_err; - } + hl_debugfs_remove_cb(cb); - if (cb_size > SZ_2M) { - dev_err(hdev->dev, "CB size %d must be less than %d\n", - cb_size, SZ_2M); - rc = -EINVAL; - goto out_err; - } + if (cb->is_mmu_mapped) + cb_unmap_mem(cb->ctx, cb); + + hl_ctx_put(cb->ctx); - if (!internal_cb) { + cb_do_release(cb->hdev, cb); +} + +static int hl_cb_mmap_mem_alloc(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) +{ + struct hl_cb_mmap_mem_alloc_args *cb_args = args; + struct hl_cb *cb; + int rc, ctx_id = cb_args->ctx->asid; + bool alloc_new_cb = true; + + if (!cb_args->internal_cb) { /* Minimum allocation must be PAGE SIZE */ - if (cb_size < PAGE_SIZE) - cb_size = PAGE_SIZE; + if (cb_args->cb_size < PAGE_SIZE) + cb_args->cb_size = PAGE_SIZE; if (ctx_id == HL_KERNEL_ASID_ID && - cb_size <= hdev->asic_prop.cb_pool_cb_size) { + cb_args->cb_size <= cb_args->hdev->asic_prop.cb_pool_cb_size) { - spin_lock(&hdev->cb_pool_lock); - if (!list_empty(&hdev->cb_pool)) { - cb = list_first_entry(&hdev->cb_pool, + spin_lock(&cb_args->hdev->cb_pool_lock); + if (!list_empty(&cb_args->hdev->cb_pool)) { + cb = list_first_entry(&cb_args->hdev->cb_pool, typeof(*cb), pool_list); list_del(&cb->pool_list); - spin_unlock(&hdev->cb_pool_lock); + spin_unlock(&cb_args->hdev->cb_pool_lock); alloc_new_cb = false; } else { - spin_unlock(&hdev->cb_pool_lock); - dev_dbg(hdev->dev, "CB pool is empty\n"); + spin_unlock(&cb_args->hdev->cb_pool_lock); + dev_dbg(cb_args->hdev->dev, "CB pool is empty\n"); } } } if (alloc_new_cb) { - cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb); - if (!cb) { - rc = -ENOMEM; - goto out_err; - } + cb = hl_cb_alloc(cb_args->hdev, cb_args->cb_size, ctx_id, cb_args->internal_cb); + if (!cb) + return -ENOMEM; } - cb->hdev = hdev; - cb->ctx = ctx; - hl_ctx_get(hdev, cb->ctx); + cb->hdev = cb_args->hdev; + cb->ctx = cb_args->ctx; + cb->buf = buf; + cb->buf->mappable_size = cb->size; + cb->buf->private = cb; + + hl_ctx_get(cb->ctx); - if (map_cb) { + if (cb_args->map_cb) { if (ctx_id == HL_KERNEL_ASID_ID) { - dev_err(hdev->dev, + dev_err(cb_args->hdev->dev, "CB mapping is not supported for kernel context\n"); rc = -EINVAL; goto release_cb; } - rc = cb_map_mem(ctx, cb); + rc = cb_map_mem(cb_args->ctx, cb); if (rc) goto release_cb; } - spin_lock(&mgr->cb_lock); - rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); - spin_unlock(&mgr->cb_lock); - - if (rc < 0) { - dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n"); - goto unmap_mem; - } - - cb->id = (u64) rc; - - kref_init(&cb->refcount); - spin_lock_init(&cb->lock); - - /* - * idr is 32-bit so we can safely OR it with a mask that is above - * 32 bit - */ - *handle = cb->id | HL_MMAP_TYPE_CB; - *handle <<= PAGE_SHIFT; - hl_debugfs_add_cb(cb); return 0; -unmap_mem: - if (cb->is_mmu_mapped) - cb_unmap_mem(cb->ctx, cb); release_cb: hl_ctx_put(cb->ctx); - cb_do_release(hdev, cb); -out_err: - *handle = 0; + cb_do_release(cb_args->hdev, cb); return rc; } -int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle) +static int hl_cb_mmap(struct hl_mmap_mem_buf *buf, + struct vm_area_struct *vma, void *args) { - struct hl_cb *cb; - u32 handle; - int rc = 0; + struct hl_cb *cb = buf->private; - /* - * handle was given to user to do mmap, I need to shift it back to - * how the idr module gave it to me - */ - cb_handle >>= PAGE_SHIFT; - handle = (u32) cb_handle; + return cb->hdev->asic_funcs->mmap(cb->hdev, vma, cb->kernel_address, + cb->bus_address, cb->size); +} - spin_lock(&mgr->cb_lock); +static struct hl_mmap_mem_buf_behavior cb_behavior = { + .topic = "CB", + .mem_id = HL_MMAP_TYPE_CB, + .alloc = hl_cb_mmap_mem_alloc, + .release = hl_cb_mmap_mem_release, + .mmap = hl_cb_mmap, +}; - cb = idr_find(&mgr->cb_handles, handle); - if (cb) { - idr_remove(&mgr->cb_handles, handle); - spin_unlock(&mgr->cb_lock); - kref_put(&cb->refcount, cb_release); - } else { - spin_unlock(&mgr->cb_lock); - dev_err(hdev->dev, - "CB destroy failed, no match to handle 0x%x\n", handle); - rc = -EINVAL; +int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, + struct hl_ctx *ctx, u32 cb_size, bool internal_cb, + bool map_cb, u64 *handle) +{ + struct hl_cb_mmap_mem_alloc_args args = { + .hdev = hdev, + .ctx = ctx, + .cb_size = cb_size, + .internal_cb = internal_cb, + .map_cb = map_cb, + }; + struct hl_mmap_mem_buf *buf; + int ctx_id = ctx->asid; + + if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) { + dev_warn_ratelimited(hdev->dev, + "Device is disabled or in reset. Can't create new CBs\n"); + return -EBUSY; } - return rc; + if (cb_size > SZ_2M) { + dev_err(hdev->dev, "CB size %d must be less than %d\n", + cb_size, SZ_2M); + return -EINVAL; + } + + buf = hl_mmap_mem_buf_alloc( + mmg, &cb_behavior, + ctx_id == HL_KERNEL_ASID_ID ? GFP_ATOMIC : GFP_KERNEL, &args); + if (!buf) + return -ENOMEM; + + *handle = buf->handle; + + return 0; +} + +int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle) +{ + int rc; + + rc = hl_mmap_mem_buf_put_handle(mmg, cb_handle); + if (rc < 0) + return rc; /* Invalid handle */ + + if (rc == 0) + dev_dbg(mmg->dev, "CB 0x%llx is destroyed while still in use\n", cb_handle); + + return 0; } -static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u64 cb_handle, u32 flags, u32 *usage_cnt, u64 *device_va) +static int hl_cb_info(struct hl_mem_mgr *mmg, + u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va) { struct hl_vm_va_block *va_block; struct hl_cb *cb; - u32 handle; int rc = 0; - /* The CB handle was given to user to do mmap, so need to shift it back - * to the value which was allocated by the IDR module. - */ - cb_handle >>= PAGE_SHIFT; - handle = (u32) cb_handle; - - spin_lock(&mgr->cb_lock); - - cb = idr_find(&mgr->cb_handles, handle); + cb = hl_cb_get(mmg, handle); if (!cb) { - dev_err(hdev->dev, - "CB info failed, no match to handle 0x%x\n", handle); - rc = -EINVAL; - goto out; + dev_err(mmg->dev, + "CB info failed, no match to handle 0x%llx\n", handle); + return -EINVAL; } if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { @@ -407,7 +396,7 @@ static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, if (va_block) { *device_va = va_block->start; } else { - dev_err(hdev->dev, "CB is not mapped to the device's MMU\n"); + dev_err(mmg->dev, "CB is not mapped to the device's MMU\n"); rc = -EINVAL; goto out; } @@ -416,7 +405,7 @@ static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, } out: - spin_unlock(&mgr->cb_lock); + hl_cb_put(cb); return rc; } @@ -424,8 +413,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) { union hl_cb_args *args = data; struct hl_device *hdev = hpriv->hdev; + u64 handle = 0, device_va = 0; enum hl_device_status status; - u64 handle = 0, device_va; u32 usage_cnt = 0; int rc; @@ -444,7 +433,7 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) args->in.cb_size, HL_MAX_CB_SIZE); rc = -EINVAL; } else { - rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx, + rc = hl_cb_create(hdev, &hpriv->mem_mgr, hpriv->ctx, args->in.cb_size, false, !!(args->in.flags & HL_CB_FLAGS_MAP), &handle); @@ -455,15 +444,17 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) break; case HL_CB_OP_DESTROY: - rc = hl_cb_destroy(hdev, &hpriv->cb_mgr, + rc = hl_cb_destroy(&hpriv->mem_mgr, args->in.cb_handle); break; case HL_CB_OP_INFO: - rc = hl_cb_info(hdev, &hpriv->cb_mgr, args->in.cb_handle, + rc = hl_cb_info(&hpriv->mem_mgr, args->in.cb_handle, args->in.flags, &usage_cnt, &device_va); + if (rc) + break; memset(&args->out, 0, sizeof(args->out)); @@ -481,163 +472,20 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) return rc; } -static void cb_vm_close(struct vm_area_struct *vma) -{ - struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data; - long new_mmap_size; - - new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start); - - if (new_mmap_size > 0) { - cb->mmap_size = new_mmap_size; - return; - } - - spin_lock(&cb->lock); - cb->mmap = false; - spin_unlock(&cb->lock); - - hl_cb_put(cb); - vma->vm_private_data = NULL; -} - -static const struct vm_operations_struct cb_vm_ops = { - .close = cb_vm_close -}; - -int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) -{ - struct hl_device *hdev = hpriv->hdev; - struct hl_cb *cb; - u32 handle, user_cb_size; - int rc; - - /* We use the page offset to hold the idr and thus we need to clear - * it before doing the mmap itself - */ - handle = vma->vm_pgoff; - vma->vm_pgoff = 0; - - /* reference was taken here */ - cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle); - if (!cb) { - dev_err(hdev->dev, - "CB mmap failed, no match to handle 0x%x\n", handle); - return -EINVAL; - } - - /* Validation check */ - user_cb_size = vma->vm_end - vma->vm_start; - if (user_cb_size != ALIGN(cb->size, PAGE_SIZE)) { - dev_err(hdev->dev, - "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n", - vma->vm_end - vma->vm_start, cb->size); - rc = -EINVAL; - goto put_cb; - } - - if (!access_ok((void __user *) (uintptr_t) vma->vm_start, - user_cb_size)) { - dev_err(hdev->dev, - "user pointer is invalid - 0x%lx\n", - vma->vm_start); - - rc = -EINVAL; - goto put_cb; - } - - spin_lock(&cb->lock); - - if (cb->mmap) { - dev_err(hdev->dev, - "CB mmap failed, CB already mmaped to user\n"); - rc = -EINVAL; - goto release_lock; - } - - cb->mmap = true; - - spin_unlock(&cb->lock); - - vma->vm_ops = &cb_vm_ops; - - /* - * Note: We're transferring the cb reference to - * vma->vm_private_data here. - */ - - vma->vm_private_data = cb; - - rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address, - cb->bus_address, cb->size); - if (rc) { - spin_lock(&cb->lock); - cb->mmap = false; - goto release_lock; - } - - cb->mmap_size = cb->size; - vma->vm_pgoff = handle; - - return 0; - -release_lock: - spin_unlock(&cb->lock); -put_cb: - hl_cb_put(cb); - return rc; -} - -struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u32 handle) +struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle) { - struct hl_cb *cb; + struct hl_mmap_mem_buf *buf; - spin_lock(&mgr->cb_lock); - cb = idr_find(&mgr->cb_handles, handle); - - if (!cb) { - spin_unlock(&mgr->cb_lock); - dev_warn(hdev->dev, - "CB get failed, no match to handle 0x%x\n", handle); + buf = hl_mmap_mem_buf_get(mmg, handle); + if (!buf) return NULL; - } - - kref_get(&cb->refcount); - - spin_unlock(&mgr->cb_lock); - - return cb; + return buf->private; } void hl_cb_put(struct hl_cb *cb) { - kref_put(&cb->refcount, cb_release); -} - -void hl_cb_mgr_init(struct hl_cb_mgr *mgr) -{ - spin_lock_init(&mgr->cb_lock); - idr_init(&mgr->cb_handles); -} - -void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr) -{ - struct hl_cb *cb; - struct idr *idp; - u32 id; - - idp = &mgr->cb_handles; - - idr_for_each_entry(idp, cb, id) { - if (kref_put(&cb->refcount, cb_release) != 1) - dev_err(hdev->dev, - "CB %d for CTX ID %d is still alive\n", - id, cb->ctx->asid); - } - - idr_destroy(&mgr->cb_handles); + hl_mmap_mem_buf_put(cb->buf); } struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, @@ -647,7 +495,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, struct hl_cb *cb; int rc; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size, + rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, cb_size, internal_cb, false, &cb_handle); if (rc) { dev_err(hdev->dev, @@ -655,8 +503,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, return NULL; } - cb_handle >>= PAGE_SHIFT; - cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle); + cb = hl_cb_get(&hdev->kernel_mem_mgr, cb_handle); /* hl_cb_get should never fail here */ if (!cb) { dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n", @@ -667,7 +514,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, return cb; destroy_cb: - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb_handle); return NULL; } diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 0a4ef13d9ac4..fb30b7de4aab 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -14,6 +14,8 @@ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ HL_CS_FLAGS_COLLECTIVE_WAIT) +#define MAX_TS_ITER_NUM 10 + /** * enum hl_cs_wait_status - cs wait status * @CS_WAIT_STATUS_BUSY: cs was not completed yet @@ -405,8 +407,7 @@ static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) { - bool next_entry_found = false; - struct hl_cs *next, *first_cs; + struct hl_cs *next = NULL, *iter, *first_cs; if (!cs_needs_timeout(cs)) return; @@ -441,13 +442,13 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) spin_lock(&hdev->cs_mirror_lock); /* queue TDR for next CS */ - list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node) - if (cs_needs_timeout(next)) { - next_entry_found = true; + list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node) + if (cs_needs_timeout(iter)) { + next = iter; break; } - if (next_entry_found && !next->tdr_active) { + if (next && !next->tdr_active) { next->tdr_active = true; schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); } @@ -734,11 +735,10 @@ static void cs_timedout(struct work_struct *work) hdev = cs->ctx->hdev; /* Save only the first CS timeout parameters */ - rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1); + rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_disable, 0, 1); if (!rc) { - hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime; - hdev->last_error.cs_timeout_timestamp = ktime_get(); - hdev->last_error.cs_timeout_seq = cs->sequence; + hdev->last_error.cs_timeout.timestamp = ktime_get(); + hdev->last_error.cs_timeout.seq = cs->sequence; } switch (cs->type) { @@ -804,7 +804,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, } /* increment refcnt for context */ - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); cs->ctx = ctx; cs->submitted = false; @@ -919,18 +919,21 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) complete_job(hdev, job); } -void hl_cs_rollback_all(struct hl_device *hdev) +void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush) { int i; struct hl_cs *cs, *tmp; - flush_workqueue(hdev->sob_reset_wq); + if (!skip_wq_flush) { + flush_workqueue(hdev->ts_free_obj_wq); - /* flush all completions before iterating over the CS mirror list in - * order to avoid a race with the release functions - */ - for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) - flush_workqueue(hdev->cq_wq[i]); + /* flush all completions before iterating over the CS mirror list in + * order to avoid a race with the release functions + */ + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + flush_workqueue(hdev->cq_wq[i]); + + } /* Make sure we don't have leftovers in the CS mirror list */ list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { @@ -948,13 +951,19 @@ void hl_cs_rollback_all(struct hl_device *hdev) static void wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) { - struct hl_user_pending_interrupt *pend; + struct hl_user_pending_interrupt *pend, *temp; unsigned long flags; spin_lock_irqsave(&interrupt->wait_list_lock, flags); - list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) { - pend->fence.error = -EIO; - complete_all(&pend->fence.completion); + list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { + if (pend->ts_reg_info.buf) { + list_del(&pend->wait_list_node); + hl_mmap_mem_buf_put(pend->ts_reg_info.buf); + hl_cb_put(pend->ts_reg_info.cq_cb); + } else { + pend->fence.error = -EIO; + complete_all(&pend->fence.completion); + } } spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); } @@ -1061,17 +1070,14 @@ static int validate_queue_index(struct hl_device *hdev, } static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, - struct hl_cb_mgr *cb_mgr, + struct hl_mem_mgr *mmg, struct hl_cs_chunk *chunk) { struct hl_cb *cb; - u32 cb_handle; - - cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); - cb = hl_cb_get(hdev, cb_mgr, cb_handle); + cb = hl_cb_get(mmg, chunk->cb_handle); if (!cb) { - dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle); + dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle); return NULL; } @@ -1333,7 +1339,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, } if (is_kernel_allocated_cb) { - cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); + cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk); if (!cb) { atomic64_inc( &ctx->cs_counters.validation_drop_cnt); @@ -1761,7 +1767,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, */ job->patched_cb = job->user_cb; job->job_cb_size = job->user_cb_size; - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); /* increment refcount as for external queues we get completion */ cs_get(cs); @@ -1823,7 +1829,7 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, handle->count = count; - hl_ctx_get(hdev, hpriv->ctx); + hl_ctx_get(hpriv->ctx); handle->ctx = hpriv->ctx; mgr = &hpriv->ctx->sig_mgr; @@ -2063,13 +2069,16 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, idp = &ctx->sig_mgr.handles; idr_for_each_entry(idp, encaps_sig_hdl, id) { if (encaps_sig_hdl->cs_seq == signal_seq) { - handle_found = true; - /* get refcount to protect removing - * this handle from idr, needed when - * multiple wait cs are used with offset + /* get refcount to protect removing this handle from idr, + * needed when multiple wait cs are used with offset * to wait on reserved encaps signals. + * Since kref_put of this handle is executed outside the + * current lock, it is possible that the handle refcount + * is 0 but it yet to be removed from the list. In this + * case need to consider the handle as not valid. */ - kref_get(&encaps_sig_hdl->refcount); + if (kref_get_unless_zero(&encaps_sig_hdl->refcount)) + handle_found = true; break; } } @@ -2514,7 +2523,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, if (timestamp) *timestamp = 0; - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); fence = hl_ctx_get_fence(ctx, seq); @@ -2654,7 +2663,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) { struct multi_cs_completion *mcs_compl; struct hl_device *hdev = hpriv->hdev; - struct multi_cs_data mcs_data = {0}; + struct multi_cs_data mcs_data = {}; union hl_wait_cs_args *args = data; struct hl_ctx *ctx = hpriv->ctx; struct hl_fence **fence_arr; @@ -2705,7 +2714,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) mcs_data.fence_arr = fence_arr; mcs_data.arr_len = seq_arr_len; - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); /* wait (with timeout) for the first CS to be completed */ mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); @@ -2739,7 +2748,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) mcs_data.update_ts = false; rc = hl_cs_poll_fences(&mcs_data, mcs_compl); - if (mcs_data.completion_bitmap) + if (rc || mcs_data.completion_bitmap) break; /* @@ -2854,64 +2863,174 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } +static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, + struct hl_cb *cq_cb, + u64 ts_offset, u64 cq_offset, u64 target_value, + spinlock_t *wait_list_lock, + struct hl_user_pending_interrupt **pend) +{ + struct hl_ts_buff *ts_buff = buf->private; + struct hl_user_pending_interrupt *requested_offset_record = + (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + ts_offset; + struct hl_user_pending_interrupt *cb_last = + (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); + unsigned long flags, iter_counter = 0; + u64 current_cq_counter; + + /* Validate ts_offset not exceeding last max */ + if (requested_offset_record > cb_last) { + dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n", + (u64)(uintptr_t)cb_last); + return -EINVAL; + } + +start_over: + spin_lock_irqsave(wait_list_lock, flags); + + /* Unregister only if we didn't reach the target value + * since in this case there will be no handling in irq context + * and then it's safe to delete the node out of the interrupt list + * then re-use it on other interrupt + */ + if (requested_offset_record->ts_reg_info.in_use) { + current_cq_counter = *requested_offset_record->cq_kernel_addr; + if (current_cq_counter < requested_offset_record->cq_target_value) { + list_del(&requested_offset_record->wait_list_node); + spin_unlock_irqrestore(wait_list_lock, flags); + + hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf); + hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); + + dev_dbg(buf->mmg->dev, + "ts node removed from interrupt list now can re-use\n"); + } else { + dev_dbg(buf->mmg->dev, + "ts node in middle of irq handling\n"); + + /* irq handling in the middle give it time to finish */ + spin_unlock_irqrestore(wait_list_lock, flags); + usleep_range(1, 10); + if (++iter_counter == MAX_TS_ITER_NUM) { + dev_err(buf->mmg->dev, + "handling registration interrupt took too long!!\n"); + return -EINVAL; + } + + goto start_over; + } + } else { + spin_unlock_irqrestore(wait_list_lock, flags); + } + + /* Fill up the new registration node info */ + requested_offset_record->ts_reg_info.in_use = 1; + requested_offset_record->ts_reg_info.buf = buf; + requested_offset_record->ts_reg_info.cq_cb = cq_cb; + requested_offset_record->ts_reg_info.timestamp_kernel_addr = + (u64 *) ts_buff->user_buff_address + ts_offset; + requested_offset_record->cq_kernel_addr = + (u64 *) cq_cb->kernel_address + cq_offset; + requested_offset_record->cq_target_value = target_value; + + *pend = requested_offset_record; + + dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB(0x%llx)\n", + (u64)(uintptr_t)requested_offset_record); + return 0; +} + static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, - struct hl_cb_mgr *cb_mgr, u64 timeout_us, - u64 cq_counters_handle, u64 cq_counters_offset, + struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg, + u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset, u64 target_value, struct hl_user_interrupt *interrupt, - u32 *status, - u64 *timestamp) + bool register_ts_record, u64 ts_handle, u64 ts_offset, + u32 *status, u64 *timestamp) { struct hl_user_pending_interrupt *pend; + struct hl_mmap_mem_buf *buf; + struct hl_cb *cq_cb; unsigned long timeout, flags; long completion_rc; - struct hl_cb *cb; int rc = 0; - u32 handle; timeout = hl_usecs64_to_jiffies(timeout_us); - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); - cq_counters_handle >>= PAGE_SHIFT; - handle = (u32) cq_counters_handle; - - cb = hl_cb_get(hdev, cb_mgr, handle); - if (!cb) { - hl_ctx_put(ctx); - return -EINVAL; + cq_cb = hl_cb_get(cb_mmg, cq_counters_handle); + if (!cq_cb) { + rc = -EINVAL; + goto put_ctx; } - pend = kzalloc(sizeof(*pend), GFP_KERNEL); - if (!pend) { - hl_cb_put(cb); - hl_ctx_put(ctx); - return -ENOMEM; - } + if (register_ts_record) { + dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", + interrupt->interrupt_id, ts_offset, cq_counters_offset); + buf = hl_mmap_mem_buf_get(mmg, ts_handle); + if (!buf) { + rc = -EINVAL; + goto put_cq_cb; + } - hl_fence_init(&pend->fence, ULONG_MAX); + /* Find first available record */ + rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset, + cq_counters_offset, target_value, + &interrupt->wait_list_lock, &pend); + if (rc) + goto put_ts_buff; + } else { + pend = kzalloc(sizeof(*pend), GFP_KERNEL); + if (!pend) { + rc = -ENOMEM; + goto put_cq_cb; + } + hl_fence_init(&pend->fence, ULONG_MAX); + pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset; + pend->cq_target_value = target_value; + } - pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset; - pend->cq_target_value = target_value; + spin_lock_irqsave(&interrupt->wait_list_lock, flags); /* We check for completion value as interrupt could have been received * before we added the node to the wait list */ if (*pend->cq_kernel_addr >= target_value) { + if (register_ts_record) + pend->ts_reg_info.in_use = 0; + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + *status = HL_WAIT_CS_STATUS_COMPLETED; - /* There was no interrupt, we assume the completion is now. */ - pend->fence.timestamp = ktime_get(); - } - if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED)) + if (register_ts_record) { + *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); + goto put_ts_buff; + } else { + pend->fence.timestamp = ktime_get(); + goto set_timestamp; + } + } else if (!timeout_us) { + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + *status = HL_WAIT_CS_STATUS_BUSY; + pend->fence.timestamp = ktime_get(); goto set_timestamp; + } /* Add pending user interrupt to relevant list for the interrupt - * handler to monitor + * handler to monitor. + * Note that we cannot have sorted list by target value, + * in order to shorten the list pass loop, since + * same list could have nodes for different cq counter handle. */ - spin_lock_irqsave(&interrupt->wait_list_lock, flags); list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + if (register_ts_record) { + rc = *status = HL_WAIT_CS_STATUS_COMPLETED; + goto ts_registration_exit; + } + /* Wait for interrupt handler to signal completion */ completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, timeout); @@ -2932,23 +3051,41 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, rc = -EIO; *status = HL_WAIT_CS_STATUS_ABORTED; } else { - dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n", - interrupt->interrupt_id); - rc = -ETIMEDOUT; + /* The wait has timed-out. We don't know anything beyond that + * because the workload wasn't submitted through the driver. + * Therefore, from driver's perspective, the workload is still + * executing. + */ + rc = 0; + *status = HL_WAIT_CS_STATUS_BUSY; } - *status = HL_WAIT_CS_STATUS_BUSY; } } + /* + * We keep removing the node from list here, and not at the irq handler + * for completion timeout case. and if it's a registration + * for ts record, the node will be deleted in the irq handler after + * we reach the target value. + */ spin_lock_irqsave(&interrupt->wait_list_lock, flags); list_del(&pend->wait_list_node); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); set_timestamp: *timestamp = ktime_to_ns(pend->fence.timestamp); - kfree(pend); - hl_cb_put(cb); + hl_cb_put(cq_cb); +ts_registration_exit: + hl_ctx_put(ctx); + + return rc; + +put_ts_buff: + hl_mmap_mem_buf_put(buf); +put_cq_cb: + hl_cb_put(cq_cb); +put_ctx: hl_ctx_put(ctx); return rc; @@ -2969,7 +3106,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ timeout = hl_usecs64_to_jiffies(timeout_us); - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); pend = kzalloc(sizeof(*pend), GFP_KERNEL); if (!pend) { @@ -3049,6 +3186,12 @@ wait_again: interrupt->interrupt_id); rc = -EINTR; } else { + /* The wait has timed-out. We don't know anything beyond that + * because the workload wasn't submitted through the driver. + * Therefore, from driver's perspective, the workload is still + * executing. + */ + rc = 0; *status = HL_WAIT_CS_STATUS_BUSY; } @@ -3101,23 +3244,20 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt]; if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) - rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr, args->in.interrupt_timeout_us, args->in.cq_counters_handle, args->in.cq_counters_offset, - args->in.target, interrupt, &status, - ×tamp); + args->in.target, interrupt, + !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT), + args->in.timestamp_handle, args->in.timestamp_offset, + &status, ×tamp); else rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, args->in.interrupt_timeout_us, args->in.addr, args->in.target, interrupt, &status, ×tamp); - if (rc) { - if (rc != -EINTR) - dev_err_ratelimited(hdev->dev, - "interrupt_wait_ioctl failed (%d)\n", rc); - + if (rc) return rc; - } memset(args, 0, sizeof(*args)); args->out.status = status; diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index c6360e33bce8..ed2cfd0c6e99 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -262,7 +262,7 @@ err_hw_block_mem_fini: return rc; } -void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx) +void hl_ctx_get(struct hl_ctx *ctx) { kref_get(&ctx->refcount); } @@ -284,7 +284,7 @@ struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev) * immediately once we find him */ ctx = hpriv->ctx; - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); break; } diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index fc084ee5106e..c6744bfc6da4 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -11,6 +11,7 @@ #include <linux/pci.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> +#include <linux/iommu.h> #define MMU_ADDR_BUF_SIZE 40 #define MMU_ASID_BUF_SIZE 10 @@ -125,9 +126,9 @@ static int command_buffers_show(struct seq_file *s, void *data) } seq_printf(s, " %03llu %d 0x%08x %d %d %d\n", - cb->id, cb->ctx->asid, cb->size, - kref_read(&cb->refcount), - cb->mmap, atomic_read(&cb->cs_cnt)); + cb->buf->handle, cb->ctx->asid, cb->size, + kref_read(&cb->buf->refcount), + atomic_read(&cb->buf->mmap), atomic_read(&cb->cs_cnt)); } spin_unlock(&dev_entry->cb_spinlock); @@ -369,8 +370,7 @@ static int userptr_lookup_show(struct seq_file *s, void *data) if (dev_entry->userptr_lookup >= userptr->addr && dev_entry->userptr_lookup < userptr->addr + userptr->size) { total_npages = 0; - for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, - i) { + for_each_sgtable_dma_sg(userptr->sgt, sg, i) { npages = hl_get_sg_info(sg, &dma_addr); sg_start = userptr->addr + total_npages * PAGE_SIZE; @@ -538,6 +538,39 @@ static int engines_show(struct seq_file *s, void *data) return 0; } +static ssize_t hl_memory_scrub(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 val = entry->memory_scrub_val; + int rc; + + if (!hl_device_operational(hdev, NULL)) { + dev_warn_ratelimited(hdev->dev, "Can't scrub memory, device is not operational\n"); + return -EIO; + } + + mutex_lock(&hdev->fpriv_list_lock); + if (hdev->is_compute_ctx_active) { + mutex_unlock(&hdev->fpriv_list_lock); + dev_err(hdev->dev, "can't scrub dram, context exist\n"); + return -EBUSY; + } + hdev->is_in_dram_scrub = true; + mutex_unlock(&hdev->fpriv_list_lock); + + rc = hdev->asic_funcs->scrub_device_dram(hdev, val); + + mutex_lock(&hdev->fpriv_list_lock); + hdev->is_in_dram_scrub = false; + mutex_unlock(&hdev->fpriv_list_lock); + + if (rc) + return rc; + return count; +} + static bool hl_is_device_va(struct hl_device *hdev, u64 addr) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -647,13 +680,105 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size, return rc; } +static int hl_access_dev_mem_by_region(struct hl_device *hdev, u64 addr, + u64 *val, enum debugfs_access_type acc_type, bool *found) +{ + size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ? + sizeof(u64) : sizeof(u32); + struct pci_mem_region *mem_reg; + int i; + + for (i = 0; i < PCI_REGION_NUMBER; i++) { + mem_reg = &hdev->pci_mem_region[i]; + if (!mem_reg->used) + continue; + if (addr >= mem_reg->region_base && + addr <= mem_reg->region_base + mem_reg->region_size - acc_size) { + *found = true; + return hdev->asic_funcs->access_dev_mem(hdev, mem_reg, i, + addr, val, acc_type); + } + } + return 0; +} + +static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 offset = prop->device_dma_offset_for_host_access; + + switch (acc_type) { + case DEBUGFS_READ32: + *val = *(u32 *) phys_to_virt(addr - offset); + break; + case DEBUGFS_WRITE32: + *(u32 *) phys_to_virt(addr - offset) = *val; + break; + case DEBUGFS_READ64: + *val = *(u64 *) phys_to_virt(addr - offset); + break; + case DEBUGFS_WRITE64: + *(u64 *) phys_to_virt(addr - offset) = *val; + break; + default: + dev_err(hdev->dev, "hostmem access-type %d id not supported\n", acc_type); + break; + } +} + +static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type) +{ + size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ? + sizeof(u64) : sizeof(u32); + u64 host_start = hdev->asic_prop.host_base_address; + u64 host_end = hdev->asic_prop.host_end_address; + bool user_address, found = false; + int rc; + + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + rc = device_va_to_pa(hdev, addr, acc_size, &addr); + if (rc) + return rc; + } + + rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found); + if (rc) { + dev_err(hdev->dev, + "Failed reading addr %#llx from dev mem (%d)\n", + addr, rc); + return rc; + } + + if (found) + return 0; + + if (!user_address || device_iommu_mapped(&hdev->pdev->dev)) { + rc = -EINVAL; + goto err; + } + + if (addr >= host_start && addr <= host_end - acc_size) { + hl_access_host_mem(hdev, addr, val, acc_type); + } else { + rc = -EINVAL; + goto err; + } + + return 0; +err: + dev_err(hdev->dev, "invalid addr %#llx\n", addr); + return rc; +} + static ssize_t hl_data_read32(struct file *f, char __user *buf, size_t count, loff_t *ppos) { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - u64 addr = entry->addr; - bool user_address; + u64 value64, addr = entry->addr; char tmp_buf[32]; ssize_t rc; u32 val; @@ -666,18 +791,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, if (*ppos) return 0; - user_address = hl_is_device_va(hdev, addr); - if (user_address) { - rc = device_va_to_pa(hdev, addr, sizeof(val), &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_read32(hdev, addr, user_address, &val); - if (rc) { - dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); + rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_READ32); + if (rc) return rc; - } + + val = value64; /* downcast back to 32 */ sprintf(tmp_buf, "0x%08x\n", val); return simple_read_from_buffer(buf, count, ppos, tmp_buf, @@ -689,8 +807,7 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - u64 addr = entry->addr; - bool user_address; + u64 value64, addr = entry->addr; u32 value; ssize_t rc; @@ -703,19 +820,10 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, if (rc) return rc; - user_address = hl_is_device_va(hdev, addr); - if (user_address) { - rc = device_va_to_pa(hdev, addr, sizeof(value), &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_write32(hdev, addr, user_address, value); - if (rc) { - dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n", - value, addr); + value64 = value; + rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_WRITE32); + if (rc) return rc; - } return count; } @@ -726,7 +834,6 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; u64 addr = entry->addr; - bool user_address; char tmp_buf[32]; ssize_t rc; u64 val; @@ -739,18 +846,9 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf, if (*ppos) return 0; - user_address = hl_is_device_va(hdev, addr); - if (user_address) { - rc = device_va_to_pa(hdev, addr, sizeof(val), &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_read64(hdev, addr, user_address, &val); - if (rc) { - dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); + rc = hl_access_mem(hdev, addr, &val, DEBUGFS_READ64); + if (rc) return rc; - } sprintf(tmp_buf, "0x%016llx\n", val); return simple_read_from_buffer(buf, count, ppos, tmp_buf, @@ -763,7 +861,6 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; u64 addr = entry->addr; - bool user_address; u64 value; ssize_t rc; @@ -776,19 +873,9 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, if (rc) return rc; - user_address = hl_is_device_va(hdev, addr); - if (user_address) { - rc = device_va_to_pa(hdev, addr, sizeof(value), &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_write64(hdev, addr, user_address, value); - if (rc) { - dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n", - value, addr); + rc = hl_access_mem(hdev, addr, &value, DEBUGFS_WRITE64); + if (rc) return rc; - } return count; } @@ -829,23 +916,67 @@ static ssize_t hl_dma_size_write(struct file *f, const char __user *buf, } /* Free the previous allocation, if there was any */ - entry->blob_desc.size = 0; - vfree(entry->blob_desc.data); + entry->data_dma_blob_desc.size = 0; + vfree(entry->data_dma_blob_desc.data); - entry->blob_desc.data = vmalloc(size); - if (!entry->blob_desc.data) + entry->data_dma_blob_desc.data = vmalloc(size); + if (!entry->data_dma_blob_desc.data) return -ENOMEM; rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size, - entry->blob_desc.data); + entry->data_dma_blob_desc.data); if (rc) { dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr); - vfree(entry->blob_desc.data); - entry->blob_desc.data = NULL; + vfree(entry->data_dma_blob_desc.data); + entry->data_dma_blob_desc.data = NULL; return -EIO; } - entry->blob_desc.size = size; + entry->data_dma_blob_desc.size = size; + + return count; +} + +static ssize_t hl_monitor_dump_trigger(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 size, trig; + ssize_t rc; + + if (hdev->reset_info.in_reset) { + dev_warn_ratelimited(hdev->dev, "Can't dump monitors during reset\n"); + return 0; + } + rc = kstrtouint_from_user(buf, count, 10, &trig); + if (rc) + return rc; + + if (trig != 1) { + dev_err(hdev->dev, "Must write 1 to trigger monitor dump\n"); + return -EINVAL; + } + + size = sizeof(struct cpucp_monitor_dump); + + /* Free the previous allocation, if there was any */ + entry->mon_dump_blob_desc.size = 0; + vfree(entry->mon_dump_blob_desc.data); + + entry->mon_dump_blob_desc.data = vmalloc(size); + if (!entry->mon_dump_blob_desc.data) + return -ENOMEM; + + rc = hdev->asic_funcs->get_monitor_dump(hdev, entry->mon_dump_blob_desc.data); + if (rc) { + dev_err(hdev->dev, "Failed to dump monitors\n"); + vfree(entry->mon_dump_blob_desc.data); + entry->mon_dump_blob_desc.data = NULL; + return -EIO; + } + + entry->mon_dump_blob_desc.size = size; return count; } @@ -890,6 +1021,8 @@ static ssize_t hl_set_power_state(struct file *f, const char __user *buf, pci_set_power_state(hdev->pdev, PCI_D0); pci_restore_state(hdev->pdev); rc = pci_enable_device(hdev->pdev); + if (rc < 0) + return rc; } else if (value == 2) { pci_save_state(hdev->pdev); pci_disable_device(hdev->pdev); @@ -1054,42 +1187,12 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf, static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, size_t count, loff_t *ppos) { - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - char tmp_buf[200]; - ssize_t rc; - - if (*ppos) - return 0; - - sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask); - rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; + return 0; } static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, size_t count, loff_t *ppos) { - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u64 value; - ssize_t rc; - - if (hdev->reset_info.in_reset) { - dev_warn_ratelimited(hdev->dev, - "Can't change clock gating during reset\n"); - return 0; - } - - rc = kstrtoull_from_user(buf, count, 16, &value); - if (rc) - return rc; - - hdev->clock_gating_mask = value; - hdev->asic_funcs->set_clock_gating(hdev); - return count; } @@ -1101,6 +1204,9 @@ static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf, char tmp_buf[200]; ssize_t rc; + if (!hdev->asic_prop.configurable_stop_on_err) + return -EOPNOTSUPP; + if (*ppos) return 0; @@ -1119,6 +1225,9 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, u32 value; ssize_t rc; + if (!hdev->asic_prop.configurable_stop_on_err) + return -EOPNOTSUPP; + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't change stop on error during reset\n"); @@ -1240,6 +1349,11 @@ static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf, return count; } +static const struct file_operations hl_mem_scrub_fops = { + .owner = THIS_MODULE, + .write = hl_memory_scrub, +}; + static const struct file_operations hl_data32b_fops = { .owner = THIS_MODULE, .read = hl_data_read32, @@ -1257,6 +1371,11 @@ static const struct file_operations hl_dma_size_fops = { .write = hl_dma_size_write }; +static const struct file_operations hl_monitor_dump_fops = { + .owner = THIS_MODULE, + .write = hl_monitor_dump_trigger +}; + static const struct file_operations hl_i2c_data_fops = { .owner = THIS_MODULE, .read = hl_i2c_data_read, @@ -1372,8 +1491,10 @@ void hl_debugfs_add_device(struct hl_device *hdev) if (!dev_entry->entry_arr) return; - dev_entry->blob_desc.size = 0; - dev_entry->blob_desc.data = NULL; + dev_entry->data_dma_blob_desc.size = 0; + dev_entry->data_dma_blob_desc.data = NULL; + dev_entry->mon_dump_blob_desc.size = 0; + dev_entry->mon_dump_blob_desc.data = NULL; INIT_LIST_HEAD(&dev_entry->file_list); INIT_LIST_HEAD(&dev_entry->cb_list); @@ -1392,6 +1513,17 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root); + debugfs_create_x64("memory_scrub_val", + 0644, + dev_entry->root, + &dev_entry->memory_scrub_val); + + debugfs_create_file("memory_scrub", + 0200, + dev_entry->root, + dev_entry, + &hl_mem_scrub_fops); + debugfs_create_x64("addr", 0644, dev_entry->root, @@ -1492,7 +1624,18 @@ void hl_debugfs_add_device(struct hl_device *hdev) debugfs_create_blob("data_dma", 0400, dev_entry->root, - &dev_entry->blob_desc); + &dev_entry->data_dma_blob_desc); + + debugfs_create_file("monitor_dump_trig", + 0200, + dev_entry->root, + dev_entry, + &hl_monitor_dump_fops); + + debugfs_create_blob("monitor_dump", + 0400, + dev_entry->root, + &dev_entry->mon_dump_blob_desc); debugfs_create_x8("skip_reset_on_timeout", 0644, @@ -1531,7 +1674,8 @@ void hl_debugfs_remove_device(struct hl_device *hdev) mutex_destroy(&entry->file_mutex); - vfree(entry->blob_desc.data); + vfree(entry->data_dma_blob_desc.data); + vfree(entry->mon_dump_blob_desc.data); for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i) vfree(entry->state_dump[i]); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 733338ab6f1d..b4f14c6d3970 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -13,6 +13,184 @@ #include <linux/pci.h> #include <linux/hwmon.h> +#define HL_RESET_DELAY_USEC 10000 /* 10ms */ + +/* + * hl_set_dram_bar- sets the bar to allow later access to address + * + * @hdev: pointer to habanalabs device structure + * @addr: the address the caller wants to access. + * + * @return: the old BAR base address on success, U64_MAX for failure. + * The caller should set it back to the old address after use. + * + * In case the bar space does not cover the whole address space, + * the bar base address should be set to allow access to a given address. + * This function can be called also if the bar doesn't need to be set, + * in that case it just won't change the base. + */ +static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 bar_base_addr; + + bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); + + return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); +} + + +static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type, enum pci_region region_type) +{ + struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; + u64 old_base, rc; + + if (region_type == PCI_REGION_DRAM) { + old_base = hl_set_dram_bar(hdev, addr); + if (old_base == U64_MAX) + return -EIO; + } + + switch (acc_type) { + case DEBUGFS_READ8: + *val = readb(hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + case DEBUGFS_WRITE8: + writeb(*val, hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + case DEBUGFS_READ32: + *val = readl(hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + case DEBUGFS_WRITE32: + writel(*val, hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + case DEBUGFS_READ64: + *val = readq(hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + case DEBUGFS_WRITE64: + writeq(*val, hdev->pcie_bar[region->bar_id] + + addr - region->region_base + region->offset_in_bar); + break; + } + + if (region_type == PCI_REGION_DRAM) { + rc = hl_set_dram_bar(hdev, old_base); + if (rc == U64_MAX) + return -EIO; + } + + return 0; +} + +int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct scatterlist *sg; + int rc, i; + + rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0); + if (rc) + return rc; + + /* Shift to the device's base physical address of host memory if necessary */ + if (prop->device_dma_offset_for_host_access) + for_each_sgtable_dma_sg(sgt, sg, i) + sg->dma_address += prop->device_dma_offset_for_host_access; + + return 0; +} + +void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct scatterlist *sg; + int i; + + /* Cancel the device's base physical address of host memory if necessary */ + if (prop->device_dma_offset_for_host_access) + for_each_sgtable_dma_sg(sgt, sg, i) + sg->dma_address -= prop->device_dma_offset_for_host_access; + + dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0); +} + +/* + * hl_access_cfg_region - access the config region + * + * @hdev: pointer to habanalabs device structure + * @addr: the address to access + * @val: the value to write from or read to + * @acc_type: the type of access (read/write 64/32) + */ +int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type) +{ + struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG]; + u32 val_h, val_l; + + if (!IS_ALIGNED(addr, sizeof(u32))) { + dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32)); + return -EINVAL; + } + + switch (acc_type) { + case DEBUGFS_READ32: + *val = RREG32(addr - cfg_region->region_base); + break; + case DEBUGFS_WRITE32: + WREG32(addr - cfg_region->region_base, *val); + break; + case DEBUGFS_READ64: + val_l = RREG32(addr - cfg_region->region_base); + val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base); + + *val = (((u64) val_h) << 32) | val_l; + break; + case DEBUGFS_WRITE64: + WREG32(addr - cfg_region->region_base, lower_32_bits(*val)); + WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val)); + break; + default: + dev_err(hdev->dev, "access type %d is not supported\n", acc_type); + return -EOPNOTSUPP; + } + + return 0; +} + +/* + * hl_access_dev_mem - access device memory + * + * @hdev: pointer to habanalabs device structure + * @region: the memory region the address belongs to + * @region_type: the type of the region the address belongs to + * @addr: the address to access + * @val: the value to write from or read to + * @acc_type: the type of access (r/w, 32/64) + */ +int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region, + enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type) +{ + switch (region_type) { + case PCI_REGION_CFG: + return hl_access_cfg_region(hdev, addr, val, acc_type); + case PCI_REGION_SRAM: + case PCI_REGION_DRAM: + return hl_access_sram_dram_region(hdev, addr, val, acc_type, + region_type); + default: + return -EFAULT; + } + + return 0; +} + enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; @@ -105,6 +283,14 @@ static void hpriv_release(struct kref *ref) hdev->is_compute_ctx_active = false; mutex_unlock(&hdev->fpriv_list_lock); + hdev->compute_ctx_in_release = 0; + + /* release the eventfd */ + if (hpriv->notifier_event.eventfd) + eventfd_ctx_put(hpriv->notifier_event.eventfd); + + mutex_destroy(&hpriv->notifier_event.lock); + kfree(hpriv); } @@ -144,9 +330,11 @@ static int hl_device_release(struct inode *inode, struct file *filp) */ hl_release_pending_user_interrupts(hpriv->hdev); - hl_cb_mgr_fini(hdev, &hpriv->cb_mgr); + hl_mem_mgr_fini(&hpriv->mem_mgr); hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); + hdev->compute_ctx_in_release = 1; + if (!hl_hpriv_put(hpriv)) dev_notice(hdev->dev, "User process closed FD but device still in use\n"); @@ -173,6 +361,11 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_ctrl_list_lock); out: + /* release the eventfd */ + if (hpriv->notifier_event.eventfd) + eventfd_ctx_put(hpriv->notifier_event.eventfd); + + mutex_destroy(&hpriv->notifier_event.lock); put_pid(hpriv->taskpid); kfree(hpriv); @@ -201,14 +394,15 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma) } vm_pgoff = vma->vm_pgoff; - vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); switch (vm_pgoff & HL_MMAP_TYPE_MASK) { - case HL_MMAP_TYPE_CB: - return hl_cb_mmap(hpriv, vma); - case HL_MMAP_TYPE_BLOCK: + vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); return hl_hw_block_mmap(hpriv, vma); + + case HL_MMAP_TYPE_CB: + case HL_MMAP_TYPE_TS_BUFF: + return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL); } return -EINVAL; @@ -410,26 +604,33 @@ static int device_early_init(struct hl_device *hdev) goto free_cq_wq; } - hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0); - if (!hdev->sob_reset_wq) { + hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0); + if (!hdev->ts_free_obj_wq) { dev_err(hdev->dev, - "Failed to allocate SOB reset workqueue\n"); + "Failed to allocate Timestamp registration free workqueue\n"); rc = -ENOMEM; goto free_eq_wq; } + hdev->pf_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0); + if (!hdev->pf_wq) { + dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n"); + rc = -ENOMEM; + goto free_ts_free_wq; + } + hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), GFP_KERNEL); if (!hdev->hl_chip_info) { rc = -ENOMEM; - goto free_sob_reset_wq; + goto free_pf_wq; } rc = hl_mmu_if_set_funcs(hdev); if (rc) goto free_chip_info; - hl_cb_mgr_init(&hdev->kernel_cb_mgr); + hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr); hdev->device_reset_work.wq = create_singlethread_workqueue("hl_device_reset"); @@ -458,11 +659,13 @@ static int device_early_init(struct hl_device *hdev) return 0; free_cb_mgr: - hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); + hl_mem_mgr_fini(&hdev->kernel_mem_mgr); free_chip_info: kfree(hdev->hl_chip_info); -free_sob_reset_wq: - destroy_workqueue(hdev->sob_reset_wq); +free_pf_wq: + destroy_workqueue(hdev->pf_wq); +free_ts_free_wq: + destroy_workqueue(hdev->ts_free_obj_wq); free_eq_wq: destroy_workqueue(hdev->eq_wq); free_cq_wq: @@ -497,11 +700,12 @@ static void device_early_fini(struct hl_device *hdev) mutex_destroy(&hdev->clk_throttling.lock); - hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); + hl_mem_mgr_fini(&hdev->kernel_mem_mgr); kfree(hdev->hl_chip_info); - destroy_workqueue(hdev->sob_reset_wq); + destroy_workqueue(hdev->pf_wq); + destroy_workqueue(hdev->ts_free_obj_wq); destroy_workqueue(hdev->eq_wq); destroy_workqueue(hdev->device_reset_work.wq); @@ -610,7 +814,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization) u64 max_power, curr_power, dc_power, dividend; int rc; - max_power = hdev->asic_prop.max_power_default; + max_power = hdev->max_power; dc_power = hdev->asic_prop.dc_power_default; rc = hl_fw_cpucp_power_get(hdev, &curr_power); @@ -644,9 +848,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en hdev->in_debug = 0; - if (!hdev->reset_info.hard_reset_pending) - hdev->asic_funcs->set_clock_gating(hdev); - goto out; } @@ -657,7 +858,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en goto out; } - hdev->asic_funcs->disable_clock_gating(hdev); hdev->in_debug = 1; out: @@ -685,7 +885,8 @@ static void take_release_locks(struct hl_device *hdev) mutex_unlock(&hdev->fpriv_ctrl_list_lock); } -static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset) +static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, + bool skip_wq_flush) { if (hard_reset) device_late_fini(hdev); @@ -698,7 +899,10 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset); /* Go over all the queues, release all CS and their jobs */ - hl_cs_rollback_all(hdev); + hl_cs_rollback_all(hdev, skip_wq_flush); + + /* flush the MMU prefetch workqueue */ + flush_workqueue(hdev->pf_wq); /* Release all pending user interrupts, each pending user interrupt * holds a reference to user context @@ -844,10 +1048,13 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool put_task_struct(task); } else { - dev_warn(hdev->dev, - "Can't get task struct for PID so giving up on killing process\n"); - mutex_unlock(fd_lock); - return -ETIME; + /* + * If we got here, it means that process was killed from outside the driver + * right after it started looping on fd_list and before get_pid_task, thus + * we don't need to kill it. + */ + dev_dbg(hdev->dev, + "Can't get task struct for user process, assuming process was killed from outside the driver\n"); } } @@ -978,7 +1185,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) int hl_device_reset(struct hl_device *hdev, u32 flags) { bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false, - reset_upon_device_release = false, schedule_hard_reset = false; + reset_upon_device_release = false, schedule_hard_reset = false, + skip_wq_flush, delay_reset; u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; struct hl_ctx *ctx; int i, rc; @@ -991,6 +1199,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) hard_reset = !!(flags & HL_DRV_RESET_HARD); from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR); fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW); + skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE); + delay_reset = !!(flags & HL_DRV_RESET_DELAY); if (!hard_reset && !hdev->asic_prop.supports_soft_reset) { hard_instead_soft = true; @@ -1040,6 +1250,9 @@ do_reset: hdev->reset_info.in_reset = 1; spin_unlock(&hdev->reset_info.lock); + if (delay_reset) + usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1); + handle_reset_trigger(hdev, flags); /* This still allows the completion of some KDMA ops */ @@ -1053,9 +1266,9 @@ do_reset: if (hard_reset) dev_info(hdev->dev, "Going to reset device\n"); else if (reset_upon_device_release) - dev_info(hdev->dev, "Going to reset device after release by user\n"); + dev_dbg(hdev->dev, "Going to reset device after release by user\n"); else - dev_info(hdev->dev, "Going to reset engines of inference device\n"); + dev_dbg(hdev->dev, "Going to reset engines of inference device\n"); } again: @@ -1076,7 +1289,7 @@ again: return 0; } - cleanup_resources(hdev, hard_reset, fw_reset); + cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush); kill_processes: if (hard_reset) { @@ -1232,7 +1445,7 @@ kill_processes: goto out_err; } - hl_set_max_power(hdev); + hl_fw_set_max_power(hdev); } else { rc = hdev->asic_funcs->non_hard_reset_late_init(hdev); if (rc) { @@ -1261,7 +1474,10 @@ kill_processes: hdev->reset_info.needs_reset = false; - dev_notice(hdev->dev, "Successfully finished resetting the device\n"); + if (hard_reset) + dev_info(hdev->dev, "Successfully finished resetting the device\n"); + else + dev_dbg(hdev->dev, "Successfully finished resetting the device\n"); if (hard_reset) { hdev->reset_info.hard_reset_cnt++; @@ -1297,11 +1513,14 @@ out_err: hdev->reset_info.hard_reset_cnt++; } else if (reset_upon_device_release) { dev_err(hdev->dev, "Failed to reset device after user release\n"); + flags |= HL_DRV_RESET_HARD; + flags &= ~HL_DRV_RESET_DEV_RELEASE; hard_reset = true; goto again; } else { dev_err(hdev->dev, "Failed to do soft-reset\n"); hdev->reset_info.soft_reset_cnt++; + flags |= HL_DRV_RESET_HARD; hard_reset = true; goto again; } @@ -1311,6 +1530,43 @@ out_err: return rc; } +static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event) +{ + mutex_lock(¬ifier_event->lock); + notifier_event->events_mask |= event; + if (notifier_event->eventfd) + eventfd_signal(notifier_event->eventfd, 1); + + mutex_unlock(¬ifier_event->lock); +} + +/* + * hl_notifier_event_send_all - notify all user processes via eventfd + * + * @hdev: pointer to habanalabs device structure + * @event: the occurred event + * Returns 0 for success or an error on failure. + */ +void hl_notifier_event_send_all(struct hl_device *hdev, u64 event) +{ + struct hl_fpriv *hpriv; + + mutex_lock(&hdev->fpriv_list_lock); + + list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) + hl_notifier_event_send(&hpriv->notifier_event, event); + + mutex_unlock(&hdev->fpriv_list_lock); + + /* control device */ + mutex_lock(&hdev->fpriv_ctrl_list_lock); + + list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node) + hl_notifier_event_send(&hpriv->notifier_event, event); + + mutex_unlock(&hdev->fpriv_ctrl_list_lock); +} + /* * hl_device_init - main initialization function for habanalabs device * @@ -1538,7 +1794,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) /* Need to call this again because the max power might change, * depending on card type for certain ASICs */ - hl_set_max_power(hdev); + if (hdev->asic_prop.set_max_power_on_device_init) + hl_fw_set_max_power(hdev); /* * hl_hwmon_init() must be called after device_late_init(), because only @@ -1682,7 +1939,7 @@ void hl_device_fini(struct hl_device *hdev) hl_hwmon_fini(hdev); - cleanup_resources(hdev, true, false); + cleanup_resources(hdev, true, false, false); /* Kill processes here after CS rollback. This is because the process * can't really exit until all its CSs are done, which is what we diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 6775c5c3166b..828a36af5b14 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -18,8 +18,9 @@ static char *extract_fw_ver_from_str(const char *fw_str) { char *str, *fw_ver, *whitespace; + u32 ver_offset; - fw_ver = kmalloc(16, GFP_KERNEL); + fw_ver = kmalloc(VERSION_MAX_LEN, GFP_KERNEL); if (!fw_ver) return NULL; @@ -29,9 +30,10 @@ static char *extract_fw_ver_from_str(const char *fw_str) /* Skip the fw- part */ str += 3; + ver_offset = str - fw_str; /* Copy until the next whitespace */ - whitespace = strnstr(str, " ", 15); + whitespace = strnstr(str, " ", VERSION_MAX_LEN - ver_offset); if (!whitespace) goto free_fw_ver; @@ -214,7 +216,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, dma_addr_t pkt_dma_addr; struct hl_bd *sent_bd; u32 tmp, expected_ack_val, pi; - int rc = 0; + int rc; pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr); @@ -228,8 +230,11 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, mutex_lock(&hdev->send_cpu_message_lock); - if (hdev->disabled) + /* CPU-CP messages can be sent during soft-reset */ + if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) { + rc = 0; goto out; + } if (hdev->device_cpu_disabled) { rc = -EIO; @@ -816,6 +821,54 @@ out: return rc; } +int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data) +{ + struct cpucp_monitor_dump *mon_dump_cpu_addr; + dma_addr_t mon_dump_dma_addr; + struct cpucp_packet pkt = {}; + size_t data_size; + __le32 *src_ptr; + u32 *dst_ptr; + u64 result; + int i, rc; + + data_size = sizeof(struct cpucp_monitor_dump); + mon_dump_cpu_addr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, data_size, + &mon_dump_dma_addr); + if (!mon_dump_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for CPU-CP monitor-dump packet\n"); + return -ENOMEM; + } + + memset(mon_dump_cpu_addr, 0, data_size); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MONITOR_DUMP_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(mon_dump_dma_addr); + pkt.data_max_size = cpu_to_le32(data_size); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_MON_DUMP_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc); + goto out; + } + + /* result contains the actual size */ + src_ptr = (__le32 *) mon_dump_cpu_addr; + dst_ptr = data; + for (i = 0; i < (data_size / sizeof(u32)); i++) { + *dst_ptr = le32_to_cpu(*src_ptr); + src_ptr++; + dst_ptr++; + } + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, data_size, mon_dump_cpu_addr); + + return rc; +} + int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters) { @@ -958,15 +1011,17 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); - if (rc) + if (rc) { dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc); + return rc; + } pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result); pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result); pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result); pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result); - return rc; + return 0; } int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power) @@ -1202,8 +1257,6 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev, hdev, cpu_boot_status_reg, status, - (status == CPU_BOOT_STATUS_IN_UBOOT) || - (status == CPU_BOOT_STATUS_DRAM_RDY) || (status == CPU_BOOT_STATUS_NIC_FW_RDY) || (status == CPU_BOOT_STATUS_READY_TO_BOOT) || (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT), @@ -1536,7 +1589,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev, le32_to_cpu(dyn_regs->cpu_cmd_status_to_host), status, FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status, - hdev->fw_poll_interval_usec, + hdev->fw_comms_poll_interval_usec, timeout); if (rc) { @@ -1906,7 +1959,7 @@ static int hl_fw_dynamic_request_descriptor(struct hl_device *hdev, * @fwc: the firmware component * @fw_version: fw component's version string */ -static void hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, +static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, enum hl_fw_component fwc, const char *fw_version) { @@ -1930,23 +1983,33 @@ static void hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, VERSION_MAX_LEN); if (preboot_ver && preboot_ver != prop->preboot_ver) { strscpy(btl_ver, prop->preboot_ver, - min((int) (preboot_ver - prop->preboot_ver), - 31)); + min((int) (preboot_ver - prop->preboot_ver), 31)); dev_info(hdev->dev, "%s\n", btl_ver); } preboot_ver = extract_fw_ver_from_str(prop->preboot_ver); if (preboot_ver) { - dev_info(hdev->dev, "preboot version %s\n", - preboot_ver); + char major[8]; + int rc; + + dev_info(hdev->dev, "preboot version %s\n", preboot_ver); + sprintf(major, "%.2s", preboot_ver); kfree(preboot_ver); + + rc = kstrtou32(major, 10, &hdev->fw_major_version); + if (rc) { + dev_err(hdev->dev, "Error %d parsing preboot major version\n", rc); + return rc; + } } break; default: dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc); - return; + return -EINVAL; } + + return 0; } /** @@ -2118,9 +2181,10 @@ static int hl_fw_dynamic_load_image(struct hl_device *hdev, goto release_fw; /* read preboot version */ - hl_fw_dynamic_read_device_fw_version(hdev, cur_fwc, + rc = hl_fw_dynamic_read_device_fw_version(hdev, cur_fwc, fw_loader->dynamic_loader.comm_desc.cur_fw_ver); - + if (rc) + goto release_fw; /* update state according to boot stage */ if (cur_fwc == FW_COMP_BOOT_FIT) { @@ -2387,9 +2451,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, goto protocol_err; /* read preboot version */ - hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT, + return hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT, fw_loader->dynamic_loader.comm_desc.cur_fw_ver); - return 0; } /* load boot fit to FW */ @@ -2682,3 +2745,138 @@ int hl_fw_init_cpu(struct hl_device *hdev) hl_fw_dynamic_init_cpu(hdev, fw_loader) : hl_fw_static_init_cpu(hdev, fw_loader); } + +void hl_fw_set_pll_profile(struct hl_device *hdev) +{ + hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index, + hdev->asic_prop.max_freq_value); +} + +int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) +{ + long value; + + if (!hl_device_operational(hdev, NULL)) + return -ENODEV; + + if (!hdev->pdev) { + *cur_clk = 0; + *max_clk = 0; + return 0; + } + + value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false); + + if (value < 0) { + dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value); + return value; + } + + *max_clk = (value / 1000 / 1000); + + value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true); + + if (value < 0) { + dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value); + return value; + } + + *cur_clk = (value / 1000 / 1000); + + return 0; +} + +long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) +{ + struct cpucp_packet pkt; + u32 used_pll_idx; + u64 result; + int rc; + + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return rc; + + memset(&pkt, 0, sizeof(pkt)); + + if (curr) + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + else + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); + + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); + + if (rc) { + dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n", + used_pll_idx, rc); + return rc; + } + + return (long) result; +} + +void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) +{ + struct cpucp_packet pkt; + u32 used_pll_idx; + int rc; + + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); + pkt.value = cpu_to_le64(freq); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + + if (rc) + dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n", + used_pll_idx, rc); +} + +long hl_fw_get_max_power(struct hl_device *hdev) +{ + struct cpucp_packet pkt; + u64 result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); + + if (rc) { + dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); + return rc; + } + + return result; +} + +void hl_fw_set_max_power(struct hl_device *hdev) +{ + struct cpucp_packet pkt; + int rc; + + /* TODO: remove this after simulator supports this packet */ + if (!hdev->pdev) + return; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(hdev->max_power); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + + if (rc) + dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index cb710fd478b6..b0b0f3f89865 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -21,6 +21,7 @@ #include <linux/hashtable.h> #include <linux/debugfs.h> #include <linux/rwsem.h> +#include <linux/eventfd.h> #include <linux/bitfield.h> #include <linux/genalloc.h> #include <linux/sched/signal.h> @@ -31,14 +32,15 @@ #define HL_NAME "habanalabs" /* Use upper bits of mmap offset to store habana driver specific information. - * bits[63:61] - Encode mmap type + * bits[63:59] - Encode mmap type * bits[45:0] - mmap offset value * * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these * defines are w.r.t to PAGE_SIZE */ -#define HL_MMAP_TYPE_SHIFT (61 - PAGE_SHIFT) -#define HL_MMAP_TYPE_MASK (0x7ull << HL_MMAP_TYPE_SHIFT) +#define HL_MMAP_TYPE_SHIFT (59 - PAGE_SHIFT) +#define HL_MMAP_TYPE_MASK (0x1full << HL_MMAP_TYPE_SHIFT) +#define HL_MMAP_TYPE_TS_BUFF (0x10ull << HL_MMAP_TYPE_SHIFT) #define HL_MMAP_TYPE_BLOCK (0x4ull << HL_MMAP_TYPE_SHIFT) #define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT) @@ -60,8 +62,10 @@ #define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ #define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_CPUCP_MON_DUMP_TIMEOUT_USEC 10000000 /* 10s */ #define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */ +#define HL_FW_COMMS_STATUS_PLDM_POLL_INTERVAL_USEC 1000000 /* 1s */ #define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ @@ -141,6 +145,9 @@ enum hl_mmu_page_table_location { * * - HL_DRV_RESET_FW_FATAL_ERR * Set if reset is due to a fatal error from FW + * + * - HL_DRV_RESET_DELAY + * Set if a delay should be added before the reset */ #define HL_DRV_RESET_HARD (1 << 0) @@ -150,6 +157,7 @@ enum hl_mmu_page_table_location { #define HL_DRV_RESET_DEV_RELEASE (1 << 4) #define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5) #define HL_DRV_RESET_FW_FATAL_ERR (1 << 6) +#define HL_DRV_RESET_DELAY (1 << 7) #define HL_MAX_SOBS_PER_MONITOR 8 @@ -389,42 +397,28 @@ enum hl_device_hw_state { * struct hl_mmu_properties - ASIC specific MMU address translation properties. * @start_addr: virtual start address of the memory region. * @end_addr: virtual end address of the memory region. - * @hop0_shift: shift of hop 0 mask. - * @hop1_shift: shift of hop 1 mask. - * @hop2_shift: shift of hop 2 mask. - * @hop3_shift: shift of hop 3 mask. - * @hop4_shift: shift of hop 4 mask. - * @hop5_shift: shift of hop 5 mask. - * @hop0_mask: mask to get the PTE address in hop 0. - * @hop1_mask: mask to get the PTE address in hop 1. - * @hop2_mask: mask to get the PTE address in hop 2. - * @hop3_mask: mask to get the PTE address in hop 3. - * @hop4_mask: mask to get the PTE address in hop 4. - * @hop5_mask: mask to get the PTE address in hop 5. + * @hop_shifts: array holds HOPs shifts. + * @hop_masks: array holds HOPs masks. * @last_mask: mask to get the bit indicating this is the last hop. + * @pgt_size: size for page tables. * @page_size: default page size used to allocate memory. * @num_hops: The amount of hops supported by the translation table. + * @hop_table_size: HOP table size. + * @hop0_tables_total_size: total size for all HOP0 tables. * @host_resident: Should the MMU page table reside in host memory or in the * device DRAM. */ struct hl_mmu_properties { u64 start_addr; u64 end_addr; - u64 hop0_shift; - u64 hop1_shift; - u64 hop2_shift; - u64 hop3_shift; - u64 hop4_shift; - u64 hop5_shift; - u64 hop0_mask; - u64 hop1_mask; - u64 hop2_mask; - u64 hop3_mask; - u64 hop4_mask; - u64 hop5_mask; + u64 hop_shifts[MMU_HOP_MAX]; + u64 hop_masks[MMU_HOP_MAX]; u64 last_mask; + u64 pgt_size; u32 page_size; u32 num_hops; + u32 hop_table_size; + u32 hop0_tables_total_size; u8 host_resident; }; @@ -475,8 +469,10 @@ struct hl_hints_range { * the device's MMU. * @dram_hints_align_mask: dram va hint addresses alignment mask which is used * for hints validity check. - * device_dma_offset_for_host_access: the offset to add to host DMA addresses - * to enable the device to access them. + * @device_dma_offset_for_host_access: the offset to add to host DMA addresses + * to enable the device to access them. + * @host_base_address: host physical start address for host DMA from device + * @host_end_address: host physical end address for host DMA from device * @max_freq_value: current max clk frequency. * @clk_pll_index: clock PLL index that specify which PLL determines the clock * we display to the user @@ -517,6 +513,10 @@ struct hl_hints_range { * @fw_app_cpu_boot_dev_sts1: bitmap representation of application security * status reported by FW, bit description can be * found in CPU_BOOT_DEV_STS1 + * @device_mem_alloc_default_page_size: may be different than dram_page_size only for ASICs for + * which the property supports_user_set_page_size is true + * (i.e. the DRAM supports multiple page sizes), otherwise + * it will shall be equal to dram_page_size. * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use @@ -554,6 +554,10 @@ struct hl_hints_range { * use-case of doing soft-reset in training (due * to the fact that training runs on multiple * devices) + * @configurable_stop_on_err: is stop-on-error option configurable via debugfs. + * @set_max_power_on_device_init: true if need to set max power in F/W on device init. + * @supports_user_set_page_size: true if user can set the allocation page size. + * @dma_mask: the dma mask to be set for this device */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -585,6 +589,8 @@ struct asic_fixed_properties { u64 cb_va_end_addr; u64 dram_hints_align_mask; u64 device_dma_offset_for_host_access; + u64 host_base_address; + u64 host_end_address; u64 max_freq_value; u32 clk_pll_index; u32 mmu_pgt_size; @@ -612,6 +618,7 @@ struct asic_fixed_properties { u32 fw_bootfit_cpu_boot_dev_sts1; u32 fw_app_cpu_boot_dev_sts0; u32 fw_app_cpu_boot_dev_sts1; + u32 device_mem_alloc_default_page_size; u16 collective_first_sob; u16 collective_first_mon; u16 sync_stream_first_sob; @@ -637,6 +644,10 @@ struct asic_fixed_properties { u8 use_get_power_for_reset_history; u8 supports_soft_reset; u8 allow_inference_soft_reset; + u8 configurable_stop_on_err; + u8 set_max_power_on_device_init; + u8 supports_user_set_page_size; + u8 dma_mask; }; /** @@ -694,51 +705,102 @@ struct hl_cs_compl { */ /** - * struct hl_cb_mgr - describes a Command Buffer Manager. - * @cb_lock: protects cb_handles. - * @cb_handles: an idr to hold all command buffer handles. + * struct hl_ts_buff - describes a timestamp buffer. + * @kernel_buff_address: Holds the internal buffer's kernel virtual address. + * @user_buff_address: Holds the user buffer's kernel virtual address. + * @kernel_buff_size: Holds the internal kernel buffer size. + */ +struct hl_ts_buff { + void *kernel_buff_address; + void *user_buff_address; + u32 kernel_buff_size; +}; + +struct hl_mmap_mem_buf; + +/** + * struct hl_mem_mgr - describes unified memory manager for mappable memory chunks. + * @dev: back pointer to the owning device + * @lock: protects handles + * @handles: an idr holding all active handles to the memory buffers in the system. + */ +struct hl_mem_mgr { + struct device *dev; + spinlock_t lock; + struct idr handles; +}; + +/** + * struct hl_mmap_mem_buf_behavior - describes unified memory manager buffer behavior + * @topic: string identifier used for logging + * @mem_id: memory type identifier, embedded in the handle and used to identify + * the memory type by handle. + * @alloc: callback executed on buffer allocation, shall allocate the memory, + * set it under buffer private, and set mappable size. + * @mmap: callback executed on mmap, must map the buffer to vma + * @release: callback executed on release, must free the resources used by the buffer */ -struct hl_cb_mgr { - spinlock_t cb_lock; - struct idr cb_handles; /* protected by cb_lock */ +struct hl_mmap_mem_buf_behavior { + const char *topic; + u64 mem_id; + + int (*alloc)(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args); + int (*mmap)(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args); + void (*release)(struct hl_mmap_mem_buf *buf); +}; + +/** + * struct hl_mmap_mem_buf - describes a single unified memory buffer + * @behavior: buffer behavior + * @mmg: back pointer to the unified memory manager + * @refcount: reference counter for buffer users + * @private: pointer to buffer behavior private data + * @mmap: atomic boolean indicating whether or not the buffer is mapped right now + * @real_mapped_size: the actual size of buffer mapped, after part of it may be released, + * may change at runtime. + * @mappable_size: the original mappable size of the buffer, does not change after + * the allocation. + * @handle: the buffer id in mmg handles store + */ +struct hl_mmap_mem_buf { + struct hl_mmap_mem_buf_behavior *behavior; + struct hl_mem_mgr *mmg; + struct kref refcount; + void *private; + atomic_t mmap; + u64 real_mapped_size; + u64 mappable_size; + u64 handle; }; /** * struct hl_cb - describes a Command Buffer. - * @refcount: reference counter for usage of the CB. * @hdev: pointer to device this CB belongs to. * @ctx: pointer to the CB owner's context. - * @lock: spinlock to protect mmap flows. + * @buf: back pointer to the parent mappable memory buffer * @debugfs_list: node in debugfs list of command buffers. * @pool_list: node in pool list of command buffers. * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to * the device's MMU. - * @id: the CB's ID. * @kernel_address: Holds the CB's kernel virtual address. * @bus_address: Holds the CB's DMA address. - * @mmap_size: Holds the CB's size that was mmaped. * @size: holds the CB's size. * @cs_cnt: holds number of CS that this CB participates in. - * @mmap: true if the CB is currently mmaped to user. * @is_pool: true if CB was acquired from the pool, false otherwise. * @is_internal: internaly allocated * @is_mmu_mapped: true if the CB is mapped to the device's MMU. */ struct hl_cb { - struct kref refcount; struct hl_device *hdev; struct hl_ctx *ctx; - spinlock_t lock; + struct hl_mmap_mem_buf *buf; struct list_head debugfs_list; struct list_head pool_list; struct list_head va_block_list; - u64 id; void *kernel_address; dma_addr_t bus_address; - u32 mmap_size; u32 size; atomic_t cs_cnt; - u8 mmap; u8 is_pool; u8 is_internal; u8 is_mmu_mapped; @@ -881,8 +943,53 @@ struct hl_user_interrupt { }; /** + * struct timestamp_reg_free_node - holds the timestamp registration free objects node + * @free_objects_node: node in the list free_obj_jobs + * @cq_cb: pointer to cq command buffer to be freed + * @buf: pointer to timestamp buffer to be freed + */ +struct timestamp_reg_free_node { + struct list_head free_objects_node; + struct hl_cb *cq_cb; + struct hl_mmap_mem_buf *buf; +}; + +/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job + * the job will be to pass over the free_obj_jobs list and put refcount to objects + * in each node of the list + * @free_obj: workqueue object to free timestamp registration node objects + * @hdev: pointer to the device structure + * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node) + */ +struct timestamp_reg_work_obj { + struct work_struct free_obj; + struct hl_device *hdev; + struct list_head *free_obj_head; +}; + +/* struct timestamp_reg_info - holds the timestamp registration related data. + * @buf: pointer to the timestamp buffer which include both user/kernel buffers. + * relevant only when doing timestamps records registration. + * @cq_cb: pointer to CQ counter CB. + * @timestamp_kernel_addr: timestamp handle address, where to set timestamp + * relevant only when doing timestamps records + * registration. + * @in_use: indicates if the node already in use. relevant only when doing + * timestamps records registration, since in this case the driver + * will have it's own buffer which serve as a records pool instead of + * allocating records dynamically. + */ +struct timestamp_reg_info { + struct hl_mmap_mem_buf *buf; + struct hl_cb *cq_cb; + u64 *timestamp_kernel_addr; + u8 in_use; +}; + +/** * struct hl_user_pending_interrupt - holds a context to a user thread * pending on an interrupt + * @ts_reg_info: holds the timestamps registration nodes info * @wait_list_node: node in the list of user threads pending on an interrupt * @fence: hl fence object for interrupt completion * @cq_target_value: CQ target value @@ -890,10 +997,11 @@ struct hl_user_interrupt { * handler for taget value comparison */ struct hl_user_pending_interrupt { - struct list_head wait_list_node; - struct hl_fence fence; - u64 cq_target_value; - u64 *cq_kernel_addr; + struct timestamp_reg_info ts_reg_info; + struct list_head wait_list_node; + struct hl_fence fence; + u64 cq_target_value; + u64 *cq_kernel_addr; }; /** @@ -971,6 +1079,15 @@ enum div_select_defs { DIV_SEL_DIVIDED_PLL = 3, }; +enum debugfs_access_type { + DEBUGFS_READ8, + DEBUGFS_WRITE8, + DEBUGFS_READ32, + DEBUGFS_WRITE32, + DEBUGFS_READ64, + DEBUGFS_WRITE64, +}; + enum pci_region { PCI_REGION_CFG, PCI_REGION_SRAM, @@ -1132,6 +1249,7 @@ struct fw_load_mgr { * its implementation is not trivial when the driver * is loaded in simulation mode (not upstreamed). * @scrub_device_mem: Scrub device memory given an address and size + * @scrub_device_dram: Scrub the dram memory of the device. * @get_int_queue_base: get the internal queue base address. * @test_queues: run simple test on all queues for sanity check. * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool. @@ -1139,34 +1257,26 @@ struct fw_load_mgr { * @asic_dma_pool_free: free small DMA allocation from pool. * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. - * @hl_dma_unmap_sg: DMA unmap scatter-gather list. + * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table. * @cs_parser: parse Command Submission. - * @asic_dma_map_sg: DMA map scatter-gather list. + * @asic_dma_map_sgtable: DMA map scatter-gather table. * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB. * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it. * @update_eq_ci: update event queue CI. * @context_switch: called upon ASID context switch. * @restore_phase_topology: clear all SOBs amd MONs. - * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM/Host memory. - * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM/Host memory. - * @debugfs_read64: debug interface for reading u64 from DRAM/SRAM/Host memory. - * @debugfs_write64: debug interface for writing u64 to DRAM/SRAM/Host memory. * @debugfs_read_dma: debug interface for reading up to 2MB from the device's * internal memory via DMA engine. * @add_device_attr: add ASIC specific device attributes. * @handle_eqe: handle event queue entry (IRQ) from CPU-CP. - * @set_pll_profile: change PLL profile (manual/automatic). * @get_events_stat: retrieve event queue entries histogram. * @read_pte: read MMU page table entry from DRAM. * @write_pte: write MMU page table entry to DRAM. * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft * (L1 only) or hard (L0 & L1) flush. - * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with - * ASID-VA-size mask. + * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with ASID-VA-size mask. + * @mmu_prefetch_cache_range: pre-fetch specific MMU STLB cache lines with ASID-VA-size mask. * @send_heartbeat: send is-alive packet to CPU-CP and verify response. - * @set_clock_gating: enable/disable clock gating per engine according to - * clock gating mask in hdev - * @disable_clock_gating: disable clock gating completely * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. * @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset @@ -1174,6 +1284,7 @@ struct fw_load_mgr { * @hw_queues_unlock: release H/W queues lock. * @get_pci_id: retrieve PCI ID. * @get_eeprom_data: retrieve EEPROM data from F/W. + * @get_monitor_dump: retrieve monitor registers dump from F/W. * @send_cpu_message: send message to F/W. If the message is timedout, the * driver will eventually reset the device. The timeout can * be determined by the calling function or it can be 0 and @@ -1187,7 +1298,6 @@ struct fw_load_mgr { * @halt_coresight: stop the ETF and ETR traces. * @ctx_init: context dependent initialization. * @ctx_fini: context dependent cleanup. - * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index. * @load_firmware_to_device: load the firmware to the device's memory * @load_boot_fit_to_device: load boot fit to device's memory @@ -1197,8 +1307,6 @@ struct fw_load_mgr { * @gen_wait_cb: Generate a wait CB. * @reset_sob: Reset a SOB. * @reset_sob_group: Reset SOB group - * @set_dma_mask_from_fw: set the DMA mask in the driver according to the - * firmware configuration * @get_device_time: Get the device time. * @collective_wait_init_cs: Generate collective master/slave packets * and place them in the relevant cs jobs @@ -1225,6 +1333,11 @@ struct fw_load_mgr { * @get_sob_addr: get SOB base address offset. * @set_pci_memory_regions: setting properties of PCI memory regions * @get_stream_master_qid_arr: get pointer to stream masters QID array + * @is_valid_dram_page_size: return true if page size is supported in device + * memory allocation, otherwise false. + * @get_valid_dram_page_orders: get valid device memory allocation page orders + * @access_dev_mem: access device memory + * @set_dram_bar_base: set the base of the DRAM BAR */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1248,6 +1361,7 @@ struct hl_asic_funcs { void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size, void *cpu_addr, dma_addr_t dma_handle); int (*scrub_device_mem)(struct hl_device *hdev, u64 addr, u64 size); + int (*scrub_device_dram)(struct hl_device *hdev, u64 val); void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id, dma_addr_t *dma_handle, u16 *queue_len); int (*test_queues)(struct hl_device *hdev); @@ -1259,12 +1373,11 @@ struct hl_asic_funcs { size_t size, dma_addr_t *dma_handle); void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev, size_t size, void *vaddr); - void (*hl_dma_unmap_sg)(struct hl_device *hdev, - struct scatterlist *sgl, int nents, + void (*hl_dma_unmap_sgtable)(struct hl_device *hdev, + struct sg_table *sgt, enum dma_data_direction dir); int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser); - int (*asic_dma_map_sg)(struct hl_device *hdev, - struct scatterlist *sgl, int nents, + int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); u32 (*get_dma_desc_list_size)(struct hl_device *hdev, struct sg_table *sgt); @@ -1275,22 +1388,12 @@ struct hl_asic_funcs { void (*update_eq_ci)(struct hl_device *hdev, u32 val); int (*context_switch)(struct hl_device *hdev, u32 asid); void (*restore_phase_topology)(struct hl_device *hdev); - int (*debugfs_read32)(struct hl_device *hdev, u64 addr, - bool user_address, u32 *val); - int (*debugfs_write32)(struct hl_device *hdev, u64 addr, - bool user_address, u32 val); - int (*debugfs_read64)(struct hl_device *hdev, u64 addr, - bool user_address, u64 *val); - int (*debugfs_write64)(struct hl_device *hdev, u64 addr, - bool user_address, u64 val); int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr); - void (*add_device_attr)(struct hl_device *hdev, - struct attribute_group *dev_attr_grp); + void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, + struct attribute_group *dev_vrm_attr_grp); void (*handle_eqe)(struct hl_device *hdev, struct hl_eq_entry *eq_entry); - void (*set_pll_profile)(struct hl_device *hdev, - enum hl_pll_frequency freq); void* (*get_events_stat)(struct hl_device *hdev, bool aggregate, u32 *size); u64 (*read_pte)(struct hl_device *hdev, u64 addr); @@ -1299,9 +1402,8 @@ struct hl_asic_funcs { u32 flags); int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 flags, u32 asid, u64 va, u64 size); + int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); - void (*set_clock_gating)(struct hl_device *hdev); - void (*disable_clock_gating)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, struct seq_file *s); @@ -1309,8 +1411,8 @@ struct hl_asic_funcs { void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev); u32 (*get_pci_id)(struct hl_device *hdev); - int (*get_eeprom_data)(struct hl_device *hdev, void *data, - size_t max_size); + int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size); + int (*get_monitor_dump)(struct hl_device *hdev, void *data); int (*send_cpu_message)(struct hl_device *hdev, u32 *msg, u16 len, u32 timeout, u64 *result); int (*pci_bars_map)(struct hl_device *hdev); @@ -1320,7 +1422,6 @@ struct hl_asic_funcs { void (*halt_coresight)(struct hl_device *hdev, struct hl_ctx *ctx); int (*ctx_init)(struct hl_ctx *ctx); void (*ctx_fini)(struct hl_ctx *ctx); - int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx); int (*load_firmware_to_device)(struct hl_device *hdev); int (*load_boot_fit_to_device)(struct hl_device *hdev); @@ -1332,7 +1433,6 @@ struct hl_asic_funcs { struct hl_gen_wait_properties *prop); void (*reset_sob)(struct hl_device *hdev, void *data); void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group); - void (*set_dma_mask_from_fw)(struct hl_device *hdev); u64 (*get_device_time)(struct hl_device *hdev); int (*collective_wait_init_cs)(struct hl_cs *cs); int (*collective_wait_create_jobs)(struct hl_device *hdev, @@ -1355,6 +1455,13 @@ struct hl_asic_funcs { u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id); void (*set_pci_memory_regions)(struct hl_device *hdev); u32* (*get_stream_master_qid_arr)(void); + bool (*is_valid_dram_page_size)(u32 page_size); + int (*mmu_get_real_page_size)(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, + u32 page_size, u32 *real_page_size, bool is_dram_addr); + void (*get_valid_dram_page_orders)(struct hl_info_dev_memalloc_page_sizes *info); + int (*access_dev_mem)(struct hl_device *hdev, struct pci_mem_region *region, + enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type); + u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); }; @@ -1742,6 +1849,8 @@ struct hl_vm_hw_block_list_node { * @pages: the physical page array. * @npages: num physical pages in the pack. * @total_size: total size of all the pages in this list. + * @node: used to attach to deletion list that is used when all the allocations are cleared + * at the teardown of the context. * @mapping_cnt: number of shared mappings. * @exporting_cnt: number of dma-buf exporting. * @asid: the context related to this list. @@ -1757,6 +1866,7 @@ struct hl_vm_phys_pg_pack { u64 *pages; u64 npages; u64 total_size; + struct list_head node; atomic_t mapping_cnt; u32 exporting_cnt; u32 asid; @@ -1822,6 +1932,18 @@ struct hl_debug_params { bool enable; }; +/** + * struct hl_notifier_event - holds the notifier data structure + * @eventfd: the event file descriptor to raise the notifications + * @lock: mutex lock to protect the notifier data flows + * @events_mask: indicates the bitmap events + */ +struct hl_notifier_event { + struct eventfd_ctx *eventfd; + struct mutex lock; + u64 events_mask; +}; + /* * FILE PRIVATE STRUCTURE */ @@ -1833,23 +1955,25 @@ struct hl_debug_params { * @taskpid: current process ID. * @ctx: current executing context. TODO: remove for multiple ctx per process * @ctx_mgr: context manager to handle multiple context for this FD. - * @cb_mgr: command buffer manager to handle multiple buffers for this FD. + * @mem_mgr: manager descriptor for memory exportable via mmap + * @notifier_event: notifier eventfd towards user process * @debugfs_list: list of relevant ASIC debugfs. * @dev_node: node in the device list of file private data * @refcount: number of related contexts. * @restore_phase_mutex: lock for context switch and restore phase. */ struct hl_fpriv { - struct hl_device *hdev; - struct file *filp; - struct pid *taskpid; - struct hl_ctx *ctx; - struct hl_ctx_mgr ctx_mgr; - struct hl_cb_mgr cb_mgr; - struct list_head debugfs_list; - struct list_head dev_node; - struct kref refcount; - struct mutex restore_phase_mutex; + struct hl_device *hdev; + struct file *filp; + struct pid *taskpid; + struct hl_ctx *ctx; + struct hl_ctx_mgr ctx_mgr; + struct hl_mem_mgr mem_mgr; + struct hl_notifier_event notifier_event; + struct list_head debugfs_list; + struct list_head dev_node; + struct kref refcount; + struct mutex restore_phase_mutex; }; @@ -1897,12 +2021,14 @@ struct hl_debugfs_entry { * @userptr_spinlock: protects userptr_list. * @ctx_mem_hash_list: list of available contexts with MMU mappings. * @ctx_mem_hash_spinlock: protects cb_list. - * @blob_desc: descriptor of blob + * @data_dma_blob_desc: data DMA descriptor of blob. + * @mon_dump_blob_desc: monitor dump descriptor of blob. * @state_dump: data of the system states in case of a bad cs. * @state_dump_sem: protects state_dump. * @addr: next address to read/write from/to in read/write32. * @mmu_addr: next virtual address to translate to physical address in mmu_show. * @userptr_lookup: the target user ptr to look up for on demand. + * @memory_scrub_val: the value to which the dram will be scrubbed to using cb scrub_device_dram * @mmu_asid: ASID to use while translating in mmu_show. * @state_dump_head: index of the latest state dump * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read. @@ -1926,12 +2052,14 @@ struct hl_dbg_device_entry { spinlock_t userptr_spinlock; struct list_head ctx_mem_hash_list; spinlock_t ctx_mem_hash_spinlock; - struct debugfs_blob_wrapper blob_desc; + struct debugfs_blob_wrapper data_dma_blob_desc; + struct debugfs_blob_wrapper mon_dump_blob_desc; char *state_dump[HL_STATE_DUMP_HIST_LEN]; struct rw_semaphore state_dump_sem; u64 addr; u64 mmu_addr; u64 userptr_lookup; + u64 memory_scrub_val; u32 mmu_asid; u32 state_dump_head; u8 i2c_bus; @@ -2347,6 +2475,24 @@ struct hl_mmu_funcs { }; /** + * struct hl_prefetch_work - prefetch work structure handler + * @pf_work: actual work struct. + * @ctx: compute context. + * @va: virtual address to pre-fetch. + * @size: pre-fetch size. + * @flags: operation flags. + * @asid: ASID for maintenance operation. + */ +struct hl_prefetch_work { + struct work_struct pf_work; + struct hl_ctx *ctx; + u64 va; + u64 size; + u32 flags; + u32 asid; +}; + +/* * number of user contexts allowed to call wait_for_multi_cs ioctl in * parallel */ @@ -2422,37 +2568,50 @@ struct hl_clk_throttle { }; /** - * struct last_error_session_info - info about last session in which CS timeout or - * razwi error occurred. - * @open_dev_timestamp: device open timestamp. - * @cs_timeout_timestamp: CS timeout timestamp. - * @razwi_timestamp: razwi timestamp. - * @cs_write_disable: if set writing to CS parameters in the structure is disabled so the - * first (root cause) CS timeout will not be overwritten. - * @razwi_write_disable: if set writing to razwi parameters in the structure is disabled so the - * first (root cause) razwi will not be overwritten. - * @cs_timeout_seq: CS timeout sequence number. - * @razwi_addr: address that caused razwi. - * @razwi_engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does - * not have engine id it will be set to U16_MAX. - * @razwi_engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible - * engines which one them caused the razwi. In that case, it will contain the - * second possible engine id, otherwise it will be set to U16_MAX. - * @razwi_non_engine_initiator: in case the initiator of the razwi does not have engine id. - * @razwi_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. + * struct cs_timeout_info - info of last CS timeout occurred. + * @timestamp: CS timeout timestamp. + * @write_disable: if set writing to CS parameters in the structure is disabled so, + * the first (root cause) CS timeout will not be overwritten. + * @seq: CS timeout sequence number. + */ +struct cs_timeout_info { + ktime_t timestamp; + atomic_t write_disable; + u64 seq; +}; + +/** + * struct razwi_info - info about last razwi error occurred. + * @timestamp: razwi timestamp. + * @write_disable: if set writing to razwi parameters in the structure is disabled so the + * first (root cause) razwi will not be overwritten. + * @addr: address that caused razwi. + * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does + * not have engine id it will be set to U16_MAX. + * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible + * engines which one them caused the razwi. In that case, it will contain the + * second possible engine id, otherwise it will be set to U16_MAX. + * @non_engine_initiator: in case the initiator of the razwi does not have engine id. + * @type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. + */ +struct razwi_info { + ktime_t timestamp; + atomic_t write_disable; + u64 addr; + u16 engine_id_1; + u16 engine_id_2; + u8 non_engine_initiator; + u8 type; +}; + +/** + * struct last_error_session_info - info about last session errors occurred. + * @cs_timeout: CS timeout error last information. + * @razwi: razwi last information. */ struct last_error_session_info { - ktime_t open_dev_timestamp; - ktime_t cs_timeout_timestamp; - ktime_t razwi_timestamp; - atomic_t cs_write_disable; - atomic_t razwi_write_disable; - u64 cs_timeout_seq; - u64 razwi_addr; - u16 razwi_engine_id_1; - u16 razwi_engine_id_2; - u8 razwi_non_engine_initiator; - u8 razwi_type; + struct cs_timeout_info cs_timeout; + struct razwi_info razwi; }; /** @@ -2518,12 +2677,13 @@ struct hl_reset_info { * @cq_wq: work queues of completion queues for executing work in process * context. * @eq_wq: work queue of event queue for executing work in process context. - * @sob_reset_wq: work queue for sob reset executions. + * @ts_free_obj_wq: work queue for timestamp registration objects release. + * @pf_wq: work queue for MMU pre-fetch operations. * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. * @cs_mirror_list: CS mirror list for TDR. * @cs_mirror_lock: protects cs_mirror_list. - * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CBs. + * @kernel_mem_mgr: memory manager for memory buffers with lifespan of driver. * @event_queue: event queue for IRQ from CPU-CP. * @dma_pool: DMA pool for small allocations. * @cpu_accessible_dma_mem: Host <-> CPU-CP shared memory CPU address. @@ -2561,17 +2721,15 @@ struct hl_reset_info { * @state_dump_specs: constants and dictionaries needed to dump system state. * @multi_cs_completion: array of multi-CS completion. * @clk_throttling: holds information about current/previous clock throttling events - * @reset_info: holds current device reset information. * @last_error: holds information about last session in which CS timeout or razwi error occurred. + * @reset_info: holds current device reset information. * @stream_master_qid_arr: pointer to array with QIDs of master streams. + * @fw_major_version: major version of current loaded preboot * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This * value is saved so in case of hard-reset, the driver will restore * this value and update the F/W after the re-initialization - * @clock_gating_mask: is clock gating enabled. bitmask that represents the - * different engines. See debugfs-driver-habanalabs for - * details. * @boot_error_status_mask: contains a mask of the device boot error status. * Each bit represents a different error, according to * the defines in hl_boot_if.h. If the bit is cleared, @@ -2586,6 +2744,9 @@ struct hl_reset_info { * session. * @open_counter: number of successful device open operations. * @fw_poll_interval_usec: FW status poll interval in usec. + * used for CPU boot status + * @fw_comms_poll_interval_usec: FW comms/protocol poll interval in usec. + * used for COMMs protocols cmds(COMMS_STS_*) * @card_type: Various ASICs have several card types. This indicates the card * type of the current device. * @major: habanalabs kernel driver major. @@ -2594,6 +2755,7 @@ struct hl_reset_info { * @id_control: minor of the control device * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit * addresses. + * @is_in_dram_scrub: true if dram scrub operation is on going. * @disabled: is device disabled. * @late_init_done: is late init stage was done during initialization. * @hwmon_initialized: is H/W monitor sensors was initialized. @@ -2607,12 +2769,9 @@ struct hl_reset_info { * huge pages. * @init_done: is the initialization of the device done. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) - * @dma_mask: the dma mask that was set for this device * @in_debug: whether the device is in a state where the profiling/tracing infrastructure * can be used. This indication is needed because in some ASICs we need to do * specific operations to enable that infrastructure. - * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant - * only to POWER9 machines. * @cdev_sysfs_created: were char devices and sysfs nodes created. * @stop_on_err: true if engines should stop on error. * @supports_sync_stream: is sync stream supported. @@ -2631,6 +2790,8 @@ struct hl_reset_info { * cases where Linux was not loaded to device CPU * @supports_wait_for_multi_cs: true if wait for multi CS is supported * @is_compute_ctx_active: Whether there is an active compute context executing. + * @compute_ctx_in_release: true if the current compute context is being released. + * @supports_mmu_prefetch: true if prefetch is supported, otherwise false. */ struct hl_device { struct pci_dev *pdev; @@ -2651,12 +2812,13 @@ struct hl_device { struct hl_user_interrupt common_user_interrupt; struct workqueue_struct **cq_wq; struct workqueue_struct *eq_wq; - struct workqueue_struct *sob_reset_wq; + struct workqueue_struct *ts_free_obj_wq; + struct workqueue_struct *pf_wq; struct hl_ctx *kernel_ctx; struct hl_hw_queue *kernel_queues; struct list_head cs_mirror_list; spinlock_t cs_mirror_lock; - struct hl_cb_mgr kernel_cb_mgr; + struct hl_mem_mgr kernel_mem_mgr; struct hl_eq event_queue; struct dma_pool *dma_pool; void *cpu_accessible_dma_mem; @@ -2707,10 +2869,10 @@ struct hl_device { struct hl_reset_info reset_info; u32 *stream_master_qid_arr; + u32 fw_major_version; atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; - u64 clock_gating_mask; u64 boot_error_status_mask; u64 dram_pci_bar_start; u64 last_successful_open_jif; @@ -2718,12 +2880,15 @@ struct hl_device { u64 open_counter; u64 fw_poll_interval_usec; ktime_t last_successful_open_ktime; + u64 fw_comms_poll_interval_usec; + enum cpucp_card_types card_type; u32 major; u32 high_pll; u16 id; u16 id_control; u16 cpu_pci_msb_addr; + u8 is_in_dram_scrub; u8 disabled; u8 late_init_done; u8 hwmon_initialized; @@ -2734,9 +2899,7 @@ struct hl_device { u8 pmmu_huge_range; u8 init_done; u8 device_cpu_disabled; - u8 dma_mask; u8 in_debug; - u8 power9_64bit_dma_enable; u8 cdev_sysfs_created; u8 stop_on_err; u8 supports_sync_stream; @@ -2751,6 +2914,8 @@ struct hl_device { u8 supports_wait_for_multi_cs; u8 stream_master_qid_arr_size; u8 is_compute_ctx_active; + u8 compute_ctx_in_release; + u8 supports_mmu_prefetch; /* Parameters for bring-up */ u64 nic_ports_mask; @@ -2883,6 +3048,14 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, return ((address <= range_end_address) && (range_start_address <= end_address)); } +uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr); +int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); +void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir); +int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type); +int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region, + enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type); int hl_device_open(struct inode *inode, struct file *filp); int hl_device_open_ctrl(struct inode *inode, struct file *filp); bool hl_device_operational(struct hl_device *hdev, @@ -2925,7 +3098,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv); void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx); int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx); void hl_ctx_do_release(struct kref *ref); -void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx); +void hl_ctx_get(struct hl_ctx *ctx); int hl_ctx_put(struct hl_ctx *ctx); struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev); struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq); @@ -2946,23 +3119,21 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization); int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sensors_arr); +void hl_notifier_event_send_all(struct hl_device *hdev, u64 event); + int hl_sysfs_init(struct hl_device *hdev); void hl_sysfs_fini(struct hl_device *hdev); int hl_hwmon_init(struct hl_device *hdev); void hl_hwmon_fini(struct hl_device *hdev); -int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, +int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, struct hl_ctx *ctx, u32 cb_size, bool internal_cb, bool map_cb, u64 *handle); -int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle); -int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); +int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle); int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); -struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u32 handle); +struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle); void hl_cb_put(struct hl_cb *cb); -void hl_cb_mgr_init(struct hl_cb_mgr *mgr); -void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr); struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, bool internal_cb); int hl_cb_pool_init(struct hl_device *hdev); @@ -2970,7 +3141,7 @@ int hl_cb_pool_fini(struct hl_device *hdev); int hl_cb_va_pool_init(struct hl_ctx *ctx); void hl_cb_va_pool_fini(struct hl_ctx *ctx); -void hl_cs_rollback_all(struct hl_device *hdev); +void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush); struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, enum hl_queue_type queue_type, bool is_kernel_allocated_cb); void hl_sob_reset_error(struct kref *ref); @@ -3016,6 +3187,8 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx); void hl_mmu_ctx_fini(struct hl_ctx *ctx); int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte); +int hl_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, + u32 page_size, u32 *real_page_size, bool is_dram_addr); int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flush_pte); int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr, @@ -3024,6 +3197,10 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size); int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags); int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, u32 flags, u32 asid, u64 va, u64 size); +int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size); +u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte); +u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, + u8 hop_idx, u64 hop_addr, u64 virt_addr); void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx); int hl_mmu_if_set_funcs(struct hl_device *hdev); @@ -3058,6 +3235,7 @@ int hl_fw_cpucp_handshake(struct hl_device *hdev, u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg, u32 boot_err1_reg); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); +int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data); int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, @@ -3094,39 +3272,26 @@ enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr); int hl_pci_init(struct hl_device *hdev); void hl_pci_fini(struct hl_device *hdev); -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, - bool curr); -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, - u64 freq); -int hl_get_temperature(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_set_temperature(struct hl_device *hdev, - int sensor_index, u32 attr, long value); -int hl_get_voltage(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_current(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_fan_speed(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_pwm_info(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, - long value); -u64 hl_get_max_power(struct hl_device *hdev); -void hl_set_max_power(struct hl_device *hdev); -int hl_set_voltage(struct hl_device *hdev, - int sensor_index, u32 attr, long value); -int hl_set_current(struct hl_device *hdev, - int sensor_index, u32 attr, long value); -int hl_set_power(struct hl_device *hdev, - int sensor_index, u32 attr, long value); -int hl_get_power(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_clk_rate(struct hl_device *hdev, - u32 *cur_clk, u32 *max_clk); -void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); -void hl_add_device_attr(struct hl_device *hdev, - struct attribute_group *dev_attr_grp); +long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); +void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); +int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +int hl_set_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long value); +int hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +int hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +int hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value); +long hl_fw_get_max_power(struct hl_device *hdev); +void hl_fw_set_max_power(struct hl_device *hdev); +int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); +int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); +int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value); +int hl_get_power(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); +void hl_fw_set_pll_profile(struct hl_device *hdev); +void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp); +void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp); + void hw_sob_get(struct hl_hw_sob *hw_sob); void hw_sob_put(struct hl_hw_sob *hw_sob); void hl_encaps_handle_do_release(struct kref *ref); @@ -3147,6 +3312,19 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset, char *hl_format_as_binary(char *buf, size_t buf_len, u32 n); const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type); +void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg); +void hl_mem_mgr_fini(struct hl_mem_mgr *mmg); +int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, + void *args); +struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg, + u64 handle); +int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle); +int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf); +struct hl_mmap_mem_buf * +hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, + struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp, + void *args); + #ifdef CONFIG_DEBUG_FS void hl_debugfs_init(void); diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 690b763c7a95..37edb69a7255 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -134,12 +134,14 @@ int hl_device_open(struct inode *inode, struct file *filp) hpriv->hdev = hdev; filp->private_data = hpriv; hpriv->filp = filp; + + mutex_init(&hpriv->notifier_event.lock); mutex_init(&hpriv->restore_phase_mutex); kref_init(&hpriv->refcount); nonseekable_open(inode, filp); - hl_cb_mgr_init(&hpriv->cb_mgr); hl_ctx_mgr_init(&hpriv->ctx_mgr); + hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr); hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); @@ -149,7 +151,28 @@ int hl_device_open(struct inode *inode, struct file *filp) dev_err_ratelimited(hdev->dev, "Can't open %s because it is %s\n", dev_name(hdev->dev), hdev->status[status]); - rc = -EPERM; + + if (status == HL_DEVICE_STATUS_IN_RESET) + rc = -EAGAIN; + else + rc = -EPERM; + + goto out_err; + } + + if (hdev->is_in_dram_scrub) { + dev_dbg_ratelimited(hdev->dev, + "Can't open %s during dram scrub\n", + dev_name(hdev->dev)); + rc = -EAGAIN; + goto out_err; + } + + if (hdev->compute_ctx_in_release) { + dev_dbg_ratelimited(hdev->dev, + "Can't open %s because another user is still releasing it\n", + dev_name(hdev->dev)); + rc = -EAGAIN; goto out_err; } @@ -172,8 +195,8 @@ int hl_device_open(struct inode *inode, struct file *filp) hl_debugfs_add_file(hpriv); - atomic_set(&hdev->last_error.cs_write_disable, 0); - atomic_set(&hdev->last_error.razwi_write_disable, 0); + atomic_set(&hdev->last_error.cs_timeout.write_disable, 0); + atomic_set(&hdev->last_error.razwi.write_disable, 0); hdev->open_counter++; hdev->last_successful_open_jif = jiffies; @@ -183,10 +206,11 @@ int hl_device_open(struct inode *inode, struct file *filp) out_err: mutex_unlock(&hdev->fpriv_list_lock); - hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); + hl_mem_mgr_fini(&hpriv->mem_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); filp->private_data = NULL; mutex_destroy(&hpriv->restore_phase_mutex); + mutex_destroy(&hpriv->notifier_event.lock); put_pid(hpriv->taskpid); kfree(hpriv); @@ -220,9 +244,11 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) hpriv->hdev = hdev; filp->private_data = hpriv; hpriv->filp = filp; + + mutex_init(&hpriv->notifier_event.lock); nonseekable_open(inode, filp); - hpriv->taskpid = find_get_pid(current->pid); + hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); mutex_lock(&hdev->fpriv_ctrl_list_lock); @@ -256,7 +282,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev) hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; hdev->mmu_enable = 1; - hdev->clock_gating_mask = ULONG_MAX; hdev->sram_scrambler_enable = 1; hdev->dram_scrambler_enable = 1; hdev->bmc_enable = 1; @@ -287,6 +312,7 @@ static int fixup_device_params(struct hl_device *hdev) hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; + hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; hdev->stop_on_err = true; hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; @@ -295,9 +321,6 @@ static int fixup_device_params(struct hl_device *hdev) /* Enable only after the initialization of the device */ hdev->disabled = true; - /* Set default DMA mask to 32 bits */ - hdev->dma_mask = 32; - return 0; } diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 3ba3a8ffda3e..c7864d6bb0a1 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -76,6 +76,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) if (hw_ip.dram_size > PAGE_SIZE) hw_ip.dram_enabled = 1; hw_ip.dram_page_size = prop->dram_page_size; + hw_ip.device_mem_alloc_default_page_size = prop->device_mem_alloc_default_page_size; hw_ip.num_of_events = prop->num_of_events; memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version, @@ -92,8 +93,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od; hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor; - hw_ip.first_available_interrupt_id = - prop->first_available_user_msix_interrupt; + hw_ip.first_available_interrupt_id = prop->first_available_user_msix_interrupt; + hw_ip.number_of_user_interrupts = prop->user_interrupt_count; hw_ip.server_type = prop->server_type; return copy_to_user(out, &hw_ip, @@ -115,6 +116,23 @@ static int hw_events_info(struct hl_device *hdev, bool aggregate, return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0; } +static int events_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + u32 max_size = args->return_size; + u64 events_mask; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((max_size < sizeof(u64)) || (!out)) + return -EINVAL; + + mutex_lock(&hpriv->notifier_event.lock); + events_mask = hpriv->notifier_event.events_mask; + hpriv->notifier_event.events_mask = 0; + mutex_unlock(&hpriv->notifier_event.lock); + + return copy_to_user(out, &events_mask, sizeof(u64)) ? -EFAULT : 0; +} + static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; @@ -251,13 +269,12 @@ static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, - &clk_rate.max_clk_rate_mhz); + rc = hl_fw_get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, &clk_rate.max_clk_rate_mhz); if (rc) return rc; - return copy_to_user(out, &clk_rate, - min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0; + return copy_to_user(out, &clk_rate, min_t(size_t, max_size, sizeof(clk_rate))) + ? -EFAULT : 0; } static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args) @@ -498,6 +515,8 @@ static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args) open_stats_info.last_open_period_ms = jiffies64_to_msecs( hdev->last_open_session_duration_jif); open_stats_info.open_counter = hdev->open_counter; + open_stats_info.is_compute_ctx_active = hdev->is_compute_ctx_active; + open_stats_info.compute_ctx_in_release = hdev->compute_ctx_in_release; return copy_to_user(out, &open_stats_info, min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0; @@ -550,7 +569,7 @@ static int last_err_open_dev_info(struct hl_fpriv *hpriv, struct hl_info_args *a if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->last_error.open_dev_timestamp); + info.timestamp = ktime_to_ns(hdev->last_successful_open_ktime); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } @@ -565,8 +584,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - info.seq = hdev->last_error.cs_timeout_seq; - info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout_timestamp); + info.seq = hdev->last_error.cs_timeout.seq; + info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } @@ -581,16 +600,74 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->last_error.razwi_timestamp); - info.addr = hdev->last_error.razwi_addr; - info.engine_id_1 = hdev->last_error.razwi_engine_id_1; - info.engine_id_2 = hdev->last_error.razwi_engine_id_2; - info.no_engine_id = hdev->last_error.razwi_non_engine_initiator; - info.error_type = hdev->last_error.razwi_type; + info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp); + info.addr = hdev->last_error.razwi.addr; + info.engine_id_1 = hdev->last_error.razwi.engine_id_1; + info.engine_id_2 = hdev->last_error.razwi.engine_id_2; + info.no_engine_id = hdev->last_error.razwi.non_engine_initiator; + info.error_type = hdev->last_error.razwi.type; + + return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; +} + +static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct hl_info_dev_memalloc_page_sizes info = {0}; + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + + if ((!max_size) || (!out)) + return -EINVAL; + + /* + * Future ASICs that will support multiple DRAM page sizes will support only "powers of 2" + * pages (unlike some of the ASICs before supporting multiple page sizes). + * For this reason for all ASICs that not support multiple page size the function will + * return an empty bitmask indicating that multiple page sizes is not supported. + */ + hdev->asic_funcs->get_valid_dram_page_orders(&info); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } +static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + int rc; + + /* check if there is already a registered on that process */ + mutex_lock(&hpriv->notifier_event.lock); + if (hpriv->notifier_event.eventfd) { + mutex_unlock(&hpriv->notifier_event.lock); + return -EINVAL; + } + + hpriv->notifier_event.eventfd = eventfd_ctx_fdget(args->eventfd); + if (IS_ERR(hpriv->notifier_event.eventfd)) { + rc = PTR_ERR(hpriv->notifier_event.eventfd); + hpriv->notifier_event.eventfd = NULL; + mutex_unlock(&hpriv->notifier_event.lock); + return rc; + } + + mutex_unlock(&hpriv->notifier_event.lock); + return 0; +} + +static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + mutex_lock(&hpriv->notifier_event.lock); + if (!hpriv->notifier_event.eventfd) { + mutex_unlock(&hpriv->notifier_event.lock); + return -EINVAL; + } + + eventfd_ctx_put(hpriv->notifier_event.eventfd); + hpriv->notifier_event.eventfd = NULL; + mutex_unlock(&hpriv->notifier_event.lock); + return 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -641,6 +718,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_RAZWI_EVENT: return razwi_info(hpriv, args); + case HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES: + return dev_mem_alloc_page_sizes_info(hpriv, args); + + case HL_INFO_GET_EVENTS: + return events_info(hpriv, args); + default: break; } @@ -691,6 +774,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_DRAM_PENDING_ROWS: return dram_pending_rows_info(hpriv, args); + case HL_INFO_REGISTER_EVENTFD: + return eventfd_register(hpriv, args); + + case HL_INFO_UNREGISTER_EVENTFD: + return eventfd_unregister(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -EINVAL; diff --git a/drivers/misc/habanalabs/common/hwmgr.c b/drivers/misc/habanalabs/common/hwmgr.c deleted file mode 100644 index 5451019f143f..000000000000 --- a/drivers/misc/habanalabs/common/hwmgr.c +++ /dev/null @@ -1,117 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2019-2021 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" - -void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) -{ - hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index, - hdev->asic_prop.max_freq_value); -} - -int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) -{ - long value; - - if (!hl_device_operational(hdev, NULL)) - return -ENODEV; - - value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false); - - if (value < 0) { - dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", - value); - return value; - } - - *max_clk = (value / 1000 / 1000); - - value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true); - - if (value < 0) { - dev_err(hdev->dev, - "Failed to retrieve device current clock %ld\n", - value); - return value; - } - - *cur_clk = (value / 1000 / 1000); - - return 0; -} - -static ssize_t clk_max_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - long value; - - if (!hl_device_operational(hdev, NULL)) - return -ENODEV; - - value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false); - - hdev->asic_prop.max_freq_value = value; - - return sprintf(buf, "%lu\n", (value / 1000 / 1000)); -} - -static ssize_t clk_max_freq_mhz_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - int rc; - u64 value; - - if (!hl_device_operational(hdev, NULL)) { - count = -ENODEV; - goto fail; - } - - rc = kstrtoull(buf, 0, &value); - if (rc) { - count = -EINVAL; - goto fail; - } - - hdev->asic_prop.max_freq_value = value * 1000 * 1000; - - hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index, - hdev->asic_prop.max_freq_value); - -fail: - return count; -} - -static ssize_t clk_cur_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - long value; - - if (!hl_device_operational(hdev, NULL)) - return -ENODEV; - - value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true); - - return sprintf(buf, "%lu\n", (value / 1000 / 1000)); -} - -static DEVICE_ATTR_RW(clk_max_freq_mhz); -static DEVICE_ATTR_RO(clk_cur_freq_mhz); - -static struct attribute *hl_dev_attrs[] = { - &dev_attr_clk_max_freq_mhz.attr, - &dev_attr_clk_cur_freq_mhz.attr, - NULL, -}; - -void hl_add_device_attr(struct hl_device *hdev, - struct attribute_group *dev_attr_grp) -{ - dev_attr_grp->attrs = hl_dev_attrs; -} diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index 1b6bdc900c26..8500e15ef743 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -137,22 +137,137 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) return IRQ_HANDLED; } +/* + * hl_ts_free_objects - handler of the free objects workqueue. + * This function should put refcount to objects that the registration node + * took refcount to them. + * @work: workqueue object pointer + */ +static void hl_ts_free_objects(struct work_struct *work) +{ + struct timestamp_reg_work_obj *job = + container_of(work, struct timestamp_reg_work_obj, free_obj); + struct timestamp_reg_free_node *free_obj, *temp_free_obj; + struct list_head *free_list_head = job->free_obj_head; + struct hl_device *hdev = job->hdev; + + list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) { + dev_dbg(hdev->dev, "About to put refcount to buf (%p) cq_cb(%p)\n", + free_obj->buf, + free_obj->cq_cb); + + hl_mmap_mem_buf_put(free_obj->buf); + hl_cb_put(free_obj->cq_cb); + kfree(free_obj); + } + + kfree(free_list_head); + kfree(job); +} + +/* + * This function called with spin_lock of wait_list_lock taken + * This function will set timestamp and delete the registration node from the + * wait_list_lock. + * and since we're protected with spin_lock here, so we cannot just put the refcount + * for the objects here, since the release function may be called and it's also a long + * logic (which might sleep also) that cannot be handled in irq context. + * so here we'll be filling a list with nodes of "put" jobs and then will send this + * list to a dedicated workqueue to do the actual put. + */ +static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend, + struct list_head **free_list) +{ + struct timestamp_reg_free_node *free_node; + u64 timestamp; + + if (!(*free_list)) { + /* Alloc/Init the timestamp registration free objects list */ + *free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC); + if (!(*free_list)) + return -ENOMEM; + + INIT_LIST_HEAD(*free_list); + } + + free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC); + if (!free_node) + return -ENOMEM; + + timestamp = ktime_get_ns(); + + *pend->ts_reg_info.timestamp_kernel_addr = timestamp; + + dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n", + pend->ts_reg_info.timestamp_kernel_addr, + *(u64 *)pend->ts_reg_info.timestamp_kernel_addr); + + list_del(&pend->wait_list_node); + + /* Mark kernel CB node as free */ + pend->ts_reg_info.in_use = 0; + + /* Putting the refcount for ts_buff and cq_cb objects will be handled + * in workqueue context, just add job to free_list. + */ + free_node->buf = pend->ts_reg_info.buf; + free_node->cq_cb = pend->ts_reg_info.cq_cb; + list_add(&free_node->free_objects_node, *free_list); + + return 0; +} + static void handle_user_cq(struct hl_device *hdev, struct hl_user_interrupt *user_cq) { - struct hl_user_pending_interrupt *pend; + struct hl_user_pending_interrupt *pend, *temp_pend; + struct list_head *ts_reg_free_list_head = NULL; + struct timestamp_reg_work_obj *job; + bool reg_node_handle_fail = false; ktime_t now = ktime_get(); + int rc; + + /* For registration nodes: + * As part of handling the registration nodes, we should put refcount to + * some objects. the problem is that we cannot do that under spinlock + * or in irq handler context at all (since release functions are long and + * might sleep), so we will need to handle that part in workqueue context. + * To avoid handling kmalloc failure which compels us rolling back actions + * and move nodes hanged on the free list back to the interrupt wait list + * we always alloc the job of the WQ at the beginning. + */ + job = kmalloc(sizeof(*job), GFP_ATOMIC); + if (!job) + return; spin_lock(&user_cq->wait_list_lock); - list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) { - if ((pend->cq_kernel_addr && - *(pend->cq_kernel_addr) >= pend->cq_target_value) || + list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) { + if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) || !pend->cq_kernel_addr) { - pend->fence.timestamp = now; - complete_all(&pend->fence.completion); + if (pend->ts_reg_info.buf) { + if (!reg_node_handle_fail) { + rc = handle_registration_node(hdev, pend, + &ts_reg_free_list_head); + if (rc) + reg_node_handle_fail = true; + } + } else { + /* Handle wait target value node */ + pend->fence.timestamp = now; + complete_all(&pend->fence.completion); + } } } spin_unlock(&user_cq->wait_list_lock); + + if (ts_reg_free_list_head) { + INIT_WORK(&job->free_obj, hl_ts_free_objects); + job->free_obj_head = ts_reg_free_list_head; + job->hdev = hdev; + queue_work(hdev->ts_free_obj_wq, &job->free_obj); + } else { + kfree(job); + } } /** @@ -167,10 +282,6 @@ irqreturn_t hl_irq_handler_user_cq(int irq, void *arg) struct hl_user_interrupt *user_cq = arg; struct hl_device *hdev = user_cq->hdev; - dev_dbg(hdev->dev, - "got user completion interrupt id %u", - user_cq->interrupt_id); - /* Handle user cq interrupts registered on all interrupts */ handle_user_cq(hdev, &hdev->common_user_interrupt); diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index c1eefaebacb6..663dd7e589d4 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -11,6 +11,7 @@ #include <linux/uaccess.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <linux/pci-p2pdma.h> MODULE_IMPORT_NS(DMA_BUF); @@ -20,6 +21,34 @@ MODULE_IMPORT_NS(DMA_BUF); /* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */ #define DRAM_POOL_PAGE_SIZE SZ_8M +static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, + struct hl_mem_in *args, u64 *handle); + +static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 psize; + + /* + * for ASIC that supports setting the allocation page size by user we will address + * user's choice only if it is not 0 (as 0 means taking the default page size) + */ + if (prop->supports_user_set_page_size && args->alloc.page_size) { + psize = args->alloc.page_size; + + if (!hdev->asic_funcs->is_valid_dram_page_size(psize)) { + dev_err(hdev->dev, "user page size (%#x) is not valid\n", psize); + return -EINVAL; + } + } else { + psize = prop->device_mem_alloc_default_page_size; + } + + *page_size = psize; + + return 0; +} + /* * The va ranges in context object contain a list with the available chunks of * device virtual memory. @@ -61,11 +90,15 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, struct hl_vm_phys_pg_pack *phys_pg_pack; u64 paddr = 0, total_size, num_pgs, i; u32 num_curr_pgs, page_size; - int handle, rc; bool contiguous; + int handle, rc; num_curr_pgs = 0; - page_size = hdev->asic_prop.dram_page_size; + + rc = set_alloc_page_size(hdev, args, &page_size); + if (rc) + return rc; + num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size); total_size = num_pgs * page_size; @@ -77,10 +110,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, contiguous = args->flags & HL_MEM_CONTIGUOUS; if (contiguous) { - paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size); + if (is_power_of_2(page_size)) + paddr = (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool, + total_size, NULL, page_size); + else + paddr = gen_pool_alloc(vm->dram_pg_pool, total_size); if (!paddr) { dev_err(hdev->dev, - "failed to allocate %llu contiguous pages with total size of %llu\n", + "Cannot allocate %llu contiguous pages with total size of %llu\n", num_pgs, total_size); return -ENOMEM; } @@ -111,12 +148,18 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, phys_pg_pack->pages[i] = paddr + i * page_size; } else { for (i = 0 ; i < num_pgs ; i++) { - phys_pg_pack->pages[i] = (u64) gen_pool_alloc( - vm->dram_pg_pool, - page_size); + if (is_power_of_2(page_size)) + phys_pg_pack->pages[i] = + (uintptr_t)gen_pool_dma_alloc_align(vm->dram_pg_pool, + page_size, NULL, + page_size); + else + phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool, + page_size); + if (!phys_pg_pack->pages[i]) { dev_err(hdev->dev, - "Failed to allocate device memory (out of memory)\n"); + "Cannot allocate device memory (out of memory)\n"); rc = -ENOMEM; goto page_err; } @@ -195,19 +238,18 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, goto pin_err; } - rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, - userptr->sgt->nents, DMA_BIDIRECTIONAL); - if (rc) { - dev_err(hdev->dev, "failed to map sgt with DMA region\n"); - goto dma_map_err; - } - userptr->dma_mapped = true; userptr->dir = DMA_BIDIRECTIONAL; userptr->vm_type = VM_TYPE_USERPTR; *p_userptr = userptr; + rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); + if (rc) { + dev_err(hdev->dev, "failed to map sgt with DMA region\n"); + goto dma_map_err; + } + return 0; dma_map_err: @@ -652,7 +694,7 @@ static u64 get_va_block(struct hl_device *hdev, continue; /* - * In case hint address is 0, and arc_hints_range_reservation + * In case hint address is 0, and hints_range_reservation * property enabled, then avoid allocating va blocks from the * range reserved for hint addresses */ @@ -858,7 +900,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, * consecutive block. */ total_npages = 0; - for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { + for_each_sgtable_dma_sg(userptr->sgt, sg, i) { npages = hl_get_sg_info(sg, &dma_addr); total_npages += npages; @@ -887,7 +929,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, phys_pg_pack->total_size = total_npages * page_size; j = 0; - for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { + for_each_sgtable_dma_sg(userptr->sgt, sg, i) { npages = hl_get_sg_info(sg, &dma_addr); /* align down to physical page size and save the offset */ @@ -1060,21 +1102,24 @@ static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, * map a device virtual block to this pages and return the start address of * this block. */ -static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, - u64 *device_addr) +static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr) { - struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; struct hl_vm_phys_pg_pack *phys_pg_pack; + enum hl_va_range_type va_range_type = 0; + struct hl_device *hdev = ctx->hdev; struct hl_userptr *userptr = NULL; + u32 handle = 0, va_block_align; struct hl_vm_hash_node *hnode; + struct hl_vm *vm = &hdev->vm; struct hl_va_range *va_range; - enum vm_type *vm_type; + bool is_userptr, do_prefetch; u64 ret_vaddr, hint_addr; - u32 handle = 0, va_block_align; + enum vm_type *vm_type; int rc; - bool is_userptr = args->flags & HL_MEM_USERPTR; - enum hl_va_range_type va_range_type = 0; + + /* set map flags */ + is_userptr = args->flags & HL_MEM_USERPTR; + do_prefetch = hdev->supports_mmu_prefetch && (args->flags & HL_MEM_PREFETCH); /* Assume failure */ *device_addr = 0; @@ -1199,19 +1244,27 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); if (rc) { - mutex_unlock(&ctx->mmu_lock); - dev_err(hdev->dev, "mapping page pack failed for handle %u\n", - handle); + dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle); goto map_err; } rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, ctx->asid, ret_vaddr, phys_pg_pack->total_size); + if (rc) + goto map_err; mutex_unlock(&ctx->mmu_lock); - if (rc) - goto map_err; + /* + * prefetch is done upon user's request. it is performed in WQ as and so can + * be outside the MMU lock. the operation itself is already protected by the mmu lock + */ + if (do_prefetch) { + rc = hl_mmu_prefetch_cache_range(ctx, *vm_type, ctx->asid, ret_vaddr, + phys_pg_pack->total_size); + if (rc) + goto map_err; + } ret_vaddr += phys_pg_pack->offset; @@ -1230,6 +1283,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, return rc; map_err: + mutex_unlock(&ctx->mmu_lock); + if (add_va_block(hdev, va_range, ret_vaddr, ret_vaddr + phys_pg_pack->total_size - 1)) dev_warn(hdev->dev, @@ -1467,7 +1522,7 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) vma->vm_ops = &hw_block_vm_ops; vma->vm_private_data = lnode; - hl_ctx_get(hdev, ctx); + hl_ctx_get(ctx); rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); if (rc) { @@ -1777,7 +1832,7 @@ static int export_dmabuf_common(struct hl_ctx *ctx, } hl_dmabuf->ctx = ctx; - hl_ctx_get(hdev, hl_dmabuf->ctx); + hl_ctx_get(hl_dmabuf->ctx); *dmabuf_fd = fd; @@ -1967,16 +2022,15 @@ err_dec_exporting_cnt: static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_ctx *ctx = hpriv->ctx; u64 block_handle, device_addr = 0; + struct hl_ctx *ctx = hpriv->ctx; u32 handle = 0, block_size; - int rc, dmabuf_fd = -EBADF; + int rc; switch (args->in.op) { case HL_MEM_OP_ALLOC: if (args->in.alloc.mem_size == 0) { - dev_err(hdev->dev, - "alloc size must be larger than 0\n"); + dev_err(hdev->dev, "alloc size must be larger than 0\n"); rc = -EINVAL; goto out; } @@ -1997,15 +2051,14 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) case HL_MEM_OP_MAP: if (args->in.flags & HL_MEM_USERPTR) { - device_addr = args->in.map_host.host_virt_addr; - rc = 0; + dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n"); + rc = -EPERM; } else { - rc = get_paddr_from_handle(ctx, &args->in, - &device_addr); + rc = get_paddr_from_handle(ctx, &args->in, &device_addr); + memset(args, 0, sizeof(*args)); + args->out.device_virt_addr = device_addr; } - memset(args, 0, sizeof(*args)); - args->out.device_virt_addr = device_addr; break; case HL_MEM_OP_UNMAP: @@ -2013,22 +2066,19 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) break; case HL_MEM_OP_MAP_BLOCK: - rc = map_block(hdev, args->in.map_block.block_addr, - &block_handle, &block_size); + rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size); args->out.block_handle = block_handle; args->out.block_size = block_size; break; case HL_MEM_OP_EXPORT_DMABUF_FD: - rc = export_dmabuf_from_addr(ctx, - args->in.export_dmabuf_fd.handle, - args->in.export_dmabuf_fd.mem_size, - args->in.flags, - &dmabuf_fd); - memset(args, 0, sizeof(*args)); - args->out.fd = dmabuf_fd; + dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n"); + rc = -EPERM; break; + case HL_MEM_OP_TS_ALLOC: + rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle); + break; default: dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); rc = -EINVAL; @@ -2039,6 +2089,106 @@ out: return rc; } +static void ts_buff_release(struct hl_mmap_mem_buf *buf) +{ + struct hl_ts_buff *ts_buff = buf->private; + + vfree(ts_buff->kernel_buff_address); + vfree(ts_buff->user_buff_address); + kfree(ts_buff); +} + +static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args) +{ + struct hl_ts_buff *ts_buff = buf->private; + + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; + return remap_vmalloc_range(vma, ts_buff->user_buff_address, 0); +} + +static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) +{ + struct hl_ts_buff *ts_buff = NULL; + u32 size, num_elements; + void *p; + + num_elements = *(u32 *)args; + + ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL); + if (!ts_buff) + return -ENOMEM; + + /* Allocate the user buffer */ + size = num_elements * sizeof(u64); + p = vmalloc_user(size); + if (!p) + goto free_mem; + + ts_buff->user_buff_address = p; + buf->mappable_size = size; + + /* Allocate the internal kernel buffer */ + size = num_elements * sizeof(struct hl_user_pending_interrupt); + p = vmalloc(size); + if (!p) + goto free_user_buff; + + ts_buff->kernel_buff_address = p; + ts_buff->kernel_buff_size = size; + + buf->private = ts_buff; + + return 0; + +free_user_buff: + vfree(ts_buff->user_buff_address); +free_mem: + kfree(ts_buff); + return -ENOMEM; +} + +static struct hl_mmap_mem_buf_behavior hl_ts_behavior = { + .topic = "TS", + .mem_id = HL_MMAP_TYPE_TS_BUFF, + .mmap = hl_ts_mmap, + .alloc = hl_ts_alloc_buf, + .release = ts_buff_release, +}; + +/** + * allocate_timestamps_buffers() - allocate timestamps buffers + * This function will allocate ts buffer that will later on be mapped to the user + * in order to be able to read the timestamp. + * in additon it'll allocate an extra buffer for registration management. + * since we cannot fail during registration for out-of-memory situation, so + * we'll prepare a pool which will be used as user interrupt nodes and instead + * of dynamically allocating nodes while registration we'll pick the node from + * this pool. in addtion it'll add node to the mapping hash which will be used + * to map user ts buffer to the internal kernel ts buffer. + * @hpriv: pointer to the private data of the fd + * @args: ioctl input + * @handle: user timestamp buffer handle as an output + */ +static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle) +{ + struct hl_mem_mgr *mmg = &hpriv->mem_mgr; + struct hl_mmap_mem_buf *buf; + + if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) { + dev_err(mmg->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n", + args->num_of_elements, TS_MAX_ELEMENTS_NUM); + return -EINVAL; + } + + buf = hl_mmap_mem_buf_alloc(mmg, &hl_ts_behavior, GFP_KERNEL, &args->num_of_elements); + if (!buf) + return -ENOMEM; + + *handle = buf->handle; + + return 0; +} + int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) { enum hl_device_status status; @@ -2154,6 +2304,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) args->out.fd = dmabuf_fd; break; + case HL_MEM_OP_TS_ALLOC: + rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle); + break; default: dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); rc = -EINVAL; @@ -2295,9 +2448,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) hl_debugfs_remove_userptr(hdev, userptr); if (userptr->dma_mapped) - hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, - userptr->sgt->nents, - userptr->dir); + hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true); kvfree(userptr->pages); @@ -2607,11 +2758,12 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) */ void hl_vm_ctx_fini(struct hl_ctx *ctx) { + struct hl_vm_phys_pg_pack *phys_pg_list, *tmp_phys_node; struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; - struct hl_vm_phys_pg_pack *phys_pg_list; struct hl_vm_hash_node *hnode; + struct hl_vm *vm = &hdev->vm; struct hlist_node *tmp_node; + struct list_head free_list; struct hl_mem_in args; int i; @@ -2644,19 +2796,24 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) mutex_unlock(&ctx->mmu_lock); + INIT_LIST_HEAD(&free_list); + spin_lock(&vm->idr_lock); idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) if (phys_pg_list->asid == ctx->asid) { dev_dbg(hdev->dev, "page list 0x%px of asid %d is still alive\n", phys_pg_list, ctx->asid); - atomic64_sub(phys_pg_list->total_size, - &hdev->dram_used_mem); - free_phys_pg_pack(hdev, phys_pg_list); + + atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem); idr_remove(&vm->phys_pg_pack_handles, i); + list_add(&phys_pg_list->node, &free_list); } spin_unlock(&vm->idr_lock); + list_for_each_entry_safe(phys_pg_list, tmp_phys_node, &free_list, node) + free_phys_pg_pack(hdev, phys_pg_list); + va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]); va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]); diff --git a/drivers/misc/habanalabs/common/memory_mgr.c b/drivers/misc/habanalabs/common/memory_mgr.c new file mode 100644 index 000000000000..ea5f2bd31b0a --- /dev/null +++ b/drivers/misc/habanalabs/common/memory_mgr.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2022 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +/** + * hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to + * the buffer descriptor. + * + * @mmg: parent unifed memory manager + * @handle: requested buffer handle + * + * Find the buffer in the store and return a pointer to its descriptor. + * Increase buffer refcount. If not found - return NULL. + */ +struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg, u64 handle) +{ + struct hl_mmap_mem_buf *buf; + + spin_lock(&mmg->lock); + buf = idr_find(&mmg->handles, lower_32_bits(handle >> PAGE_SHIFT)); + if (!buf) { + spin_unlock(&mmg->lock); + dev_warn(mmg->dev, + "Buff get failed, no match to handle %#llx\n", handle); + return NULL; + } + kref_get(&buf->refcount); + spin_unlock(&mmg->lock); + return buf; +} + +/** + * hl_mmap_mem_buf_destroy - destroy the unused buffer + * + * @buf: memory manager buffer descriptor + * + * Internal function, used as a final step of buffer release. Shall be invoked + * only when the buffer is no longer in use (removed from idr). Will call the + * release callback (if applicable), and free the memory. + */ +static void hl_mmap_mem_buf_destroy(struct hl_mmap_mem_buf *buf) +{ + if (buf->behavior->release) + buf->behavior->release(buf); + + kfree(buf); +} + +/** + * hl_mmap_mem_buf_release - release buffer + * + * @kref: kref that reached 0. + * + * Internal function, used as a kref release callback, when the last user of + * the buffer is released. Shall be called from an interrupt context. + */ +static void hl_mmap_mem_buf_release(struct kref *kref) +{ + struct hl_mmap_mem_buf *buf = + container_of(kref, struct hl_mmap_mem_buf, refcount); + + spin_lock(&buf->mmg->lock); + idr_remove(&buf->mmg->handles, lower_32_bits(buf->handle >> PAGE_SHIFT)); + spin_unlock(&buf->mmg->lock); + + hl_mmap_mem_buf_destroy(buf); +} + +/** + * hl_mmap_mem_buf_remove_idr_locked - remove handle from idr + * + * @kref: kref that reached 0. + * + * Internal function, used for kref put by handle. Assumes mmg lock is taken. + * Will remove the buffer from idr, without destroying it. + */ +static void hl_mmap_mem_buf_remove_idr_locked(struct kref *kref) +{ + struct hl_mmap_mem_buf *buf = + container_of(kref, struct hl_mmap_mem_buf, refcount); + + idr_remove(&buf->mmg->handles, lower_32_bits(buf->handle >> PAGE_SHIFT)); +} + +/** + * hl_mmap_mem_buf_put - decrease the reference to the buffer + * + * @buf: memory manager buffer descriptor + * + * Decrease the reference to the buffer, and release it if it was the last one. + * Shall be called from an interrupt context. + */ +int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf) +{ + return kref_put(&buf->refcount, hl_mmap_mem_buf_release); +} + +/** + * hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the + * given handle. + * + * @mmg: parent unifed memory manager + * @handle: requested buffer handle + * + * Decrease the reference to the buffer, and release it if it was the last one. + * Shall not be called from an interrupt context. Return -EINVAL if handle was + * not found, else return the put outcome (0 or 1). + */ +int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle) +{ + struct hl_mmap_mem_buf *buf; + + spin_lock(&mmg->lock); + buf = idr_find(&mmg->handles, lower_32_bits(handle >> PAGE_SHIFT)); + if (!buf) { + spin_unlock(&mmg->lock); + dev_dbg(mmg->dev, + "Buff put failed, no match to handle %#llx\n", handle); + return -EINVAL; + } + + if (kref_put(&buf->refcount, hl_mmap_mem_buf_remove_idr_locked)) { + spin_unlock(&mmg->lock); + hl_mmap_mem_buf_destroy(buf); + return 1; + } + + spin_unlock(&mmg->lock); + return 0; +} + +/** + * @hl_mmap_mem_buf_alloc - allocate a new mappable buffer + * + * @mmg: parent unifed memory manager + * @behavior: behavior object describing this buffer polymorphic behavior + * @gfp: gfp flags to use for the memory allocations + * @args: additional args passed to behavior->alloc + * + * Allocate and register a new memory buffer inside the give memory manager. + * Return the pointer to the new buffer on success or NULL on failure. + */ +struct hl_mmap_mem_buf * +hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, + struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp, + void *args) +{ + struct hl_mmap_mem_buf *buf; + int rc; + + buf = kzalloc(sizeof(*buf), gfp); + if (!buf) + return NULL; + + spin_lock(&mmg->lock); + rc = idr_alloc(&mmg->handles, buf, 1, 0, GFP_ATOMIC); + spin_unlock(&mmg->lock); + if (rc < 0) { + dev_err(mmg->dev, + "%s: Failed to allocate IDR for a new buffer, rc=%d\n", + behavior->topic, rc); + goto free_buf; + } + + buf->mmg = mmg; + buf->behavior = behavior; + buf->handle = (((u64)rc | buf->behavior->mem_id) << PAGE_SHIFT); + kref_init(&buf->refcount); + + rc = buf->behavior->alloc(buf, gfp, args); + if (rc) { + dev_err(mmg->dev, "%s: Failure in buffer alloc callback %d\n", + behavior->topic, rc); + goto remove_idr; + } + + return buf; + +remove_idr: + spin_lock(&mmg->lock); + idr_remove(&mmg->handles, lower_32_bits(buf->handle >> PAGE_SHIFT)); + spin_unlock(&mmg->lock); +free_buf: + kfree(buf); + return NULL; +} + +/** + * hl_mmap_mem_buf_vm_close - handle mmap close + * + * @vma: the vma object for which mmap was closed. + * + * Put the memory buffer if it is no longer mapped. + */ +static void hl_mmap_mem_buf_vm_close(struct vm_area_struct *vma) +{ + struct hl_mmap_mem_buf *buf = + (struct hl_mmap_mem_buf *)vma->vm_private_data; + long new_mmap_size; + + new_mmap_size = buf->real_mapped_size - (vma->vm_end - vma->vm_start); + + if (new_mmap_size > 0) { + buf->real_mapped_size = new_mmap_size; + return; + } + + atomic_set(&buf->mmap, 0); + hl_mmap_mem_buf_put(buf); + vma->vm_private_data = NULL; +} + +static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = { + .close = hl_mmap_mem_buf_vm_close +}; + +/** + * hl_mem_mgr_mmap - map the given buffer to the user + * + * @mmg: unifed memory manager + * @vma: the vma object for which mmap was closed. + * @args: additional args passed to behavior->mmap + * + * Map the buffer specified by the vma->vm_pgoff to the given vma. + */ +int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, + void *args) +{ + struct hl_mmap_mem_buf *buf; + u64 user_mem_size; + u64 handle; + int rc; + + /* We use the page offset to hold the idr and thus we need to clear + * it before doing the mmap itself + */ + handle = vma->vm_pgoff << PAGE_SHIFT; + vma->vm_pgoff = 0; + + /* Reference was taken here */ + buf = hl_mmap_mem_buf_get(mmg, handle); + if (!buf) { + dev_err(mmg->dev, + "Memory mmap failed, no match to handle %#llx\n", handle); + return -EINVAL; + } + + /* Validation check */ + user_mem_size = vma->vm_end - vma->vm_start; + if (user_mem_size != ALIGN(buf->mappable_size, PAGE_SIZE)) { + dev_err(mmg->dev, + "%s: Memory mmap failed, mmap VM size 0x%llx != 0x%llx allocated physical mem size\n", + buf->behavior->topic, user_mem_size, buf->mappable_size); + rc = -EINVAL; + goto put_mem; + } + +#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK + if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start, + user_mem_size)) { +#else + if (!access_ok((void __user *)(uintptr_t)vma->vm_start, + user_mem_size)) { +#endif + dev_err(mmg->dev, "%s: User pointer is invalid - 0x%lx\n", + buf->behavior->topic, vma->vm_start); + + rc = -EINVAL; + goto put_mem; + } + + if (atomic_cmpxchg(&buf->mmap, 0, 1)) { + dev_err(mmg->dev, + "%s, Memory mmap failed, already mmaped to user\n", + buf->behavior->topic); + rc = -EINVAL; + goto put_mem; + } + + vma->vm_ops = &hl_mmap_mem_buf_vm_ops; + + /* Note: We're transferring the memory reference to vma->vm_private_data here. */ + + vma->vm_private_data = buf; + + rc = buf->behavior->mmap(buf, vma, args); + if (rc) { + atomic_set(&buf->mmap, 0); + goto put_mem; + } + + buf->real_mapped_size = buf->mappable_size; + vma->vm_pgoff = handle >> PAGE_SHIFT; + + return 0; + +put_mem: + hl_mmap_mem_buf_put(buf); + return rc; +} + +/** + * hl_mem_mgr_init - initialize unified memory manager + * + * @dev: owner device pointer + * @mmg: structure to initialize + * + * Initialize an instance of unified memory manager + */ +void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg) +{ + mmg->dev = dev; + spin_lock_init(&mmg->lock); + idr_init(&mmg->handles); +} + +/** + * hl_mem_mgr_fini - release unified memory manager + * + * @mmg: parent unifed memory manager + * + * Release the unified memory manager. Shall be called from an interrupt context. + */ +void hl_mem_mgr_fini(struct hl_mem_mgr *mmg) +{ + struct hl_mmap_mem_buf *buf; + struct idr *idp; + const char *topic; + u32 id; + + idp = &mmg->handles; + + idr_for_each_entry(idp, buf, id) { + topic = buf->behavior->topic; + if (hl_mmap_mem_buf_put(buf) != 1) + dev_err(mmg->dev, + "%s: Buff handle %u for CTX is still alive\n", + topic, id); + } + + /* TODO: can it happen that some buffer is still in use at this point? */ + + idr_destroy(&mmg->handles); +} diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 9153a1f55175..f3734718d94f 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -9,6 +9,20 @@ #include "../habanalabs.h" +/** + * hl_mmu_get_funcs() - get MMU functions structure + * @hdev: habanalabs device structure. + * @pgt_residency: page table residency. + * @is_dram_addr: true if we need HMMU functions + * + * @return appropriate MMU functions structure + */ +static struct hl_mmu_funcs *hl_mmu_get_funcs(struct hl_device *hdev, int pgt_residency, + bool is_dram_addr) +{ + return &hdev->mmu_func[pgt_residency]; +} + bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -122,6 +136,53 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) } /* + * hl_mmu_get_real_page_size - get real page size to use in map/unmap operation + * + * @hdev: pointer to device data. + * @mmu_prop: MMU properties. + * @page_size: page size + * @real_page_size: set here the actual page size to use for the operation + * @is_dram_addr: true if DRAM address, otherwise false. + * + * @return 0 on success, otherwise non 0 error code + * + * note that this is general implementation that can fit most MMU arch. but as this is used as an + * MMU function: + * 1. it shall not be called directly- only from mmu_func structure instance + * 2. each MMU may modify the implementation internally + */ +int hl_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, + u32 page_size, u32 *real_page_size, bool is_dram_addr) +{ + /* + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and map them separately. + */ + if ((page_size % mmu_prop->page_size) == 0) { + *real_page_size = mmu_prop->page_size; + return 0; + } + + dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", + page_size, mmu_prop->page_size >> 10); + + return -EFAULT; +} + +static struct hl_mmu_properties *hl_mmu_get_prop(struct hl_device *hdev, u32 page_size, + bool is_dram_addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (is_dram_addr) + return &prop->dmmu; + else if ((page_size % prop->pmmu_huge.page_size) == 0) + return &prop->pmmu_huge; + + return &prop->pmmu; +} + +/* * hl_mmu_unmap_page - unmaps a virtual addr * * @ctx: pointer to the context structure @@ -142,60 +203,35 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) * For optimization reasons PCI flush may be requested once after unmapping of * large area. */ -int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, - bool flush_pte) +int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flush_pte) { struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; - u64 real_virt_addr; + struct hl_mmu_funcs *mmu_funcs; + int i, pgt_residency, rc = 0; u32 real_page_size, npages; - int i, rc = 0, pgt_residency; + u64 real_virt_addr; bool is_dram_addr; if (!hdev->mmu_enable) return 0; is_dram_addr = hl_is_dram_va(hdev, virt_addr); - - if (is_dram_addr) - mmu_prop = &prop->dmmu; - else if ((page_size % prop->pmmu_huge.page_size) == 0) - mmu_prop = &prop->pmmu_huge; - else - mmu_prop = &prop->pmmu; + mmu_prop = hl_mmu_get_prop(hdev, page_size, is_dram_addr); pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; - /* - * The H/W handles mapping of specific page sizes. Hence if the page - * size is bigger, we break it to sub-pages and unmap them separately. - */ - if ((page_size % mmu_prop->page_size) == 0) { - real_page_size = mmu_prop->page_size; - } else { - /* - * MMU page size may differ from DRAM page size. - * In such case work with the DRAM page size and let the MMU - * scrambling routine to handle this mismatch when - * calculating the address to remove from the MMU page table - */ - if (is_dram_addr && ((page_size % prop->dram_page_size) == 0)) { - real_page_size = prop->dram_page_size; - } else { - dev_err(hdev->dev, - "page size of %u is not %uKB aligned, can't unmap\n", - page_size, mmu_prop->page_size >> 10); + mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); - return -EFAULT; - } - } + rc = hdev->asic_funcs->mmu_get_real_page_size(hdev, mmu_prop, page_size, &real_page_size, + is_dram_addr); + if (rc) + return rc; npages = page_size / real_page_size; real_virt_addr = virt_addr; for (i = 0 ; i < npages ; i++) { - rc = hdev->mmu_func[pgt_residency].unmap(ctx, - real_virt_addr, is_dram_addr); + rc = mmu_funcs->unmap(ctx, real_virt_addr, is_dram_addr); if (rc) break; @@ -203,7 +239,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, } if (flush_pte) - hdev->mmu_func[pgt_residency].flush(ctx); + mmu_funcs->flush(ctx); return rc; } @@ -230,15 +266,15 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, * For optimization reasons PCI flush may be requested once after mapping of * large area. */ -int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, - u32 page_size, bool flush_pte) +int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, + bool flush_pte) { + int i, rc, pgt_residency, mapped_cnt = 0; struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; u64 real_virt_addr, real_phys_addr; + struct hl_mmu_funcs *mmu_funcs; u32 real_page_size, npages; - int i, rc, pgt_residency, mapped_cnt = 0; bool is_dram_addr; @@ -246,40 +282,15 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, return 0; is_dram_addr = hl_is_dram_va(hdev, virt_addr); - - if (is_dram_addr) - mmu_prop = &prop->dmmu; - else if ((page_size % prop->pmmu_huge.page_size) == 0) - mmu_prop = &prop->pmmu_huge; - else - mmu_prop = &prop->pmmu; + mmu_prop = hl_mmu_get_prop(hdev, page_size, is_dram_addr); pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; + mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); - /* - * The H/W handles mapping of specific page sizes. Hence if the page - * size is bigger, we break it to sub-pages and map them separately. - */ - if ((page_size % mmu_prop->page_size) == 0) { - real_page_size = mmu_prop->page_size; - } else if (is_dram_addr && ((page_size % prop->dram_page_size) == 0) && - (prop->dram_page_size < mmu_prop->page_size)) { - /* - * MMU page size may differ from DRAM page size. - * In such case work with the DRAM page size and let the MMU - * scrambling routine handle this mismatch when calculating - * the address to place in the MMU page table. (in that case - * also make sure that the dram_page_size smaller than the - * mmu page size) - */ - real_page_size = prop->dram_page_size; - } else { - dev_err(hdev->dev, - "page size of %u is not %uKB aligned, can't map\n", - page_size, mmu_prop->page_size >> 10); - - return -EFAULT; - } + rc = hdev->asic_funcs->mmu_get_real_page_size(hdev, mmu_prop, page_size, &real_page_size, + is_dram_addr); + if (rc) + return rc; /* * Verify that the phys and virt addresses are aligned with the @@ -302,9 +313,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, real_phys_addr = phys_addr; for (i = 0 ; i < npages ; i++) { - rc = hdev->mmu_func[pgt_residency].map(ctx, - real_virt_addr, real_phys_addr, - real_page_size, is_dram_addr); + rc = mmu_funcs->map(ctx, real_virt_addr, real_phys_addr, real_page_size, + is_dram_addr); if (rc) goto err; @@ -314,22 +324,21 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, } if (flush_pte) - hdev->mmu_func[pgt_residency].flush(ctx); + mmu_funcs->flush(ctx); return 0; err: real_virt_addr = virt_addr; for (i = 0 ; i < mapped_cnt ; i++) { - if (hdev->mmu_func[pgt_residency].unmap(ctx, - real_virt_addr, is_dram_addr)) + if (mmu_funcs->unmap(ctx, real_virt_addr, is_dram_addr)) dev_warn_ratelimited(hdev->dev, "failed to unmap va: 0x%llx\n", real_virt_addr); real_virt_addr += real_page_size; } - hdev->mmu_func[pgt_residency].flush(ctx); + mmu_funcs->flush(ctx); return rc; } @@ -480,11 +489,9 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops, u64 *phys_addr) { - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; + struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; u64 offset_mask, addr_mask, hop_shift, tmp_phys_addr; - u32 hop0_shift_off; - void *p; + struct hl_mmu_properties *mmu_prop; /* last hop holds the phys address and flags */ if (hops->unscrambled_paddr) @@ -493,11 +500,11 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, tmp_phys_addr = hops->hop_info[hops->used_hops - 1].hop_pte_val; if (hops->range_type == HL_VA_RANGE_TYPE_HOST_HUGE) - p = &prop->pmmu_huge; + mmu_prop = &prop->pmmu_huge; else if (hops->range_type == HL_VA_RANGE_TYPE_HOST) - p = &prop->pmmu; + mmu_prop = &prop->pmmu; else /* HL_VA_RANGE_TYPE_DRAM */ - p = &prop->dmmu; + mmu_prop = &prop->dmmu; if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) && !is_power_of_2(prop->dram_page_size)) { @@ -508,7 +515,7 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, /* * Bit arithmetics cannot be used for non power of two page * sizes. In addition, since bit arithmetics is not used, - * we cannot ignore dram base. All that shall be considerd. + * we cannot ignore dram base. All that shall be considered. */ dram_page_size = prop->dram_page_size; @@ -526,10 +533,7 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, * structure in order to determine the right masks * for the page offset. */ - hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift); - p = (char *)p + hop0_shift_off; - p = (char *)p + ((hops->used_hops - 1) * sizeof(u64)); - hop_shift = *(u64 *)p; + hop_shift = mmu_prop->hop_shifts[hops->used_hops - 1]; offset_mask = (1ull << hop_shift) - 1; addr_mask = ~(offset_mask); *phys_addr = (tmp_phys_addr & addr_mask) | @@ -557,40 +561,39 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops) { struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; + struct asic_fixed_properties *prop; struct hl_mmu_properties *mmu_prop; - int rc; + struct hl_mmu_funcs *mmu_funcs; + int pgt_residency, rc; bool is_dram_addr; if (!hdev->mmu_enable) return -EOPNOTSUPP; + prop = &hdev->asic_prop; hops->scrambled_vaddr = virt_addr; /* assume no scrambling */ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->dmmu.start_addr, - prop->dmmu.end_addr); + prop->dmmu.start_addr, + prop->dmmu.end_addr); - /* host-residency is the same in PMMU and HPMMU, use one of them */ + /* host-residency is the same in PMMU and PMMU huge, no need to distinguish here */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; + mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); mutex_lock(&ctx->mmu_lock); - - if (mmu_prop->host_resident) - rc = hdev->mmu_func[MMU_HR_PGT].get_tlb_info(ctx, - virt_addr, hops); - else - rc = hdev->mmu_func[MMU_DR_PGT].get_tlb_info(ctx, - virt_addr, hops); - + rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops); mutex_unlock(&ctx->mmu_lock); + if (rc) + return rc; + /* add page offset to physical address */ if (hops->unscrambled_paddr) - hl_mmu_pa_page_with_offset(ctx, virt_addr, hops, - &hops->unscrambled_paddr); + hl_mmu_pa_page_with_offset(ctx, virt_addr, hops, &hops->unscrambled_paddr); - return rc; + return 0; } int hl_mmu_if_set_funcs(struct hl_device *hdev) @@ -662,3 +665,83 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, return rc; } +static void hl_mmu_prefetch_work_function(struct work_struct *work) +{ + struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work); + struct hl_ctx *ctx = pfw->ctx; + + if (!hl_device_operational(ctx->hdev, NULL)) + goto put_ctx; + + mutex_lock(&ctx->mmu_lock); + + ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, + pfw->va, pfw->size); + + mutex_unlock(&ctx->mmu_lock); + +put_ctx: + /* + * context was taken in the common mmu prefetch function- see comment there about + * context handling. + */ + hl_ctx_put(ctx); + kfree(pfw); +} + +int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size) +{ + struct hl_prefetch_work *handle_pf_work; + + handle_pf_work = kmalloc(sizeof(*handle_pf_work), GFP_KERNEL); + if (!handle_pf_work) + return -ENOMEM; + + INIT_WORK(&handle_pf_work->pf_work, hl_mmu_prefetch_work_function); + handle_pf_work->ctx = ctx; + handle_pf_work->va = va; + handle_pf_work->size = size; + handle_pf_work->flags = flags; + handle_pf_work->asid = asid; + + /* + * as actual prefetch is done in a WQ we must get the context (and put it + * at the end of the work function) + */ + hl_ctx_get(ctx); + queue_work(ctx->hdev->pf_wq, &handle_pf_work->pf_work); + + return 0; +} + +u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) +{ + return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX; +} + +/** + * hl_mmu_get_hop_pte_phys_addr() - extract PTE address from HOP + * @ctx: pointer to the context structure to initialize. + * @mmu_prop: MMU properties. + * @hop_idx: HOP index. + * @hop_addr: HOP address. + * @virt_addr: virtual address fro the translation. + * + * @return the matching PTE value on success, otherwise U64_MAX. + */ +u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, + u8 hop_idx, u64 hop_addr, u64 virt_addr) +{ + u64 mask, shift; + + if (hop_idx >= mmu_prop->num_hops) { + dev_err_ratelimited(ctx->hdev->dev, "Invalid hop index %d\n", hop_idx); + return U64_MAX; + } + + shift = mmu_prop->hop_shifts[hop_idx]; + mask = mmu_prop->hop_masks[hop_idx]; + + return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); +} + diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c index 6134b6ae7615..e2d91a69acc2 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c @@ -10,6 +10,8 @@ #include <linux/slab.h> +#define MMU_V1_MAX_HOPS (MMU_HOP4 + 1) + static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) @@ -170,65 +172,21 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) return num_of_ptes_left; } -static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr, u64 mask, u64 shift) -{ - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & mask) >> shift); -} - -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask, - mmu_prop->hop0_shift); -} - -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask, - mmu_prop->hop1_shift); -} - -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) +static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, + u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask, - mmu_prop->hop2_shift); -} - -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask, - mmu_prop->hop3_shift); -} - -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask, - mmu_prop->hop4_shift); -} + u64 mask, shift; -static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) -{ - if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & HOP_PHYS_ADDR_MASK; - else - return ULLONG_MAX; + mask = mmu_prop->hop_masks[hop_idx]; + shift = mmu_prop->hop_shifts[hop_idx]; + return hop_addr_arr[hop_idx] + + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); } static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop) { - u64 hop_addr = get_next_hop_addr(ctx, curr_pte); + u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); if (hop_addr == ULLONG_MAX) { hop_addr = alloc_hop(ctx); @@ -467,7 +425,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) { /* MMU H/W fini was already done in device hw_fini() */ - if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) { + if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); @@ -524,74 +482,50 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) } } -static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, +static int hl_mmu_v1_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) { + u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0; struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; - u64 hop0_addr = 0, hop0_pte_addr = 0, - hop1_addr = 0, hop1_pte_addr = 0, - hop2_addr = 0, hop2_pte_addr = 0, - hop3_addr = 0, hop3_pte_addr = 0, - hop4_addr = 0, hop4_pte_addr = 0, - curr_pte; bool is_huge, clear_hop3 = true; + int hop_idx; /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; - - hop1_addr = get_next_hop_addr(ctx, curr_pte); - - if (hop1_addr == ULLONG_MAX) - goto not_mapped; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; - - hop2_addr = get_next_hop_addr(ctx, curr_pte); - - if (hop2_addr == ULLONG_MAX) - goto not_mapped; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; - - hop3_addr = get_next_hop_addr(ctx, curr_pte); - - if (hop3_addr == ULLONG_MAX) - goto not_mapped; + for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) { + if (hop_idx == MMU_HOP0) { + hop_addr[hop_idx] = get_hop0_addr(ctx); + } else { + hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); + if (hop_addr[hop_idx] == ULLONG_MAX) + goto not_mapped; + } - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); + hop_pte_addr[hop_idx] = + get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); - curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; + } is_huge = curr_pte & mmu_prop->last_mask; if (is_dram_addr && !is_huge) { - dev_err(hdev->dev, - "DRAM unmapping should use huge pages only\n"); + dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n"); return -EFAULT; } if (!is_huge) { - hop4_addr = get_next_hop_addr(ctx, curr_pte); - - if (hop4_addr == ULLONG_MAX) + hop_idx = MMU_HOP4; + hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); + if (hop_addr[hop_idx] == ULLONG_MAX) goto not_mapped; - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, - virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; - + hop_pte_addr[hop_idx] = + get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; clear_hop3 = false; } @@ -613,39 +547,33 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, goto not_mapped; } - write_final_pte(ctx, hop3_pte_addr, default_pte); - put_pte(ctx, hop3_addr); + hop_idx = MMU_HOP3; + write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte); + put_pte(ctx, hop_addr[hop_idx]); } else { if (!(curr_pte & PAGE_PRESENT_MASK)) goto not_mapped; - if (hop4_addr) - clear_pte(ctx, hop4_pte_addr); + if (hop_addr[MMU_HOP4]) + clear_pte(ctx, hop_pte_addr[MMU_HOP4]); else - clear_pte(ctx, hop3_pte_addr); + clear_pte(ctx, hop_pte_addr[MMU_HOP3]); - if (hop4_addr && !put_pte(ctx, hop4_addr)) + if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4])) clear_hop3 = true; if (!clear_hop3) goto mapped; - clear_pte(ctx, hop3_pte_addr); - - if (put_pte(ctx, hop3_addr)) - goto mapped; - - clear_pte(ctx, hop2_pte_addr); - - if (put_pte(ctx, hop2_addr)) - goto mapped; - - clear_pte(ctx, hop1_pte_addr); + for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) { + clear_pte(ctx, hop_pte_addr[hop_idx]); - if (put_pte(ctx, hop1_addr)) - goto mapped; + if (hop_idx == MMU_HOP0) + break; - clear_pte(ctx, hop0_pte_addr); + if (put_pte(ctx, hop_addr[hop_idx])) + goto mapped; + } } mapped: @@ -658,21 +586,15 @@ not_mapped: return -EINVAL; } -static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, +static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, bool is_dram_addr) { + u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0; struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; - u64 hop0_addr = 0, hop0_pte_addr = 0, - hop1_addr = 0, hop1_pte_addr = 0, - hop2_addr = 0, hop2_pte_addr = 0, - hop3_addr = 0, hop3_pte_addr = 0, - hop4_addr = 0, hop4_pte_addr = 0, - curr_pte = 0; - bool hop1_new = false, hop2_new = false, hop3_new = false, - hop4_new = false, is_huge; - int rc = -ENOMEM; + bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false}; + int num_hops, hop_idx, prev_hop, rc = -ENOMEM; /* * This mapping function can map a page or a huge page. For huge page @@ -692,39 +614,21 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, is_huge = false; } - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; - - hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); - if (hop1_addr == ULLONG_MAX) - goto err; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; - - hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); - if (hop2_addr == ULLONG_MAX) - goto err; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; + num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS; - hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); - if (hop3_addr == ULLONG_MAX) - goto err; - - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; - - if (!is_huge) { - hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new); - if (hop4_addr == ULLONG_MAX) - goto err; + for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) { + if (hop_idx == MMU_HOP0) { + hop_addr[hop_idx] = get_hop0_addr(ctx); + } else { + hop_addr[hop_idx] = + get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]); + if (hop_addr[hop_idx] == ULLONG_MAX) + goto err; + } - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, - virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; + hop_pte_addr[hop_idx] = + get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; } if (hdev->dram_default_page_mapping && is_dram_addr) { @@ -740,30 +644,22 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, goto err; } - if (hop1_new || hop2_new || hop3_new || hop4_new) { - dev_err(hdev->dev, - "DRAM mapping should not allocate more hops\n"); - rc = -EFAULT; - goto err; + for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { + if (hop_new[hop_idx]) { + dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n"); + rc = -EFAULT; + goto err; + } } } else if (curr_pte & PAGE_PRESENT_MASK) { dev_err(hdev->dev, "mapping already exists for virt_addr 0x%llx\n", virt_addr); - dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr); - dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr); - dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr); - dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr); - - if (!is_huge) - dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop4_pte_addr, - hop4_pte_addr); + for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) + dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", hop_idx, + *(u64 *) (uintptr_t) hop_pte_addr[hop_idx], + hop_pte_addr[hop_idx]); rc = -EINVAL; goto err; @@ -772,53 +668,28 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | PAGE_PRESENT_MASK; - if (is_huge) - write_final_pte(ctx, hop3_pte_addr, curr_pte); - else - write_final_pte(ctx, hop4_pte_addr, curr_pte); + write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte); - if (hop1_new) { - curr_pte = - (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop0_pte_addr, curr_pte); - } - if (hop2_new) { - curr_pte = - (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop1_pte_addr, curr_pte); - get_pte(ctx, hop1_addr); - } - if (hop3_new) { - curr_pte = - (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop2_pte_addr, curr_pte); - get_pte(ctx, hop2_addr); - } + for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { + prev_hop = hop_idx - 1; - if (!is_huge) { - if (hop4_new) { - curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | - PAGE_PRESENT_MASK; - write_pte(ctx, hop3_pte_addr, curr_pte); - get_pte(ctx, hop3_addr); + if (hop_new[hop_idx]) { + curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop_pte_addr[prev_hop], curr_pte); + if (hop_idx != MMU_HOP1) + get_pte(ctx, hop_addr[prev_hop]); } - - get_pte(ctx, hop4_addr); - } else { - get_pte(ctx, hop3_addr); } + get_pte(ctx, hop_addr[num_hops - 1]); + return 0; err: - if (hop4_new) - free_hop(ctx, hop4_addr); - if (hop3_new) - free_hop(ctx, hop3_addr); - if (hop2_new) - free_hop(ctx, hop2_addr); - if (hop1_new) - free_hop(ctx, hop1_addr); + for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) { + if (hop_new[hop_idx]) + free_hop(ctx, hop_addr[hop_idx]); + } return rc; } @@ -845,27 +716,6 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) } -static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - int hop_num, u64 hop_addr, u64 virt_addr) -{ - switch (hop_num) { - case 0: - return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - case 1: - return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - case 2: - return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - case 3: - return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - case 4: - return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - default: - break; - } - return U64_MAX; -} - static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops) { @@ -906,7 +756,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx); hops->hop_info[0].hop_pte_addr = - get_hop_pte_addr(ctx, mmu_prop, 0, + hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, hops->hop_info[0].hop_addr, virt_addr); hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev, @@ -914,13 +764,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, for (i = 1 ; i < used_hops ; i++) { hops->hop_info[i].hop_addr = - get_next_hop_addr(ctx, + hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val); if (hops->hop_info[i].hop_addr == ULLONG_MAX) return -EFAULT; hops->hop_info[i].hop_pte_addr = - get_hop_pte_addr(ctx, mmu_prop, i, + hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, hops->hop_info[i].hop_addr, virt_addr); hops->hop_info[i].hop_pte_val = @@ -957,8 +807,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) mmu->fini = hl_mmu_v1_fini; mmu->ctx_init = hl_mmu_v1_ctx_init; mmu->ctx_fini = hl_mmu_v1_ctx_fini; - mmu->map = _hl_mmu_v1_map; - mmu->unmap = _hl_mmu_v1_unmap; + mmu->map = hl_mmu_v1_map; + mmu->unmap = hl_mmu_v1_unmap; mmu->flush = flush; mmu->swap_out = hl_mmu_v1_swap_out; mmu->swap_in = hl_mmu_v1_swap_in; diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c index 0b5366cc84fd..610acd4a8057 100644 --- a/drivers/misc/habanalabs/common/pci/pci.c +++ b/drivers/misc/habanalabs/common/pci/pci.c @@ -338,10 +338,7 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, lower_32_bits(outbound_region_end_address)); rc |= hl_pci_iatu_write(hdev, 0x014, 0); - if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64)) - rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000); - else - rc |= hl_pci_iatu_write(hdev, 0x018, 0); + rc |= hl_pci_iatu_write(hdev, 0x018, 0); rc |= hl_pci_iatu_write(hdev, 0x020, upper_32_bits(outbound_region_end_address)); @@ -395,6 +392,7 @@ enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr) */ int hl_pci_init(struct hl_device *hdev) { + struct asic_fixed_properties *prop = &hdev->asic_prop; struct pci_dev *pdev = hdev->pdev; int rc; @@ -411,28 +409,25 @@ int hl_pci_init(struct hl_device *hdev) rc = hdev->asic_funcs->pci_bars_map(hdev); if (rc) { - dev_err(hdev->dev, "Failed to initialize PCI BARs\n"); + dev_err(hdev->dev, "Failed to map PCI BAR addresses\n"); goto disable_device; } rc = hdev->asic_funcs->init_iatu(hdev); if (rc) { - dev_err(hdev->dev, "Failed to initialize iATU\n"); + dev_err(hdev->dev, "PCI controller was not initialized successfully\n"); goto unmap_pci_bars; } /* Driver must sleep in order for FW to finish the iATU configuration */ - if (hdev->asic_prop.iatu_done_by_fw) { + if (hdev->asic_prop.iatu_done_by_fw) usleep_range(2000, 3000); - hdev->asic_funcs->set_dma_mask_from_fw(hdev); - } - rc = dma_set_mask_and_coherent(&pdev->dev, - DMA_BIT_MASK(hdev->dma_mask)); + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(prop->dma_mask)); if (rc) { dev_err(hdev->dev, "Failed to set dma mask to %d bits, error %d\n", - hdev->dma_mask, rc); + prop->dma_mask, rc); goto unmap_pci_bars; } diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 45c715325e2a..9ebeb18ab85e 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -9,105 +9,91 @@ #include <linux/pci.h> -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) +static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpucp_packet pkt; - u32 used_pll_idx; - u64 result; - int rc; - - rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); - if (rc) - return rc; - - memset(&pkt, 0, sizeof(pkt)); + struct hl_device *hdev = dev_get_drvdata(dev); + long value; - if (curr) - pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET << - CPUCP_PKT_CTL_OPCODE_SHIFT); - else - pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << - CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32((u32)used_pll_idx); + if (!hl_device_operational(hdev, NULL)) + return -ENODEV; - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, &result); + value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false); + if (value < 0) + return value; - if (rc) { - dev_err(hdev->dev, - "Failed to get frequency of PLL %d, error %d\n", - used_pll_idx, rc); - return rc; - } + hdev->asic_prop.max_freq_value = value; - return (long) result; + return sprintf(buf, "%lu\n", (value / 1000 / 1000)); } -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) +static ssize_t clk_max_freq_mhz_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { - struct cpucp_packet pkt; - u32 used_pll_idx; + struct hl_device *hdev = dev_get_drvdata(dev); int rc; + u64 value; - rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); - if (rc) - return; + if (!hl_device_operational(hdev, NULL)) { + count = -ENODEV; + goto fail; + } - memset(&pkt, 0, sizeof(pkt)); + rc = kstrtoull(buf, 0, &value); + if (rc) { + count = -EINVAL; + goto fail; + } - pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << - CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32((u32)used_pll_idx); - pkt.value = cpu_to_le64(freq); + hdev->asic_prop.max_freq_value = value * 1000 * 1000; - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, NULL); + hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index, hdev->asic_prop.max_freq_value); - if (rc) - dev_err(hdev->dev, - "Failed to set frequency to PLL %d, error %d\n", - used_pll_idx, rc); +fail: + return count; } -u64 hl_get_max_power(struct hl_device *hdev) +static ssize_t clk_cur_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpucp_packet pkt; - u64 result; - int rc; + struct hl_device *hdev = dev_get_drvdata(dev); + long value; - memset(&pkt, 0, sizeof(pkt)); + if (!hl_device_operational(hdev, NULL)) + return -ENODEV; - pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << - CPUCP_PKT_CTL_OPCODE_SHIFT); + value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true); + if (value < 0) + return value; - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, &result); + return sprintf(buf, "%lu\n", (value / 1000 / 1000)); +} - if (rc) { - dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); - return (u64) rc; - } +static DEVICE_ATTR_RW(clk_max_freq_mhz); +static DEVICE_ATTR_RO(clk_cur_freq_mhz); - return result; -} +static struct attribute *hl_dev_clk_attrs[] = { + &dev_attr_clk_max_freq_mhz.attr, + &dev_attr_clk_cur_freq_mhz.attr, +}; -void hl_set_max_power(struct hl_device *hdev) +static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpucp_packet pkt; - int rc; + struct hl_device *hdev = dev_get_drvdata(dev); + struct cpucp_info *cpucp_info; - memset(&pkt, 0, sizeof(pkt)); + cpucp_info = &hdev->asic_prop.cpucp_info; - pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << - CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.value = cpu_to_le64(hdev->max_power); + if (cpucp_info->infineon_second_stage_version) + return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version), + le32_to_cpu(cpucp_info->infineon_second_stage_version)); + else + return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); +} - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, NULL); +static DEVICE_ATTR_RO(vrm_ver); - if (rc) - dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); -} +static struct attribute *hl_dev_vrm_attrs[] = { + &dev_attr_vrm_ver.attr, +}; static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -158,20 +144,6 @@ static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version); } -static ssize_t infineon_ver_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - if (hdev->asic_prop.cpucp_info.infineon_second_stage_version) - return sprintf(buf, "%#04x %#04x\n", - le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version), - le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_second_stage_version)); - else - return sprintf(buf, "%#04x\n", - le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version)); -} - static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -188,6 +160,14 @@ static ssize_t thermal_ver_show(struct device *dev, return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version); } +static ssize_t fw_os_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.fw_os_version); +} + static ssize_t preboot_btl_ver_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -323,7 +303,9 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - val = hl_get_max_power(hdev); + val = hl_fw_get_max_power(hdev); + if (val < 0) + return val; return sprintf(buf, "%lu\n", val); } @@ -348,7 +330,7 @@ static ssize_t max_power_store(struct device *dev, } hdev->max_power = value; - hl_set_max_power(hdev); + hl_fw_set_max_power(hdev); out: return count; @@ -394,7 +376,6 @@ static DEVICE_ATTR_RO(device_type); static DEVICE_ATTR_RO(fuse_ver); static DEVICE_ATTR_WO(hard_reset); static DEVICE_ATTR_RO(hard_reset_cnt); -static DEVICE_ATTR_RO(infineon_ver); static DEVICE_ATTR_RW(max_power); static DEVICE_ATTR_RO(pci_addr); static DEVICE_ATTR_RO(preboot_btl_ver); @@ -403,6 +384,7 @@ static DEVICE_ATTR_RO(soft_reset_cnt); static DEVICE_ATTR_RO(status); static DEVICE_ATTR_RO(thermal_ver); static DEVICE_ATTR_RO(uboot_ver); +static DEVICE_ATTR_RO(fw_os_ver); static struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, @@ -420,13 +402,13 @@ static struct attribute *hl_dev_attrs[] = { &dev_attr_fuse_ver.attr, &dev_attr_hard_reset.attr, &dev_attr_hard_reset_cnt.attr, - &dev_attr_infineon_ver.attr, &dev_attr_max_power.attr, &dev_attr_pci_addr.attr, &dev_attr_preboot_btl_ver.attr, &dev_attr_status.attr, &dev_attr_thermal_ver.attr, &dev_attr_uboot_ver.attr, + &dev_attr_fw_os_ver.attr, NULL, }; @@ -441,10 +423,12 @@ static struct attribute_group hl_dev_attr_group = { }; static struct attribute_group hl_dev_clks_attr_group; +static struct attribute_group hl_dev_vrm_attr_group; static const struct attribute_group *hl_dev_attr_groups[] = { &hl_dev_attr_group, &hl_dev_clks_attr_group, + &hl_dev_vrm_attr_group, NULL, }; @@ -463,13 +447,23 @@ static const struct attribute_group *hl_dev_inference_attr_groups[] = { NULL, }; +void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp) +{ + dev_clk_attr_grp->attrs = hl_dev_clk_attrs; +} + +void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp) +{ + dev_vrm_attr_grp->attrs = hl_dev_vrm_attrs; +} + int hl_sysfs_init(struct hl_device *hdev) { int rc; hdev->max_power = hdev->asic_prop.max_power_default; - hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group); + hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group, &hl_dev_vrm_attr_group); rc = device_add_groups(hdev->dev, hl_dev_attr_groups); if (rc) { diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 013c6da2e3ca..fba322241096 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -95,7 +95,7 @@ #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 -#define GAUDI_ARB_WDT_TIMEOUT 0x1000000 +#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ #define GAUDI_CLK_GATE_DEBUGFS_MASK (\ BIT(GAUDI_ENGINE_ID_MME_0) |\ @@ -458,7 +458,6 @@ struct ecc_info_extract_params { u64 block_address; u32 num_memories; bool derr; - bool disable_clock_gating; }; static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, @@ -558,6 +557,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) } prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; + prop->host_base_address = HOST_PHYS_BASE; + prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; prop->collective_first_sob = 0; prop->collective_first_mon = 0; @@ -596,24 +597,28 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; + prop->device_mem_alloc_default_page_size = prop->dram_page_size; prop->dram_supports_virtual_memory = false; - prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT; - prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT; - prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT; - prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT; - prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT; - prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK; - prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK; - prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK; - prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK; - prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK; + prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; + prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; + prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; + prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; + prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; + prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; + prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; + prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; + prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; + prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; prop->pmmu.start_addr = VA_HOST_SPACE_START; prop->pmmu.end_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; prop->pmmu.page_size = PAGE_SIZE_4KB; prop->pmmu.num_hops = MMU_ARCH_5_HOPS; prop->pmmu.last_mask = LAST_MASK; + /* TODO: will be duplicated until implementing per-MMU props */ + prop->pmmu.hop_table_size = prop->mmu_hop_table_size; + prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -667,6 +672,12 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->use_get_power_for_reset_history = true; + prop->configurable_stop_on_err = true; + + prop->set_max_power_on_device_init = true; + + prop->dma_mask = 48; + return 0; } @@ -748,8 +759,6 @@ static int gaudi_init_iatu(struct hl_device *hdev) if (rc) goto done; - hdev->asic_funcs->set_dma_mask_from_fw(hdev); - /* Outbound Region 0 - Point to Host */ outbound_region.addr = HOST_PHYS_BASE; outbound_region.size = HOST_PHYS_SIZE; @@ -1002,7 +1011,7 @@ free_job: release_cb: hl_cb_put(cb); - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); return rc; } @@ -1464,7 +1473,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev, job->patched_cb = NULL; job->job_cb_size = job->user_cb_size; - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); /* increment refcount as for external queues we get completion */ if (hw_queue_prop->type == QUEUE_TYPE_EXT) @@ -1636,7 +1645,7 @@ static int gaudi_late_init(struct hl_device *hdev) */ gaudi_mmu_prepare(hdev, 1); - hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST); + hl_fw_set_pll_profile(hdev); return 0; @@ -1896,7 +1905,6 @@ static int gaudi_sw_init(struct hl_device *hdev) goto free_cpu_accessible_dma_pool; spin_lock_init(&gaudi->hw_queues_lock); - mutex_init(&gaudi->clk_gate_mutex); hdev->supports_sync_stream = true; hdev->supports_coresight = true; @@ -1946,8 +1954,6 @@ static int gaudi_sw_fini(struct hl_device *hdev) dma_pool_destroy(hdev->dma_pool); - mutex_destroy(&gaudi->clk_gate_mutex); - kfree(gaudi); return 0; @@ -2805,9 +2811,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, QM_ARB_ERR_MSG_EN_MASK); - /* Increase ARB WDT to support streams architecture */ - WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, - GAUDI_ARB_WDT_TIMEOUT); + /* Set timeout to maximum */ + WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, QMAN_EXTERNAL_MAKE_TRUSTED); @@ -2984,9 +2989,8 @@ static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, QM_ARB_ERR_MSG_EN_MASK); - /* Increase ARB WDT to support streams architecture */ - WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, - GAUDI_ARB_WDT_TIMEOUT); + /* Set timeout to maximum */ + WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, @@ -3121,9 +3125,8 @@ static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, QM_ARB_ERR_MSG_EN_MASK); - /* Increase ARB WDT to support streams architecture */ - WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, - GAUDI_ARB_WDT_TIMEOUT); + /* Set timeout to maximum */ + WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); WREG32(mmMME0_QM_GLBL_PROT + mme_offset, @@ -3255,9 +3258,8 @@ static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, QM_ARB_ERR_MSG_EN_MASK); - /* Increase ARB WDT to support streams architecture */ - WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, - GAUDI_ARB_WDT_TIMEOUT); + /* Set timeout to maximum */ + WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, @@ -3406,9 +3408,8 @@ static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, QM_ARB_ERR_MSG_EN_MASK); - /* Increase ARB WDT to support streams architecture */ - WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, - GAUDI_ARB_WDT_TIMEOUT); + /* Set timeout to maximum */ + WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, @@ -3738,76 +3739,8 @@ static void gaudi_tpc_stall(struct hl_device *hdev) WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); } -static void gaudi_set_clock_gating(struct hl_device *hdev) -{ - struct gaudi_device *gaudi = hdev->asic_specific; - u32 qman_offset; - bool enable; - int i; - - /* In case we are during debug session, don't enable the clock gate - * as it may interfere - */ - if (hdev->in_debug) - return; - - if (hdev->asic_prop.fw_security_enabled) - return; - - for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { - enable = !!(hdev->clock_gating_mask & - (BIT_ULL(gaudi_dma_assignment[i]))); - - qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; - WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, - enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmDMA0_QM_CGM_CFG + qman_offset, - enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0); - } - - for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) { - enable = !!(hdev->clock_gating_mask & - (BIT_ULL(gaudi_dma_assignment[i]))); - - /* GC sends work to DMA engine through Upper CP in DMA5 so - * we need to not enable clock gating in that DMA - */ - if (i == GAUDI_HBM_DMA_4) - enable = 0; - - qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; - WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, - enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmDMA0_QM_CGM_CFG + qman_offset, - enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - } - - enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))); - WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - - enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))); - WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - - for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { - enable = !!(hdev->clock_gating_mask & - (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i))); - - WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, - enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmTPC0_QM_CGM_CFG + qman_offset, - enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - - qman_offset += TPC_QMAN_OFFSET; - } - - gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE; -} - static void gaudi_disable_clock_gating(struct hl_device *hdev) { - struct gaudi_device *gaudi = hdev->asic_specific; u32 qman_offset; int i; @@ -3832,8 +3765,6 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev) qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); } - - gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE); } static void gaudi_enable_timestamp(struct hl_device *hdev) @@ -3859,9 +3790,6 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_ { u32 wait_timeout_ms; - dev_info(hdev->dev, - "Halting compute engines and disabling interrupts\n"); - if (hdev->pldm) wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; else @@ -3876,8 +3804,6 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_ gaudi_stop_hbm_dma_qmans(hdev); gaudi_stop_pci_dma_qmans(hdev); - hdev->asic_funcs->disable_clock_gating(hdev); - msleep(wait_timeout_ms); gaudi_pci_dma_stall(hdev); @@ -3931,7 +3857,7 @@ static int gaudi_mmu_init(struct hl_device *hdev) /* mem cache invalidation */ WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0); + hl_mmu_invalidate_cache(hdev, true, 0); WREG32(mmMMU_UP_MMU_ENABLE, 1); WREG32(mmMMU_UP_SPI_MASK, 0xF); @@ -4203,10 +4129,8 @@ static int gaudi_hw_init(struct hl_device *hdev) /* In case the clock gating was enabled in preboot we need to disable * it here before touching the MME/TPC registers. - * There is no need to take clk gating mutex because when this function - * runs, no other relevant code can run */ - hdev->asic_funcs->disable_clock_gating(hdev); + gaudi_disable_clock_gating(hdev); /* SRAM scrambler must be initialized after CPU is running from HBM */ gaudi_init_scrambler_sram(hdev); @@ -4232,8 +4156,6 @@ static int gaudi_hw_init(struct hl_device *hdev) gaudi_init_nic_qmans(hdev); - hdev->asic_funcs->set_clock_gating(hdev); - gaudi_enable_timestamp(hdev); /* MSI must be enabled before CPU queues and NIC are initialized */ @@ -4285,7 +4207,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset } if (fw_reset) { - dev_info(hdev->dev, + dev_dbg(hdev->dev, "Firmware performs HARD reset, going to wait %dms\n", reset_timeout_ms); @@ -4377,11 +4299,11 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); - dev_info(hdev->dev, + dev_dbg(hdev->dev, "Issued HARD reset command, going to wait %dms\n", reset_timeout_ms); } else { - dev_info(hdev->dev, + dev_dbg(hdev->dev, "Firmware performs HARD reset, going to wait %dms\n", reset_timeout_ms); } @@ -4400,14 +4322,11 @@ skip_reset: status); if (gaudi) { - gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | - HW_CAP_HBM | HW_CAP_PCI_DMA | - HW_CAP_MME | HW_CAP_TPC_MASK | - HW_CAP_HBM_DMA | HW_CAP_PLL | - HW_CAP_NIC_MASK | HW_CAP_MMU | - HW_CAP_SRAM_SCRAMBLER | - HW_CAP_HBM_SCRAMBLER | - HW_CAP_CLK_GATE); + gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | + HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | + HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | + HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | + HW_CAP_HBM_SCRAMBLER); memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); @@ -4821,12 +4740,11 @@ static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); } -static int gaudi_hbm_scrubbing(struct hl_device *hdev) +static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; u64 cur_addr = DRAM_BASE_ADDR_USER; - u32 val; - u32 chunk_size; + u32 chunk_size, busy; int rc, dma_id; while (cur_addr < prop->dram_end_address) { @@ -4840,8 +4758,10 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev) "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", cur_addr, cur_addr + chunk_size); - WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf); - WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf); + WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, + lower_32_bits(val)); + WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, + upper_32_bits(val)); WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(cur_addr)); WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, @@ -4864,8 +4784,8 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev) rc = hl_poll_timeout( hdev, mmDMA0_CORE_STS0 + dma_offset, - val, - ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), + busy, + ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 1000, HBM_SCRUBBING_TIMEOUT_US); @@ -4884,7 +4804,6 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev) static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; int rc = 0; u64 val = 0; @@ -4919,17 +4838,11 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size) return rc; } - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); - /* Scrub HBM using all DMA channels in parallel */ - rc = gaudi_hbm_scrubbing(hdev); + rc = gaudi_scrub_device_dram(hdev, 0xdeadbeaf); if (rc) dev_err(hdev->dev, "Failed to clear HBM in mem scrub all\n"); - - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); } return rc; @@ -5121,37 +5034,7 @@ static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); } -static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir)) - return -ENOMEM; - - /* Shift to the device's base physical address of host memory */ - for_each_sg(sgl, sg, nents, i) - sg->dma_address += HOST_PHYS_BASE; - - return 0; -} - -static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - /* Cancel the device's base physical address of host memory */ - for_each_sg(sgl, sg, nents, i) - sg->dma_address -= HOST_PHYS_BASE; - - dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir); -} - -static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, - struct sg_table *sgt) +static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) { struct scatterlist *sg, *sg_next_iter; u32 count, dma_desc_cnt; @@ -5160,8 +5043,7 @@ static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, dma_desc_cnt = 0; - for_each_sg(sgt->sgl, sg, sgt->nents, count) { - + for_each_sgtable_dma_sg(sgt, sg, count) { len = sg_dma_len(sg); addr = sg_dma_address(sg); @@ -5215,8 +5097,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev, list_add_tail(&userptr->job_node, parser->job_userptr_list); - rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, - userptr->sgt->nents, dir); + rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); if (rc) { dev_err(hdev->dev, "failed to map sgt with DMA region\n"); goto unpin_memory; @@ -5491,7 +5372,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev, sgt = userptr->sgt; dma_desc_cnt = 0; - for_each_sg(sgt->sgl, sg, sgt->nents, count) { + for_each_sgtable_dma_sg(sgt, sg, count) { len = sg_dma_len(sg); dma_addr = sg_dma_address(sg); @@ -5645,7 +5526,7 @@ static int gaudi_patch_cb(struct hl_device *hdev, static int gaudi_parse_cb_mmu(struct hl_device *hdev, struct hl_cs_parser *parser) { - u64 patched_cb_handle; + u64 handle; u32 patched_cb_size; struct hl_cb *user_cb; int rc; @@ -5661,9 +5542,9 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev, else parser->patched_cb_size = parser->user_cb_size; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, + rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, parser->patched_cb_size, false, false, - &patched_cb_handle); + &handle); if (rc) { dev_err(hdev->dev, @@ -5672,13 +5553,10 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev, return rc; } - patched_cb_handle >>= PAGE_SHIFT; - parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, - (u32) patched_cb_handle); + parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); /* hl_cb_get should never fail */ if (!parser->patched_cb) { - dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n", - (u32) patched_cb_handle); + dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); rc = -EFAULT; goto out; } @@ -5718,8 +5596,7 @@ out: * cb_put will release it, but here we want to remove it from the * idr */ - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, - patched_cb_handle << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, handle); return rc; } @@ -5727,7 +5604,7 @@ out: static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser) { - u64 patched_cb_handle; + u64 handle; int rc; rc = gaudi_validate_cb(hdev, parser, false); @@ -5735,22 +5612,19 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, if (rc) goto free_userptr; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, + rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, parser->patched_cb_size, false, false, - &patched_cb_handle); + &handle); if (rc) { dev_err(hdev->dev, "Failed to allocate patched CB for DMA CS %d\n", rc); goto free_userptr; } - patched_cb_handle >>= PAGE_SHIFT; - parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, - (u32) patched_cb_handle); + parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); /* hl_cb_get should never fail here */ if (!parser->patched_cb) { - dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n", - (u32) patched_cb_handle); + dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); rc = -EFAULT; goto out; } @@ -5767,8 +5641,7 @@ out: * cb_put will release it, but here we want to remove it from the * idr */ - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, - patched_cb_handle << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, handle); free_userptr: if (rc) @@ -5881,7 +5754,6 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, struct hl_cs_job *job; u32 cb_size, ctl, err_cause; struct hl_cb *cb; - u64 id; int rc; cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); @@ -5948,9 +5820,8 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, } release_cb: - id = cb->id; hl_cb_put(cb); - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); return rc; } @@ -6013,7 +5884,7 @@ static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, release_cb: hl_cb_put(cb); - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); return rc; } @@ -6184,217 +6055,6 @@ static void gaudi_restore_phase_topology(struct hl_device *hdev) } -static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, - bool user_address, u32 *val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { - - dev_err_ratelimited(hdev->dev, - "Can't read register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - *val = RREG32(addr - CFG_BASE); - } - - } else if ((addr >= SRAM_BASE_ADDR) && - (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { - *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - *val = readl(hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); - } - if (hbm_bar_addr == U64_MAX) - rc = -EIO; - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); - } else { - rc = -EFAULT; - } - - return rc; -} - -static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, - bool user_address, u32 val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { - - dev_err_ratelimited(hdev->dev, - "Can't write register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - WREG32(addr - CFG_BASE, val); - } - - } else if ((addr >= SRAM_BASE_ADDR) && - (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { - writel(val, hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - writel(val, hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); - } - if (hbm_bar_addr == U64_MAX) - rc = -EIO; - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; - } else { - rc = -EFAULT; - } - - return rc; -} - -static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, - bool user_address, u64 *val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { - - dev_err_ratelimited(hdev->dev, - "Can't read register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - u32 val_l = RREG32(addr - CFG_BASE); - u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); - - *val = (((u64) val_h) << 32) | val_l; - } - - } else if ((addr >= SRAM_BASE_ADDR) && - (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { - *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - } else if (addr <= - DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - *val = readq(hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); - } - if (hbm_bar_addr == U64_MAX) - rc = -EIO; - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); - } else { - rc = -EFAULT; - } - - return rc; -} - -static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, - bool user_address, u64 val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { - - dev_err_ratelimited(hdev->dev, - "Can't write register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - WREG32(addr - CFG_BASE, lower_32_bits(val)); - WREG32(addr + sizeof(u32) - CFG_BASE, - upper_32_bits(val)); - } - - } else if ((addr >= SRAM_BASE_ADDR) && - (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { - writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - } else if (addr <= - DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - writeq(val, hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); - } - if (hbm_bar_addr == U64_MAX) - rc = -EIO; - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; - } else { - rc = -EFAULT; - } - - return rc; -} - static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, u32 size_to_dma, dma_addr_t dma_addr) { @@ -6446,7 +6106,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr) { u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; - struct gaudi_device *gaudi = hdev->asic_specific; u32 qm_glbl_sts0, qm_cgm_sts; u64 dma_offset, qm_offset; dma_addr_t dma_addr; @@ -6462,10 +6121,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, if (!kernel_addr) return -ENOMEM; - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - hdev->asic_funcs->hw_queues_lock(hdev); dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; @@ -6550,10 +6205,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, out: hdev->asic_funcs->hw_queues_unlock(hdev); - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); @@ -6601,10 +6252,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) return; } - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); @@ -6882,10 +6529,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); - - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); } static int gaudi_send_job_on_qman0(struct hl_device *hdev, @@ -7266,10 +6909,8 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, struct ecc_info_extract_params *params, u64 *ecc_address, u64 *ecc_syndrom, u8 *memory_wrapper_idx) { - struct gaudi_device *gaudi = hdev->asic_specific; u32 i, num_mem_regs, reg, err_bit; u64 err_addr, err_word = 0; - int rc = 0; num_mem_regs = params->num_memories / 32 + ((params->num_memories % 32) ? 1 : 0); @@ -7282,11 +6923,6 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, else err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; - if (params->disable_clock_gating) { - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); - } - /* Set invalid wrapper index */ *memory_wrapper_idx = 0xFF; @@ -7303,8 +6939,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, if (*memory_wrapper_idx == 0xFF) { dev_err(hdev->dev, "ECC error information cannot be found\n"); - rc = -EINVAL; - goto enable_clk_gate; + return -EINVAL; } WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, @@ -7324,14 +6959,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); -enable_clk_gate: - if (params->disable_clock_gating) { - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - } - - return rc; + return 0; } /* @@ -7589,7 +7217,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; params.num_memories = 90; params.derr = false; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: @@ -7598,7 +7225,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; params.num_memories = 90; params.derr = true; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_ACC_SERR: @@ -7609,7 +7235,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; params.num_memories = 128; params.derr = false; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_ACC_DERR: @@ -7620,7 +7245,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; params.num_memories = 128; params.derr = true; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_SBAB_SERR: @@ -7632,7 +7256,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; params.num_memories = 33; params.derr = false; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_SBAB_DERR: @@ -7644,7 +7267,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; params.num_memories = 33; params.derr = true; - params.disable_clock_gating = true; extract_info_from_fw = false; break; default: @@ -7782,19 +7404,18 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type); /* In case it's the first razwi, save its parameters*/ - rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1); + rc = atomic_cmpxchg(&hdev->last_error.razwi.write_disable, 0, 1); if (!rc) { - hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime; - hdev->last_error.razwi_timestamp = ktime_get(); - hdev->last_error.razwi_addr = razwi_addr; - hdev->last_error.razwi_engine_id_1 = engine_id_1; - hdev->last_error.razwi_engine_id_2 = engine_id_2; + hdev->last_error.razwi.timestamp = ktime_get(); + hdev->last_error.razwi.addr = razwi_addr; + hdev->last_error.razwi.engine_id_1 = engine_id_1; + hdev->last_error.razwi.engine_id_2 = engine_id_2; /* * If first engine id holds non valid value the razwi initiator * does not have engine id */ - hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX); - hdev->last_error.razwi_type = razwi_type; + hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX); + hdev->last_error.razwi.type = razwi_type; } } @@ -7819,6 +7440,48 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev, fw_alive->thread_id, fw_alive->uptime_seconds); } +static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, + void *data) +{ + char desc[64] = "", *type; + struct eq_nic_sei_event *eq_nic_sei = data; + u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; + + switch (eq_nic_sei->axi_error_cause) { + case RXB: + type = "RXB"; + break; + case RXE: + type = "RXE"; + break; + case TXS: + type = "TXS"; + break; + case TXE: + type = "TXE"; + break; + case QPC_RESP: + type = "QPC_RESP"; + break; + case NON_AXI_ERR: + type = "NON_AXI_ERR"; + break; + case TMR: + type = "TMR"; + break; + default: + dev_err(hdev->dev, "unknown NIC AXI cause %d\n", + eq_nic_sei->axi_error_cause); + type = "N/A"; + break; + } + + snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, + eq_nic_sei->id); + dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", + event_type, desc); +} + static int gaudi_non_hard_reset_late_init(struct hl_device *hdev) { /* GAUDI doesn't support any reset except hard-reset */ @@ -7966,19 +7629,9 @@ static int gaudi_hbm_event_to_dev(u16 hbm_event_type) static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, char *interrupt_name) { - struct gaudi_device *gaudi = hdev->asic_specific; u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; bool soft_reset_required = false; - /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock - * gating, and thus cannot be done in CPU-CP and should be done instead - * by the driver. - */ - - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; @@ -7996,10 +7649,6 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, /* Clear interrupts */ WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - return soft_reset_required; } @@ -8066,6 +7715,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { struct gaudi_device *gaudi = hdev->asic_specific; + u64 data = le64_to_cpu(eq_entry->data[0]); u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); u32 fw_fatal_err_flag = 0; u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) @@ -8102,6 +7752,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_PSOC_MEM_DERR: case GAUDI_EVENT_PSOC_CORESIGHT_DERR: case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: + case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: case GAUDI_EVENT_MMU_DERR: @@ -8202,6 +7853,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_PSOC_MEM_SERR: case GAUDI_EVENT_PSOC_CORESIGHT_SERR: case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: + case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: fallthrough; @@ -8226,7 +7878,6 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_MMU_PAGE_FAULT: case GAUDI_EVENT_MMU_WR_PERM: case GAUDI_EVENT_RAZWI_OR_ADC: - case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: fallthrough; @@ -8246,6 +7897,19 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: + gaudi_print_irq_info(hdev, event_type, true); + gaudi_handle_qman_err(hdev, event_type); + hl_fw_unmask_irq(hdev, event_type); + + /* In TPC QM event, notify on TPC assertion. While there isn't + * a specific event for assertion yet, the FW generates QM event. + * The SW upper layer will inspect an internal mapped area to indicate + * if the event is a tpc assertion or tpc QM. + */ + hl_notifier_event_send_all(hdev, HL_NOTIFIER_EVENT_TPC_ASSERT); + break; + case GAUDI_EVENT_RAZWI_OR_ADC_SW: gaudi_print_irq_info(hdev, event_type, true); goto reset_device; @@ -8263,6 +7927,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: + gaudi_print_nic_axi_irq_info(hdev, event_type, &data); + hl_fw_unmask_irq(hdev, event_type); + break; + case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: gaudi_print_irq_info(hdev, event_type, false); gaudi_print_sm_sei_info(hdev, event_type, @@ -8274,6 +7943,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: + break; + case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: gaudi_print_clk_change_info(hdev, event_type); hl_fw_unmask_irq(hdev, event_type); @@ -8314,7 +7986,7 @@ reset_device: | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag); else if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag); + hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag); else hl_fw_unmask_irq(hdev, event_type); } @@ -8443,8 +8115,6 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) set_default_power_values(hdev); - hdev->max_power = prop->max_power_default; - return 0; } @@ -8461,10 +8131,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u64 offset; int i, dma_id, port; - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - if (s) seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" @@ -8585,10 +8251,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (s) seq_puts(s, "\n"); - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - return is_idle; } @@ -8624,14 +8286,22 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, return hl_fw_get_eeprom_data(hdev, data, max_size); } +static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) + return 0; + + return hl_fw_get_monitor_dump(hdev, data); +} + /* * this function should be used only during initialization and/or after reset, * when there are no active users. */ -static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, - u32 tpc_id) +static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) { - struct gaudi_device *gaudi = hdev->asic_specific; u64 kernel_timeout; u32 status, offset; int rc; @@ -8643,10 +8313,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, else kernel_timeout = HL_DEVICE_TIMEOUT_USEC; - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, lower_32_bits(tpc_kernel)); WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, @@ -8686,8 +8352,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, dev_err(hdev->dev, "Timeout while waiting for TPC%d icache prefetch\n", tpc_id); - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); return -EIO; } @@ -8711,8 +8375,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, dev_err(hdev->dev, "Timeout while waiting for TPC%d vector pipe\n", tpc_id); - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); return -EIO; } @@ -8724,9 +8386,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 1000, kernel_timeout); - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); - if (rc) { dev_err(hdev->dev, "Timeout while waiting for TPC%d kernel to execute\n", @@ -8791,7 +8450,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev, hdev->internal_cb_pool_dma_addr, HOST_SPACE_INTERNAL_CB_SZ); - hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); + hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); mutex_unlock(&ctx->mmu_lock); if (rc) @@ -8826,7 +8485,7 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, HOST_SPACE_INTERNAL_CB_SZ); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); + hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); mutex_unlock(&ctx->mmu_lock); gen_pool_destroy(hdev->internal_cb_pool); @@ -9202,18 +8861,6 @@ static void gaudi_reset_sob(struct hl_device *hdev, void *data) kref_init(&hw_sob->kref); } -static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev) -{ - if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) == - HL_POWER9_HOST_MAGIC) { - hdev->power9_64bit_dma_enable = 1; - hdev->dma_mask = 64; - } else { - hdev->power9_64bit_dma_enable = 0; - hdev->dma_mask = 48; - } -} - static u64 gaudi_get_device_time(struct hl_device *hdev) { u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; @@ -9275,7 +8922,7 @@ static int gaudi_add_sync_to_engine_map_entry( */ if (reg_value == 0 || reg_value == 0xffffffff) return 0; - reg_value -= (u32)CFG_BASE; + reg_value -= lower_32_bits(CFG_BASE); /* create a new hash entry */ entry = kzalloc(sizeof(*entry), GFP_KERNEL); @@ -9293,23 +8940,15 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map) { struct hl_state_dump_specs *sds = &hdev->state_dump_specs; - struct gaudi_device *gaudi = hdev->asic_specific; int i, j, rc; u32 reg_value; /* Iterate over TPC engines */ for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { - /* TPC registered must be accessed with clock gating disabled */ - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + sds->props[SP_NEXT_TPC] * i); - /* We can reenable clock_gating */ - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); - rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, ENGINE_TPC, i); if (rc) @@ -9319,20 +8958,11 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, /* Iterate over MME engines */ for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { - /* MME registered must be accessed with clock gating - * disabled - */ - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); reg_value = RREG32(sds->props[SP_MME_CFG_SO] + sds->props[SP_NEXT_MME] * i + j * sizeof(u32)); - /* We can reenable clock_gating */ - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); - rc = gaudi_add_sync_to_engine_map_entry( map, reg_value, ENGINE_MME, i * sds->props[SP_SUB_MME_ENG_NUM] + j); @@ -9537,6 +9167,35 @@ static u32 *gaudi_get_stream_master_qid_arr(void) return gaudi_stream_master; } +static void gaudi_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_sizes *info) +{ + /* set 0 since multiple pages are not supported */ + info->page_order_bitmask = 0; +} + +static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + struct cpucp_info *cpucp_info; + + cpucp_info = &hdev->asic_prop.cpucp_info; + + return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); +} + +static DEVICE_ATTR_RO(infineon_ver); + +static struct attribute *gaudi_vrm_dev_attrs[] = { + &dev_attr_infineon_ver.attr, +}; + +static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, + struct attribute_group *dev_vrm_attr_grp) +{ + hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); + dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_early_init, .early_fini = gaudi_early_fini, @@ -9555,36 +9214,31 @@ static const struct hl_asic_funcs gaudi_funcs = { .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, .asic_dma_free_coherent = gaudi_dma_free_coherent, .scrub_device_mem = gaudi_scrub_device_mem, + .scrub_device_dram = gaudi_scrub_device_dram, .get_int_queue_base = gaudi_get_int_queue_base, .test_queues = gaudi_test_queues, .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, .asic_dma_pool_free = gaudi_dma_pool_free, .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, - .hl_dma_unmap_sg = gaudi_dma_unmap_sg, + .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, .cs_parser = gaudi_cs_parser, - .asic_dma_map_sg = gaudi_dma_map_sg, + .asic_dma_map_sgtable = hl_dma_map_sgtable, .get_dma_desc_list_size = gaudi_get_dma_desc_list_size, .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, .update_eq_ci = gaudi_update_eq_ci, .context_switch = gaudi_context_switch, .restore_phase_topology = gaudi_restore_phase_topology, - .debugfs_read32 = gaudi_debugfs_read32, - .debugfs_write32 = gaudi_debugfs_write32, - .debugfs_read64 = gaudi_debugfs_read64, - .debugfs_write64 = gaudi_debugfs_write64, .debugfs_read_dma = gaudi_debugfs_read_dma, - .add_device_attr = hl_add_device_attr, + .add_device_attr = gaudi_add_device_attr, .handle_eqe = gaudi_handle_eqe, - .set_pll_profile = hl_set_pll_profile, .get_events_stat = gaudi_get_events_stat, .read_pte = gaudi_read_pte, .write_pte = gaudi_write_pte, .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, + .mmu_prefetch_cache_range = NULL, .send_heartbeat = gaudi_send_heartbeat, - .set_clock_gating = gaudi_set_clock_gating, - .disable_clock_gating = gaudi_disable_clock_gating, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, .non_hard_reset_late_init = gaudi_non_hard_reset_late_init, @@ -9592,6 +9246,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .hw_queues_unlock = gaudi_hw_queues_unlock, .get_pci_id = gaudi_get_pci_id, .get_eeprom_data = gaudi_get_eeprom_data, + .get_monitor_dump = gaudi_get_monitor_dump, .send_cpu_message = gaudi_send_cpu_message, .pci_bars_map = gaudi_pci_bars_map, .init_iatu = gaudi_init_iatu, @@ -9600,7 +9255,6 @@ static const struct hl_asic_funcs gaudi_funcs = { .halt_coresight = gaudi_halt_coresight, .ctx_init = gaudi_ctx_init, .ctx_fini = gaudi_ctx_fini, - .get_clk_rate = hl_get_clk_rate, .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, .load_firmware_to_device = gaudi_load_firmware_to_device, .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, @@ -9610,7 +9264,6 @@ static const struct hl_asic_funcs gaudi_funcs = { .gen_wait_cb = gaudi_gen_wait_cb, .reset_sob = gaudi_reset_sob, .reset_sob_group = gaudi_reset_sob_group, - .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw, .get_device_time = gaudi_get_device_time, .collective_wait_init_cs = gaudi_collective_wait_init_cs, .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, @@ -9626,7 +9279,12 @@ static const struct hl_asic_funcs gaudi_funcs = { .state_dump_init = gaudi_state_dump_init, .get_sob_addr = gaudi_get_sob_addr, .set_pci_memory_regions = gaudi_set_pci_memory_regions, - .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr + .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, + .is_valid_dram_page_size = NULL, + .mmu_get_real_page_size = hl_mmu_get_real_page_size, + .get_valid_dram_page_orders = gaudi_get_valid_dram_page_orders, + .access_dev_mem = hl_access_dev_mem, + .set_dram_bar_base = gaudi_set_hbm_bar_base, }; /** diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 8ac16a9b7d15..4fbcf3f0afe5 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2019-2020 HabanaLabs, Ltd. + * Copyright 2019-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -148,14 +148,14 @@ #define MME_QMAN_LENGTH 1024 #define MME_QMAN_SIZE_IN_BYTES (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) -#define HBM_DMA_QMAN_LENGTH 1024 +#define HBM_DMA_QMAN_LENGTH 4096 #define HBM_DMA_QMAN_SIZE_IN_BYTES \ (HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) #define TPC_QMAN_LENGTH 1024 #define TPC_QMAN_SIZE_IN_BYTES (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) -#define NIC_QMAN_LENGTH 1024 +#define NIC_QMAN_LENGTH 4096 #define NIC_QMAN_SIZE_IN_BYTES (NIC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) @@ -177,7 +177,6 @@ #define HW_CAP_MSI BIT(6) #define HW_CAP_CPU_Q BIT(7) #define HW_CAP_HBM_DMA BIT(8) -#define HW_CAP_CLK_GATE BIT(9) #define HW_CAP_SRAM_SCRAMBLER BIT(10) #define HW_CAP_HBM_SCRAMBLER BIT(11) @@ -313,8 +312,6 @@ struct gaudi_internal_qman_info { * struct gaudi_device - ASIC specific manage structure. * @cpucp_info_get: get information on device from CPU-CP * @hw_queues_lock: protects the H/W queues from concurrent access. - * @clk_gate_mutex: protects code areas that require clock gating to be disabled - * temporarily * @internal_qmans: Internal QMANs information. The array size is larger than * the actual number of internal queues because they are not in * consecutive order. @@ -337,7 +334,6 @@ struct gaudi_device { /* TODO: remove hw_queues_lock after moving to scheduler code */ spinlock_t hw_queues_lock; - struct mutex clk_gate_mutex; struct gaudi_internal_qman_info internal_qmans[GAUDI_QUEUE_ID_SIZE]; @@ -355,8 +351,6 @@ struct gaudi_device { void gaudi_init_security(struct hl_device *hdev); void gaudi_ack_protection_bits_errors(struct hl_device *hdev); -void gaudi_add_device_attr(struct hl_device *hdev, - struct attribute_group *dev_attr_grp); int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data); void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx); void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index fbcc7bbf44b3..4cde505a7416 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -390,6 +390,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) } prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; + prop->host_base_address = HOST_PHYS_BASE; + prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; prop->dram_base_address = DRAM_PHYS_BASE; @@ -413,23 +415,27 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; + prop->device_mem_alloc_default_page_size = prop->dram_page_size; prop->dram_supports_virtual_memory = true; - prop->dmmu.hop0_shift = MMU_V1_0_HOP0_SHIFT; - prop->dmmu.hop1_shift = MMU_V1_0_HOP1_SHIFT; - prop->dmmu.hop2_shift = MMU_V1_0_HOP2_SHIFT; - prop->dmmu.hop3_shift = MMU_V1_0_HOP3_SHIFT; - prop->dmmu.hop4_shift = MMU_V1_0_HOP4_SHIFT; - prop->dmmu.hop0_mask = MMU_V1_0_HOP0_MASK; - prop->dmmu.hop1_mask = MMU_V1_0_HOP1_MASK; - prop->dmmu.hop2_mask = MMU_V1_0_HOP2_MASK; - prop->dmmu.hop3_mask = MMU_V1_0_HOP3_MASK; - prop->dmmu.hop4_mask = MMU_V1_0_HOP4_MASK; + prop->dmmu.hop_shifts[MMU_HOP0] = MMU_V1_0_HOP0_SHIFT; + prop->dmmu.hop_shifts[MMU_HOP1] = MMU_V1_0_HOP1_SHIFT; + prop->dmmu.hop_shifts[MMU_HOP2] = MMU_V1_0_HOP2_SHIFT; + prop->dmmu.hop_shifts[MMU_HOP3] = MMU_V1_0_HOP3_SHIFT; + prop->dmmu.hop_shifts[MMU_HOP4] = MMU_V1_0_HOP4_SHIFT; + prop->dmmu.hop_masks[MMU_HOP0] = MMU_V1_0_HOP0_MASK; + prop->dmmu.hop_masks[MMU_HOP1] = MMU_V1_0_HOP1_MASK; + prop->dmmu.hop_masks[MMU_HOP2] = MMU_V1_0_HOP2_MASK; + prop->dmmu.hop_masks[MMU_HOP3] = MMU_V1_0_HOP3_MASK; + prop->dmmu.hop_masks[MMU_HOP4] = MMU_V1_0_HOP4_MASK; prop->dmmu.start_addr = VA_DDR_SPACE_START; prop->dmmu.end_addr = VA_DDR_SPACE_END; prop->dmmu.page_size = PAGE_SIZE_2MB; prop->dmmu.num_hops = MMU_ARCH_5_HOPS; prop->dmmu.last_mask = LAST_MASK; + /* TODO: will be duplicated until implementing per-MMU props */ + prop->dmmu.hop_table_size = prop->mmu_hop_table_size; + prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; /* shifts and masks are the same in PMMU and DMMU */ memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); @@ -438,6 +444,9 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->pmmu.page_size = PAGE_SIZE_4KB; prop->pmmu.num_hops = MMU_ARCH_5_HOPS; prop->pmmu.last_mask = LAST_MASK; + /* TODO: will be duplicated until implementing per-MMU props */ + prop->pmmu.hop_table_size = prop->mmu_hop_table_size; + prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -477,6 +486,12 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->use_get_power_for_reset_history = true; + prop->configurable_stop_on_err = true; + + prop->set_max_power_on_device_init = true; + + prop->dma_mask = 48; + return 0; } @@ -564,8 +579,6 @@ static int goya_init_iatu(struct hl_device *hdev) if (rc) goto done; - hdev->asic_funcs->set_dma_mask_from_fw(hdev); - /* Outbound Region 0 - Point to Host */ outbound_region.addr = HOST_PHYS_BASE; outbound_region.size = HOST_PHYS_SIZE; @@ -893,7 +906,7 @@ int goya_late_init(struct hl_device *hdev) goya->pm_mng_profile = PM_AUTO; - hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW); + goya_set_pll_profile(hdev, PLL_LOW); schedule_delayed_work(&goya->goya_work->work_freq, usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); @@ -2469,9 +2482,6 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_r { u32 wait_timeout_ms; - dev_info(hdev->dev, - "Halting compute engines and disabling interrupts\n"); - if (hdev->pldm) wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC; else @@ -2700,8 +2710,7 @@ int goya_mmu_init(struct hl_device *hdev) WREG32_AND(mmSTLB_STLB_FEATURE_EN, (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK)); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, - MMU_OP_USERPTR | MMU_OP_PHYS_PACK); + hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK); WREG32(mmMMU_MMU_ENABLE, 1); WREG32(mmMMU_SPI_MASK, 0xF); @@ -2816,12 +2825,12 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) goya_set_pll_refclk(hdev); WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL); - dev_info(hdev->dev, + dev_dbg(hdev->dev, "Issued HARD reset command, going to wait %dms\n", reset_timeout_ms); } else { WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET); - dev_info(hdev->dev, + dev_dbg(hdev->dev, "Issued SOFT reset command, going to wait %dms\n", reset_timeout_ms); } @@ -3302,35 +3311,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); } -static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir)) - return -ENOMEM; - - /* Shift to the device's base physical address of host memory */ - for_each_sg(sgl, sg, nents, i) - sg->dma_address += HOST_PHYS_BASE; - - return 0; -} - -static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - /* Cancel the device's base physical address of host memory */ - for_each_sg(sgl, sg, nents, i) - sg->dma_address -= HOST_PHYS_BASE; - - dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir); -} - u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) { struct scatterlist *sg, *sg_next_iter; @@ -3340,8 +3320,7 @@ u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) dma_desc_cnt = 0; - for_each_sg(sgt->sgl, sg, sgt->nents, count) { - + for_each_sgtable_dma_sg(sgt, sg, count) { len = sg_dma_len(sg); addr = sg_dma_address(sg); @@ -3395,8 +3374,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev, list_add_tail(&userptr->job_node, parser->job_userptr_list); - rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, - userptr->sgt->nents, dir); + rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); if (rc) { dev_err(hdev->dev, "failed to map sgt with DMA region\n"); goto unpin_memory; @@ -3860,7 +3838,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev, sgt = userptr->sgt; dma_desc_cnt = 0; - for_each_sg(sgt->sgl, sg, sgt->nents, count) { + for_each_sgtable_dma_sg(sgt, sg, count) { len = sg_dma_len(sg); dma_addr = sg_dma_address(sg); @@ -4023,7 +4001,7 @@ static int goya_patch_cb(struct hl_device *hdev, static int goya_parse_cb_mmu(struct hl_device *hdev, struct hl_cs_parser *parser) { - u64 patched_cb_handle; + u64 handle; u32 patched_cb_size; struct hl_cb *user_cb; int rc; @@ -4036,9 +4014,9 @@ static int goya_parse_cb_mmu(struct hl_device *hdev, parser->patched_cb_size = parser->user_cb_size + sizeof(struct packet_msg_prot) * 2; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, + rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, parser->patched_cb_size, false, false, - &patched_cb_handle); + &handle); if (rc) { dev_err(hdev->dev, @@ -4047,13 +4025,10 @@ static int goya_parse_cb_mmu(struct hl_device *hdev, return rc; } - patched_cb_handle >>= PAGE_SHIFT; - parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, - (u32) patched_cb_handle); + parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); /* hl_cb_get should never fail here */ if (!parser->patched_cb) { - dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n", - (u32) patched_cb_handle); + dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); rc = -EFAULT; goto out; } @@ -4093,8 +4068,7 @@ out: * cb_put will release it, but here we want to remove it from the * idr */ - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, - patched_cb_handle << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, handle); return rc; } @@ -4102,7 +4076,7 @@ out: static int goya_parse_cb_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser) { - u64 patched_cb_handle; + u64 handle; int rc; rc = goya_validate_cb(hdev, parser, false); @@ -4110,22 +4084,19 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev, if (rc) goto free_userptr; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, + rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, parser->patched_cb_size, false, false, - &patched_cb_handle); + &handle); if (rc) { dev_err(hdev->dev, "Failed to allocate patched CB for DMA CS %d\n", rc); goto free_userptr; } - patched_cb_handle >>= PAGE_SHIFT; - parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, - (u32) patched_cb_handle); + parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); /* hl_cb_get should never fail here */ if (!parser->patched_cb) { - dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n", - (u32) patched_cb_handle); + dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); rc = -EFAULT; goto out; } @@ -4142,8 +4113,7 @@ out: * cb_put will release it, but here we want to remove it from the * idr */ - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, - patched_cb_handle << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, handle); free_userptr: if (rc) @@ -4250,224 +4220,7 @@ static void goya_clear_sm_regs(struct hl_device *hdev) i = RREG32(mmSYNC_MNGR_SOB_OBJ_0); } -/* - * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped - * address. - * - * @hdev: pointer to hl_device structure - * @addr: device or host mapped address - * @val: returned value - * - * In case of DDR address that is not mapped into the default aperture that - * the DDR bar exposes, the function will configure the iATU so that the DDR - * bar will be positioned at a base address that allows reading from the - * required address. Configuring the iATU during normal operation can - * lead to undefined behavior and therefore, should be done with extreme care - * - */ -static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, - bool user_address, u32 *val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 ddr_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - *val = RREG32(addr - CFG_BASE); - - } else if ((addr >= SRAM_BASE_ADDR) && - (addr < SRAM_BASE_ADDR + SRAM_SIZE)) { - - *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - - } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { - - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); - if (ddr_bar_addr != U64_MAX) { - *val = readl(hdev->pcie_bar[DDR_BAR_ID] + - (addr - bar_base_addr)); - - ddr_bar_addr = goya_set_ddr_bar_base(hdev, - ddr_bar_addr); - } - if (ddr_bar_addr == U64_MAX) - rc = -EIO; - - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); - - } else { - rc = -EFAULT; - } - - return rc; -} - -/* - * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped - * address. - * - * @hdev: pointer to hl_device structure - * @addr: device or host mapped address - * @val: returned value - * - * In case of DDR address that is not mapped into the default aperture that - * the DDR bar exposes, the function will configure the iATU so that the DDR - * bar will be positioned at a base address that allows writing to the - * required address. Configuring the iATU during normal operation can - * lead to undefined behavior and therefore, should be done with extreme care - * - */ -static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, - bool user_address, u32 val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 ddr_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - WREG32(addr - CFG_BASE, val); - - } else if ((addr >= SRAM_BASE_ADDR) && - (addr < SRAM_BASE_ADDR + SRAM_SIZE)) { - - writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - - } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { - - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); - if (ddr_bar_addr != U64_MAX) { - writel(val, hdev->pcie_bar[DDR_BAR_ID] + - (addr - bar_base_addr)); - - ddr_bar_addr = goya_set_ddr_bar_base(hdev, - ddr_bar_addr); - } - if (ddr_bar_addr == U64_MAX) - rc = -EIO; - - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; - - } else { - rc = -EFAULT; - } - - return rc; -} - -static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, - bool user_address, u64 *val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 ddr_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - u32 val_l = RREG32(addr - CFG_BASE); - u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); - - *val = (((u64) val_h) << 32) | val_l; - - } else if ((addr >= SRAM_BASE_ADDR) && - (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) { - - *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - - } else if (addr <= - DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { - - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); - if (ddr_bar_addr != U64_MAX) { - *val = readq(hdev->pcie_bar[DDR_BAR_ID] + - (addr - bar_base_addr)); - - ddr_bar_addr = goya_set_ddr_bar_base(hdev, - ddr_bar_addr); - } - if (ddr_bar_addr == U64_MAX) - rc = -EIO; - - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); - - } else { - rc = -EFAULT; - } - - return rc; -} - -static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, - bool user_address, u64 val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 ddr_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - WREG32(addr - CFG_BASE, lower_32_bits(val)); - WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); - - } else if ((addr >= SRAM_BASE_ADDR) && - (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) { - - writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - - } else if (addr <= - DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { - - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); - if (ddr_bar_addr != U64_MAX) { - writeq(val, hdev->pcie_bar[DDR_BAR_ID] + - (addr - bar_base_addr)); - - ddr_bar_addr = goya_set_ddr_bar_base(hdev, - ddr_bar_addr); - } - if (ddr_bar_addr == U64_MAX) - rc = -EIO; - - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; - - } else { - rc = -EFAULT; - } - - return rc; -} - -static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, - void *blob_addr) +static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr) { dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n"); return -EPERM; @@ -5092,7 +4845,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, release_cb: hl_cb_put(cb); - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); return rc; } @@ -5341,7 +5094,7 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev, /* Treat as invalidate all because there is no range invalidation * in Goya */ - return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); + return hl_mmu_invalidate_cache(hdev, is_hard, flags); } int goya_send_heartbeat(struct hl_device *hdev) @@ -5391,16 +5144,6 @@ int goya_cpucp_info_get(struct hl_device *hdev) return 0; } -static void goya_set_clock_gating(struct hl_device *hdev) -{ - /* clock gating not supported in Goya */ -} - -static void goya_disable_clock_gating(struct hl_device *hdev) -{ - /* clock gating not supported in Goya */ -} - static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, struct seq_file *s) { @@ -5562,20 +5305,6 @@ static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group) } -static void goya_set_dma_mask_from_fw(struct hl_device *hdev) -{ - if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) == - HL_POWER9_HOST_MAGIC) { - dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n"); - hdev->power9_64bit_dma_enable = 1; - hdev->dma_mask = 64; - } else { - dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n"); - hdev->power9_64bit_dma_enable = 0; - hdev->dma_mask = 48; - } -} - u64 goya_get_device_time(struct hl_device *hdev) { u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; @@ -5688,6 +5417,22 @@ static u32 *goya_get_stream_master_qid_arr(void) return NULL; } +static void goya_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_sizes *info) +{ + /* set 0 since multiple pages are not supported */ + info->page_order_bitmask = 0; +} + +static int goya_get_monitor_dump(struct hl_device *hdev, void *data) +{ + return -EOPNOTSUPP; +} + +static int goya_scrub_device_dram(struct hl_device *hdev, u64 val) +{ + return -EOPNOTSUPP; +} + static const struct hl_asic_funcs goya_funcs = { .early_init = goya_early_init, .early_fini = goya_early_fini, @@ -5706,36 +5451,31 @@ static const struct hl_asic_funcs goya_funcs = { .asic_dma_alloc_coherent = goya_dma_alloc_coherent, .asic_dma_free_coherent = goya_dma_free_coherent, .scrub_device_mem = goya_scrub_device_mem, + .scrub_device_dram = goya_scrub_device_dram, .get_int_queue_base = goya_get_int_queue_base, .test_queues = goya_test_queues, .asic_dma_pool_zalloc = goya_dma_pool_zalloc, .asic_dma_pool_free = goya_dma_pool_free, .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free, - .hl_dma_unmap_sg = goya_dma_unmap_sg, + .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, .cs_parser = goya_cs_parser, - .asic_dma_map_sg = goya_dma_map_sg, + .asic_dma_map_sgtable = hl_dma_map_sgtable, .get_dma_desc_list_size = goya_get_dma_desc_list_size, .add_end_of_cb_packets = goya_add_end_of_cb_packets, .update_eq_ci = goya_update_eq_ci, .context_switch = goya_context_switch, .restore_phase_topology = goya_restore_phase_topology, - .debugfs_read32 = goya_debugfs_read32, - .debugfs_write32 = goya_debugfs_write32, - .debugfs_read64 = goya_debugfs_read64, - .debugfs_write64 = goya_debugfs_write64, .debugfs_read_dma = goya_debugfs_read_dma, .add_device_attr = goya_add_device_attr, .handle_eqe = goya_handle_eqe, - .set_pll_profile = goya_set_pll_profile, .get_events_stat = goya_get_events_stat, .read_pte = goya_read_pte, .write_pte = goya_write_pte, .mmu_invalidate_cache = goya_mmu_invalidate_cache, .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, + .mmu_prefetch_cache_range = NULL, .send_heartbeat = goya_send_heartbeat, - .set_clock_gating = goya_set_clock_gating, - .disable_clock_gating = goya_disable_clock_gating, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, .non_hard_reset_late_init = goya_non_hard_reset_late_init, @@ -5743,6 +5483,7 @@ static const struct hl_asic_funcs goya_funcs = { .hw_queues_unlock = goya_hw_queues_unlock, .get_pci_id = goya_get_pci_id, .get_eeprom_data = goya_get_eeprom_data, + .get_monitor_dump = goya_get_monitor_dump, .send_cpu_message = goya_send_cpu_message, .pci_bars_map = goya_pci_bars_map, .init_iatu = goya_init_iatu, @@ -5751,7 +5492,6 @@ static const struct hl_asic_funcs goya_funcs = { .halt_coresight = goya_halt_coresight, .ctx_init = goya_ctx_init, .ctx_fini = goya_ctx_fini, - .get_clk_rate = hl_get_clk_rate, .get_queue_id_for_cq = goya_get_queue_id_for_cq, .load_firmware_to_device = goya_load_firmware_to_device, .load_boot_fit_to_device = goya_load_boot_fit_to_device, @@ -5761,7 +5501,6 @@ static const struct hl_asic_funcs goya_funcs = { .gen_wait_cb = goya_gen_wait_cb, .reset_sob = goya_reset_sob, .reset_sob_group = goya_reset_sob_group, - .set_dma_mask_from_fw = goya_set_dma_mask_from_fw, .get_device_time = goya_get_device_time, .collective_wait_init_cs = goya_collective_wait_init_cs, .collective_wait_create_jobs = goya_collective_wait_create_jobs, @@ -5778,6 +5517,11 @@ static const struct hl_asic_funcs goya_funcs = { .get_sob_addr = &goya_get_sob_addr, .set_pci_memory_regions = goya_set_pci_memory_regions, .get_stream_master_qid_arr = goya_get_stream_master_qid_arr, + .is_valid_dram_page_size = NULL, + .mmu_get_real_page_size = hl_mmu_get_real_page_size, + .get_valid_dram_page_orders = goya_get_valid_dram_page_orders, + .access_dev_mem = hl_access_dev_mem, + .set_dram_bar_base = goya_set_ddr_bar_base, }; /* diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 3740fd25bf84..647f57402616 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -217,8 +217,8 @@ u64 goya_get_max_power(struct hl_device *hdev); void goya_set_max_power(struct hl_device *hdev, u64 value); void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); -void goya_add_device_attr(struct hl_device *hdev, - struct attribute_group *dev_attr_grp); +void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, + struct attribute_group *dev_vrm_attr_grp); int goya_cpucp_info_get(struct hl_device *hdev); int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data); void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx); diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c index 76b47749affe..6580fc6a486a 100644 --- a/drivers/misc/habanalabs/goya/goya_hwmgr.c +++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -11,21 +11,24 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) { struct goya_device *goya = hdev->asic_specific; + if (!hdev->pdev) + return; + switch (freq) { case PLL_HIGH: - hl_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll); - hl_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll); - hl_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll); + hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll); + hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll); + hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll); break; case PLL_LOW: - hl_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW); - hl_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW); - hl_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW); + hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW); + hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW); + hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW); break; case PLL_LAST: - hl_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk); - hl_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk); - hl_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk); + hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk); + hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk); + hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk); break; default: dev_err(hdev->dev, "unknown frequency setting\n"); @@ -41,7 +44,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false); + value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, false); if (value < 0) return value; @@ -74,7 +77,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, HL_GOYA_MME_PLL, value); + hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, value); goya->mme_clk = value; fail: @@ -90,7 +93,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, false); + value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, false); if (value < 0) return value; @@ -123,7 +126,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, HL_GOYA_TPC_PLL, value); + hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, value); goya->tpc_clk = value; fail: @@ -139,7 +142,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, false); + value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, false); if (value < 0) return value; @@ -172,7 +175,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, HL_GOYA_IC_PLL, value); + hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, value); goya->ic_clk = value; fail: @@ -188,7 +191,7 @@ static ssize_t mme_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true); + value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, true); if (value < 0) return value; @@ -205,7 +208,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, true); + value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, true); if (value < 0) return value; @@ -222,7 +225,7 @@ static ssize_t ic_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, true); + value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, true); if (value < 0) return value; @@ -347,7 +350,7 @@ static DEVICE_ATTR_RW(pm_mng_profile); static DEVICE_ATTR_RW(tpc_clk); static DEVICE_ATTR_RO(tpc_clk_curr); -static struct attribute *goya_dev_attrs[] = { +static struct attribute *goya_clk_dev_attrs[] = { &dev_attr_high_pll.attr, &dev_attr_ic_clk.attr, &dev_attr_ic_clk_curr.attr, @@ -356,11 +359,27 @@ static struct attribute *goya_dev_attrs[] = { &dev_attr_pm_mng_profile.attr, &dev_attr_tpc_clk.attr, &dev_attr_tpc_clk_curr.attr, - NULL, }; -void goya_add_device_attr(struct hl_device *hdev, - struct attribute_group *dev_attr_grp) +static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + struct cpucp_info *cpucp_info; + + cpucp_info = &hdev->asic_prop.cpucp_info; + + return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); +} + +static DEVICE_ATTR_RO(infineon_ver); + +static struct attribute *goya_vrm_dev_attrs[] = { + &dev_attr_infineon_ver.attr, +}; + +void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, + struct attribute_group *dev_vrm_attr_grp) { - dev_attr_grp->attrs = goya_dev_attrs; + dev_clk_attr_grp->attrs = goya_clk_dev_attrs; + dev_vrm_attr_grp->attrs = goya_vrm_dev_attrs; } diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index 737c39f33f05..38e44b6cf581 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -389,6 +389,14 @@ enum pq_init_status { * * CPUCP_PACKET_ENGINE_CORE_ASID_SET - * Packet to perform engine core ASID configuration + * + * CPUCP_PACKET_MONITOR_DUMP_GET - + * Get monitors registers dump from the CpuCP kernel. + * The CPU will put the registers dump in the a buffer allocated by the driver + * which address is passed via the CpuCp packet. In addition, the host's driver + * passes the max size it allows the CpuCP to write to the structure, to prevent + * data corruption in case of mismatched driver/FW versions. + * Relevant only to Gaudi. */ enum cpucp_packet_id { @@ -439,6 +447,11 @@ enum cpucp_packet_id { CPUCP_PACKET_POWER_SET, /* internal */ CPUCP_PACKET_RESERVED, /* not used */ CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ + CPUCP_PACKET_RESERVED2, /* not used */ + CPUCP_PACKET_RESERVED3, /* not used */ + CPUCP_PACKET_RESERVED4, /* not used */ + CPUCP_PACKET_RESERVED5, /* not used */ + CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ }; #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 @@ -540,19 +553,25 @@ struct cpucp_packet { struct cpucp_unmask_irq_arr_packet { struct cpucp_packet cpucp_pkt; __le32 length; - __le32 irqs[0]; + __le32 irqs[]; }; struct cpucp_nic_status_packet { struct cpucp_packet cpucp_pkt; __le32 length; - __le32 data[0]; + __le32 data[]; }; struct cpucp_array_data_packet { struct cpucp_packet cpucp_pkt; __le32 length; - __le32 data[0]; + __le32 data[]; +}; + +enum cpucp_led_index { + CPUCP_LED0_INDEX = 0, + CPUCP_LED1_INDEX, + CPUCP_LED2_INDEX }; enum cpucp_packet_rc { @@ -576,7 +595,10 @@ enum cpucp_temp_type { cpucp_temp_offset = 19, cpucp_temp_lowest = 21, cpucp_temp_highest = 22, - cpucp_temp_reset_history = 23 + cpucp_temp_reset_history = 23, + cpucp_temp_warn = 24, + cpucp_temp_max_crit = 25, + cpucp_temp_max_warn = 26, }; enum cpucp_in_attributes { @@ -686,6 +708,7 @@ enum pll_index { enum rl_index { TPC_RL = 0, MME_RL, + EDMA_RL, }; enum pvt_index { @@ -780,6 +803,7 @@ struct cpucp_security_info { * (0 = functional 1 = binned) * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance * (0 = functional 1 = binned) + * @fw_os_version: Firmware OS Version */ struct cpucp_info { struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; @@ -807,6 +831,7 @@ struct cpucp_info { __le32 reserved6; __u8 pll_map[PLL_MAP_LEN]; __le64 mme_binning_mask; + __u8 fw_os_version[VERSION_MAX_LEN]; }; struct cpucp_mac_addr { @@ -818,6 +843,7 @@ enum cpucp_serdes_type { TYPE_2_SERDES_TYPE, HLS1_SERDES_TYPE, HLS1H_SERDES_TYPE, + HLS2_SERDES_TYPE, UNKNOWN_SERDES_TYPE, MAX_NUM_SERDES_TYPE = UNKNOWN_SERDES_TYPE }; @@ -831,9 +857,28 @@ struct cpucp_nic_info { __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN]; __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN]; __le16 serdes_type; /* enum cpucp_serdes_type */ + __le16 tx_swap_map[CPUCP_MAX_NICS]; __u8 reserved[6]; }; +#define PAGE_DISCARD_MAX 64 + +struct page_discard_info { + __u8 num_entries; + __u8 reserved[7]; + __le32 mmu_page_idx[PAGE_DISCARD_MAX]; +}; + +/* + * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp". + * @integer: the integer part of the SER value; + * @exp: the exponent part of the SER value. + */ +struct ser_val { + __le16 integer; + __le16 exp; +}; + /* * struct cpucp_nic_status - describes the status of a NIC port. * @port: NIC port index. @@ -887,4 +932,29 @@ struct cpucp_hbm_row_replaced_rows_info { struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX]; }; +/* + * struct dcore_monitor_regs_data - DCORE monitor regs data. + * the structure follows sync manager block layout. relevant only to Gaudi. + * @mon_pay_addrl: array of payload address low bits. + * @mon_pay_addrh: array of payload address high bits. + * @mon_pay_data: array of payload data. + * @mon_arm: array of monitor arm. + * @mon_status: array of monitor status. + */ +struct dcore_monitor_regs_data { + __le32 mon_pay_addrl[512]; + __le32 mon_pay_addrh[512]; + __le32 mon_pay_data[512]; + __le32 mon_arm[512]; + __le32 mon_status[512]; +}; + +/* contains SM data for each SYNC_MNGR (relevant only to Gaudi) */ +struct cpucp_monitor_dump { + struct dcore_monitor_regs_data sync_mngr_w_s; + struct dcore_monitor_regs_data sync_mngr_e_s; + struct dcore_monitor_regs_data sync_mngr_w_n; + struct dcore_monitor_regs_data sync_mngr_e_n; +}; + #endif /* CPUCP_IF_H */ diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 135e21d6edc9..15f91ae9de6e 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -33,6 +33,7 @@ enum cpu_boot_err { CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18, CPU_BOOT_ERR_BINNING_FAIL = 19, CPU_BOOT_ERR_TPM_FAIL = 20, + CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, CPU_BOOT_ERR_ENABLED = 31, CPU_BOOT_ERR_SCND_EN = 63, CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ @@ -111,6 +112,9 @@ enum cpu_boot_err { * * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed. * + * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature + * sensor. + * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the * running FW populates the error @@ -134,6 +138,7 @@ enum cpu_boot_err { #define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR) #define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) #define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) +#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) #define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) #define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h index d966bd4dfea6..c07ed4ed304c 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h @@ -311,6 +311,16 @@ enum gaudi_async_event_id { GAUDI_EVENT_FW_ALIVE_S = 645, GAUDI_EVENT_DEV_RESET_REQ = 646, GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647, + GAUDI_EVENT_STATUS_NIC0_ENG0 = 648, + GAUDI_EVENT_STATUS_NIC0_ENG1 = 649, + GAUDI_EVENT_STATUS_NIC1_ENG0 = 650, + GAUDI_EVENT_STATUS_NIC1_ENG1 = 651, + GAUDI_EVENT_STATUS_NIC2_ENG0 = 652, + GAUDI_EVENT_STATUS_NIC2_ENG1 = 653, + GAUDI_EVENT_STATUS_NIC3_ENG0 = 654, + GAUDI_EVENT_STATUS_NIC3_ENG1 = 655, + GAUDI_EVENT_STATUS_NIC4_ENG0 = 656, + GAUDI_EVENT_STATUS_NIC4_ENG1 = 657, GAUDI_EVENT_FIX_POWER_ENV_S = 658, GAUDI_EVENT_FIX_POWER_ENV_E = 659, GAUDI_EVENT_FIX_THERMAL_ENV_S = 660, diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h index 6e097ace2e96..66fc083a7c6a 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h @@ -54,7 +54,7 @@ struct gaudi_packet { /* The rest of the packet data follows. Use the corresponding * packet_XXX struct to deference the data, based on packet type */ - u8 contents[0]; + u8 contents[]; }; struct packet_nop { @@ -75,7 +75,7 @@ struct packet_wreg32 { struct packet_wreg_bulk { __le32 size64; __le32 ctl; - __le64 values[0]; /* data starts here */ + __le64 values[]; /* data starts here */ }; #define GAUDI_PKT_LONG_CTL_OP_SHIFT 20 diff --git a/drivers/misc/habanalabs/include/goya/goya_packets.h b/drivers/misc/habanalabs/include/goya/goya_packets.h index ef54bad20509..50ce5175b63a 100644 --- a/drivers/misc/habanalabs/include/goya/goya_packets.h +++ b/drivers/misc/habanalabs/include/goya/goya_packets.h @@ -62,7 +62,7 @@ struct goya_packet { /* The rest of the packet data follows. Use the corresponding * packet_XXX struct to deference the data, based on packet type */ - u8 contents[0]; + u8 contents[]; }; struct packet_nop { @@ -86,7 +86,7 @@ struct packet_wreg32 { struct packet_wreg_bulk { __le32 size64; __le32 ctl; - __le64 values[0]; /* data starts here */ + __le64 values[]; /* data starts here */ }; struct packet_msg_long { diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h index 758f246627f8..cae8ac8bc5b1 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -34,4 +34,14 @@ #define MMU_CONFIG_TIMEOUT_USEC 2000 /* 2 ms */ +enum mmu_hop_num { + MMU_HOP0, + MMU_HOP1, + MMU_HOP2, + MMU_HOP3, + MMU_HOP4, + MMU_HOP5, + MMU_HOP_MAX, +}; + #endif /* INCLUDE_MMU_GENERAL_H_ */ diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c index 1c763796cf1f..caa3de37698b 100644 --- a/drivers/misc/hi6421v600-irq.c +++ b/drivers/misc/hi6421v600-irq.c @@ -117,8 +117,8 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv) * If both powerkey down and up IRQs are received, * handle them at the right order */ - generic_handle_irq(priv->irqs[POWERKEY_DOWN]); - generic_handle_irq(priv->irqs[POWERKEY_UP]); + generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]); + generic_handle_irq_safe(priv->irqs[POWERKEY_UP]); pending &= ~HISI_IRQ_POWERKEY_UP_DOWN; } @@ -126,7 +126,7 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv) continue; for_each_set_bit(offset, &pending, BITS_PER_BYTE) { - generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]); + generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]); } } diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 67c5b452dd35..88b91ad8e541 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -1070,10 +1070,10 @@ static int kgdbts_option_setup(char *opt) { if (strlen(opt) >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdbts: config string too long\n"); - return -ENOSPC; + return 1; } strcpy(config, opt); - return 0; + return 1; } __setup("kgdbts=", kgdbts_option_setup); diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c index 98828030b5a4..bac4df2e5231 100644 --- a/drivers/misc/lattice-ecp3-config.c +++ b/drivers/misc/lattice-ecp3-config.c @@ -211,13 +211,11 @@ static int lattice_ecp3_probe(struct spi_device *spi) return 0; } -static int lattice_ecp3_remove(struct spi_device *spi) +static void lattice_ecp3_remove(struct spi_device *spi) { struct fpga_data *data = spi_get_drvdata(spi); wait_for_completion(&data->fw_loaded); - - return 0; } static const struct spi_device_id lattice_ecp3_id[] = { diff --git a/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/drivers/misc/lis3lv02d/lis3lv02d_spi.c index 9e40dfb60742..203a108b8883 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_spi.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_spi.c @@ -96,15 +96,13 @@ static int lis302dl_spi_probe(struct spi_device *spi) return lis3lv02d_init_device(&lis3_dev); } -static int lis302dl_spi_remove(struct spi_device *spi) +static void lis302dl_spi_remove(struct spi_device *spi) { struct lis3lv02d *lis3 = spi_get_drvdata(spi); lis3lv02d_joystick_disable(lis3); lis3lv02d_poweroff(lis3); lis3lv02d_remove_fs(&lis3_dev); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index f21854ac5cc2..009239ad1d8a 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -68,40 +68,40 @@ void __init lkdtm_bugs_init(int *recur_param) recur_count = *recur_param; } -void lkdtm_PANIC(void) +static void lkdtm_PANIC(void) { panic("dumptest"); } -void lkdtm_BUG(void) +static void lkdtm_BUG(void) { BUG(); } static int warn_counter; -void lkdtm_WARNING(void) +static void lkdtm_WARNING(void) { WARN_ON(++warn_counter); } -void lkdtm_WARNING_MESSAGE(void) +static void lkdtm_WARNING_MESSAGE(void) { WARN(1, "Warning message trigger count: %d\n", ++warn_counter); } -void lkdtm_EXCEPTION(void) +static void lkdtm_EXCEPTION(void) { *((volatile int *) 0) = 0; } -void lkdtm_LOOP(void) +static void lkdtm_LOOP(void) { for (;;) ; } -void lkdtm_EXHAUST_STACK(void) +static void lkdtm_EXHAUST_STACK(void) { pr_info("Calling function with %lu frame size to depth %d ...\n", REC_STACK_SIZE, recur_count); @@ -115,7 +115,7 @@ static noinline void __lkdtm_CORRUPT_STACK(void *stack) } /* This should trip the stack canary, not corrupt the return address. */ -noinline void lkdtm_CORRUPT_STACK(void) +static noinline void lkdtm_CORRUPT_STACK(void) { /* Use default char array length that triggers stack protection. */ char data[8] __aligned(sizeof(void *)); @@ -125,7 +125,7 @@ noinline void lkdtm_CORRUPT_STACK(void) } /* Same as above but will only get a canary with -fstack-protector-strong */ -noinline void lkdtm_CORRUPT_STACK_STRONG(void) +static noinline void lkdtm_CORRUPT_STACK_STRONG(void) { union { unsigned short shorts[4]; @@ -139,7 +139,7 @@ noinline void lkdtm_CORRUPT_STACK_STRONG(void) static pid_t stack_pid; static unsigned long stack_addr; -void lkdtm_REPORT_STACK(void) +static void lkdtm_REPORT_STACK(void) { volatile uintptr_t magic; pid_t pid = task_pid_nr(current); @@ -222,7 +222,7 @@ static noinline void __lkdtm_REPORT_STACK_CANARY(void *stack) } } -void lkdtm_REPORT_STACK_CANARY(void) +static void lkdtm_REPORT_STACK_CANARY(void) { /* Use default char array length that triggers stack protection. */ char data[8] __aligned(sizeof(void *)) = { }; @@ -230,7 +230,7 @@ void lkdtm_REPORT_STACK_CANARY(void) __lkdtm_REPORT_STACK_CANARY((void *)&data); } -void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void) +static void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void) { static u8 data[5] __attribute__((aligned(4))) = {1, 2, 3, 4, 5}; u32 *p; @@ -245,21 +245,21 @@ void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void) pr_err("XFAIL: arch has CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS\n"); } -void lkdtm_SOFTLOCKUP(void) +static void lkdtm_SOFTLOCKUP(void) { preempt_disable(); for (;;) cpu_relax(); } -void lkdtm_HARDLOCKUP(void) +static void lkdtm_HARDLOCKUP(void) { local_irq_disable(); for (;;) cpu_relax(); } -void lkdtm_SPINLOCKUP(void) +static void lkdtm_SPINLOCKUP(void) { /* Must be called twice to trigger. */ spin_lock(&lock_me_up); @@ -267,7 +267,7 @@ void lkdtm_SPINLOCKUP(void) __release(&lock_me_up); } -void lkdtm_HUNG_TASK(void) +static void lkdtm_HUNG_TASK(void) { set_current_state(TASK_UNINTERRUPTIBLE); schedule(); @@ -276,7 +276,7 @@ void lkdtm_HUNG_TASK(void) volatile unsigned int huge = INT_MAX - 2; volatile unsigned int ignored; -void lkdtm_OVERFLOW_SIGNED(void) +static void lkdtm_OVERFLOW_SIGNED(void) { int value; @@ -291,7 +291,7 @@ void lkdtm_OVERFLOW_SIGNED(void) } -void lkdtm_OVERFLOW_UNSIGNED(void) +static void lkdtm_OVERFLOW_UNSIGNED(void) { unsigned int value; @@ -319,7 +319,7 @@ struct array_bounds { int three; }; -void lkdtm_ARRAY_BOUNDS(void) +static void lkdtm_ARRAY_BOUNDS(void) { struct array_bounds_flex_array *not_checked; struct array_bounds *checked; @@ -327,6 +327,11 @@ void lkdtm_ARRAY_BOUNDS(void) not_checked = kmalloc(sizeof(*not_checked) * 2, GFP_KERNEL); checked = kmalloc(sizeof(*checked) * 2, GFP_KERNEL); + if (!not_checked || !checked) { + kfree(not_checked); + kfree(checked); + return; + } pr_info("Array access within bounds ...\n"); /* For both, touch all bytes in the actual member size. */ @@ -346,10 +351,13 @@ void lkdtm_ARRAY_BOUNDS(void) kfree(not_checked); kfree(checked); pr_err("FAIL: survived array bounds overflow!\n"); - pr_expected_config(CONFIG_UBSAN_BOUNDS); + if (IS_ENABLED(CONFIG_UBSAN_BOUNDS)) + pr_expected_config(CONFIG_UBSAN_TRAP); + else + pr_expected_config(CONFIG_UBSAN_BOUNDS); } -void lkdtm_CORRUPT_LIST_ADD(void) +static void lkdtm_CORRUPT_LIST_ADD(void) { /* * Initially, an empty list via LIST_HEAD: @@ -389,7 +397,7 @@ void lkdtm_CORRUPT_LIST_ADD(void) } } -void lkdtm_CORRUPT_LIST_DEL(void) +static void lkdtm_CORRUPT_LIST_DEL(void) { LIST_HEAD(test_head); struct lkdtm_list item; @@ -417,7 +425,7 @@ void lkdtm_CORRUPT_LIST_DEL(void) } /* Test that VMAP_STACK is actually allocating with a leading guard page */ -void lkdtm_STACK_GUARD_PAGE_LEADING(void) +static void lkdtm_STACK_GUARD_PAGE_LEADING(void) { const unsigned char *stack = task_stack_page(current); const unsigned char *ptr = stack - 1; @@ -431,7 +439,7 @@ void lkdtm_STACK_GUARD_PAGE_LEADING(void) } /* Test that VMAP_STACK is actually allocating with a trailing guard page */ -void lkdtm_STACK_GUARD_PAGE_TRAILING(void) +static void lkdtm_STACK_GUARD_PAGE_TRAILING(void) { const unsigned char *stack = task_stack_page(current); const unsigned char *ptr = stack + THREAD_SIZE; @@ -444,7 +452,7 @@ void lkdtm_STACK_GUARD_PAGE_TRAILING(void) pr_err("FAIL: accessed page after stack! (byte: %x)\n", byte); } -void lkdtm_UNSET_SMEP(void) +static void lkdtm_UNSET_SMEP(void) { #if IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_UML) #define MOV_CR4_DEPTH 64 @@ -510,7 +518,7 @@ void lkdtm_UNSET_SMEP(void) #endif } -void lkdtm_DOUBLE_FAULT(void) +static void lkdtm_DOUBLE_FAULT(void) { #if IS_ENABLED(CONFIG_X86_32) && !IS_ENABLED(CONFIG_UML) /* @@ -558,7 +566,7 @@ static noinline void change_pac_parameters(void) } #endif -noinline void lkdtm_CORRUPT_PAC(void) +static noinline void lkdtm_CORRUPT_PAC(void) { #ifdef CONFIG_ARM64 #define CORRUPT_PAC_ITERATE 10 @@ -586,3 +594,37 @@ noinline void lkdtm_CORRUPT_PAC(void) pr_err("XFAIL: this test is arm64-only\n"); #endif } + +static struct crashtype crashtypes[] = { + CRASHTYPE(PANIC), + CRASHTYPE(BUG), + CRASHTYPE(WARNING), + CRASHTYPE(WARNING_MESSAGE), + CRASHTYPE(EXCEPTION), + CRASHTYPE(LOOP), + CRASHTYPE(EXHAUST_STACK), + CRASHTYPE(CORRUPT_STACK), + CRASHTYPE(CORRUPT_STACK_STRONG), + CRASHTYPE(REPORT_STACK), + CRASHTYPE(REPORT_STACK_CANARY), + CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE), + CRASHTYPE(SOFTLOCKUP), + CRASHTYPE(HARDLOCKUP), + CRASHTYPE(SPINLOCKUP), + CRASHTYPE(HUNG_TASK), + CRASHTYPE(OVERFLOW_SIGNED), + CRASHTYPE(OVERFLOW_UNSIGNED), + CRASHTYPE(ARRAY_BOUNDS), + CRASHTYPE(CORRUPT_LIST_ADD), + CRASHTYPE(CORRUPT_LIST_DEL), + CRASHTYPE(STACK_GUARD_PAGE_LEADING), + CRASHTYPE(STACK_GUARD_PAGE_TRAILING), + CRASHTYPE(UNSET_SMEP), + CRASHTYPE(DOUBLE_FAULT), + CRASHTYPE(CORRUPT_PAC), +}; + +struct crashtype_category bugs_crashtypes = { + .crashtypes = crashtypes, + .len = ARRAY_SIZE(crashtypes), +}; diff --git a/drivers/misc/lkdtm/cfi.c b/drivers/misc/lkdtm/cfi.c index c9aeddef1044..666a7f4bc137 100644 --- a/drivers/misc/lkdtm/cfi.c +++ b/drivers/misc/lkdtm/cfi.c @@ -3,6 +3,7 @@ * This is for all the tests relating directly to Control Flow Integrity. */ #include "lkdtm.h" +#include <asm/page.h> static int called_count; @@ -22,7 +23,7 @@ static noinline int lkdtm_increment_int(int *counter) /* * This tries to call an indirect function with a mismatched prototype. */ -void lkdtm_CFI_FORWARD_PROTO(void) +static void lkdtm_CFI_FORWARD_PROTO(void) { /* * Matches lkdtm_increment_void()'s prototype, but not @@ -41,3 +42,145 @@ void lkdtm_CFI_FORWARD_PROTO(void) pr_err("FAIL: survived mismatched prototype function call!\n"); pr_expected_config(CONFIG_CFI_CLANG); } + +/* + * This can stay local to LKDTM, as there should not be a production reason + * to disable PAC && SCS. + */ +#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL +# ifdef CONFIG_ARM64_BTI_KERNEL +# define __no_pac "branch-protection=bti" +# else +# define __no_pac "branch-protection=none" +# endif +# define __no_ret_protection __noscs __attribute__((__target__(__no_pac))) +#else +# define __no_ret_protection __noscs +#endif + +#define no_pac_addr(addr) \ + ((__force __typeof__(addr))((uintptr_t)(addr) | PAGE_OFFSET)) + +/* The ultimate ROP gadget. */ +static noinline __no_ret_protection +void set_return_addr_unchecked(unsigned long *expected, unsigned long *addr) +{ + /* Use of volatile is to make sure final write isn't seen as a dead store. */ + unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1; + + /* Make sure we've found the right place on the stack before writing it. */ + if (no_pac_addr(*ret_addr) == expected) + *ret_addr = (addr); + else + /* Check architecture, stack layout, or compiler behavior... */ + pr_warn("Eek: return address mismatch! %px != %px\n", + *ret_addr, addr); +} + +static noinline +void set_return_addr(unsigned long *expected, unsigned long *addr) +{ + /* Use of volatile is to make sure final write isn't seen as a dead store. */ + unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1; + + /* Make sure we've found the right place on the stack before writing it. */ + if (no_pac_addr(*ret_addr) == expected) + *ret_addr = (addr); + else + /* Check architecture, stack layout, or compiler behavior... */ + pr_warn("Eek: return address mismatch! %px != %px\n", + *ret_addr, addr); +} + +static volatile int force_check; + +static void lkdtm_CFI_BACKWARD(void) +{ + /* Use calculated gotos to keep labels addressable. */ + void *labels[] = {0, &&normal, &&redirected, &&check_normal, &&check_redirected}; + + pr_info("Attempting unchecked stack return address redirection ...\n"); + + /* Always false */ + if (force_check) { + /* + * Prepare to call with NULLs to avoid parameters being treated as + * constants in -02. + */ + set_return_addr_unchecked(NULL, NULL); + set_return_addr(NULL, NULL); + if (force_check) + goto *labels[1]; + if (force_check) + goto *labels[2]; + if (force_check) + goto *labels[3]; + if (force_check) + goto *labels[4]; + return; + } + + /* + * Use fallthrough switch case to keep basic block ordering between + * set_return_addr*() and the label after it. + */ + switch (force_check) { + case 0: + set_return_addr_unchecked(&&normal, &&redirected); + fallthrough; + case 1: +normal: + /* Always true */ + if (!force_check) { + pr_err("FAIL: stack return address manipulation failed!\n"); + /* If we can't redirect "normally", we can't test mitigations. */ + return; + } + break; + default: +redirected: + pr_info("ok: redirected stack return address.\n"); + break; + } + + pr_info("Attempting checked stack return address redirection ...\n"); + + switch (force_check) { + case 0: + set_return_addr(&&check_normal, &&check_redirected); + fallthrough; + case 1: +check_normal: + /* Always true */ + if (!force_check) { + pr_info("ok: control flow unchanged.\n"); + return; + } + +check_redirected: + pr_err("FAIL: stack return address was redirected!\n"); + break; + } + + if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) { + pr_expected_config(CONFIG_ARM64_PTR_AUTH_KERNEL); + return; + } + if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK)) { + pr_expected_config(CONFIG_SHADOW_CALL_STACK); + return; + } + pr_warn("This is probably expected, since this %s was built *without* %s=y nor %s=y\n", + lkdtm_kernel_info, + "CONFIG_ARM64_PTR_AUTH_KERNEL", "CONFIG_SHADOW_CALL_STACK"); +} + +static struct crashtype crashtypes[] = { + CRASHTYPE(CFI_FORWARD_PROTO), + CRASHTYPE(CFI_BACKWARD), +}; + +struct crashtype_category cfi_crashtypes = { + .crashtypes = crashtypes, + .len = ARRAY_SIZE(crashtypes), +}; diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index f69b964b9952..b4712ff196b4 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -86,108 +86,21 @@ static struct crashpoint crashpoints[] = { #endif }; - -/* Crash types. */ -struct crashtype { - const char *name; - void (*func)(void); -}; - -#define CRASHTYPE(_name) \ - { \ - .name = __stringify(_name), \ - .func = lkdtm_ ## _name, \ - } - -/* Define the possible types of crashes that can be triggered. */ -static const struct crashtype crashtypes[] = { - CRASHTYPE(PANIC), - CRASHTYPE(BUG), - CRASHTYPE(WARNING), - CRASHTYPE(WARNING_MESSAGE), - CRASHTYPE(EXCEPTION), - CRASHTYPE(LOOP), - CRASHTYPE(EXHAUST_STACK), - CRASHTYPE(CORRUPT_STACK), - CRASHTYPE(CORRUPT_STACK_STRONG), - CRASHTYPE(REPORT_STACK), - CRASHTYPE(REPORT_STACK_CANARY), - CRASHTYPE(CORRUPT_LIST_ADD), - CRASHTYPE(CORRUPT_LIST_DEL), - CRASHTYPE(STACK_GUARD_PAGE_LEADING), - CRASHTYPE(STACK_GUARD_PAGE_TRAILING), - CRASHTYPE(UNSET_SMEP), - CRASHTYPE(CORRUPT_PAC), - CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE), - CRASHTYPE(SLAB_LINEAR_OVERFLOW), - CRASHTYPE(VMALLOC_LINEAR_OVERFLOW), - CRASHTYPE(WRITE_AFTER_FREE), - CRASHTYPE(READ_AFTER_FREE), - CRASHTYPE(WRITE_BUDDY_AFTER_FREE), - CRASHTYPE(READ_BUDDY_AFTER_FREE), - CRASHTYPE(SLAB_INIT_ON_ALLOC), - CRASHTYPE(BUDDY_INIT_ON_ALLOC), - CRASHTYPE(SLAB_FREE_DOUBLE), - CRASHTYPE(SLAB_FREE_CROSS), - CRASHTYPE(SLAB_FREE_PAGE), - CRASHTYPE(SOFTLOCKUP), - CRASHTYPE(HARDLOCKUP), - CRASHTYPE(SPINLOCKUP), - CRASHTYPE(HUNG_TASK), - CRASHTYPE(OVERFLOW_SIGNED), - CRASHTYPE(OVERFLOW_UNSIGNED), - CRASHTYPE(ARRAY_BOUNDS), - CRASHTYPE(EXEC_DATA), - CRASHTYPE(EXEC_STACK), - CRASHTYPE(EXEC_KMALLOC), - CRASHTYPE(EXEC_VMALLOC), - CRASHTYPE(EXEC_RODATA), - CRASHTYPE(EXEC_USERSPACE), - CRASHTYPE(EXEC_NULL), - CRASHTYPE(ACCESS_USERSPACE), - CRASHTYPE(ACCESS_NULL), - CRASHTYPE(WRITE_RO), - CRASHTYPE(WRITE_RO_AFTER_INIT), - CRASHTYPE(WRITE_KERN), - CRASHTYPE(REFCOUNT_INC_OVERFLOW), - CRASHTYPE(REFCOUNT_ADD_OVERFLOW), - CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW), - CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_OVERFLOW), - CRASHTYPE(REFCOUNT_DEC_ZERO), - CRASHTYPE(REFCOUNT_DEC_NEGATIVE), - CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE), - CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE), - CRASHTYPE(REFCOUNT_INC_ZERO), - CRASHTYPE(REFCOUNT_ADD_ZERO), - CRASHTYPE(REFCOUNT_INC_SATURATED), - CRASHTYPE(REFCOUNT_DEC_SATURATED), - CRASHTYPE(REFCOUNT_ADD_SATURATED), - CRASHTYPE(REFCOUNT_INC_NOT_ZERO_SATURATED), - CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_SATURATED), - CRASHTYPE(REFCOUNT_DEC_AND_TEST_SATURATED), - CRASHTYPE(REFCOUNT_SUB_AND_TEST_SATURATED), - CRASHTYPE(REFCOUNT_TIMING), - CRASHTYPE(ATOMIC_TIMING), - CRASHTYPE(USERCOPY_HEAP_SIZE_TO), - CRASHTYPE(USERCOPY_HEAP_SIZE_FROM), - CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO), - CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM), - CRASHTYPE(USERCOPY_STACK_FRAME_TO), - CRASHTYPE(USERCOPY_STACK_FRAME_FROM), - CRASHTYPE(USERCOPY_STACK_BEYOND), - CRASHTYPE(USERCOPY_KERNEL), - CRASHTYPE(STACKLEAK_ERASING), - CRASHTYPE(CFI_FORWARD_PROTO), - CRASHTYPE(FORTIFIED_OBJECT), - CRASHTYPE(FORTIFIED_SUBOBJECT), - CRASHTYPE(FORTIFIED_STRSCPY), - CRASHTYPE(DOUBLE_FAULT), +/* List of possible types for crashes that can be triggered. */ +static const struct crashtype_category *crashtype_categories[] = { + &bugs_crashtypes, + &heap_crashtypes, + &perms_crashtypes, + &refcount_crashtypes, + &usercopy_crashtypes, + &stackleak_crashtypes, + &cfi_crashtypes, + &fortify_crashtypes, #ifdef CONFIG_PPC_64S_HASH_MMU - CRASHTYPE(PPC_SLB_MULTIHIT), + &powerpc_crashtypes, #endif }; - /* Global kprobe entry and crashtype. */ static struct kprobe *lkdtm_kprobe; static struct crashpoint *lkdtm_crashpoint; @@ -222,11 +135,16 @@ char *lkdtm_kernel_info; /* Return the crashtype number or NULL if the name is invalid */ static const struct crashtype *find_crashtype(const char *name) { - int i; + int cat, idx; + + for (cat = 0; cat < ARRAY_SIZE(crashtype_categories); cat++) { + for (idx = 0; idx < crashtype_categories[cat]->len; idx++) { + struct crashtype *crashtype; - for (i = 0; i < ARRAY_SIZE(crashtypes); i++) { - if (!strcmp(name, crashtypes[i].name)) - return &crashtypes[i]; + crashtype = &crashtype_categories[cat]->crashtypes[idx]; + if (!strcmp(name, crashtype->name)) + return crashtype; + } } return NULL; @@ -346,17 +264,24 @@ static ssize_t lkdtm_debugfs_entry(struct file *f, static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf, size_t count, loff_t *off) { + int n, cat, idx; + ssize_t out; char *buf; - int i, n, out; buf = (char *)__get_free_page(GFP_KERNEL); if (buf == NULL) return -ENOMEM; n = scnprintf(buf, PAGE_SIZE, "Available crash types:\n"); - for (i = 0; i < ARRAY_SIZE(crashtypes); i++) { - n += scnprintf(buf + n, PAGE_SIZE - n, "%s\n", - crashtypes[i].name); + + for (cat = 0; cat < ARRAY_SIZE(crashtype_categories); cat++) { + for (idx = 0; idx < crashtype_categories[cat]->len; idx++) { + struct crashtype *crashtype; + + crashtype = &crashtype_categories[cat]->crashtypes[idx]; + n += scnprintf(buf + n, PAGE_SIZE - n, "%s\n", + crashtype->name); + } } buf[n] = '\0'; diff --git a/drivers/misc/lkdtm/fortify.c b/drivers/misc/lkdtm/fortify.c index d06458a4858e..080293fa3c52 100644 --- a/drivers/misc/lkdtm/fortify.c +++ b/drivers/misc/lkdtm/fortify.c @@ -10,7 +10,7 @@ static volatile int fortify_scratch_space; -void lkdtm_FORTIFIED_OBJECT(void) +static void lkdtm_FORTIFIED_OBJECT(void) { struct target { char a[10]; @@ -31,7 +31,7 @@ void lkdtm_FORTIFIED_OBJECT(void) pr_expected_config(CONFIG_FORTIFY_SOURCE); } -void lkdtm_FORTIFIED_SUBOBJECT(void) +static void lkdtm_FORTIFIED_SUBOBJECT(void) { struct target { char a[10]; @@ -44,14 +44,14 @@ void lkdtm_FORTIFIED_SUBOBJECT(void) strscpy(src, "over ten bytes", size); size = strlen(src) + 1; - pr_info("trying to strcpy past the end of a member of a struct\n"); + pr_info("trying to strncpy past the end of a member of a struct\n"); /* - * memcpy(target.a, src, 20); will hit a compile error because the + * strncpy(target.a, src, 20); will hit a compile error because the * compiler knows at build time that target.a < 20 bytes. Use a * volatile to force a runtime error. */ - memcpy(target.a, src, size); + strncpy(target.a, src, size); /* Store result to global to prevent the code from being eliminated */ fortify_scratch_space = target.a[3]; @@ -67,7 +67,7 @@ void lkdtm_FORTIFIED_SUBOBJECT(void) * strscpy and generate a panic because there is a write overflow (i.e. src * length is greater than dst length). */ -void lkdtm_FORTIFIED_STRSCPY(void) +static void lkdtm_FORTIFIED_STRSCPY(void) { char *src; char dst[5]; @@ -134,3 +134,14 @@ void lkdtm_FORTIFIED_STRSCPY(void) kfree(src); } + +static struct crashtype crashtypes[] = { + CRASHTYPE(FORTIFIED_OBJECT), + CRASHTYPE(FORTIFIED_SUBOBJECT), + CRASHTYPE(FORTIFIED_STRSCPY), +}; + +struct crashtype_category fortify_crashtypes = { + .crashtypes = crashtypes, + .len = ARRAY_SIZE(crashtypes), +}; diff --git a/drivers/misc/lkdtm/heap.c b/drivers/misc/lkdtm/heap.c index 8a92f5a800fa..62516078a619 100644 --- a/drivers/misc/lkdtm/heap.c +++ b/drivers/misc/lkdtm/heap.c @@ -22,8 +22,11 @@ static volatile int __offset = 1; /* * If there aren't guard pages, it's likely that a consecutive allocation will * let us overflow into the second allocation without overwriting something real. + * + * This should always be caught because there is an unconditional unmapped + * page after vmap allocations. */ -void lkdtm_VMALLOC_LINEAR_OVERFLOW(void) +static void lkdtm_VMALLOC_LINEAR_OVERFLOW(void) { char *one, *two; @@ -41,8 +44,11 @@ void lkdtm_VMALLOC_LINEAR_OVERFLOW(void) * This tries to stay within the next largest power-of-2 kmalloc cache * to avoid actually overwriting anything important if it's not detected * correctly. + * + * This should get caught by either memory tagging, KASan, or by using + * CONFIG_SLUB_DEBUG=y and slub_debug=ZF (or CONFIG_SLUB_DEBUG_ON=y). */ -void lkdtm_SLAB_LINEAR_OVERFLOW(void) +static void lkdtm_SLAB_LINEAR_OVERFLOW(void) { size_t len = 1020; u32 *data = kmalloc(len, GFP_KERNEL); @@ -50,11 +56,12 @@ void lkdtm_SLAB_LINEAR_OVERFLOW(void) return; pr_info("Attempting slab linear overflow ...\n"); + OPTIMIZER_HIDE_VAR(data); data[1024 / sizeof(u32)] = 0x12345678; kfree(data); } -void lkdtm_WRITE_AFTER_FREE(void) +static void lkdtm_WRITE_AFTER_FREE(void) { int *base, *again; size_t len = 1024; @@ -80,7 +87,7 @@ void lkdtm_WRITE_AFTER_FREE(void) pr_info("Hmm, didn't get the same memory range.\n"); } -void lkdtm_READ_AFTER_FREE(void) +static void lkdtm_READ_AFTER_FREE(void) { int *base, *val, saw; size_t len = 1024; @@ -124,7 +131,7 @@ void lkdtm_READ_AFTER_FREE(void) kfree(val); } -void lkdtm_WRITE_BUDDY_AFTER_FREE(void) +static void lkdtm_WRITE_BUDDY_AFTER_FREE(void) { unsigned long p = __get_free_page(GFP_KERNEL); if (!p) { @@ -144,7 +151,7 @@ void lkdtm_WRITE_BUDDY_AFTER_FREE(void) schedule(); } -void lkdtm_READ_BUDDY_AFTER_FREE(void) +static void lkdtm_READ_BUDDY_AFTER_FREE(void) { unsigned long p = __get_free_page(GFP_KERNEL); int saw, *val; @@ -181,7 +188,7 @@ void lkdtm_READ_BUDDY_AFTER_FREE(void) kfree(val); } -void lkdtm_SLAB_INIT_ON_ALLOC(void) +static void lkdtm_SLAB_INIT_ON_ALLOC(void) { u8 *first; u8 *val; @@ -213,7 +220,7 @@ void lkdtm_SLAB_INIT_ON_ALLOC(void) kfree(val); } -void lkdtm_BUDDY_INIT_ON_ALLOC(void) +static void lkdtm_BUDDY_INIT_ON_ALLOC(void) { u8 *first; u8 *val; @@ -246,7 +253,7 @@ void lkdtm_BUDDY_INIT_ON_ALLOC(void) free_page((unsigned long)val); } -void lkdtm_SLAB_FREE_DOUBLE(void) +static void lkdtm_SLAB_FREE_DOUBLE(void) { int *val; @@ -263,7 +270,7 @@ void lkdtm_SLAB_FREE_DOUBLE(void) kmem_cache_free(double_free_cache, val); } -void lkdtm_SLAB_FREE_CROSS(void) +static void lkdtm_SLAB_FREE_CROSS(void) { int *val; @@ -279,7 +286,7 @@ void lkdtm_SLAB_FREE_CROSS(void) kmem_cache_free(b_cache, val); } -void lkdtm_SLAB_FREE_PAGE(void) +static void lkdtm_SLAB_FREE_PAGE(void) { unsigned long p = __get_free_page(GFP_KERNEL); @@ -313,3 +320,22 @@ void __exit lkdtm_heap_exit(void) kmem_cache_destroy(a_cache); kmem_cache_destroy(b_cache); } + +static struct crashtype crashtypes[] = { + CRASHTYPE(SLAB_LINEAR_OVERFLOW), + CRASHTYPE(VMALLOC_LINEAR_OVERFLOW), + CRASHTYPE(WRITE_AFTER_FREE), + CRASHTYPE(READ_AFTER_FREE), + CRASHTYPE(WRITE_BUDDY_AFTER_FREE), + CRASHTYPE(READ_BUDDY_AFTER_FREE), + CRASHTYPE(SLAB_INIT_ON_ALLOC), + CRASHTYPE(BUDDY_INIT_ON_ALLOC), + CRASHTYPE(SLAB_FREE_DOUBLE), + CRASHTYPE(SLAB_FREE_CROSS), + CRASHTYPE(SLAB_FREE_PAGE), +}; + +struct crashtype_category heap_crashtypes = { + .crashtypes = crashtypes, + .len = ARRAY_SIZE(crashtypes), +}; diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index d6137c70ebbe..015e0484026b 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -9,19 +9,19 @@ extern char *lkdtm_kernel_info; #define pr_expected_config(kconfig) \ -{ \ +do { \ if (IS_ENABLED(kconfig)) \ pr_err("Unexpected! This %s was built with " #kconfig "=y\n", \ lkdtm_kernel_info); \ else \ pr_warn("This is probably expected, since this %s was built *without* " #kconfig "=y\n", \ lkdtm_kernel_info); \ -} +} while (0) #ifndef MODULE int lkdtm_check_bool_cmdline(const char *param); #define pr_expected_config_param(kconfig, param) \ -{ \ +do { \ if (IS_ENABLED(kconfig)) { \ switch (lkdtm_check_bool_cmdline(param)) { \ case 0: \ @@ -52,118 +52,49 @@ int lkdtm_check_bool_cmdline(const char *param); break; \ } \ } \ -} +} while (0) #else #define pr_expected_config_param(kconfig, param) pr_expected_config(kconfig) #endif -/* bugs.c */ -void __init lkdtm_bugs_init(int *recur_param); -void lkdtm_PANIC(void); -void lkdtm_BUG(void); -void lkdtm_WARNING(void); -void lkdtm_WARNING_MESSAGE(void); -void lkdtm_EXCEPTION(void); -void lkdtm_LOOP(void); -void lkdtm_EXHAUST_STACK(void); -void lkdtm_CORRUPT_STACK(void); -void lkdtm_CORRUPT_STACK_STRONG(void); -void lkdtm_REPORT_STACK(void); -void lkdtm_REPORT_STACK_CANARY(void); -void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void); -void lkdtm_SOFTLOCKUP(void); -void lkdtm_HARDLOCKUP(void); -void lkdtm_SPINLOCKUP(void); -void lkdtm_HUNG_TASK(void); -void lkdtm_OVERFLOW_SIGNED(void); -void lkdtm_OVERFLOW_UNSIGNED(void); -void lkdtm_ARRAY_BOUNDS(void); -void lkdtm_CORRUPT_LIST_ADD(void); -void lkdtm_CORRUPT_LIST_DEL(void); -void lkdtm_STACK_GUARD_PAGE_LEADING(void); -void lkdtm_STACK_GUARD_PAGE_TRAILING(void); -void lkdtm_UNSET_SMEP(void); -void lkdtm_DOUBLE_FAULT(void); -void lkdtm_CORRUPT_PAC(void); +/* Crash types. */ +struct crashtype { + const char *name; + void (*func)(void); +}; -/* heap.c */ -void __init lkdtm_heap_init(void); -void __exit lkdtm_heap_exit(void); -void lkdtm_VMALLOC_LINEAR_OVERFLOW(void); -void lkdtm_SLAB_LINEAR_OVERFLOW(void); -void lkdtm_WRITE_AFTER_FREE(void); -void lkdtm_READ_AFTER_FREE(void); -void lkdtm_WRITE_BUDDY_AFTER_FREE(void); -void lkdtm_READ_BUDDY_AFTER_FREE(void); -void lkdtm_SLAB_INIT_ON_ALLOC(void); -void lkdtm_BUDDY_INIT_ON_ALLOC(void); -void lkdtm_SLAB_FREE_DOUBLE(void); -void lkdtm_SLAB_FREE_CROSS(void); -void lkdtm_SLAB_FREE_PAGE(void); +#define CRASHTYPE(_name) \ + { \ + .name = __stringify(_name), \ + .func = lkdtm_ ## _name, \ + } -/* perms.c */ -void __init lkdtm_perms_init(void); -void lkdtm_WRITE_RO(void); -void lkdtm_WRITE_RO_AFTER_INIT(void); -void lkdtm_WRITE_KERN(void); -void lkdtm_EXEC_DATA(void); -void lkdtm_EXEC_STACK(void); -void lkdtm_EXEC_KMALLOC(void); -void lkdtm_EXEC_VMALLOC(void); -void lkdtm_EXEC_RODATA(void); -void lkdtm_EXEC_USERSPACE(void); -void lkdtm_EXEC_NULL(void); -void lkdtm_ACCESS_USERSPACE(void); -void lkdtm_ACCESS_NULL(void); +/* Category's collection of crashtypes. */ +struct crashtype_category { + struct crashtype *crashtypes; + size_t len; +}; -/* refcount.c */ -void lkdtm_REFCOUNT_INC_OVERFLOW(void); -void lkdtm_REFCOUNT_ADD_OVERFLOW(void); -void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void); -void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void); -void lkdtm_REFCOUNT_DEC_ZERO(void); -void lkdtm_REFCOUNT_DEC_NEGATIVE(void); -void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void); -void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void); -void lkdtm_REFCOUNT_INC_ZERO(void); -void lkdtm_REFCOUNT_ADD_ZERO(void); -void lkdtm_REFCOUNT_INC_SATURATED(void); -void lkdtm_REFCOUNT_DEC_SATURATED(void); -void lkdtm_REFCOUNT_ADD_SATURATED(void); -void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void); -void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void); -void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void); -void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void); -void lkdtm_REFCOUNT_TIMING(void); -void lkdtm_ATOMIC_TIMING(void); +/* Each category's crashtypes list. */ +extern struct crashtype_category bugs_crashtypes; +extern struct crashtype_category heap_crashtypes; +extern struct crashtype_category perms_crashtypes; +extern struct crashtype_category refcount_crashtypes; +extern struct crashtype_category usercopy_crashtypes; +extern struct crashtype_category stackleak_crashtypes; +extern struct crashtype_category cfi_crashtypes; +extern struct crashtype_category fortify_crashtypes; +extern struct crashtype_category powerpc_crashtypes; -/* rodata.c */ -void lkdtm_rodata_do_nothing(void); - -/* usercopy.c */ +/* Each category's init/exit routines. */ +void __init lkdtm_bugs_init(int *recur_param); +void __init lkdtm_heap_init(void); +void __exit lkdtm_heap_exit(void); +void __init lkdtm_perms_init(void); void __init lkdtm_usercopy_init(void); void __exit lkdtm_usercopy_exit(void); -void lkdtm_USERCOPY_HEAP_SIZE_TO(void); -void lkdtm_USERCOPY_HEAP_SIZE_FROM(void); -void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void); -void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void); -void lkdtm_USERCOPY_STACK_FRAME_TO(void); -void lkdtm_USERCOPY_STACK_FRAME_FROM(void); -void lkdtm_USERCOPY_STACK_BEYOND(void); -void lkdtm_USERCOPY_KERNEL(void); - -/* stackleak.c */ -void lkdtm_STACKLEAK_ERASING(void); -/* cfi.c */ -void lkdtm_CFI_FORWARD_PROTO(void); - -/* fortify.c */ -void lkdtm_FORTIFIED_OBJECT(void); -void lkdtm_FORTIFIED_SUBOBJECT(void); -void lkdtm_FORTIFIED_STRSCPY(void); - -/* powerpc.c */ -void lkdtm_PPC_SLB_MULTIHIT(void); +/* Special declaration for function-in-rodata. */ +void lkdtm_rodata_do_nothing(void); #endif diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 2dede2ef658f..b93404d65650 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -10,6 +10,7 @@ #include <linux/mman.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> +#include <asm/sections.h> /* Whether or not to fill the target memory area with do_nothing(). */ #define CODE_WRITE true @@ -21,7 +22,7 @@ /* This is non-const, so it will end up in the .data section. */ static u8 data_area[EXEC_SIZE]; -/* This is cost, so it will end up in the .rodata section. */ +/* This is const, so it will end up in the .rodata section. */ static const unsigned long rodata = 0xAA55AA55; /* This is marked __ro_after_init, so it should ultimately be .rodata. */ @@ -31,31 +32,51 @@ static unsigned long ro_after_init __ro_after_init = 0x55AA5500; * This just returns to the caller. It is designed to be copied into * non-executable memory regions. */ -static void do_nothing(void) +static noinline void do_nothing(void) { return; } /* Must immediately follow do_nothing for size calculuations to work out. */ -static void do_overwritten(void) +static noinline void do_overwritten(void) { pr_info("do_overwritten wasn't overwritten!\n"); return; } +static noinline void do_almost_nothing(void) +{ + pr_info("do_nothing was hijacked!\n"); +} + +static void *setup_function_descriptor(func_desc_t *fdesc, void *dst) +{ + if (!have_function_descriptors()) + return dst; + + memcpy(fdesc, do_nothing, sizeof(*fdesc)); + fdesc->addr = (unsigned long)dst; + barrier(); + + return fdesc; +} + static noinline void execute_location(void *dst, bool write) { - void (*func)(void) = dst; + void (*func)(void); + func_desc_t fdesc; + void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing_text); do_nothing(); if (write == CODE_WRITE) { - memcpy(dst, do_nothing, EXEC_SIZE); + memcpy(dst, do_nothing_text, EXEC_SIZE); flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE); } - pr_info("attempting bad execution at %px\n", func); + pr_info("attempting bad execution at %px\n", dst); + func = setup_function_descriptor(&fdesc, dst); func(); pr_err("FAIL: func returned\n"); } @@ -65,21 +86,24 @@ static void execute_user_location(void *dst) int copied; /* Intentionally crossing kernel/user memory boundary. */ - void (*func)(void) = dst; + void (*func)(void); + func_desc_t fdesc; + void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing_text); do_nothing(); - copied = access_process_vm(current, (unsigned long)dst, do_nothing, + copied = access_process_vm(current, (unsigned long)dst, do_nothing_text, EXEC_SIZE, FOLL_WRITE); if (copied < EXEC_SIZE) return; - pr_info("attempting bad execution at %px\n", func); + pr_info("attempting bad execution at %px\n", dst); + func = setup_function_descriptor(&fdesc, dst); func(); pr_err("FAIL: func returned\n"); } -void lkdtm_WRITE_RO(void) +static void lkdtm_WRITE_RO(void) { /* Explicitly cast away "const" for the test and make volatile. */ volatile unsigned long *ptr = (unsigned long *)&rodata; @@ -89,7 +113,7 @@ void lkdtm_WRITE_RO(void) pr_err("FAIL: survived bad write\n"); } -void lkdtm_WRITE_RO_AFTER_INIT(void) +static void lkdtm_WRITE_RO_AFTER_INIT(void) { volatile unsigned long *ptr = &ro_after_init; @@ -108,13 +132,14 @@ void lkdtm_WRITE_RO_AFTER_INIT(void) pr_err("FAIL: survived bad write\n"); } -void lkdtm_WRITE_KERN(void) +static void lkdtm_WRITE_KERN(void) { size_t size; volatile unsigned char *ptr; - size = (unsigned long)do_overwritten - (unsigned long)do_nothing; - ptr = (unsigned char *)do_overwritten; + size = (unsigned long)dereference_function_descriptor(do_overwritten) - + (unsigned long)dereference_function_descriptor(do_nothing); + ptr = dereference_function_descriptor(do_overwritten); pr_info("attempting bad %zu byte write at %px\n", size, ptr); memcpy((void *)ptr, (unsigned char *)do_nothing, size); @@ -124,37 +149,55 @@ void lkdtm_WRITE_KERN(void) do_overwritten(); } -void lkdtm_EXEC_DATA(void) +static void lkdtm_WRITE_OPD(void) +{ + size_t size = sizeof(func_desc_t); + void (*func)(void) = do_nothing; + + if (!have_function_descriptors()) { + pr_info("XFAIL: Platform doesn't use function descriptors.\n"); + return; + } + pr_info("attempting bad %zu bytes write at %px\n", size, do_nothing); + memcpy(do_nothing, do_almost_nothing, size); + pr_err("FAIL: survived bad write\n"); + + asm("" : "=m"(func)); + func(); +} + +static void lkdtm_EXEC_DATA(void) { execute_location(data_area, CODE_WRITE); } -void lkdtm_EXEC_STACK(void) +static void lkdtm_EXEC_STACK(void) { u8 stack_area[EXEC_SIZE]; execute_location(stack_area, CODE_WRITE); } -void lkdtm_EXEC_KMALLOC(void) +static void lkdtm_EXEC_KMALLOC(void) { u32 *kmalloc_area = kmalloc(EXEC_SIZE, GFP_KERNEL); execute_location(kmalloc_area, CODE_WRITE); kfree(kmalloc_area); } -void lkdtm_EXEC_VMALLOC(void) +static void lkdtm_EXEC_VMALLOC(void) { u32 *vmalloc_area = vmalloc(EXEC_SIZE); execute_location(vmalloc_area, CODE_WRITE); vfree(vmalloc_area); } -void lkdtm_EXEC_RODATA(void) +static void lkdtm_EXEC_RODATA(void) { - execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS); + execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing), + CODE_AS_IS); } -void lkdtm_EXEC_USERSPACE(void) +static void lkdtm_EXEC_USERSPACE(void) { unsigned long user_addr; @@ -169,12 +212,12 @@ void lkdtm_EXEC_USERSPACE(void) vm_munmap(user_addr, PAGE_SIZE); } -void lkdtm_EXEC_NULL(void) +static void lkdtm_EXEC_NULL(void) { execute_location(NULL, CODE_AS_IS); } -void lkdtm_ACCESS_USERSPACE(void) +static void lkdtm_ACCESS_USERSPACE(void) { unsigned long user_addr, tmp = 0; unsigned long *ptr; @@ -207,7 +250,7 @@ void lkdtm_ACCESS_USERSPACE(void) vm_munmap(user_addr, PAGE_SIZE); } -void lkdtm_ACCESS_NULL(void) +static void lkdtm_ACCESS_NULL(void) { unsigned long tmp; volatile unsigned long *ptr = (unsigned long *)NULL; @@ -227,3 +270,24 @@ void __init lkdtm_perms_init(void) /* Make sure we can write to __ro_after_init values during __init */ ro_after_init |= 0xAA; } + +static struct crashtype crashtypes[] = { + CRASHTYPE(WRITE_RO), + CRASHTYPE(WRITE_RO_AFTER_INIT), + CRASHTYPE(WRITE_KERN), + CRASHTYPE(WRITE_OPD), + CRASHTYPE(EXEC_DATA), + CRASHTYPE(EXEC_STACK), + CRASHTYPE(EXEC_KMALLOC), + CRASHTYPE(EXEC_VMALLOC), + CRASHTYPE(EXEC_RODATA), + CRASHTYPE(EXEC_USERSPACE), + CRASHTYPE(EXEC_NULL), + CRASHTYPE(ACCESS_USERSPACE), + CRASHTYPE(ACCESS_NULL), +}; + +struct crashtype_category perms_crashtypes = { + .crashtypes = crashtypes, + .len = ARRAY_SIZE(crashtypes), +}; diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c index 077c9f9ed8d0..be385449911a 100644 --- a/drivers/misc/lkdtm/powerpc.c +++ b/drivers/misc/lkdtm/powerpc.c @@ -100,7 +100,7 @@ static void insert_dup_slb_entry_0(void) preempt_enable(); } -void lkdtm_PPC_SLB_MULTIHIT(void) +static void lkdtm_PPC_SLB_MULTIHIT(void) { if (!radix_enabled()) { pr_info("Injecting SLB multihit errors\n"); @@ -118,3 +118,12 @@ void lkdtm_PPC_SLB_MULTIHIT(void) pr_err("XFAIL: This test is for ppc64 and with hash mode MMU only\n"); } } + +static struct crashtype crashtypes[] = { + CRASHTYPE(PPC_SLB_MULTIHIT), +}; + +struct crashtype_category powerpc_crashtypes = { + .crashtypes = crashtypes, + .len = ARRAY_SIZE(crashtypes), +}; diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c index de7c5ab528d9..5cd488f54cfa 100644 --- a/drivers/misc/lkdtm/refcount.c +++ b/drivers/misc/lkdtm/refcount.c @@ -24,7 +24,7 @@ static void overflow_check(refcount_t *ref) * A refcount_inc() above the maximum value of the refcount implementation, * should at least saturate, and at most also WARN. */ -void lkdtm_REFCOUNT_INC_OVERFLOW(void) +static void lkdtm_REFCOUNT_INC_OVERFLOW(void) { refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX - 1); @@ -40,7 +40,7 @@ void lkdtm_REFCOUNT_INC_OVERFLOW(void) } /* refcount_add() should behave just like refcount_inc() above. */ -void lkdtm_REFCOUNT_ADD_OVERFLOW(void) +static void lkdtm_REFCOUNT_ADD_OVERFLOW(void) { refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX - 1); @@ -58,7 +58,7 @@ void lkdtm_REFCOUNT_ADD_OVERFLOW(void) } /* refcount_inc_not_zero() should behave just like refcount_inc() above. */ -void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void) +static void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void) { refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX); @@ -70,7 +70,7 @@ void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void) } /* refcount_add_not_zero() should behave just like refcount_inc() above. */ -void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void) +static void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void) { refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX); @@ -103,7 +103,7 @@ static void check_zero(refcount_t *ref) * zero it should either saturate (when inc-from-zero isn't protected) * or stay at zero (when inc-from-zero is protected) and should WARN for both. */ -void lkdtm_REFCOUNT_DEC_ZERO(void) +static void lkdtm_REFCOUNT_DEC_ZERO(void) { refcount_t zero = REFCOUNT_INIT(2); @@ -142,7 +142,7 @@ static void check_negative(refcount_t *ref, int start) } /* A refcount_dec() going negative should saturate and may WARN. */ -void lkdtm_REFCOUNT_DEC_NEGATIVE(void) +static void lkdtm_REFCOUNT_DEC_NEGATIVE(void) { refcount_t neg = REFCOUNT_INIT(0); @@ -156,7 +156,7 @@ void lkdtm_REFCOUNT_DEC_NEGATIVE(void) * A refcount_dec_and_test() should act like refcount_dec() above when * going negative. */ -void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void) +static void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void) { refcount_t neg = REFCOUNT_INIT(0); @@ -171,7 +171,7 @@ void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void) * A refcount_sub_and_test() should act like refcount_dec_and_test() * above when going negative. */ -void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void) +static void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void) { refcount_t neg = REFCOUNT_INIT(3); @@ -203,7 +203,7 @@ static void check_from_zero(refcount_t *ref) /* * A refcount_inc() from zero should pin to zero or saturate and may WARN. */ -void lkdtm_REFCOUNT_INC_ZERO(void) +static void lkdtm_REFCOUNT_INC_ZERO(void) { refcount_t zero = REFCOUNT_INIT(0); @@ -228,7 +228,7 @@ void lkdtm_REFCOUNT_INC_ZERO(void) * A refcount_add() should act like refcount_inc() above when starting * at zero. */ -void lkdtm_REFCOUNT_ADD_ZERO(void) +static void lkdtm_REFCOUNT_ADD_ZERO(void) { refcount_t zero = REFCOUNT_INIT(0); @@ -267,7 +267,7 @@ static void check_saturated(refcount_t *ref) * A refcount_inc() from a saturated value should at most warn about * being saturated already. */ -void lkdtm_REFCOUNT_INC_SATURATED(void) +static void lkdtm_REFCOUNT_INC_SATURATED(void) { refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); @@ -278,7 +278,7 @@ void lkdtm_REFCOUNT_INC_SATURATED(void) } /* Should act like refcount_inc() above from saturated. */ -void lkdtm_REFCOUNT_DEC_SATURATED(void) +static void lkdtm_REFCOUNT_DEC_SATURATED(void) { refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); @@ -289,7 +289,7 @@ void lkdtm_REFCOUNT_DEC_SATURATED(void) } /* Should act like refcount_inc() above from saturated. */ -void lkdtm_REFCOUNT_ADD_SATURATED(void) +static void lkdtm_REFCOUNT_ADD_SATURATED(void) { refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); @@ -300,7 +300,7 @@ void lkdtm_REFCOUNT_ADD_SATURATED(void) } /* Should act like refcount_inc() above from saturated. */ -void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void) +static void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void) { refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); @@ -312,7 +312,7 @@ void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void) } /* Should act like refcount_inc() above from saturated. */ -void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void) +static void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void) { refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); @@ -324,7 +324,7 @@ void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void) } /* Should act like refcount_inc() above from saturated. */ -void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void) +static void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void) { refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); @@ -336,7 +336,7 @@ void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void) } /* Should act like refcount_inc() above from saturated. */ -void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void) +static void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void) { refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); @@ -348,7 +348,7 @@ void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void) } /* Used to time the existing atomic_t when used for reference counting */ -void lkdtm_ATOMIC_TIMING(void) +static void lkdtm_ATOMIC_TIMING(void) { unsigned int i; atomic_t count = ATOMIC_INIT(1); @@ -373,7 +373,7 @@ void lkdtm_ATOMIC_TIMING(void) * cd /sys/kernel/debug/provoke-crash * perf stat -B -- cat <(echo REFCOUNT_TIMING) > DIRECT */ -void lkdtm_REFCOUNT_TIMING(void) +static void lkdtm_REFCOUNT_TIMING(void) { unsigned int i; refcount_t count = REFCOUNT_INIT(1); @@ -390,3 +390,30 @@ void lkdtm_REFCOUNT_TIMING(void) else pr_info("refcount timing: done\n"); } + +static struct crashtype crashtypes[] = { + CRASHTYPE(REFCOUNT_INC_OVERFLOW), + CRASHTYPE(REFCOUNT_ADD_OVERFLOW), + CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW), + CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_OVERFLOW), + CRASHTYPE(REFCOUNT_DEC_ZERO), + CRASHTYPE(REFCOUNT_DEC_NEGATIVE), + CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE), + CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE), + CRASHTYPE(REFCOUNT_INC_ZERO), + CRASHTYPE(REFCOUNT_ADD_ZERO), + CRASHTYPE(REFCOUNT_INC_SATURATED), + CRASHTYPE(REFCOUNT_DEC_SATURATED), + CRASHTYPE(REFCOUNT_ADD_SATURATED), + CRASHTYPE(REFCOUNT_INC_NOT_ZERO_SATURATED), + CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_SATURATED), + CRASHTYPE(REFCOUNT_DEC_AND_TEST_SATURATED), + CRASHTYPE(REFCOUNT_SUB_AND_TEST_SATURATED), + CRASHTYPE(ATOMIC_TIMING), + CRASHTYPE(REFCOUNT_TIMING), +}; + +struct crashtype_category refcount_crashtypes = { + .crashtypes = crashtypes, + .len = ARRAY_SIZE(crashtypes), +}; diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c index 00db21ff115e..025b133297a6 100644 --- a/drivers/misc/lkdtm/stackleak.c +++ b/drivers/misc/lkdtm/stackleak.c @@ -11,72 +11,134 @@ #include "lkdtm.h" #include <linux/stackleak.h> -void lkdtm_STACKLEAK_ERASING(void) +#if defined(CONFIG_GCC_PLUGIN_STACKLEAK) +/* + * Check that stackleak tracks the lowest stack pointer and erases the stack + * below this as expected. + * + * To prevent the lowest stack pointer changing during the test, IRQs are + * masked and instrumentation of this function is disabled. We assume that the + * compiler will create a fixed-size stack frame for this function. + * + * Any non-inlined function may make further use of the stack, altering the + * lowest stack pointer and/or clobbering poison values. To avoid spurious + * failures we must avoid printing until the end of the test or have already + * encountered a failure condition. + */ +static void noinstr check_stackleak_irqoff(void) { - unsigned long *sp, left, found, i; - const unsigned long check_depth = - STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); + const unsigned long task_stack_base = (unsigned long)task_stack_page(current); + const unsigned long task_stack_low = stackleak_task_low_bound(current); + const unsigned long task_stack_high = stackleak_task_high_bound(current); + const unsigned long current_sp = current_stack_pointer; + const unsigned long lowest_sp = current->lowest_stack; + unsigned long untracked_high; + unsigned long poison_high, poison_low; bool test_failed = false; /* - * For the details about the alignment of the poison values, see - * the comment in stackleak_track_stack(). + * Check that the current and lowest recorded stack pointer values fall + * within the expected task stack boundaries. These tests should never + * fail unless the boundaries are incorrect or we're clobbering the + * STACK_END_MAGIC, and in either casee something is seriously wrong. */ - sp = PTR_ALIGN(&i, sizeof(unsigned long)); - - left = ((unsigned long)sp & (THREAD_SIZE - 1)) / sizeof(unsigned long); - sp--; + if (current_sp < task_stack_low || current_sp >= task_stack_high) { + pr_err("FAIL: current_stack_pointer (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n", + current_sp, task_stack_low, task_stack_high - 1); + test_failed = true; + goto out; + } + if (lowest_sp < task_stack_low || lowest_sp >= task_stack_high) { + pr_err("FAIL: current->lowest_stack (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n", + lowest_sp, task_stack_low, task_stack_high - 1); + test_failed = true; + goto out; + } /* - * One 'long int' at the bottom of the thread stack is reserved - * and not poisoned. + * Depending on what has run prior to this test, the lowest recorded + * stack pointer could be above or below the current stack pointer. + * Start from the lowest of the two. + * + * Poison values are naturally-aligned unsigned longs. As the current + * stack pointer might not be sufficiently aligned, we must align + * downwards to find the lowest known stack pointer value. This is the + * high boundary for a portion of the stack which may have been used + * without being tracked, and has to be scanned for poison. */ - if (left > 1) { - left--; - } else { - pr_err("FAIL: not enough stack space for the test\n"); - test_failed = true; - goto end; - } + untracked_high = min(current_sp, lowest_sp); + untracked_high = ALIGN_DOWN(untracked_high, sizeof(unsigned long)); - pr_info("checking unused part of the thread stack (%lu bytes)...\n", - left * sizeof(unsigned long)); + /* + * Find the top of the poison in the same way as the erasing code. + */ + poison_high = stackleak_find_top_of_poison(task_stack_low, untracked_high); /* - * Search for 'check_depth' poison values in a row (just like - * stackleak_erase() does). + * Check whether the poisoned portion of the stack (if any) consists + * entirely of poison. This verifies the entries that + * stackleak_find_top_of_poison() should have checked. */ - for (i = 0, found = 0; i < left && found <= check_depth; i++) { - if (*(sp - i) == STACKLEAK_POISON) - found++; - else - found = 0; - } + poison_low = poison_high; + while (poison_low > task_stack_low) { + poison_low -= sizeof(unsigned long); + + if (*(unsigned long *)poison_low == STACKLEAK_POISON) + continue; - if (found <= check_depth) { - pr_err("FAIL: the erased part is not found (checked %lu bytes)\n", - i * sizeof(unsigned long)); + pr_err("FAIL: non-poison value %lu bytes below poison boundary: 0x%lx\n", + poison_high - poison_low, *(unsigned long *)poison_low); test_failed = true; - goto end; } - pr_info("the erased part begins after %lu not poisoned bytes\n", - (i - found) * sizeof(unsigned long)); - - /* The rest of thread stack should be erased */ - for (; i < left; i++) { - if (*(sp - i) != STACKLEAK_POISON) { - pr_err("FAIL: bad value number %lu in the erased part: 0x%lx\n", - i, *(sp - i)); - test_failed = true; - } - } + pr_info("stackleak stack usage:\n" + " high offset: %lu bytes\n" + " current: %lu bytes\n" + " lowest: %lu bytes\n" + " tracked: %lu bytes\n" + " untracked: %lu bytes\n" + " poisoned: %lu bytes\n" + " low offset: %lu bytes\n", + task_stack_base + THREAD_SIZE - task_stack_high, + task_stack_high - current_sp, + task_stack_high - lowest_sp, + task_stack_high - untracked_high, + untracked_high - poison_high, + poison_high - task_stack_low, + task_stack_low - task_stack_base); -end: +out: if (test_failed) { pr_err("FAIL: the thread stack is NOT properly erased!\n"); - pr_expected_config(CONFIG_GCC_PLUGIN_STACKLEAK); } else { pr_info("OK: the rest of the thread stack is properly erased\n"); } } + +static void lkdtm_STACKLEAK_ERASING(void) +{ + unsigned long flags; + + local_irq_save(flags); + check_stackleak_irqoff(); + local_irq_restore(flags); +} +#else /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */ +static void lkdtm_STACKLEAK_ERASING(void) +{ + if (IS_ENABLED(CONFIG_HAVE_ARCH_STACKLEAK)) { + pr_err("XFAIL: stackleak is not enabled (CONFIG_GCC_PLUGIN_STACKLEAK=n)\n"); + } else { + pr_err("XFAIL: stackleak is not supported on this arch (HAVE_ARCH_STACKLEAK=n)\n"); + } +} +#endif /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */ + +static struct crashtype crashtypes[] = { + CRASHTYPE(STACKLEAK_ERASING), +}; + +struct crashtype_category stackleak_crashtypes = { + .crashtypes = crashtypes, + .len = ARRAY_SIZE(crashtypes), +}; diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c index 9161ce7ed47a..6215ec995cd3 100644 --- a/drivers/misc/lkdtm/usercopy.c +++ b/drivers/misc/lkdtm/usercopy.c @@ -5,6 +5,7 @@ */ #include "lkdtm.h" #include <linux/slab.h> +#include <linux/highmem.h> #include <linux/vmalloc.h> #include <linux/sched/task_stack.h> #include <linux/mman.h> @@ -30,12 +31,12 @@ static const unsigned char test_text[] = "This is a test.\n"; */ static noinline unsigned char *trick_compiler(unsigned char *stack) { - return stack + 0; + return stack + unconst; } static noinline unsigned char *do_usercopy_stack_callee(int value) { - unsigned char buf[32]; + unsigned char buf[128]; int i; /* Exercise stack to avoid everything living in registers. */ @@ -43,7 +44,12 @@ static noinline unsigned char *do_usercopy_stack_callee(int value) buf[i] = value & 0xff; } - return trick_compiler(buf); + /* + * Put the target buffer in the middle of stack allocation + * so that we don't step on future stack users regardless + * of stack growth direction. + */ + return trick_compiler(&buf[(128/2)-32]); } static noinline void do_usercopy_stack(bool to_user, bool bad_frame) @@ -66,6 +72,12 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame) bad_stack -= sizeof(unsigned long); } +#ifdef ARCH_HAS_CURRENT_STACK_POINTER + pr_info("stack : %px\n", (void *)current_stack_pointer); +#endif + pr_info("good_stack: %px-%px\n", good_stack, good_stack + sizeof(good_stack)); + pr_info("bad_stack : %px-%px\n", bad_stack, bad_stack + sizeof(good_stack)); + user_addr = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, 0); @@ -119,7 +131,7 @@ free_user: * This checks for whole-object size validation with hardened usercopy, * with or without usercopy whitelisting. */ -static void do_usercopy_heap_size(bool to_user) +static void do_usercopy_slab_size(bool to_user) { unsigned long user_addr; unsigned char *one, *two; @@ -185,9 +197,9 @@ free_kernel: /* * This checks for the specific whitelist window within an object. If this - * test passes, then do_usercopy_heap_size() tests will pass too. + * test passes, then do_usercopy_slab_size() tests will pass too. */ -static void do_usercopy_heap_whitelist(bool to_user) +static void do_usercopy_slab_whitelist(bool to_user) { unsigned long user_alloc; unsigned char *buf = NULL; @@ -261,42 +273,42 @@ free_alloc: } /* Callable tests. */ -void lkdtm_USERCOPY_HEAP_SIZE_TO(void) +static void lkdtm_USERCOPY_SLAB_SIZE_TO(void) { - do_usercopy_heap_size(true); + do_usercopy_slab_size(true); } -void lkdtm_USERCOPY_HEAP_SIZE_FROM(void) +static void lkdtm_USERCOPY_SLAB_SIZE_FROM(void) { - do_usercopy_heap_size(false); + do_usercopy_slab_size(false); } -void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void) +static void lkdtm_USERCOPY_SLAB_WHITELIST_TO(void) { - do_usercopy_heap_whitelist(true); + do_usercopy_slab_whitelist(true); } -void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void) +static void lkdtm_USERCOPY_SLAB_WHITELIST_FROM(void) { - do_usercopy_heap_whitelist(false); + do_usercopy_slab_whitelist(false); } -void lkdtm_USERCOPY_STACK_FRAME_TO(void) +static void lkdtm_USERCOPY_STACK_FRAME_TO(void) { do_usercopy_stack(true, true); } -void lkdtm_USERCOPY_STACK_FRAME_FROM(void) +static void lkdtm_USERCOPY_STACK_FRAME_FROM(void) { do_usercopy_stack(false, true); } -void lkdtm_USERCOPY_STACK_BEYOND(void) +static void lkdtm_USERCOPY_STACK_BEYOND(void) { do_usercopy_stack(true, false); } -void lkdtm_USERCOPY_KERNEL(void) +static void lkdtm_USERCOPY_KERNEL(void) { unsigned long user_addr; @@ -330,6 +342,86 @@ free_user: vm_munmap(user_addr, PAGE_SIZE); } +/* + * This expects "kaddr" to point to a PAGE_SIZE allocation, which means + * a more complete test that would include copy_from_user() would risk + * memory corruption. Just test copy_to_user() here, as that exercises + * almost exactly the same code paths. + */ +static void do_usercopy_page_span(const char *name, void *kaddr) +{ + unsigned long uaddr; + + uaddr = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (uaddr >= TASK_SIZE) { + pr_warn("Failed to allocate user memory\n"); + return; + } + + /* Initialize contents. */ + memset(kaddr, 0xAA, PAGE_SIZE); + + /* Bump the kaddr forward to detect a page-spanning overflow. */ + kaddr += PAGE_SIZE / 2; + + pr_info("attempting good copy_to_user() from kernel %s: %px\n", + name, kaddr); + if (copy_to_user((void __user *)uaddr, kaddr, + unconst + (PAGE_SIZE / 2))) { + pr_err("copy_to_user() failed unexpectedly?!\n"); + goto free_user; + } + + pr_info("attempting bad copy_to_user() from kernel %s: %px\n", + name, kaddr); + if (copy_to_user((void __user *)uaddr, kaddr, unconst + PAGE_SIZE)) { + pr_warn("Good, copy_to_user() failed, but lacked Oops(?!)\n"); + goto free_user; + } + + pr_err("FAIL: bad copy_to_user() not detected!\n"); + pr_expected_config_param(CONFIG_HARDENED_USERCOPY, "hardened_usercopy"); + +free_user: + vm_munmap(uaddr, PAGE_SIZE); +} + +static void lkdtm_USERCOPY_VMALLOC(void) +{ + void *addr; + + addr = vmalloc(PAGE_SIZE); + if (!addr) { + pr_err("vmalloc() failed!?\n"); + return; + } + do_usercopy_page_span("vmalloc", addr); + vfree(addr); +} + +static void lkdtm_USERCOPY_FOLIO(void) +{ + struct folio *folio; + void *addr; + + /* + * FIXME: Folio checking currently misses 0-order allocations, so + * allocate and bump forward to the last page. + */ + folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, 1); + if (!folio) { + pr_err("folio_alloc() failed!?\n"); + return; + } + addr = folio_address(folio); + if (addr) + do_usercopy_page_span("folio", addr + PAGE_SIZE); + else + pr_err("folio_address() failed?!\n"); + folio_put(folio); +} + void __init lkdtm_usercopy_init(void) { /* Prepare cache that lacks SLAB_USERCOPY flag. */ @@ -345,3 +437,21 @@ void __exit lkdtm_usercopy_exit(void) { kmem_cache_destroy(whitelist_cache); } + +static struct crashtype crashtypes[] = { + CRASHTYPE(USERCOPY_SLAB_SIZE_TO), + CRASHTYPE(USERCOPY_SLAB_SIZE_FROM), + CRASHTYPE(USERCOPY_SLAB_WHITELIST_TO), + CRASHTYPE(USERCOPY_SLAB_WHITELIST_FROM), + CRASHTYPE(USERCOPY_STACK_FRAME_TO), + CRASHTYPE(USERCOPY_STACK_FRAME_FROM), + CRASHTYPE(USERCOPY_STACK_BEYOND), + CRASHTYPE(USERCOPY_VMALLOC), + CRASHTYPE(USERCOPY_FOLIO), + CRASHTYPE(USERCOPY_KERNEL), +}; + +struct crashtype_category usercopy_crashtypes = { + .crashtypes = crashtypes, + .len = ARRAY_SIZE(crashtypes), +}; diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig index 0e0bcd0da852..d21486d69df2 100644 --- a/drivers/misc/mei/Kconfig +++ b/drivers/misc/mei/Kconfig @@ -46,6 +46,20 @@ config INTEL_MEI_TXE Supported SoCs: Intel Bay Trail +config INTEL_MEI_GSC + tristate "Intel MEI GSC embedded device" + depends on INTEL_MEI + depends on INTEL_MEI_ME + depends on X86 && PCI + depends on DRM_I915 + help + Intel auxiliary driver for GSC devices embedded in Intel graphics devices. + + An MEI device here called GSC can be embedded in an + Intel graphics devices, to support a range of chassis + tasks such as graphics card firmware update and security + tasks. + source "drivers/misc/mei/hdcp/Kconfig" source "drivers/misc/mei/pxp/Kconfig" diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile index d8e5165917f2..fb740d754900 100644 --- a/drivers/misc/mei/Makefile +++ b/drivers/misc/mei/Makefile @@ -18,6 +18,9 @@ obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o mei-me-objs := pci-me.o mei-me-objs += hw-me.o +obj-$(CONFIG_INTEL_MEI_GSC) += mei-gsc.o +mei-gsc-objs := gsc-me.o + obj-$(CONFIG_INTEL_MEI_TXE) += mei-txe.o mei-txe-objs := pci-txe.o mei-txe-objs += hw-txe.o diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 67844089db21..59506ba6fc48 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -30,6 +30,12 @@ static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO; #define MEI_UUID_MKHIF_FIX UUID_LE(0x55213584, 0x9a29, 0x4916, \ 0xba, 0xdf, 0xf, 0xb7, 0xed, 0x68, 0x2a, 0xeb) +#define MEI_UUID_IGSC_MKHI UUID_LE(0xE2C2AFA2, 0x3817, 0x4D19, \ + 0x9D, 0x95, 0x06, 0xB1, 0x6B, 0x58, 0x8A, 0x5D) + +#define MEI_UUID_IGSC_MKHI_FIX UUID_LE(0x46E0C1FB, 0xA546, 0x414F, \ + 0x91, 0x70, 0xB7, 0xF4, 0x6D, 0x57, 0xB4, 0xAD) + #define MEI_UUID_HDCP UUID_LE(0xB638AB7E, 0x94E2, 0x4EA2, \ 0xA5, 0x52, 0xD1, 0xC5, 0x4B, 0x62, 0x7F, 0x04) @@ -241,6 +247,23 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev) mei_cldev_disable(cldev); } +static void mei_gsc_mkhi_ver(struct mei_cl_device *cldev) +{ + int ret; + + /* No need to enable the client if nothing is needed from it */ + if (!cldev->bus->fw_f_fw_ver_supported) + return; + + ret = mei_cldev_enable(cldev); + if (ret) + return; + + ret = mei_fwver(cldev); + if (ret < 0) + dev_err(&cldev->dev, "FW version command failed %d\n", ret); + mei_cldev_disable(cldev); +} /** * mei_wd - wd client on the bus, change protocol version * as the API has changed. @@ -492,6 +515,8 @@ static struct mei_fixup { MEI_FIXUP(MEI_UUID_NFC_HCI, mei_nfc), MEI_FIXUP(MEI_UUID_WD, mei_wd), MEI_FIXUP(MEI_UUID_MKHIF_FIX, mei_mkhi_fix), + MEI_FIXUP(MEI_UUID_IGSC_MKHI, mei_gsc_mkhi_ver), + MEI_FIXUP(MEI_UUID_IGSC_MKHI_FIX, mei_gsc_mkhi_ver), MEI_FIXUP(MEI_UUID_HDCP, whitelist), MEI_FIXUP(MEI_UUID_ANY, vt_support), MEI_FIXUP(MEI_UUID_PAVP, whitelist), diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 06734670a732..31264ab2eb13 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -2148,6 +2148,7 @@ void mei_cl_all_disconnect(struct mei_device *dev) list_for_each_entry(cl, &dev->file_list, link) mei_cl_set_disconnected(cl); } +EXPORT_SYMBOL_GPL(mei_cl_all_disconnect); static struct mei_cl *mei_cl_dma_map_find(struct mei_device *dev, u8 buffer_id) { diff --git a/drivers/misc/mei/gsc-me.c b/drivers/misc/mei/gsc-me.c new file mode 100644 index 000000000000..c8145e9b62b6 --- /dev/null +++ b/drivers/misc/mei/gsc-me.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright(c) 2019-2022, Intel Corporation. All rights reserved. + * + * Intel Management Engine Interface (Intel MEI) Linux driver + */ + +#include <linux/module.h> +#include <linux/mei_aux.h> +#include <linux/device.h> +#include <linux/irqreturn.h> +#include <linux/jiffies.h> +#include <linux/ktime.h> +#include <linux/delay.h> +#include <linux/pm_runtime.h> + +#include "mei_dev.h" +#include "hw-me.h" +#include "hw-me-regs.h" + +#include "mei-trace.h" + +#define MEI_GSC_RPM_TIMEOUT 500 + +static int mei_gsc_read_hfs(const struct mei_device *dev, int where, u32 *val) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + *val = ioread32(hw->mem_addr + where + 0xC00); + + return 0; +} + +static int mei_gsc_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *aux_dev_id) +{ + struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev); + struct mei_device *dev; + struct mei_me_hw *hw; + struct device *device; + const struct mei_cfg *cfg; + int ret; + + cfg = mei_me_get_cfg(aux_dev_id->driver_data); + if (!cfg) + return -ENODEV; + + device = &aux_dev->dev; + + dev = mei_me_dev_init(device, cfg); + if (!dev) { + ret = -ENOMEM; + goto err; + } + + hw = to_me_hw(dev); + hw->mem_addr = devm_ioremap_resource(device, &adev->bar); + if (IS_ERR(hw->mem_addr)) { + dev_err(device, "mmio not mapped\n"); + ret = PTR_ERR(hw->mem_addr); + goto err; + } + + hw->irq = adev->irq; + hw->read_fws = mei_gsc_read_hfs; + + dev_set_drvdata(device, dev); + + ret = devm_request_threaded_irq(device, hw->irq, + mei_me_irq_quick_handler, + mei_me_irq_thread_handler, + IRQF_ONESHOT, KBUILD_MODNAME, dev); + if (ret) { + dev_err(device, "irq register failed %d\n", ret); + goto err; + } + + pm_runtime_get_noresume(device); + pm_runtime_set_active(device); + pm_runtime_enable(device); + + /* Continue to char device setup in spite of firmware handshake failure. + * In order to provide access to the firmware status registers to the user + * space via sysfs. + */ + if (mei_start(dev)) + dev_warn(device, "init hw failure.\n"); + + pm_runtime_set_autosuspend_delay(device, MEI_GSC_RPM_TIMEOUT); + pm_runtime_use_autosuspend(device); + + ret = mei_register(dev, device); + if (ret) + goto register_err; + + pm_runtime_put_noidle(device); + return 0; + +register_err: + mei_stop(dev); + devm_free_irq(device, hw->irq, dev); + +err: + dev_err(device, "probe failed: %d\n", ret); + dev_set_drvdata(device, NULL); + return ret; +} + +static void mei_gsc_remove(struct auxiliary_device *aux_dev) +{ + struct mei_device *dev; + struct mei_me_hw *hw; + + dev = dev_get_drvdata(&aux_dev->dev); + if (!dev) + return; + + hw = to_me_hw(dev); + + mei_stop(dev); + + mei_deregister(dev); + + pm_runtime_disable(&aux_dev->dev); + + mei_disable_interrupts(dev); + devm_free_irq(&aux_dev->dev, hw->irq, dev); +} + +static int __maybe_unused mei_gsc_pm_suspend(struct device *device) +{ + struct mei_device *dev = dev_get_drvdata(device); + + if (!dev) + return -ENODEV; + + mei_stop(dev); + + mei_disable_interrupts(dev); + + return 0; +} + +static int __maybe_unused mei_gsc_pm_resume(struct device *device) +{ + struct mei_device *dev = dev_get_drvdata(device); + int err; + + if (!dev) + return -ENODEV; + + err = mei_restart(dev); + if (err) + return err; + + /* Start timer if stopped in suspend */ + schedule_delayed_work(&dev->timer_work, HZ); + + return 0; +} + +static int __maybe_unused mei_gsc_pm_runtime_idle(struct device *device) +{ + struct mei_device *dev = dev_get_drvdata(device); + + if (!dev) + return -ENODEV; + if (mei_write_is_idle(dev)) + pm_runtime_autosuspend(device); + + return -EBUSY; +} + +static int __maybe_unused mei_gsc_pm_runtime_suspend(struct device *device) +{ + struct mei_device *dev = dev_get_drvdata(device); + struct mei_me_hw *hw; + int ret; + + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + if (mei_write_is_idle(dev)) { + hw = to_me_hw(dev); + hw->pg_state = MEI_PG_ON; + ret = 0; + } else { + ret = -EAGAIN; + } + + mutex_unlock(&dev->device_lock); + + return ret; +} + +static int __maybe_unused mei_gsc_pm_runtime_resume(struct device *device) +{ + struct mei_device *dev = dev_get_drvdata(device); + struct mei_me_hw *hw; + irqreturn_t irq_ret; + + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + hw = to_me_hw(dev); + hw->pg_state = MEI_PG_OFF; + + mutex_unlock(&dev->device_lock); + + irq_ret = mei_me_irq_thread_handler(1, dev); + if (irq_ret != IRQ_HANDLED) + dev_err(dev->dev, "thread handler fail %d\n", irq_ret); + + return 0; +} + +static const struct dev_pm_ops mei_gsc_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(mei_gsc_pm_suspend, + mei_gsc_pm_resume) + SET_RUNTIME_PM_OPS(mei_gsc_pm_runtime_suspend, + mei_gsc_pm_runtime_resume, + mei_gsc_pm_runtime_idle) +}; + +static const struct auxiliary_device_id mei_gsc_id_table[] = { + { + .name = "i915.mei-gsc", + .driver_data = MEI_ME_GSC_CFG, + + }, + { + .name = "i915.mei-gscfi", + .driver_data = MEI_ME_GSCFI_CFG, + }, + { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(auxiliary, mei_gsc_id_table); + +static struct auxiliary_driver mei_gsc_driver = { + .probe = mei_gsc_probe, + .remove = mei_gsc_remove, + .driver = { + /* auxiliary_driver_register() sets .name to be the modname */ + .pm = &mei_gsc_pm_ops, + }, + .id_table = mei_gsc_id_table +}; +module_auxiliary_driver(mei_gsc_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_ALIAS("auxiliary:i915.mei-gsc"); +MODULE_ALIAS("auxiliary:i915.mei-gscfi"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c index cebcca6d6d3e..cf2b8261da14 100644 --- a/drivers/misc/mei/hbm.c +++ b/drivers/misc/mei/hbm.c @@ -1351,7 +1351,8 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) if (dev->dev_state != MEI_DEV_INIT_CLIENTS || dev->hbm_state != MEI_HBM_CAP_SETUP) { - if (dev->dev_state == MEI_DEV_POWER_DOWN) { + if (dev->dev_state == MEI_DEV_POWER_DOWN || + dev->dev_state == MEI_DEV_POWERING_DOWN) { dev_dbg(dev->dev, "hbm: capabilities response: on shutdown, ignoring\n"); return 0; } diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c index ec2a4fce8581..e889a8bd7ac8 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.c +++ b/drivers/misc/mei/hdcp/mei_hdcp.c @@ -784,7 +784,7 @@ static int mei_hdcp_component_match(struct device *dev, int subcomponent, { struct device *base = data; - if (strcmp(dev->driver->name, "i915") || + if (!dev->driver || strcmp(dev->driver->name, "i915") || subcomponent != I915_COMPONENT_HDCP) return 0; diff --git a/drivers/misc/mei/hdcp/mei_hdcp.h b/drivers/misc/mei/hdcp/mei_hdcp.h index 834757f5e072..ca09c8f83d6b 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.h +++ b/drivers/misc/mei/hdcp/mei_hdcp.h @@ -9,7 +9,7 @@ #ifndef __MEI_HDCP_H__ #define __MEI_HDCP_H__ -#include <drm/drm_hdcp.h> +#include <drm/display/drm_hdcp.h> /* me_hdcp_status: Enumeration of all HDCP Status Codes */ enum me_hdcp_status { diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 67bb6a25fd0a..15e8e2b322b1 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -107,6 +107,9 @@ #define MEI_DEV_ID_ADP_S 0x7AE8 /* Alder Lake Point S */ #define MEI_DEV_ID_ADP_LP 0x7A60 /* Alder Lake Point LP */ #define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */ +#define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */ + +#define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */ /* * MEI HW Section @@ -120,6 +123,7 @@ #define PCI_CFG_HFS_2 0x48 #define PCI_CFG_HFS_3 0x60 # define PCI_CFG_HFS_3_FW_SKU_MSK 0x00000070 +# define PCI_CFG_HFS_3_FW_SKU_IGN 0x00000000 # define PCI_CFG_HFS_3_FW_SKU_SPS 0x00000060 #define PCI_CFG_HFS_4 0x64 #define PCI_CFG_HFS_5 0x68 diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index d3a6c0728645..befa491e3344 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1154,6 +1154,8 @@ static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable) ret = mei_me_d0i3_exit_sync(dev); if (ret) return ret; + } else { + hw->pg_state = MEI_PG_OFF; } } @@ -1226,6 +1228,7 @@ irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id) me_intr_disable(dev, hcsr); return IRQ_WAKE_THREAD; } +EXPORT_SYMBOL_GPL(mei_me_irq_quick_handler); /** * mei_me_irq_thread_handler - function called after ISR to handle the interrupt @@ -1257,7 +1260,11 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) /* check if ME wants a reset */ if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) { dev_warn(dev->dev, "FW not ready: resetting.\n"); - schedule_work(&dev->reset_work); + if (dev->dev_state == MEI_DEV_POWERING_DOWN || + dev->dev_state == MEI_DEV_POWER_DOWN) + mei_cl_all_disconnect(dev); + else if (dev->dev_state != MEI_DEV_DISABLED) + schedule_work(&dev->reset_work); goto end; } @@ -1289,12 +1296,14 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) if (rets == -ENODATA) break; - if (rets && - (dev->dev_state != MEI_DEV_RESETTING && - dev->dev_state != MEI_DEV_POWER_DOWN)) { - dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n", - rets); - schedule_work(&dev->reset_work); + if (rets) { + dev_err(dev->dev, "mei_irq_read_handler ret = %d, state = %d.\n", + rets, dev->dev_state); + if (dev->dev_state != MEI_DEV_RESETTING && + dev->dev_state != MEI_DEV_DISABLED && + dev->dev_state != MEI_DEV_POWERING_DOWN && + dev->dev_state != MEI_DEV_POWER_DOWN) + schedule_work(&dev->reset_work); goto end; } } @@ -1320,6 +1329,7 @@ end: mutex_unlock(&dev->device_lock); return IRQ_HANDLED; } +EXPORT_SYMBOL_GPL(mei_me_irq_thread_handler); static const struct mei_hw_ops mei_me_hw_ops = { @@ -1405,16 +1415,16 @@ static bool mei_me_fw_type_sps_4(const struct pci_dev *pdev) .quirk_probe = mei_me_fw_type_sps_4 /** - * mei_me_fw_type_sps() - check for sps sku + * mei_me_fw_type_sps_ign() - check for sps or ign sku * - * Read ME FW Status register to check for SPS Firmware. - * The SPS FW is only signaled in pci function 0 + * Read ME FW Status register to check for SPS or IGN Firmware. + * The SPS/IGN FW is only signaled in pci function 0 * * @pdev: pci device * - * Return: true in case of SPS firmware + * Return: true in case of SPS/IGN firmware */ -static bool mei_me_fw_type_sps(const struct pci_dev *pdev) +static bool mei_me_fw_type_sps_ign(const struct pci_dev *pdev) { u32 reg; u32 fw_type; @@ -1427,14 +1437,21 @@ static bool mei_me_fw_type_sps(const struct pci_dev *pdev) dev_dbg(&pdev->dev, "fw type is %d\n", fw_type); - return fw_type == PCI_CFG_HFS_3_FW_SKU_SPS; + return fw_type == PCI_CFG_HFS_3_FW_SKU_IGN || + fw_type == PCI_CFG_HFS_3_FW_SKU_SPS; } #define MEI_CFG_KIND_ITOUCH \ .kind = "itouch" -#define MEI_CFG_FW_SPS \ - .quirk_probe = mei_me_fw_type_sps +#define MEI_CFG_TYPE_GSC \ + .kind = "gsc" + +#define MEI_CFG_TYPE_GSCFI \ + .kind = "gscfi" + +#define MEI_CFG_FW_SPS_IGN \ + .quirk_probe = mei_me_fw_type_sps_ign #define MEI_CFG_FW_VER_SUPP \ .fw_ver_supported = 1 @@ -1535,7 +1552,7 @@ static const struct mei_cfg mei_me_pch12_sps_cfg = { MEI_CFG_PCH8_HFS, MEI_CFG_FW_VER_SUPP, MEI_CFG_DMA_128, - MEI_CFG_FW_SPS, + MEI_CFG_FW_SPS_IGN, }; /* Cannon Lake itouch with quirk for SPS 5.0 and newer Firmware exclusion @@ -1545,7 +1562,7 @@ static const struct mei_cfg mei_me_pch12_itouch_sps_cfg = { MEI_CFG_KIND_ITOUCH, MEI_CFG_PCH8_HFS, MEI_CFG_FW_VER_SUPP, - MEI_CFG_FW_SPS, + MEI_CFG_FW_SPS_IGN, }; /* Tiger Lake and newer devices */ @@ -1562,7 +1579,21 @@ static const struct mei_cfg mei_me_pch15_sps_cfg = { MEI_CFG_FW_VER_SUPP, MEI_CFG_DMA_128, MEI_CFG_TRC, - MEI_CFG_FW_SPS, + MEI_CFG_FW_SPS_IGN, +}; + +/* Graphics System Controller */ +static const struct mei_cfg mei_me_gsc_cfg = { + MEI_CFG_TYPE_GSC, + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, +}; + +/* Graphics System Controller Firmware Interface */ +static const struct mei_cfg mei_me_gscfi_cfg = { + MEI_CFG_TYPE_GSCFI, + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, }; /* @@ -1585,6 +1616,8 @@ static const struct mei_cfg *const mei_cfg_list[] = { [MEI_ME_PCH12_SPS_ITOUCH_CFG] = &mei_me_pch12_itouch_sps_cfg, [MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg, [MEI_ME_PCH15_SPS_CFG] = &mei_me_pch15_sps_cfg, + [MEI_ME_GSC_CFG] = &mei_me_gsc_cfg, + [MEI_ME_GSCFI_CFG] = &mei_me_gscfi_cfg, }; const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) @@ -1595,7 +1628,8 @@ const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) return NULL; return mei_cfg_list[idx]; -}; +} +EXPORT_SYMBOL_GPL(mei_me_get_cfg); /** * mei_me_dev_init - allocates and initializes the mei device structure @@ -1630,4 +1664,4 @@ struct mei_device *mei_me_dev_init(struct device *parent, return dev; } - +EXPORT_SYMBOL_GPL(mei_me_dev_init); diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index 00a7132ac7a2..a071c645e905 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -112,6 +112,8 @@ enum mei_cfg_idx { MEI_ME_PCH12_SPS_ITOUCH_CFG, MEI_ME_PCH15_CFG, MEI_ME_PCH15_SPS_CFG, + MEI_ME_GSC_CFG, + MEI_ME_GSCFI_CFG, MEI_ME_NUM_CFG, }; diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index f79076c67256..eb052005ca86 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -161,6 +161,11 @@ int mei_reset(struct mei_device *dev) return ret; } + if (dev->dev_state != MEI_DEV_RESETTING) { + dev_dbg(dev->dev, "wrong state = %d on link start\n", dev->dev_state); + return 0; + } + dev_dbg(dev->dev, "link is established start sending messages.\n"); mei_set_devstate(dev, MEI_DEV_INIT_CLIENTS); diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c index a67f4f2d33a9..0706322154cb 100644 --- a/drivers/misc/mei/interrupt.c +++ b/drivers/misc/mei/interrupt.c @@ -424,31 +424,26 @@ int mei_irq_read_handler(struct mei_device *dev, list_for_each_entry(cl, &dev->file_list, link) { if (mei_cl_hbm_equal(cl, mei_hdr)) { cl_dbg(dev, cl, "got a message\n"); - break; + ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list); + goto reset_slots; } } /* if no recipient cl was found we assume corrupted header */ - if (&cl->link == &dev->file_list) { - /* A message for not connected fixed address clients - * should be silently discarded - * On power down client may be force cleaned, - * silently discard such messages - */ - if (hdr_is_fixed(mei_hdr) || - dev->dev_state == MEI_DEV_POWER_DOWN) { - mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length); - ret = 0; - goto reset_slots; - } - dev_err(dev->dev, "no destination client found 0x%08X\n", - dev->rd_msg_hdr[0]); - ret = -EBADMSG; - goto end; + /* A message for not connected fixed address clients + * should be silently discarded + * On power down client may be force cleaned, + * silently discard such messages + */ + if (hdr_is_fixed(mei_hdr) || + dev->dev_state == MEI_DEV_POWER_DOWN) { + mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length); + ret = 0; + goto reset_slots; } - - ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list); - + dev_err(dev->dev, "no destination client found 0x%08X\n", dev->rd_msg_hdr[0]); + ret = -EBADMSG; + goto end; reset_slots: /* reset the number of slots and header */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 3a45aaf002ac..5435604327a7 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/pci.h> +#include <linux/dma-mapping.h> #include <linux/sched.h> #include <linux/interrupt.h> @@ -113,6 +114,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } @@ -192,14 +196,7 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto end; } - if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) || - dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) { - - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); - if (err) - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - } + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); goto end; diff --git a/drivers/misc/mei/pxp/mei_pxp.c b/drivers/misc/mei/pxp/mei_pxp.c index f7380d387bab..5c39457e3f53 100644 --- a/drivers/misc/mei/pxp/mei_pxp.c +++ b/drivers/misc/mei/pxp/mei_pxp.c @@ -131,7 +131,7 @@ static int mei_pxp_component_match(struct device *dev, int subcomponent, { struct device *base = data; - if (strcmp(dev->driver->name, "i915") || + if (!dev->driver || strcmp(dev->driver->name, "i915") || subcomponent != I915_COMPONENT_PXP) return 0; diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c index ecdcfae025b7..a06920b7e049 100644 --- a/drivers/misc/ocxl/afu_irq.c +++ b/drivers/misc/ocxl/afu_irq.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ // Copyright 2017 IBM Corp. #include <linux/interrupt.h> +#include <linux/irqdomain.h> #include <asm/pnv-ocxl.h> #include <asm/xive.h> #include "ocxl_internal.h" diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index d881f5e40ad9..6777c419a8da 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -556,7 +556,9 @@ int ocxl_file_register_afu(struct ocxl_afu *afu) err_unregister: ocxl_sysfs_unregister_afu(info); // safe to call even if register failed + free_minor(info); device_unregister(&info->dev); + return rc; err_put: ocxl_afu_put(afu); free_minor(info); diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index ab039c115381..4cf4c55a5f00 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -6,6 +6,7 @@ #include <linux/mm_types.h> #include <linux/mmu_context.h> #include <linux/mmu_notifier.h> +#include <linux/irqdomain.h> #include <asm/copro.h> #include <asm/pnv-ocxl.h> #include <asm/xive.h> @@ -94,7 +95,7 @@ struct ocxl_link { struct spa *spa; void *platform_data; }; -static struct list_head links_list = LIST_HEAD_INIT(links_list); +static LIST_HEAD(links_list); static DEFINE_MUTEX(links_list_lock); enum xsl_response { diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c new file mode 100644 index 000000000000..c61be3404c6f --- /dev/null +++ b/drivers/misc/open-dice.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 - Google LLC + * Author: David Brazdil <dbrazdil@google.com> + * + * Driver for Open Profile for DICE. + * + * This driver takes ownership of a reserved memory region containing data + * generated by the Open Profile for DICE measured boot protocol. The memory + * contents are not interpreted by the kernel but can be mapped into a userspace + * process via a misc device. Userspace can also request a wipe of the memory. + * + * Userspace can access the data with (w/o error handling): + * + * fd = open("/dev/open-dice0", O_RDWR); + * read(fd, &size, sizeof(unsigned long)); + * data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + * write(fd, NULL, 0); // wipe + * close(fd); + */ + +#include <linux/io.h> +#include <linux/miscdevice.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/of_reserved_mem.h> +#include <linux/platform_device.h> + +#define DRIVER_NAME "open-dice" + +struct open_dice_drvdata { + struct mutex lock; + char name[16]; + struct reserved_mem *rmem; + struct miscdevice misc; +}; + +static inline struct open_dice_drvdata *to_open_dice_drvdata(struct file *filp) +{ + return container_of(filp->private_data, struct open_dice_drvdata, misc); +} + +static int open_dice_wipe(struct open_dice_drvdata *drvdata) +{ + void *kaddr; + + mutex_lock(&drvdata->lock); + kaddr = devm_memremap(drvdata->misc.this_device, drvdata->rmem->base, + drvdata->rmem->size, MEMREMAP_WC); + if (IS_ERR(kaddr)) { + mutex_unlock(&drvdata->lock); + return PTR_ERR(kaddr); + } + + memset(kaddr, 0, drvdata->rmem->size); + devm_memunmap(drvdata->misc.this_device, kaddr); + mutex_unlock(&drvdata->lock); + return 0; +} + +/* + * Copies the size of the reserved memory region to the user-provided buffer. + */ +static ssize_t open_dice_read(struct file *filp, char __user *ptr, size_t len, + loff_t *off) +{ + unsigned long val = to_open_dice_drvdata(filp)->rmem->size; + + return simple_read_from_buffer(ptr, len, off, &val, sizeof(val)); +} + +/* + * Triggers a wipe of the reserved memory region. The user-provided pointer + * is never dereferenced. + */ +static ssize_t open_dice_write(struct file *filp, const char __user *ptr, + size_t len, loff_t *off) +{ + if (open_dice_wipe(to_open_dice_drvdata(filp))) + return -EIO; + + /* Consume the input buffer. */ + return len; +} + +/* + * Creates a mapping of the reserved memory region in user address space. + */ +static int open_dice_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct open_dice_drvdata *drvdata = to_open_dice_drvdata(filp); + + /* Do not allow userspace to modify the underlying data. */ + if ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED)) + return -EPERM; + + /* Ensure userspace cannot acquire VM_WRITE + VM_SHARED later. */ + if (vma->vm_flags & VM_WRITE) + vma->vm_flags &= ~VM_MAYSHARE; + else if (vma->vm_flags & VM_SHARED) + vma->vm_flags &= ~VM_MAYWRITE; + + /* Create write-combine mapping so all clients observe a wipe. */ + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP; + return vm_iomap_memory(vma, drvdata->rmem->base, drvdata->rmem->size); +} + +static const struct file_operations open_dice_fops = { + .owner = THIS_MODULE, + .read = open_dice_read, + .write = open_dice_write, + .mmap = open_dice_mmap, +}; + +static int __init open_dice_probe(struct platform_device *pdev) +{ + static unsigned int dev_idx; + struct device *dev = &pdev->dev; + struct reserved_mem *rmem; + struct open_dice_drvdata *drvdata; + int ret; + + rmem = of_reserved_mem_lookup(dev->of_node); + if (!rmem) { + dev_err(dev, "failed to lookup reserved memory\n"); + return -EINVAL; + } + + if (!rmem->size || (rmem->size > ULONG_MAX)) { + dev_err(dev, "invalid memory region size\n"); + return -EINVAL; + } + + if (!PAGE_ALIGNED(rmem->base) || !PAGE_ALIGNED(rmem->size)) { + dev_err(dev, "memory region must be page-aligned\n"); + return -EINVAL; + } + + drvdata = devm_kmalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + *drvdata = (struct open_dice_drvdata){ + .lock = __MUTEX_INITIALIZER(drvdata->lock), + .rmem = rmem, + .misc = (struct miscdevice){ + .parent = dev, + .name = drvdata->name, + .minor = MISC_DYNAMIC_MINOR, + .fops = &open_dice_fops, + .mode = 0600, + }, + }; + + /* Index overflow check not needed, misc_register() will fail. */ + snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++); + + ret = misc_register(&drvdata->misc); + if (ret) { + dev_err(dev, "failed to register misc device '%s': %d\n", + drvdata->name, ret); + return ret; + } + + platform_set_drvdata(pdev, drvdata); + return 0; +} + +static int open_dice_remove(struct platform_device *pdev) +{ + struct open_dice_drvdata *drvdata = platform_get_drvdata(pdev); + + misc_deregister(&drvdata->misc); + return 0; +} + +static const struct of_device_id open_dice_of_match[] = { + { .compatible = "google,open-dice" }, + {}, +}; + +static struct platform_driver open_dice_driver = { + .remove = open_dice_remove, + .driver = { + .name = DRIVER_NAME, + .of_match_table = open_dice_of_match, + }, +}; + +static int __init open_dice_init(void) +{ + int ret = platform_driver_probe(&open_dice_driver, open_dice_probe); + + /* DICE regions are optional. Succeed even with zero instances. */ + return (ret == -ENODEV) ? 0 : ret; +} + +static void __exit open_dice_exit(void) +{ + platform_driver_unregister(&open_dice_driver); +} + +module_init(open_dice_init); +module_exit(open_dice_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("David Brazdil <dbrazdil@google.com>"); diff --git a/drivers/misc/pvpanic/pvpanic.c b/drivers/misc/pvpanic/pvpanic.c index 4b8f1c7d726d..049a12006348 100644 --- a/drivers/misc/pvpanic/pvpanic.c +++ b/drivers/misc/pvpanic/pvpanic.c @@ -34,7 +34,9 @@ pvpanic_send_event(unsigned int event) { struct pvpanic_instance *pi_cur; - spin_lock(&pvpanic_lock); + if (!spin_trylock(&pvpanic_lock)) + return; + list_for_each_entry(pi_cur, &pvpanic_list, list) { if (event & pi_cur->capability & pi_cur->events) iowrite8(event, pi_cur->base); @@ -55,9 +57,13 @@ pvpanic_panic_notify(struct notifier_block *nb, unsigned long code, void *unused return NOTIFY_DONE; } +/* + * Call our notifier very early on panic, deferring the + * action taken to the hypervisor. + */ static struct notifier_block pvpanic_panic_nb = { .notifier_call = pvpanic_panic_notify, - .priority = 1, /* let this called before broken drm_fb_helper() */ + .priority = INT_MAX, }; static void pvpanic_remove(void *param) diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 0ea923fe6371..19dbdad8ad8a 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -1016,7 +1016,7 @@ static int quicktest1(unsigned long arg) break; } if (ret != MQE_QUEUE_FULL || i != 4) { - printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", + printk(KERN_DEBUG "GRU:%d quicktest1: unexpected status %d, i %d\n", smp_processor_id(), ret, i); goto done; } diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index e4c067c61251..5efc869fe59a 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -530,12 +530,6 @@ struct gru_blade_state { for ((i) = (k)*GRU_CBR_AU_SIZE; \ (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) -/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ -#define for_each_dsr_in_allocation_map(i, map, k) \ - for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \ - for ((i) = (k) * GRU_DSR_AU_CL; \ - (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++) - #define gseg_physical_address(gru, ctxnum) \ ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE) #define gseg_virtual_address(gru, ctxnum) \ diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index dab7b92db790..50644f83e78c 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -247,7 +247,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg) xpnet_device->stats.rx_packets++; xpnet_device->stats.rx_bytes += skb->len + ETH_HLEN; - netif_rx_ni(skb); + netif_rx(skb); xpc_received(partid, channel, (void *)msg); } diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index f1d8ba6d4857..086ce77d9074 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1452,10 +1452,10 @@ static void vmballoon_reset(struct vmballoon *b) error = vmballoon_vmci_init(b); if (error) - pr_err("failed to initialize vmci doorbell\n"); + pr_err_once("failed to initialize vmci doorbell\n"); if (vmballoon_send_guest_id(b)) - pr_err("failed to send guest ID to the host\n"); + pr_err_once("failed to send guest ID to the host\n"); unlock: up_write(&b->conf_sem); diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig index 605794aadf11..b6d4d7fd686a 100644 --- a/drivers/misc/vmw_vmci/Kconfig +++ b/drivers/misc/vmw_vmci/Kconfig @@ -5,7 +5,7 @@ config VMWARE_VMCI tristate "VMware VMCI Driver" - depends on X86 && PCI + depends on (X86 || ARM64) && !CPU_BIG_ENDIAN && PCI help This is VMware's Virtual Machine Communication Interface. It enables high-speed communication between host and guest in a virtual diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c index 6cf3e21c7604..172696abce31 100644 --- a/drivers/misc/vmw_vmci/vmci_context.c +++ b/drivers/misc/vmw_vmci/vmci_context.c @@ -665,9 +665,8 @@ int vmci_ctx_add_notification(u32 context_id, u32 remote_cid) int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid) { struct vmci_ctx *context; - struct vmci_handle_list *notifier, *tmp; + struct vmci_handle_list *notifier = NULL, *iter, *tmp; struct vmci_handle handle; - bool found = false; context = vmci_ctx_get(context_id); if (!context) @@ -676,23 +675,23 @@ int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid) handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER); spin_lock(&context->lock); - list_for_each_entry_safe(notifier, tmp, + list_for_each_entry_safe(iter, tmp, &context->notifier_list, node) { - if (vmci_handle_is_equal(notifier->handle, handle)) { - list_del_rcu(¬ifier->node); + if (vmci_handle_is_equal(iter->handle, handle)) { + list_del_rcu(&iter->node); context->n_notifiers--; - found = true; + notifier = iter; break; } } spin_unlock(&context->lock); - if (found) + if (notifier) kvfree_rcu(notifier); vmci_ctx_put(context); - return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND; + return notifier ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND; } static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context, diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index 1018dc77269d..aa7b05de97dd 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/processor.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/init.h> @@ -31,6 +32,12 @@ #define VMCI_UTIL_NUM_RESOURCES 1 +/* + * Datagram buffers for DMA send/receive must accommodate at least + * a maximum sized datagram and the header. + */ +#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE) + static bool vmci_disable_msi; module_param_named(disable_msi, vmci_disable_msi, bool, 0); MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); @@ -45,13 +52,18 @@ static u32 vm_context_id = VMCI_INVALID_ID; struct vmci_guest_device { struct device *dev; /* PCI device we are attached to */ void __iomem *iobase; + void __iomem *mmio_base; bool exclusive_vectors; struct tasklet_struct datagram_tasklet; struct tasklet_struct bm_tasklet; + struct wait_queue_head inout_wq; void *data_buffer; + dma_addr_t data_buffer_base; + void *tx_buffer; + dma_addr_t tx_buffer_base; void *notification_bitmap; dma_addr_t notification_base; }; @@ -89,6 +101,92 @@ u32 vmci_get_vm_context_id(void) return vm_context_id; } +static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg) +{ + if (dev->mmio_base != NULL) + return readl(dev->mmio_base + reg); + return ioread32(dev->iobase + reg); +} + +static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) +{ + if (dev->mmio_base != NULL) + writel(val, dev->mmio_base + reg); + else + iowrite32(val, dev->iobase + reg); +} + +static void vmci_read_data(struct vmci_guest_device *vmci_dev, + void *dest, size_t size) +{ + if (vmci_dev->mmio_base == NULL) + ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, + dest, size); + else { + /* + * For DMA datagrams, the data_buffer will contain the header on the + * first page, followed by the incoming datagram(s) on the following + * pages. The header uses an S/G element immediately following the + * header on the first page to point to the data area. + */ + struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer; + struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1); + size_t buffer_offset = dest - vmci_dev->data_buffer; + + buffer_header->opcode = 1; + buffer_header->size = 1; + buffer_header->busy = 0; + sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset; + sg_array[0].size = size; + + vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base), + VMCI_DATA_IN_LOW_ADDR); + + wait_event(vmci_dev->inout_wq, buffer_header->busy == 1); + } +} + +static int vmci_write_data(struct vmci_guest_device *dev, + struct vmci_datagram *dg) +{ + int result; + + if (dev->mmio_base != NULL) { + struct vmci_data_in_out_header *buffer_header = dev->tx_buffer; + u8 *dg_out_buffer = (u8 *)(buffer_header + 1); + + if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) + return VMCI_ERROR_INVALID_ARGS; + + /* + * Initialize send buffer with outgoing datagram + * and set up header for inline data. Device will + * not access buffer asynchronously - only after + * the write to VMCI_DATA_OUT_LOW_ADDR. + */ + memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg)); + buffer_header->opcode = 0; + buffer_header->size = VMCI_DG_SIZE(dg); + buffer_header->busy = 1; + + vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base), + VMCI_DATA_OUT_LOW_ADDR); + + /* Caller holds a spinlock, so cannot block. */ + spin_until_cond(buffer_header->busy == 0); + + result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); + if (result == VMCI_SUCCESS) + result = (int)buffer_header->result; + } else { + iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR, + dg, VMCI_DG_SIZE(dg)); + result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); + } + + return result; +} + /* * VM to hypervisor call mechanism. We use the standard VMware naming * convention since shared code is calling this function as well. @@ -114,9 +212,8 @@ int vmci_send_datagram(struct vmci_datagram *dg) spin_lock_irqsave(&vmci_dev_spinlock, flags); if (vmci_dev_g) { - iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR, - dg, VMCI_DG_SIZE(dg)); - result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR); + vmci_write_data(vmci_dev_g, dg); + result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); } else { result = VMCI_ERROR_UNAVAILABLE; } @@ -156,9 +253,9 @@ static void vmci_guest_cid_update(u32 sub_id, /* * Verify that the host supports the hypercalls we need. If it does not, - * try to find fallback hypercalls and use those instead. Returns - * true if required hypercalls (or fallback hypercalls) are - * supported by the host, false otherwise. + * try to find fallback hypercalls and use those instead. Returns 0 if + * required hypercalls (or fallback hypercalls) are supported by the host, + * an error code otherwise. */ static int vmci_check_host_caps(struct pci_dev *pdev) { @@ -195,15 +292,17 @@ static int vmci_check_host_caps(struct pci_dev *pdev) } /* - * Reads datagrams from the data in port and dispatches them. We - * always start reading datagrams into only the first page of the - * datagram buffer. If the datagrams don't fit into one page, we - * use the maximum datagram buffer size for the remainder of the - * invocation. This is a simple heuristic for not penalizing - * small datagrams. + * Reads datagrams from the device and dispatches them. For IO port + * based access to the device, we always start reading datagrams into + * only the first page of the datagram buffer. If the datagrams don't + * fit into one page, we use the maximum datagram buffer size for the + * remainder of the invocation. This is a simple heuristic for not + * penalizing small datagrams. For DMA-based datagrams, we always + * use the maximum datagram buffer size, since there is no performance + * penalty for doing so. * * This function assumes that it has exclusive access to the data - * in port for the duration of the call. + * in register(s) for the duration of the call. */ static void vmci_dispatch_dgs(unsigned long data) { @@ -211,23 +310,41 @@ static void vmci_dispatch_dgs(unsigned long data) u8 *dg_in_buffer = vmci_dev->data_buffer; struct vmci_datagram *dg; size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; - size_t current_dg_in_buffer_size = PAGE_SIZE; + size_t current_dg_in_buffer_size; size_t remaining_bytes; + bool is_io_port = vmci_dev->mmio_base == NULL; BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); - ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, - vmci_dev->data_buffer, current_dg_in_buffer_size); + if (!is_io_port) { + /* For mmio, the first page is used for the header. */ + dg_in_buffer += PAGE_SIZE; + + /* + * For DMA-based datagram operations, there is no performance + * penalty for reading the maximum buffer size. + */ + current_dg_in_buffer_size = VMCI_MAX_DG_SIZE; + } else { + current_dg_in_buffer_size = PAGE_SIZE; + } + vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); dg = (struct vmci_datagram *)dg_in_buffer; remaining_bytes = current_dg_in_buffer_size; + /* + * Read through the buffer until an invalid datagram header is + * encountered. The exit condition for datagrams read through + * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram + * can start on any page boundary in the buffer. + */ while (dg->dst.resource != VMCI_INVALID_ID || - remaining_bytes > PAGE_SIZE) { + (is_io_port && remaining_bytes > PAGE_SIZE)) { unsigned dg_in_size; /* - * When the input buffer spans multiple pages, a datagram can - * start on any page boundary in the buffer. + * If using VMCI_DATA_IN_ADDR, skip to the next page + * as a datagram can start on any page boundary. */ if (dg->dst.resource == VMCI_INVALID_ID) { dg = (struct vmci_datagram *)roundup( @@ -277,11 +394,10 @@ static void vmci_dispatch_dgs(unsigned long data) current_dg_in_buffer_size = dg_in_buffer_size; - ioread8_rep(vmci_dev->iobase + - VMCI_DATA_IN_ADDR, - vmci_dev->data_buffer + + vmci_read_data(vmci_dev, + dg_in_buffer + remaining_bytes, - current_dg_in_buffer_size - + current_dg_in_buffer_size - remaining_bytes); } @@ -319,10 +435,8 @@ static void vmci_dispatch_dgs(unsigned long data) current_dg_in_buffer_size = dg_in_buffer_size; for (;;) { - ioread8_rep(vmci_dev->iobase + - VMCI_DATA_IN_ADDR, - vmci_dev->data_buffer, - current_dg_in_buffer_size); + vmci_read_data(vmci_dev, dg_in_buffer, + current_dg_in_buffer_size); if (bytes_to_skip <= current_dg_in_buffer_size) break; @@ -339,8 +453,7 @@ static void vmci_dispatch_dgs(unsigned long data) if (remaining_bytes < VMCI_DG_HEADERSIZE) { /* Get the next batch of datagrams. */ - ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, - vmci_dev->data_buffer, + vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); dg = (struct vmci_datagram *)dg_in_buffer; remaining_bytes = current_dg_in_buffer_size; @@ -384,7 +497,7 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev) unsigned int icr; /* Acknowledge interrupt and determine what needs doing. */ - icr = ioread32(dev->iobase + VMCI_ICR_ADDR); + icr = vmci_read_reg(dev, VMCI_ICR_ADDR); if (icr == 0 || icr == ~0) return IRQ_NONE; @@ -398,6 +511,12 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev) icr &= ~VMCI_ICR_NOTIFICATION; } + + if (icr & VMCI_ICR_DMA_DATAGRAM) { + wake_up_all(&dev->inout_wq); + icr &= ~VMCI_ICR_DMA_DATAGRAM; + } + if (icr != 0) dev_warn(dev->dev, "Ignoring unknown interrupt cause (%d)\n", @@ -423,13 +542,47 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) } /* + * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM, + * which is for the completion of a DMA datagram send or receive operation. + * Will only get called if we are using MSI-X with exclusive vectors. + */ +static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) +{ + struct vmci_guest_device *dev = _dev; + + wake_up_all(&dev->inout_wq); + + return IRQ_HANDLED; +} + +static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev) +{ + if (vmci_dev->mmio_base != NULL) { + if (vmci_dev->tx_buffer != NULL) + dma_free_coherent(vmci_dev->dev, + VMCI_DMA_DG_BUFFER_SIZE, + vmci_dev->tx_buffer, + vmci_dev->tx_buffer_base); + if (vmci_dev->data_buffer != NULL) + dma_free_coherent(vmci_dev->dev, + VMCI_DMA_DG_BUFFER_SIZE, + vmci_dev->data_buffer, + vmci_dev->data_buffer_base); + } else { + vfree(vmci_dev->data_buffer); + } +} + +/* * Most of the initialization at module load time is done here. */ static int vmci_guest_probe_device(struct pci_dev *pdev, const struct pci_device_id *id) { struct vmci_guest_device *vmci_dev; - void __iomem *iobase; + void __iomem *iobase = NULL; + void __iomem *mmio_base = NULL; + unsigned int num_irq_vectors; unsigned int capabilities; unsigned int caps_in_use; unsigned long cmd; @@ -445,16 +598,33 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, return error; } - error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME); - if (error) { - dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); - return error; - } + /* + * The VMCI device with mmio access to registers requests 256KB + * for BAR1. If present, driver will use new VMCI device + * functionality for register access and datagram send/recv. + */ - iobase = pcim_iomap_table(pdev)[0]; + if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) { + dev_info(&pdev->dev, "MMIO register access is available\n"); + mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET, + VMCI_MMIO_ACCESS_SIZE); + /* If the map fails, we fall back to IOIO access. */ + if (!mmio_base) + dev_warn(&pdev->dev, "Failed to map MMIO register access\n"); + } - dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u\n", - (unsigned long)iobase, pdev->irq); + if (!mmio_base) { + if (IS_ENABLED(CONFIG_ARM64)) { + dev_err(&pdev->dev, "MMIO base is invalid\n"); + return -ENXIO; + } + error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME); + if (error) { + dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); + return error; + } + iobase = pcim_iomap_table(pdev)[0]; + } vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); if (!vmci_dev) { @@ -466,17 +636,35 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, vmci_dev->dev = &pdev->dev; vmci_dev->exclusive_vectors = false; vmci_dev->iobase = iobase; + vmci_dev->mmio_base = mmio_base; tasklet_init(&vmci_dev->datagram_tasklet, vmci_dispatch_dgs, (unsigned long)vmci_dev); tasklet_init(&vmci_dev->bm_tasklet, vmci_process_bitmap, (unsigned long)vmci_dev); + init_waitqueue_head(&vmci_dev->inout_wq); - vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); + if (mmio_base != NULL) { + vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, + &vmci_dev->tx_buffer_base, + GFP_KERNEL); + if (!vmci_dev->tx_buffer) { + dev_err(&pdev->dev, + "Can't allocate memory for datagram tx buffer\n"); + return -ENOMEM; + } + + vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, + &vmci_dev->data_buffer_base, + GFP_KERNEL); + } else { + vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); + } if (!vmci_dev->data_buffer) { dev_err(&pdev->dev, "Can't allocate memory for datagram buffer\n"); - return -ENOMEM; + error = -ENOMEM; + goto err_free_data_buffers; } pci_set_master(pdev); /* To enable queue_pair functionality. */ @@ -490,11 +678,11 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, * * Right now, we need datagrams. There are no fallbacks. */ - capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR); + capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR); if (!(capabilities & VMCI_CAPS_DATAGRAM)) { dev_err(&pdev->dev, "Device does not support datagrams\n"); error = -ENXIO; - goto err_free_data_buffer; + goto err_free_data_buffers; } caps_in_use = VMCI_CAPS_DATAGRAM; @@ -522,19 +710,39 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, vmci_dev->notification_bitmap = dma_alloc_coherent( &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base, GFP_KERNEL); - if (!vmci_dev->notification_bitmap) { + if (!vmci_dev->notification_bitmap) dev_warn(&pdev->dev, "Unable to allocate notification bitmap\n"); - } else { - memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE); + else caps_in_use |= VMCI_CAPS_NOTIFICATIONS; + } + + if (mmio_base != NULL) { + if (capabilities & VMCI_CAPS_DMA_DATAGRAM) { + caps_in_use |= VMCI_CAPS_DMA_DATAGRAM; + } else { + dev_err(&pdev->dev, + "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n"); + error = -ENXIO; + goto err_free_notification_bitmap; } } dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use); /* Let the host know which capabilities we intend to use. */ - iowrite32(caps_in_use, vmci_dev->iobase + VMCI_CAPS_ADDR); + vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR); + + if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { + /* Let the device know the size for pages passed down. */ + vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT); + + /* Configure the high order parts of the data in/out buffers. */ + vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base), + VMCI_DATA_IN_HIGH_ADDR); + vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base), + VMCI_DATA_OUT_HIGH_ADDR); + } /* Set up global device so that we can start sending datagrams */ spin_lock_irq(&vmci_dev_spinlock); @@ -561,7 +769,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, /* Check host capabilities. */ error = vmci_check_host_caps(pdev); if (error) - goto err_remove_bitmap; + goto err_remove_vmci_dev_g; /* Enable device. */ @@ -581,13 +789,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, * Enable interrupts. Try MSI-X first, then MSI, and then fallback on * legacy interrupts. */ - error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS, - PCI_IRQ_MSIX); + if (vmci_dev->mmio_base != NULL) + num_irq_vectors = VMCI_MAX_INTRS; + else + num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION; + error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors, + PCI_IRQ_MSIX); if (error < 0) { error = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY); if (error < 0) - goto err_remove_bitmap; + goto err_unsubscribe_event; } else { vmci_dev->exclusive_vectors = true; } @@ -620,6 +832,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, pci_irq_vector(pdev, 1), error); goto err_free_irq; } + if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { + error = request_irq(pci_irq_vector(pdev, 2), + vmci_interrupt_dma_datagram, + 0, KBUILD_MODNAME, vmci_dev); + if (error) { + dev_err(&pdev->dev, + "Failed to allocate irq %u: %d\n", + pci_irq_vector(pdev, 2), error); + goto err_free_bm_irq; + } + } } dev_dbg(&pdev->dev, "Registered device\n"); @@ -630,17 +853,22 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, cmd = VMCI_IMR_DATAGRAM; if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) cmd |= VMCI_IMR_NOTIFICATION; - iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR); + if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) + cmd |= VMCI_IMR_DMA_DATAGRAM; + vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR); /* Enable interrupts. */ - iowrite32(VMCI_CONTROL_INT_ENABLE, - vmci_dev->iobase + VMCI_CONTROL_ADDR); + vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR); pci_set_drvdata(pdev, vmci_dev); vmci_call_vsock_callback(false); return 0; +err_free_bm_irq: + if (vmci_dev->exclusive_vectors) + free_irq(pci_irq_vector(pdev, 1), vmci_dev); + err_free_irq: free_irq(pci_irq_vector(pdev, 0), vmci_dev); tasklet_kill(&vmci_dev->datagram_tasklet); @@ -649,29 +877,29 @@ err_free_irq: err_disable_msi: pci_free_irq_vectors(pdev); +err_unsubscribe_event: vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); if (vmci_err < VMCI_SUCCESS) dev_warn(&pdev->dev, "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); -err_remove_bitmap: - if (vmci_dev->notification_bitmap) { - iowrite32(VMCI_CONTROL_RESET, - vmci_dev->iobase + VMCI_CONTROL_ADDR); - dma_free_coherent(&pdev->dev, PAGE_SIZE, - vmci_dev->notification_bitmap, - vmci_dev->notification_base); - } - err_remove_vmci_dev_g: spin_lock_irq(&vmci_dev_spinlock); vmci_pdev = NULL; vmci_dev_g = NULL; spin_unlock_irq(&vmci_dev_spinlock); -err_free_data_buffer: - vfree(vmci_dev->data_buffer); +err_free_notification_bitmap: + if (vmci_dev->notification_bitmap) { + vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); + dma_free_coherent(&pdev->dev, PAGE_SIZE, + vmci_dev->notification_bitmap, + vmci_dev->notification_base); + } + +err_free_data_buffers: + vmci_free_dg_buffers(vmci_dev); /* The rest are managed resources and will be freed by PCI core */ return error; @@ -700,15 +928,18 @@ static void vmci_guest_remove_device(struct pci_dev *pdev) spin_unlock_irq(&vmci_dev_spinlock); dev_dbg(&pdev->dev, "Resetting vmci device\n"); - iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR); + vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); /* * Free IRQ and then disable MSI/MSI-X as appropriate. For * MSI-X, we might have multiple vectors, each with their own * IRQ, which we must free too. */ - if (vmci_dev->exclusive_vectors) + if (vmci_dev->exclusive_vectors) { free_irq(pci_irq_vector(pdev, 1), vmci_dev); + if (vmci_dev->mmio_base != NULL) + free_irq(pci_irq_vector(pdev, 2), vmci_dev); + } free_irq(pci_irq_vector(pdev, 0), vmci_dev); pci_free_irq_vectors(pdev); @@ -726,7 +957,10 @@ static void vmci_guest_remove_device(struct pci_dev *pdev) vmci_dev->notification_base); } - vfree(vmci_dev->data_buffer); + vmci_free_dg_buffers(vmci_dev); + + if (vmci_dev->mmio_base != NULL) + pci_iounmap(pdev, vmci_dev->mmio_base); /* The rest are managed resources and will be freed by PCI core */ } diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index 94ebf7f3fd58..8f2de1893245 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -2577,6 +2577,12 @@ static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q, if (result < VMCI_SUCCESS) return result; + /* + * This virt_wmb() ensures that data written to the queue + * is observable before the new producer_tail is. + */ + virt_wmb(); + vmci_q_header_add_producer_tail(produce_q->q_header, written, produce_q_size); return written; @@ -2620,6 +2626,12 @@ static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q, if (buf_ready < VMCI_SUCCESS) return (ssize_t) buf_ready; + /* + * This virt_rmb() ensures that data from the queue will be read + * after we have determined how much is ready to be consumed. + */ + virt_rmb(); + read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready); head = vmci_q_header_consumer_head(produce_q->q_header); if (likely(head + read < consume_q_size)) { |
