diff options
Diffstat (limited to 'drivers/misc')
43 files changed, 1694 insertions, 353 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 7f0d48f406e3..99e151475d8f 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -480,4 +480,5 @@ source "drivers/misc/cxl/Kconfig" source "drivers/misc/ocxl/Kconfig" source "drivers/misc/cardreader/Kconfig" source "drivers/misc/habanalabs/Kconfig" +source "drivers/misc/uacce/Kconfig" endmenu diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index c1860d35dc7e..9abf2923d831 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -56,4 +56,5 @@ obj-$(CONFIG_OCXL) += ocxl/ obj-y += cardreader/ obj-$(CONFIG_PVPANIC) += pvpanic.o obj-$(CONFIG_HABANA_AI) += habanalabs/ +obj-$(CONFIG_UACCE) += uacce/ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c index 4feed296a327..3a9467aaa435 100644 --- a/drivers/misc/cardreader/rts5227.c +++ b/drivers/misc/cardreader/rts5227.c @@ -394,7 +394,8 @@ static const struct pcr_ops rts522a_pcr_ops = { void rts522a_init_params(struct rtsx_pcr *pcr) { rts5227_init_params(pcr); - + pcr->ops = &rts522a_pcr_ops; + pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 20, 11); pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3; pcr->option.ocp_en = 1; diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c index db936e4d6e56..1a81cda948c1 100644 --- a/drivers/misc/cardreader/rts5249.c +++ b/drivers/misc/cardreader/rts5249.c @@ -618,6 +618,7 @@ static const struct pcr_ops rts524a_pcr_ops = { void rts524a_init_params(struct rtsx_pcr *pcr) { rts5249_init_params(pcr); + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11); pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; pcr->option.ltr_l1off_snooze_sspwrgate = LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; @@ -733,6 +734,7 @@ static const struct pcr_ops rts525a_pcr_ops = { void rts525a_init_params(struct rtsx_pcr *pcr) { rts5249_init_params(pcr); + pcr->tx_initial_phase = SET_CLOCK_PHASE(25, 29, 11); pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; pcr->option.ltr_l1off_snooze_sspwrgate = LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c index 4214f02a17fd..711054ebad74 100644 --- a/drivers/misc/cardreader/rts5260.c +++ b/drivers/misc/cardreader/rts5260.c @@ -662,7 +662,7 @@ void rts5260_init_params(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16); + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11); pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); pcr->ic_version = rts5260_get_ic_version(pcr); diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c index bc4967a6efa1..78c3b1d424c3 100644 --- a/drivers/misc/cardreader/rts5261.c +++ b/drivers/misc/cardreader/rts5261.c @@ -764,7 +764,7 @@ void rts5261_init_params(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 27, 16); + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 11); pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); pcr->ic_version = rts5261_get_ic_version(pcr); diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 031eb64549af..9ff18d4961ce 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -227,6 +227,7 @@ MODULE_DEVICE_TABLE(of, at24_of_match); static const struct acpi_device_id at24_acpi_ids[] = { { "INT3499", (kernel_ulong_t)&at24_data_INT3499 }, + { "TPF0001", (kernel_ulong_t)&at24_data_24c1024 }, { /* END OF LIST */ } }; MODULE_DEVICE_TABLE(acpi, at24_acpi_ids); @@ -712,13 +713,14 @@ static int at24_probe(struct i2c_client *client) * chip is functional. */ err = at24_read(at24, 0, &test_byte, 1); - pm_runtime_idle(dev); if (err) { pm_runtime_disable(dev); regulator_disable(at24->vcc_reg); return -ENODEV; } + pm_runtime_idle(dev); + if (writable) dev_info(dev, "%u byte %s EEPROM, writable, %u bytes/write\n", byte_len, client->name, at24->write_max); diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index 0bf08678431b..409276b6374d 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -129,6 +129,8 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) spin_unlock(&job->user_cb->lock); hl_cb_put(job->user_cb); job->user_cb = NULL; + } else if (!rc) { + job->job_cb_size = job->user_cb_size; } return rc; @@ -507,7 +509,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, struct hl_cb *cb; bool int_queues_only = true; u32 size_to_copy; - int rc, i, parse_cnt; + int rc, i; *cs_seq = ULLONG_MAX; @@ -547,7 +549,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, hl_debugfs_add_cs(cs); /* Validate ALL the CS chunks before submitting the CS */ - for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) { + for (i = 0 ; i < num_chunks ; i++) { struct hl_cs_chunk *chunk = &cs_chunk_array[i]; enum hl_queue_type queue_type; bool is_kernel_allocated_cb; @@ -585,10 +587,6 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, job->cs = cs; job->user_cb = cb; job->user_cb_size = chunk->cb_size; - if (is_kernel_allocated_cb) - job->job_cb_size = cb->size; - else - job->job_cb_size = chunk->cb_size; job->hw_queue_id = chunk->queue_index; cs->jobs_in_queue_cnt[job->hw_queue_id]++; @@ -659,8 +657,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) struct hl_device *hdev = hpriv->hdev; union hl_cs_args *args = data; struct hl_ctx *ctx = hpriv->ctx; - void __user *chunks; - u32 num_chunks; + void __user *chunks_execute, *chunks_restore; + u32 num_chunks_execute, num_chunks_restore; u64 cs_seq = ULONG_MAX; int rc, do_ctx_switch; bool need_soft_reset = false; @@ -673,13 +671,25 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) goto out; } + chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute; + num_chunks_execute = args->in.num_chunks_execute; + + if (!num_chunks_execute) { + dev_err(hdev->dev, + "Got execute CS with 0 chunks, context %d\n", + ctx->asid); + rc = -EINVAL; + goto out; + } + do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { long ret; - chunks = (void __user *)(uintptr_t)args->in.chunks_restore; - num_chunks = args->in.num_chunks_restore; + chunks_restore = + (void __user *) (uintptr_t) args->in.chunks_restore; + num_chunks_restore = args->in.num_chunks_restore; mutex_lock(&hpriv->restore_phase_mutex); @@ -707,13 +717,13 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) hdev->asic_funcs->restore_phase_topology(hdev); - if (num_chunks == 0) { + if (!num_chunks_restore) { dev_dbg(hdev->dev, "Need to run restore phase but restore CS is empty\n"); rc = 0; } else { - rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, - &cs_seq); + rc = _hl_cs_ioctl(hpriv, chunks_restore, + num_chunks_restore, &cs_seq); } mutex_unlock(&hpriv->restore_phase_mutex); @@ -726,7 +736,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) } /* Need to wait for restore completion before execution phase */ - if (num_chunks > 0) { + if (num_chunks_restore) { ret = _hl_cs_wait_ioctl(hdev, ctx, jiffies_to_usecs(hdev->timeout_jiffies), cs_seq); @@ -754,18 +764,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) } } - chunks = (void __user *)(uintptr_t)args->in.chunks_execute; - num_chunks = args->in.num_chunks_execute; - - if (num_chunks == 0) { - dev_err(hdev->dev, - "Got execute CS with 0 chunks, context %d\n", - ctx->asid); - rc = -EINVAL; - goto out; - } - - rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq); + rc = _hl_cs_ioctl(hpriv, chunks_execute, num_chunks_execute, &cs_seq); out: if (rc != -EAGAIN) { diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 20413e350343..756d36ed5d95 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data) } is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + prop->dmmu.start_addr, + prop->dmmu.end_addr); + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mutex_lock(&ctx->mmu_lock); @@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr) goto out; if (hdev->dram_supports_virtual_memory && - addr >= prop->va_space_dram_start_address && - addr < prop->va_space_dram_end_address) + (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr)) return true; - if (addr >= prop->va_space_host_start_address && - addr < prop->va_space_host_end_address) + if (addr >= prop->pmmu.start_addr && + addr < prop->pmmu.end_addr) + return true; + + if (addr >= prop->pmmu_huge.start_addr && + addr < prop->pmmu_huge.end_addr) return true; out: return false; @@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, } is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + prop->dmmu.start_addr, + prop->dmmu.end_addr); + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mutex_lock(&ctx->mmu_lock); @@ -705,6 +710,65 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, return count; } +static ssize_t hl_data_read64(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[32]; + u64 addr = entry->addr; + u64 val; + ssize_t rc; + + if (*ppos) + return 0; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val); + if (rc) { + dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); + return rc; + } + + sprintf(tmp_buf, "0x%016llx\n", val); + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); +} + +static ssize_t hl_data_write64(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 addr = entry->addr; + u64 value; + ssize_t rc; + + rc = kstrtoull_from_user(buf, count, 16, &value); + if (rc) + return rc; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value); + if (rc) { + dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n", + value, addr); + return rc; + } + + return count; +} + static ssize_t hl_get_power_state(struct file *f, char __user *buf, size_t count, loff_t *ppos) { @@ -912,6 +976,12 @@ static const struct file_operations hl_data32b_fops = { .write = hl_data_write32 }; +static const struct file_operations hl_data64b_fops = { + .owner = THIS_MODULE, + .read = hl_data_read64, + .write = hl_data_write64 +}; + static const struct file_operations hl_i2c_data_fops = { .owner = THIS_MODULE, .read = hl_i2c_data_read, @@ -1025,6 +1095,12 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry, &hl_data32b_fops); + debugfs_create_file("data64", + 0644, + dev_entry->root, + dev_entry, + &hl_data64b_fops); + debugfs_create_file("set_power_state", 0200, dev_entry->root, diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index b680b0caa69b..aef4de36b7aa 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -36,7 +36,7 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) status = HL_DEVICE_STATUS_OPERATIONAL; return status; -}; +} static void hpriv_release(struct kref *ref) { diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index b8a8de24aaf7..68f065607544 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -324,7 +324,11 @@ static u32 goya_all_events[] = { GOYA_ASYNC_EVENT_ID_DMA_BM_CH1, GOYA_ASYNC_EVENT_ID_DMA_BM_CH2, GOYA_ASYNC_EVENT_ID_DMA_BM_CH3, - GOYA_ASYNC_EVENT_ID_DMA_BM_CH4 + GOYA_ASYNC_EVENT_ID_DMA_BM_CH4, + GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S, + GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E, + GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S, + GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E }; static int goya_mmu_clear_pgt_range(struct hl_device *hdev); @@ -393,19 +397,21 @@ void goya_get_fixed_properties(struct hl_device *hdev) prop->dmmu.hop2_mask = HOP2_MASK; prop->dmmu.hop3_mask = HOP3_MASK; prop->dmmu.hop4_mask = HOP4_MASK; - prop->dmmu.huge_page_size = PAGE_SIZE_2MB; + prop->dmmu.start_addr = VA_DDR_SPACE_START; + prop->dmmu.end_addr = VA_DDR_SPACE_END; + prop->dmmu.page_size = PAGE_SIZE_2MB; - /* No difference between PMMU and DMMU except of page size */ + /* shifts and masks are the same in PMMU and DMMU */ memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); - prop->dmmu.page_size = PAGE_SIZE_2MB; + prop->pmmu.start_addr = VA_HOST_SPACE_START; + prop->pmmu.end_addr = VA_HOST_SPACE_END; prop->pmmu.page_size = PAGE_SIZE_4KB; - prop->va_space_host_start_address = VA_HOST_SPACE_START; - prop->va_space_host_end_address = VA_HOST_SPACE_END; - prop->va_space_dram_start_address = VA_DDR_SPACE_START; - prop->va_space_dram_end_address = VA_DDR_SPACE_END; - prop->dram_size_for_default_page_mapping = - prop->va_space_dram_end_address; + /* PMMU and HPMMU are the same except of page size */ + memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); + prop->pmmu_huge.page_size = PAGE_SIZE_2MB; + + prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END; prop->cfg_size = CFG_SIZE; prop->max_asid = MAX_ASID; prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE; @@ -2573,8 +2579,7 @@ static int goya_hw_init(struct hl_device *hdev) * After CPU initialization is finished, change DDR bar mapping inside * iATU to point to the start address of the MMU page tables */ - if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE + - (MMU_PAGE_TABLES_ADDR & + if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR & ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) { dev_err(hdev->dev, "failed to map DDR bar to MMU page tables\n"); @@ -3443,12 +3448,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev, /* * WA for HW-23. * We can't allow user to read from Host using QMANs other than 1. + * PMMU and HPMMU addresses are equal, check only one of them. */ if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 && hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr), le32_to_cpu(user_dma_pkt->tsize), - hdev->asic_prop.va_space_host_start_address, - hdev->asic_prop.va_space_host_end_address)) { + hdev->asic_prop.pmmu.start_addr, + hdev->asic_prop.pmmu.end_addr)) { dev_err(hdev->dev, "Can't DMA from host on queue other then 1\n"); return -EFAULT; @@ -4178,6 +4184,96 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) return rc; } +static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 ddr_bar_addr; + int rc = 0; + + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { + u32 val_l = RREG32(addr - CFG_BASE); + u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); + + *val = (((u64) val_h) << 32) | val_l; + + } else if ((addr >= SRAM_BASE_ADDR) && + (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) { + + *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] + + (addr - SRAM_BASE_ADDR)); + + } else if ((addr >= DRAM_PHYS_BASE) && + (addr <= + DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) { + + u64 bar_base_addr = DRAM_PHYS_BASE + + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); + if (ddr_bar_addr != U64_MAX) { + *val = readq(hdev->pcie_bar[DDR_BAR_ID] + + (addr - bar_base_addr)); + + ddr_bar_addr = goya_set_ddr_bar_base(hdev, + ddr_bar_addr); + } + if (ddr_bar_addr == U64_MAX) + rc = -EIO; + + } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { + *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); + + } else { + rc = -EFAULT; + } + + return rc; +} + +static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 ddr_bar_addr; + int rc = 0; + + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { + WREG32(addr - CFG_BASE, lower_32_bits(val)); + WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); + + } else if ((addr >= SRAM_BASE_ADDR) && + (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) { + + writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] + + (addr - SRAM_BASE_ADDR)); + + } else if ((addr >= DRAM_PHYS_BASE) && + (addr <= + DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) { + + u64 bar_base_addr = DRAM_PHYS_BASE + + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); + if (ddr_bar_addr != U64_MAX) { + writeq(val, hdev->pcie_bar[DDR_BAR_ID] + + (addr - bar_base_addr)); + + ddr_bar_addr = goya_set_ddr_bar_base(hdev, + ddr_bar_addr); + } + if (ddr_bar_addr == U64_MAX) + rc = -EIO; + + } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { + *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + + } else { + rc = -EFAULT; + } + + return rc; +} + static u64 goya_read_pte(struct hl_device *hdev, u64 addr) { struct goya_device *goya = hdev->asic_specific; @@ -4297,6 +4393,14 @@ static const char *_goya_get_event_desc(u16 event_type) return "TPC%d_bmon_spmu"; case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: return "DMA_bm_ch%d"; + case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: + return "POWER_ENV_S"; + case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: + return "POWER_ENV_E"; + case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: + return "THERMAL_ENV_S"; + case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: + return "THERMAL_ENV_E"; default: return "N/A"; } @@ -4388,22 +4492,22 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size) static void goya_print_razwi_info(struct hl_device *hdev) { if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) { - dev_err(hdev->dev, "Illegal write to LBW\n"); + dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n"); WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0); } if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) { - dev_err(hdev->dev, "Illegal read from LBW\n"); + dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n"); WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0); } if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) { - dev_err(hdev->dev, "Illegal write to HBW\n"); + dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n"); WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0); } if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) { - dev_err(hdev->dev, "Illegal read from HBW\n"); + dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n"); WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0); } } @@ -4423,7 +4527,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev) addr <<= 32; addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA); - dev_err(hdev->dev, "MMU page fault on va 0x%llx\n", addr); + dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", + addr); WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0); } @@ -4435,7 +4540,7 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type, char desc[20] = ""; goya_get_event_desc(event_type, desc, sizeof(desc)); - dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", + dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", event_type, desc); if (razwi) { @@ -4526,6 +4631,33 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) return rc; } +static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type) +{ + switch (event_type) { + case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: + dev_info_ratelimited(hdev->dev, + "Clock throttling due to power consumption\n"); + break; + case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: + dev_info_ratelimited(hdev->dev, + "Power envelop is safe, back to optimal clock\n"); + break; + case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: + dev_info_ratelimited(hdev->dev, + "Clock throttling due to overheating\n"); + break; + case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: + dev_info_ratelimited(hdev->dev, + "Thermal envelop is safe, back to optimal clock\n"); + break; + + default: + dev_err(hdev->dev, "Received invalid clock change event %d\n", + event_type); + break; + } +} + void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); @@ -4609,6 +4741,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) goya_unmask_irq(hdev, event_type); break; + case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: + case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: + case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: + case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: + goya_print_clk_change_info(hdev, event_type); + goya_unmask_irq(hdev, event_type); + break; + default: dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", event_type); @@ -4776,7 +4916,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) { rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off, - prop->dram_base_address + off, PAGE_SIZE_2MB); + prop->dram_base_address + off, PAGE_SIZE_2MB, + (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE); if (rc) { dev_err(hdev->dev, "Map failed for address 0x%llx\n", prop->dram_base_address + off); @@ -4786,7 +4927,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, - hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB); + hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true); if (rc) { dev_err(hdev->dev, @@ -4799,7 +4940,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, hdev->cpu_accessible_dma_address + cpu_off, - PAGE_SIZE_4KB); + PAGE_SIZE_4KB, true); if (rc) { dev_err(hdev->dev, "Map failed for CPU accessible memory\n"); @@ -4825,14 +4966,15 @@ unmap_cpu: for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB) if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, - PAGE_SIZE_4KB)) + PAGE_SIZE_4KB, true)) dev_warn_ratelimited(hdev->dev, "failed to unmap address 0x%llx\n", VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); unmap: for (; off >= 0 ; off -= PAGE_SIZE_2MB) if (hl_mmu_unmap(hdev->kernel_ctx, - prop->dram_base_address + off, PAGE_SIZE_2MB)) + prop->dram_base_address + off, PAGE_SIZE_2MB, + true)) dev_warn_ratelimited(hdev->dev, "failed to unmap address 0x%llx\n", prop->dram_base_address + off); @@ -4857,14 +4999,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev) if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, - PAGE_SIZE_2MB)) + PAGE_SIZE_2MB, true)) dev_warn(hdev->dev, "Failed to unmap CPU accessible memory\n"); } else { for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, - PAGE_SIZE_4KB)) + PAGE_SIZE_4KB, + (cpu_off + PAGE_SIZE_4KB) >= SZ_2M)) dev_warn_ratelimited(hdev->dev, "failed to unmap address 0x%llx\n", VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); @@ -4872,7 +5015,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev) for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) if (hl_mmu_unmap(hdev->kernel_ctx, - prop->dram_base_address + off, PAGE_SIZE_2MB)) + prop->dram_base_address + off, PAGE_SIZE_2MB, + (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE)) dev_warn_ratelimited(hdev->dev, "Failed to unmap address 0x%llx\n", prop->dram_base_address + off); @@ -5113,6 +5257,7 @@ static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, } static void goya_hw_queues_lock(struct hl_device *hdev) + __acquires(&goya->hw_queues_lock) { struct goya_device *goya = hdev->asic_specific; @@ -5120,6 +5265,7 @@ static void goya_hw_queues_lock(struct hl_device *hdev) } static void goya_hw_queues_unlock(struct hl_device *hdev) + __releases(&goya->hw_queues_lock) { struct goya_device *goya = hdev->asic_specific; @@ -5180,6 +5326,8 @@ static const struct hl_asic_funcs goya_funcs = { .restore_phase_topology = goya_restore_phase_topology, .debugfs_read32 = goya_debugfs_read32, .debugfs_write32 = goya_debugfs_write32, + .debugfs_read64 = goya_debugfs_read64, + .debugfs_write64 = goya_debugfs_write64, .add_device_attr = goya_add_device_attr, .handle_eqe = goya_handle_eqe, .set_pll_profile = goya_set_pll_profile, diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index c1ee6e2b5dff..a1bc930d904f 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr, u64 range_start, range_end; if (hdev->mmu_enable) { - range_start = prop->va_space_dram_start_address; - range_end = prop->va_space_dram_end_address; + range_start = prop->dmmu.start_addr; + range_end = prop->dmmu.end_addr; } else { range_start = prop->dram_user_base_address; range_end = prop->dram_end_address; diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c index b2ebc01e27f4..cdd4903e48fa 100644 --- a/drivers/misc/habanalabs/goya/goya_hwmgr.c +++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c @@ -298,8 +298,8 @@ static ssize_t pm_mng_profile_store(struct device *dev, /* Make sure we are in LOW PLL when changing modes */ if (hdev->pm_mng_profile == PM_MANUAL) { hdev->curr_pll_profile = PLL_HIGH; - hl_device_set_frequency(hdev, PLL_LOW); hdev->pm_mng_profile = PM_AUTO; + hl_device_set_frequency(hdev, PLL_LOW); } } else if (strncmp("manual", buf, strlen("manual")) == 0) { if (hdev->pm_mng_profile == PM_AUTO) { diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 00c949f4ccd1..31ebcf9458fe 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -132,6 +132,8 @@ enum hl_device_hw_state { /** * struct hl_mmu_properties - ASIC specific MMU address translation properties. + * @start_addr: virtual start address of the memory region. + * @end_addr: virtual end address of the memory region. * @hop0_shift: shift of hop 0 mask. * @hop1_shift: shift of hop 1 mask. * @hop2_shift: shift of hop 2 mask. @@ -143,9 +145,10 @@ enum hl_device_hw_state { * @hop3_mask: mask to get the PTE address in hop 3. * @hop4_mask: mask to get the PTE address in hop 4. * @page_size: default page size used to allocate memory. - * @huge_page_size: page size used to allocate memory with huge pages. */ struct hl_mmu_properties { + u64 start_addr; + u64 end_addr; u64 hop0_shift; u64 hop1_shift; u64 hop2_shift; @@ -157,7 +160,6 @@ struct hl_mmu_properties { u64 hop3_mask; u64 hop4_mask; u32 page_size; - u32 huge_page_size; }; /** @@ -169,6 +171,8 @@ struct hl_mmu_properties { * @preboot_ver: F/W Preboot version. * @dmmu: DRAM MMU address translation properties. * @pmmu: PCI (host) MMU address translation properties. + * @pmmu_huge: PCI (host) MMU address translation properties for memory + * allocated with huge pages. * @sram_base_address: SRAM physical start address. * @sram_end_address: SRAM physical end address. * @sram_user_base_address - SRAM physical start address for user access. @@ -178,14 +182,6 @@ struct hl_mmu_properties { * @dram_size: DRAM total size. * @dram_pci_bar_size: size of PCI bar towards DRAM. * @max_power_default: max power of the device after reset - * @va_space_host_start_address: base address of virtual memory range for - * mapping host memory. - * @va_space_host_end_address: end address of virtual memory range for - * mapping host memory. - * @va_space_dram_start_address: base address of virtual memory range for - * mapping DRAM memory. - * @va_space_dram_end_address: end address of virtual memory range for - * mapping DRAM memory. * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page * fault. * @pcie_dbi_base_address: Base address of the PCIE_DBI block. @@ -218,6 +214,7 @@ struct asic_fixed_properties { char preboot_ver[VERSION_MAX_LEN]; struct hl_mmu_properties dmmu; struct hl_mmu_properties pmmu; + struct hl_mmu_properties pmmu_huge; u64 sram_base_address; u64 sram_end_address; u64 sram_user_base_address; @@ -227,10 +224,6 @@ struct asic_fixed_properties { u64 dram_size; u64 dram_pci_bar_size; u64 max_power_default; - u64 va_space_host_start_address; - u64 va_space_host_end_address; - u64 va_space_dram_start_address; - u64 va_space_dram_end_address; u64 dram_size_for_default_page_mapping; u64 pcie_dbi_base_address; u64 pcie_aux_dbi_reg_addr; @@ -431,10 +424,12 @@ struct hl_eq { * enum hl_asic_type - supported ASIC types. * @ASIC_INVALID: Invalid ASIC type. * @ASIC_GOYA: Goya device. + * @ASIC_GAUDI: Gaudi device. */ enum hl_asic_type { ASIC_INVALID, - ASIC_GOYA + ASIC_GOYA, + ASIC_GAUDI }; struct hl_cs_parser; @@ -589,6 +584,8 @@ struct hl_asic_funcs { void (*restore_phase_topology)(struct hl_device *hdev); int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val); int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val); + int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val); + int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val); void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_attr_grp); void (*handle_eqe)(struct hl_device *hdev, @@ -658,6 +655,8 @@ struct hl_va_range { * this hits 0l. It is incremented on CS and CS_WAIT. * @cs_pending: array of DMA fence objects representing pending CS. * @host_va_range: holds available virtual addresses for host mappings. + * @host_huge_va_range: holds available virtual addresses for host mappings + * with huge pages. * @dram_va_range: holds available virtual addresses for DRAM mappings. * @mem_hash_lock: protects the mem_hash. * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the @@ -688,8 +687,9 @@ struct hl_ctx { struct hl_device *hdev; struct kref refcount; struct dma_fence *cs_pending[HL_MAX_PENDING_CS]; - struct hl_va_range host_va_range; - struct hl_va_range dram_va_range; + struct hl_va_range *host_va_range; + struct hl_va_range *host_huge_va_range; + struct hl_va_range *dram_va_range; struct mutex mem_hash_lock; struct mutex mmu_lock; struct list_head debugfs_list; @@ -763,7 +763,7 @@ struct hl_userptr { * @aborted: true if CS was aborted due to some device error. */ struct hl_cs { - u8 jobs_in_queue_cnt[HL_MAX_QUEUES]; + u16 jobs_in_queue_cnt[HL_MAX_QUEUES]; struct hl_ctx *ctx; struct list_head job_list; spinlock_t job_lock; @@ -1291,6 +1291,8 @@ struct hl_device_idle_busy_ts { * otherwise. * @dram_supports_virtual_memory: is MMU enabled towards DRAM. * @dram_default_page_mapping: is DRAM default page mapping enabled. + * @pmmu_huge_range: is a different virtual addresses range used for PMMU with + * huge pages. * @init_done: is the initialization of the device done. * @mmu_enable: is MMU enabled. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) @@ -1372,6 +1374,7 @@ struct hl_device { u8 reset_on_lockup; u8 dram_supports_virtual_memory; u8 dram_default_page_mapping; + u8 pmmu_huge_range; u8 init_done; u8 device_cpu_disabled; u8 dma_mask; @@ -1573,8 +1576,10 @@ int hl_mmu_init(struct hl_device *hdev); void hl_mmu_fini(struct hl_device *hdev); int hl_mmu_ctx_init(struct hl_ctx *ctx); void hl_mmu_ctx_fini(struct hl_ctx *ctx); -int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size); -int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size); +int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, + u32 page_size, bool flush_pte); +int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, + bool flush_pte); void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx); @@ -1606,11 +1611,18 @@ int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask); long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); -long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr); -long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr); -long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr); -long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr); -long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr); +int hl_get_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_set_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long value); +int hl_get_voltage(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_get_current(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_get_fan_speed(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_get_pwm_info(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value); u64 hl_get_max_power(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index 8c342fb499ca..b670859c677a 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -40,12 +40,13 @@ MODULE_PARM_DESC(reset_on_lockup, #define PCI_VENDOR_ID_HABANALABS 0x1da3 #define PCI_IDS_GOYA 0x0001 +#define PCI_IDS_GAUDI 0x1000 static const struct pci_device_id ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, + { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, { 0, } }; -MODULE_DEVICE_TABLE(pci, ids); /* * get_asic_type - translate device id to asic type @@ -63,6 +64,9 @@ static enum hl_asic_type get_asic_type(u16 device) case PCI_IDS_GOYA: asic_type = ASIC_GOYA; break; + case PCI_IDS_GAUDI: + asic_type = ASIC_GAUDI; + break; default: asic_type = ASIC_INVALID; break; @@ -263,6 +267,11 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, dev_err(&pdev->dev, "Unsupported ASIC\n"); rc = -ENODEV; goto free_hdev; + } else if (hdev->asic_type == ASIC_GAUDI) { + dev_err(&pdev->dev, + "GAUDI is not supported by the current kernel\n"); + rc = -ENODEV; + goto free_hdev; } } else { hdev->asic_type = asic_type; diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c index 7be4bace9b4f..a21a26e07c3b 100644 --- a/drivers/misc/habanalabs/hwmon.c +++ b/drivers/misc/habanalabs/hwmon.c @@ -113,6 +113,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { struct hl_device *hdev = dev_get_drvdata(dev); + int rc; if (hl_device_disabled_or_in_reset(hdev)) return -ENODEV; @@ -125,36 +126,40 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, case hwmon_temp_crit: case hwmon_temp_max_hyst: case hwmon_temp_crit_hyst: + case hwmon_temp_offset: + case hwmon_temp_highest: break; default: return -EINVAL; } - *val = hl_get_temperature(hdev, channel, attr); + rc = hl_get_temperature(hdev, channel, attr, val); break; case hwmon_in: switch (attr) { case hwmon_in_input: case hwmon_in_min: case hwmon_in_max: + case hwmon_in_highest: break; default: return -EINVAL; } - *val = hl_get_voltage(hdev, channel, attr); + rc = hl_get_voltage(hdev, channel, attr, val); break; case hwmon_curr: switch (attr) { case hwmon_curr_input: case hwmon_curr_min: case hwmon_curr_max: + case hwmon_curr_highest: break; default: return -EINVAL; } - *val = hl_get_current(hdev, channel, attr); + rc = hl_get_current(hdev, channel, attr, val); break; case hwmon_fan: switch (attr) { @@ -165,7 +170,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, default: return -EINVAL; } - *val = hl_get_fan_speed(hdev, channel, attr); + rc = hl_get_fan_speed(hdev, channel, attr, val); break; case hwmon_pwm: switch (attr) { @@ -175,12 +180,12 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, default: return -EINVAL; } - *val = hl_get_pwm_info(hdev, channel, attr); + rc = hl_get_pwm_info(hdev, channel, attr, val); break; default: return -EINVAL; } - return 0; + return rc; } static int hl_write(struct device *dev, enum hwmon_sensor_types type, @@ -192,6 +197,15 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, return -ENODEV; switch (type) { + case hwmon_temp: + switch (attr) { + case hwmon_temp_offset: + break; + default: + return -EINVAL; + } + hl_set_temperature(hdev, channel, attr, val); + break; case hwmon_pwm: switch (attr) { case hwmon_pwm_input: @@ -219,7 +233,10 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, case hwmon_temp_max_hyst: case hwmon_temp_crit: case hwmon_temp_crit_hyst: + case hwmon_temp_highest: return 0444; + case hwmon_temp_offset: + return 0644; } break; case hwmon_in: @@ -227,6 +244,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, case hwmon_in_input: case hwmon_in_min: case hwmon_in_max: + case hwmon_in_highest: return 0444; } break; @@ -235,6 +253,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, case hwmon_curr_input: case hwmon_curr_min: case hwmon_curr_max: + case hwmon_curr_highest: return 0444; } break; @@ -265,10 +284,10 @@ static const struct hwmon_ops hl_hwmon_ops = { .write = hl_write }; -long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr) +int hl_get_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) { struct armcp_packet pkt; - long result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -279,22 +298,47 @@ long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr) pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, &result); + SENSORS_PKT_TIMEOUT, value); if (rc) { dev_err(hdev->dev, "Failed to get temperature from sensor %d, error %d\n", sensor_index, rc); - result = 0; + *value = 0; } - return result; + return rc; } -long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr) +int hl_set_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + pkt.value = __cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SENSORS_PKT_TIMEOUT, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set temperature of sensor %d, error %d\n", + sensor_index, rc); + + return rc; +} + +int hl_get_voltage(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) { struct armcp_packet pkt; - long result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -305,22 +349,22 @@ long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr) pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, &result); + SENSORS_PKT_TIMEOUT, value); if (rc) { dev_err(hdev->dev, "Failed to get voltage from sensor %d, error %d\n", sensor_index, rc); - result = 0; + *value = 0; } - return result; + return rc; } -long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr) +int hl_get_current(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) { struct armcp_packet pkt; - long result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -331,22 +375,22 @@ long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr) pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, &result); + SENSORS_PKT_TIMEOUT, value); if (rc) { dev_err(hdev->dev, "Failed to get current from sensor %d, error %d\n", sensor_index, rc); - result = 0; + *value = 0; } - return result; + return rc; } -long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr) +int hl_get_fan_speed(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) { struct armcp_packet pkt; - long result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -357,22 +401,22 @@ long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr) pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, &result); + SENSORS_PKT_TIMEOUT, value); if (rc) { dev_err(hdev->dev, "Failed to get fan speed from sensor %d, error %d\n", sensor_index, rc); - result = 0; + *value = 0; } - return result; + return rc; } -long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr) +int hl_get_pwm_info(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) { struct armcp_packet pkt; - long result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -383,16 +427,16 @@ long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr) pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, &result); + SENSORS_PKT_TIMEOUT, value); if (rc) { dev_err(hdev->dev, "Failed to get pwm info from sensor %d, error %d\n", sensor_index, rc); - result = 0; + *value = 0; } - return result; + return rc; } void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h index e4c6699a1868..bdd0a4c3a9cf 100644 --- a/drivers/misc/habanalabs/include/armcp_if.h +++ b/drivers/misc/habanalabs/include/armcp_if.h @@ -189,6 +189,10 @@ enum pq_init_status { * ArmCP to write to the structure, to prevent data corruption in case of * mismatched driver/FW versions. * + * ARMCP_PACKET_TEMPERATURE_SET - + * Set the value of the offset property of a specified thermal sensor. + * The packet's arguments specify the desired sensor and the field to + * set. */ enum armcp_packet_id { @@ -214,6 +218,8 @@ enum armcp_packet_id { ARMCP_PACKET_MAX_POWER_GET, /* sysfs */ ARMCP_PACKET_MAX_POWER_SET, /* sysfs */ ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */ + ARMCP_RESERVED, + ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */ }; #define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5 @@ -271,24 +277,32 @@ enum armcp_packet_rc { armcp_packet_fault }; +/* + * armcp_temp_type should adhere to hwmon_temp_attributes + * defined in Linux kernel hwmon.h file + */ enum armcp_temp_type { armcp_temp_input, armcp_temp_max = 6, armcp_temp_max_hyst, armcp_temp_crit, - armcp_temp_crit_hyst + armcp_temp_crit_hyst, + armcp_temp_offset = 19, + armcp_temp_highest = 22 }; enum armcp_in_attributes { armcp_in_input, armcp_in_min, - armcp_in_max + armcp_in_max, + armcp_in_highest = 7 }; enum armcp_curr_attributes { armcp_curr_input, armcp_curr_min, - armcp_curr_max + armcp_curr_max, + armcp_curr_highest = 7 }; enum armcp_fan_attributes { diff --git a/drivers/misc/habanalabs/include/goya/goya_async_events.h b/drivers/misc/habanalabs/include/goya/goya_async_events.h index bb7a1aa3279e..5fb92362fc5f 100644 --- a/drivers/misc/habanalabs/include/goya/goya_async_events.h +++ b/drivers/misc/habanalabs/include/goya/goya_async_events.h @@ -188,6 +188,10 @@ enum goya_async_event_id { GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485, GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486, GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487, + GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S = 507, + GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E = 508, + GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S = 509, + GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E = 510, GOYA_ASYNC_EVENT_ID_LAST_VALID_ID = 1023, GOYA_ASYNC_EVENT_ID_SIZE }; diff --git a/drivers/misc/habanalabs/include/goya/goya_reg_map.h b/drivers/misc/habanalabs/include/goya/goya_reg_map.h index cd89723c7f61..08061282cd9c 100644 --- a/drivers/misc/habanalabs/include/goya/goya_reg_map.h +++ b/drivers/misc/habanalabs/include/goya/goya_reg_map.h @@ -11,24 +11,27 @@ /* * PSOC scratch-pad registers */ -#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 -#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1 -#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2 -#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3 -#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4 -#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5 -#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6 -#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7 -#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8 -#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9 -#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10 -#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26 -#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27 -#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28 -#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29 -#define mmUBOOT_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_30 -#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31 +#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 +#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1 +#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2 +#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3 +#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4 +#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5 +#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6 +#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7 +#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8 +#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9 +#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10 +#define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24 +#define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25 +#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26 +#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27 +#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28 +#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29 +#define mmRDWR_TEST mmPSOC_GLOBAL_CONF_SCRATCHPAD_30 +#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31 -#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS +#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS +#define mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS mmPSOC_GLOBAL_CONF_WARM_REBOOT #endif /* GOYA_REG_MAP_H_ */ diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h index 2853a2de8cf6..f7992a69fd3a 100644 --- a/drivers/misc/habanalabs/include/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/hl_boot_if.h @@ -8,20 +8,35 @@ #ifndef HL_BOOT_IF_H #define HL_BOOT_IF_H +#define LKD_HARD_RESET_MAGIC 0xED7BD694 + +/* CPU error bits in BOOT_ERROR registers */ +#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0) +#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1) +#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2) +#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3) +#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4) +#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5) +#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6) +#define CPU_BOOT_ERR0_ENABLED (1 << 31) + enum cpu_boot_status { CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ - CPU_BOOT_STATUS_IN_WFE, - CPU_BOOT_STATUS_DRAM_RDY, - CPU_BOOT_STATUS_SRAM_AVAIL, - CPU_BOOT_STATUS_IN_BTL, /* BTL is H/W FSM */ - CPU_BOOT_STATUS_IN_PREBOOT, - CPU_BOOT_STATUS_IN_SPL, - CPU_BOOT_STATUS_IN_UBOOT, - CPU_BOOT_STATUS_DRAM_INIT_FAIL, - CPU_BOOT_STATUS_FIT_CORRUPTED, - CPU_BOOT_STATUS_UBOOT_NOT_READY, - CPU_BOOT_STATUS_RESERVED, - CPU_BOOT_STATUS_TS_INIT_FAIL, + CPU_BOOT_STATUS_IN_WFE = 1, + CPU_BOOT_STATUS_DRAM_RDY = 2, + CPU_BOOT_STATUS_SRAM_AVAIL = 3, + CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */ + CPU_BOOT_STATUS_IN_PREBOOT = 5, + CPU_BOOT_STATUS_IN_SPL = 6, + CPU_BOOT_STATUS_IN_UBOOT = 7, + CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */ + CPU_BOOT_STATUS_UBOOT_NOT_READY = 10, + CPU_BOOT_STATUS_NIC_FW_RDY = 11, + CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */ + CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */ + CPU_BOOT_STATUS_READY_TO_BOOT = 15, }; enum kmd_msg { diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 6c72cb4eff54..a72f766ca470 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -530,7 +530,7 @@ static u64 get_va_block(struct hl_device *hdev, * or not, hence we continue with the biggest possible * granularity. */ - page_size = hdev->asic_prop.pmmu.huge_page_size; + page_size = hdev->asic_prop.pmmu_huge.page_size; else page_size = hdev->asic_prop.dmmu.page_size; @@ -638,13 +638,12 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, struct hl_userptr *userptr, struct hl_vm_phys_pg_pack **pphys_pg_pack) { - struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu; struct hl_vm_phys_pg_pack *phys_pg_pack; struct scatterlist *sg; dma_addr_t dma_addr; u64 page_mask, total_npages; u32 npages, page_size = PAGE_SIZE, - huge_page_size = mmu_prop->huge_page_size; + huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; bool first = true, is_huge_page_opt = true; int rc, i, j; u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); @@ -747,7 +746,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, for (i = 0 ; i < phys_pg_pack->npages ; i++) { paddr = phys_pg_pack->pages[i]; - rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size); + rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size, + (i + 1) == phys_pg_pack->npages); if (rc) { dev_err(hdev->dev, "map failed for handle %u, npages: %llu, mapped: %llu", @@ -765,7 +765,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, err: next_vaddr = vaddr; for (i = 0 ; i < mapped_pg_cnt ; i++) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size)) + if (hl_mmu_unmap(ctx, next_vaddr, page_size, + (i + 1) == mapped_pg_cnt)) dev_warn_ratelimited(hdev->dev, "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", phys_pg_pack->handle, next_vaddr, @@ -794,7 +795,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, next_vaddr = vaddr; for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size)) + if (hl_mmu_unmap(ctx, next_vaddr, page_size, + (i + 1) == phys_pg_pack->npages)) dev_warn_ratelimited(hdev->dev, "unmap failed for vaddr: 0x%llx\n", next_vaddr); @@ -853,6 +855,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_userptr *userptr = NULL; struct hl_vm_hash_node *hnode; + struct hl_va_range *va_range; enum vm_type_t *vm_type; u64 ret_vaddr, hint_addr; u32 handle = 0; @@ -924,9 +927,16 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto hnode_err; } - ret_vaddr = get_va_block(hdev, - is_userptr ? &ctx->host_va_range : &ctx->dram_va_range, - phys_pg_pack->total_size, hint_addr, is_userptr); + if (is_userptr) + if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size) + va_range = ctx->host_va_range; + else + va_range = ctx->host_huge_va_range; + else + va_range = ctx->dram_va_range; + + ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, + hint_addr, is_userptr); if (!ret_vaddr) { dev_err(hdev->dev, "no available va block for handle %u\n", handle); @@ -965,10 +975,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, return 0; map_err: - if (add_va_block(hdev, - is_userptr ? &ctx->host_va_range : &ctx->dram_va_range, - ret_vaddr, - ret_vaddr + phys_pg_pack->total_size - 1)) + if (add_va_block(hdev, va_range, ret_vaddr, + ret_vaddr + phys_pg_pack->total_size - 1)) dev_warn(hdev->dev, "release va block failed for handle 0x%x, vaddr: 0x%llx\n", handle, ret_vaddr); @@ -1030,7 +1038,6 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) if (*vm_type == VM_TYPE_USERPTR) { is_userptr = true; - va_range = &ctx->host_va_range; userptr = hnode->ptr; rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack); @@ -1040,9 +1047,15 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) vaddr); goto vm_type_err; } + + if (phys_pg_pack->page_size == + hdev->asic_prop.pmmu.page_size) + va_range = ctx->host_va_range; + else + va_range = ctx->host_huge_va_range; } else if (*vm_type == VM_TYPE_PHYS_PACK) { is_userptr = false; - va_range = &ctx->dram_va_range; + va_range = ctx->dram_va_range; phys_pg_pack = hnode->ptr; } else { dev_warn(hdev->dev, @@ -1438,19 +1451,18 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, } /* - * hl_va_range_init - initialize virtual addresses range - * - * @hdev : pointer to the habanalabs device structure - * @va_range : pointer to the range to initialize - * @start : range start address - * @end : range end address + * va_range_init - initialize virtual addresses range + * @hdev: pointer to the habanalabs device structure + * @va_range: pointer to the range to initialize + * @start: range start address + * @end: range end address * * This function does the following: * - Initializes the virtual addresses list of the given range with the given * addresses. */ -static int hl_va_range_init(struct hl_device *hdev, - struct hl_va_range *va_range, u64 start, u64 end) +static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, + u64 start, u64 end) { int rc; @@ -1485,47 +1497,105 @@ static int hl_va_range_init(struct hl_device *hdev, } /* - * hl_vm_ctx_init_with_ranges - initialize virtual memory for context + * va_range_fini() - clear a virtual addresses range + * @hdev: pointer to the habanalabs structure + * va_range: pointer to virtual addresses range * - * @ctx : pointer to the habanalabs context structure - * @host_range_start : host virtual addresses range start - * @host_range_end : host virtual addresses range end - * @dram_range_start : dram virtual addresses range start - * @dram_range_end : dram virtual addresses range end + * This function does the following: + * - Frees the virtual addresses block list and its lock + */ +static void va_range_fini(struct hl_device *hdev, + struct hl_va_range *va_range) +{ + mutex_lock(&va_range->lock); + clear_va_list_locked(hdev, &va_range->list); + mutex_unlock(&va_range->lock); + + mutex_destroy(&va_range->lock); + kfree(va_range); +} + +/* + * vm_ctx_init_with_ranges() - initialize virtual memory for context + * @ctx: pointer to the habanalabs context structure + * @host_range_start: host virtual addresses range start. + * @host_range_end: host virtual addresses range end. + * @host_huge_range_start: host virtual addresses range start for memory + * allocated with huge pages. + * @host_huge_range_end: host virtual addresses range end for memory allocated + * with huge pages. + * @dram_range_start: dram virtual addresses range start. + * @dram_range_end: dram virtual addresses range end. * * This function initializes the following: * - MMU for context * - Virtual address to area descriptor hashtable * - Virtual block list of available virtual memory */ -static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, - u64 host_range_end, u64 dram_range_start, - u64 dram_range_end) +static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, + u64 host_range_start, + u64 host_range_end, + u64 host_huge_range_start, + u64 host_huge_range_end, + u64 dram_range_start, + u64 dram_range_end) { struct hl_device *hdev = ctx->hdev; int rc; + ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL); + if (!ctx->host_va_range) + return -ENOMEM; + + ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range), + GFP_KERNEL); + if (!ctx->host_huge_va_range) { + rc = -ENOMEM; + goto host_huge_va_range_err; + } + + ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL); + if (!ctx->dram_va_range) { + rc = -ENOMEM; + goto dram_va_range_err; + } + rc = hl_mmu_ctx_init(ctx); if (rc) { dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); - return rc; + goto mmu_ctx_err; } mutex_init(&ctx->mem_hash_lock); hash_init(ctx->mem_hash); - mutex_init(&ctx->host_va_range.lock); + mutex_init(&ctx->host_va_range->lock); - rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start, - host_range_end); + rc = va_range_init(hdev, ctx->host_va_range, host_range_start, + host_range_end); if (rc) { dev_err(hdev->dev, "failed to init host vm range\n"); - goto host_vm_err; + goto host_page_range_err; + } + + if (hdev->pmmu_huge_range) { + mutex_init(&ctx->host_huge_va_range->lock); + + rc = va_range_init(hdev, ctx->host_huge_va_range, + host_huge_range_start, + host_huge_range_end); + if (rc) { + dev_err(hdev->dev, + "failed to init host huge vm range\n"); + goto host_hpage_range_err; + } + } else { + ctx->host_huge_va_range = ctx->host_va_range; } - mutex_init(&ctx->dram_va_range.lock); + mutex_init(&ctx->dram_va_range->lock); - rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start, + rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start, dram_range_end); if (rc) { dev_err(hdev->dev, "failed to init dram vm range\n"); @@ -1537,15 +1607,29 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, return 0; dram_vm_err: - mutex_destroy(&ctx->dram_va_range.lock); + mutex_destroy(&ctx->dram_va_range->lock); - mutex_lock(&ctx->host_va_range.lock); - clear_va_list_locked(hdev, &ctx->host_va_range.list); - mutex_unlock(&ctx->host_va_range.lock); -host_vm_err: - mutex_destroy(&ctx->host_va_range.lock); + if (hdev->pmmu_huge_range) { + mutex_lock(&ctx->host_huge_va_range->lock); + clear_va_list_locked(hdev, &ctx->host_huge_va_range->list); + mutex_unlock(&ctx->host_huge_va_range->lock); + } +host_hpage_range_err: + if (hdev->pmmu_huge_range) + mutex_destroy(&ctx->host_huge_va_range->lock); + mutex_lock(&ctx->host_va_range->lock); + clear_va_list_locked(hdev, &ctx->host_va_range->list); + mutex_unlock(&ctx->host_va_range->lock); +host_page_range_err: + mutex_destroy(&ctx->host_va_range->lock); mutex_destroy(&ctx->mem_hash_lock); hl_mmu_ctx_fini(ctx); +mmu_ctx_err: + kfree(ctx->dram_va_range); +dram_va_range_err: + kfree(ctx->host_huge_va_range); +host_huge_va_range_err: + kfree(ctx->host_va_range); return rc; } @@ -1553,8 +1637,8 @@ host_vm_err: int hl_vm_ctx_init(struct hl_ctx *ctx) { struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; - u64 host_range_start, host_range_end, dram_range_start, - dram_range_end; + u64 host_range_start, host_range_end, host_huge_range_start, + host_huge_range_end, dram_range_start, dram_range_end; atomic64_set(&ctx->dram_phys_mem, 0); @@ -1566,38 +1650,26 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) * address of the memory related to the given handle. */ if (ctx->hdev->mmu_enable) { - dram_range_start = prop->va_space_dram_start_address; - dram_range_end = prop->va_space_dram_end_address; - host_range_start = prop->va_space_host_start_address; - host_range_end = prop->va_space_host_end_address; + dram_range_start = prop->dmmu.start_addr; + dram_range_end = prop->dmmu.end_addr; + host_range_start = prop->pmmu.start_addr; + host_range_end = prop->pmmu.end_addr; + host_huge_range_start = prop->pmmu_huge.start_addr; + host_huge_range_end = prop->pmmu_huge.end_addr; } else { dram_range_start = prop->dram_user_base_address; dram_range_end = prop->dram_end_address; host_range_start = prop->dram_user_base_address; host_range_end = prop->dram_end_address; + host_huge_range_start = prop->dram_user_base_address; + host_huge_range_end = prop->dram_end_address; } - return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, - dram_range_start, dram_range_end); -} - -/* - * hl_va_range_fini - clear a virtual addresses range - * - * @hdev : pointer to the habanalabs structure - * va_range : pointer to virtual addresses range - * - * This function does the following: - * - Frees the virtual addresses block list and its lock - */ -static void hl_va_range_fini(struct hl_device *hdev, - struct hl_va_range *va_range) -{ - mutex_lock(&va_range->lock); - clear_va_list_locked(hdev, &va_range->list); - mutex_unlock(&va_range->lock); - - mutex_destroy(&va_range->lock); + return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, + host_huge_range_start, + host_huge_range_end, + dram_range_start, + dram_range_end); } /* @@ -1664,8 +1736,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) } spin_unlock(&vm->idr_lock); - hl_va_range_fini(hdev, &ctx->dram_va_range); - hl_va_range_fini(hdev, &ctx->host_va_range); + va_range_fini(hdev, ctx->dram_va_range); + if (hdev->pmmu_huge_range) + va_range_fini(hdev, ctx->host_huge_va_range); + va_range_fini(hdev, ctx->host_va_range); mutex_destroy(&ctx->mem_hash_lock); hl_mmu_ctx_fini(ctx); diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 6262b26e2086..a290d6b49d78 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) return phys_hop_addr + pte_offset; } +static bool is_dram_va(struct hl_device *hdev, u64 virt_addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); +} + static int dram_default_mapping_init(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; @@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) curr_pte; bool is_huge, clear_hop3 = true; + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; hop0_addr = get_hop0_addr(ctx); @@ -637,29 +647,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) clear_hop3 = true; if (!clear_hop3) - goto flush; + goto mapped; clear_pte(ctx, hop3_pte_addr); if (put_pte(ctx, hop3_addr)) - goto flush; + goto mapped; clear_pte(ctx, hop2_pte_addr); if (put_pte(ctx, hop2_addr)) - goto flush; + goto mapped; clear_pte(ctx, hop1_pte_addr); if (put_pte(ctx, hop1_addr)) - goto flush; + goto mapped; clear_pte(ctx, hop0_pte_addr); } -flush: - flush(ctx); - +mapped: return 0; not_mapped: @@ -675,6 +683,7 @@ not_mapped: * @ctx: pointer to the context structure * @virt_addr: virt addr to map from * @page_size: size of the page to unmap + * @flush_pte: whether to do a PCI flush * * This function does the following: * - Check that the virt addr is mapped @@ -685,40 +694,43 @@ not_mapped: * changes the MMU hash, it must be protected by a lock. * However, because it maps only a single page, the lock should be implemented * in a higher level in order to protect the entire mapping of the memory area + * + * For optimization reasons PCI flush may be requested once after unmapping of + * large area. */ -int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) +int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, + bool flush_pte) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; u64 real_virt_addr; u32 real_page_size, npages; - int i, rc; + int i, rc = 0; bool is_dram_addr; if (!hdev->mmu_enable) return 0; - is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + is_dram_addr = is_dram_va(hdev, virt_addr); - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + if (is_dram_addr) + mmu_prop = &prop->dmmu; + else if ((page_size % prop->pmmu_huge.page_size) == 0) + mmu_prop = &prop->pmmu_huge; + else + mmu_prop = &prop->pmmu; /* * The H/W handles mapping of specific page sizes. Hence if the page * size is bigger, we break it to sub-pages and unmap them separately. */ - if ((page_size % mmu_prop->huge_page_size) == 0) { - real_page_size = mmu_prop->huge_page_size; - } else if ((page_size % mmu_prop->page_size) == 0) { + if ((page_size % mmu_prop->page_size) == 0) { real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not %uKB nor %uMB aligned, can't unmap\n", - page_size, - mmu_prop->page_size >> 10, - mmu_prop->huge_page_size >> 20); + "page size of %u is not %uKB aligned, can't unmap\n", + page_size, mmu_prop->page_size >> 10); return -EFAULT; } @@ -729,12 +741,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) for (i = 0 ; i < npages ; i++) { rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr); if (rc) - return rc; + break; real_virt_addr += real_page_size; } - return 0; + if (flush_pte) + flush(ctx); + + return rc; } static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, @@ -753,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, hop4_new = false, is_huge; int rc = -ENOMEM; - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - /* * This mapping function can map a page or a huge page. For huge page * there are only 3 hops rather than 4. Currently the DRAM allocation @@ -762,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, * one of the two page sizes. Since this is a common code for all the * three cases, we need this hugs page check. */ - is_huge = page_size == mmu_prop->huge_page_size; - - if (is_dram_addr && !is_huge) { - dev_err(hdev->dev, "DRAM mapping should use huge pages only\n"); - return -EFAULT; + if (is_dram_addr) { + mmu_prop = &prop->dmmu; + is_huge = true; + } else if (page_size == prop->pmmu_huge.page_size) { + mmu_prop = &prop->pmmu_huge; + is_huge = true; + } else { + mmu_prop = &prop->pmmu; + is_huge = false; } hop0_addr = get_hop0_addr(ctx); @@ -885,8 +902,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, get_pte(ctx, hop3_addr); } - flush(ctx); - return 0; err: @@ -909,6 +924,7 @@ err: * @virt_addr: virt addr to map from * @phys_addr: phys addr to map to * @page_size: physical page size + * @flush_pte: whether to do a PCI flush * * This function does the following: * - Check that the virt addr is not mapped @@ -919,8 +935,12 @@ err: * changes the MMU hash, it must be protected by a lock. * However, because it maps only a single page, the lock should be implemented * in a higher level in order to protect the entire mapping of the memory area + * + * For optimization reasons PCI flush may be requested once after mapping of + * large area. */ -int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) +int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, + bool flush_pte) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -933,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) if (!hdev->mmu_enable) return 0; - is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + is_dram_addr = is_dram_va(hdev, virt_addr); - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + if (is_dram_addr) + mmu_prop = &prop->dmmu; + else if ((page_size % prop->pmmu_huge.page_size) == 0) + mmu_prop = &prop->pmmu_huge; + else + mmu_prop = &prop->pmmu; /* * The H/W handles mapping of specific page sizes. Hence if the page * size is bigger, we break it to sub-pages and map them separately. */ - if ((page_size % mmu_prop->huge_page_size) == 0) { - real_page_size = mmu_prop->huge_page_size; - } else if ((page_size % mmu_prop->page_size) == 0) { + if ((page_size % mmu_prop->page_size) == 0) { real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not %dKB nor %dMB aligned, can't unmap\n", - page_size, - mmu_prop->page_size >> 10, - mmu_prop->huge_page_size >> 20); + "page size of %u is not %uKB aligned, can't unmap\n", + page_size, mmu_prop->page_size >> 10); return -EFAULT; } @@ -976,6 +995,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) mapped_cnt++; } + if (flush_pte) + flush(ctx); + return 0; err: @@ -988,6 +1010,8 @@ err: real_virt_addr += real_page_size; } + flush(ctx); + return rc; } diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index de87693cf557..886459e0ddd9 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -11,6 +11,7 @@ #include <linux/sched/signal.h> #include <linux/sched/task_stack.h> #include <linux/uaccess.h> +#include <linux/slab.h> #ifdef CONFIG_X86_32 #include <asm/desc.h> @@ -175,6 +176,80 @@ void lkdtm_HUNG_TASK(void) schedule(); } +volatile unsigned int huge = INT_MAX - 2; +volatile unsigned int ignored; + +void lkdtm_OVERFLOW_SIGNED(void) +{ + int value; + + value = huge; + pr_info("Normal signed addition ...\n"); + value += 1; + ignored = value; + + pr_info("Overflowing signed addition ...\n"); + value += 4; + ignored = value; +} + + +void lkdtm_OVERFLOW_UNSIGNED(void) +{ + unsigned int value; + + value = huge; + pr_info("Normal unsigned addition ...\n"); + value += 1; + ignored = value; + + pr_info("Overflowing unsigned addition ...\n"); + value += 4; + ignored = value; +} + +/* Intentially using old-style flex array definition of 1 byte. */ +struct array_bounds_flex_array { + int one; + int two; + char data[1]; +}; + +struct array_bounds { + int one; + int two; + char data[8]; + int three; +}; + +void lkdtm_ARRAY_BOUNDS(void) +{ + struct array_bounds_flex_array *not_checked; + struct array_bounds *checked; + volatile int i; + + not_checked = kmalloc(sizeof(*not_checked) * 2, GFP_KERNEL); + checked = kmalloc(sizeof(*checked) * 2, GFP_KERNEL); + + pr_info("Array access within bounds ...\n"); + /* For both, touch all bytes in the actual member size. */ + for (i = 0; i < sizeof(checked->data); i++) + checked->data[i] = 'A'; + /* + * For the uninstrumented flex array member, also touch 1 byte + * beyond to verify it is correctly uninstrumented. + */ + for (i = 0; i < sizeof(not_checked->data) + 1; i++) + not_checked->data[i] = 'A'; + + pr_info("Array access beyond bounds ...\n"); + for (i = 0; i < sizeof(checked->data) + 1; i++) + checked->data[i] = 'B'; + + kfree(not_checked); + kfree(checked); +} + void lkdtm_CORRUPT_LIST_ADD(void) { /* @@ -378,3 +453,39 @@ void lkdtm_DOUBLE_FAULT(void) pr_err("XFAIL: this test is ia32-only\n"); #endif } + +#ifdef CONFIG_ARM64_PTR_AUTH +static noinline void change_pac_parameters(void) +{ + /* Reset the keys of current task */ + ptrauth_thread_init_kernel(current); + ptrauth_thread_switch_kernel(current); +} + +#define CORRUPT_PAC_ITERATE 10 +noinline void lkdtm_CORRUPT_PAC(void) +{ + int i; + + if (!system_supports_address_auth()) { + pr_err("FAIL: arm64 pointer authentication feature not present\n"); + return; + } + + pr_info("Change the PAC parameters to force function return failure\n"); + /* + * Pac is a hash value computed from input keys, return address and + * stack pointer. As pac has fewer bits so there is a chance of + * collision, so iterate few times to reduce the collision probability. + */ + for (i = 0; i < CORRUPT_PAC_ITERATE; i++) + change_pac_parameters(); + + pr_err("FAIL: %s test failed. Kernel may be unstable from here\n", __func__); +} +#else /* !CONFIG_ARM64_PTR_AUTH */ +noinline void lkdtm_CORRUPT_PAC(void) +{ + pr_err("FAIL: arm64 pointer authentication config disabled\n"); +} +#endif diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index ee0d6e721441..a5e344df9166 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -116,6 +116,7 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(STACK_GUARD_PAGE_LEADING), CRASHTYPE(STACK_GUARD_PAGE_TRAILING), CRASHTYPE(UNSET_SMEP), + CRASHTYPE(CORRUPT_PAC), CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE), CRASHTYPE(OVERWRITE_ALLOCATION), CRASHTYPE(WRITE_AFTER_FREE), @@ -129,6 +130,9 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(HARDLOCKUP), CRASHTYPE(SPINLOCKUP), CRASHTYPE(HUNG_TASK), + CRASHTYPE(OVERFLOW_SIGNED), + CRASHTYPE(OVERFLOW_UNSIGNED), + CRASHTYPE(ARRAY_BOUNDS), CRASHTYPE(EXEC_DATA), CRASHTYPE(EXEC_STACK), CRASHTYPE(EXEC_KMALLOC), diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index c56d23e37643..601a2156a0d4 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -22,6 +22,9 @@ void lkdtm_SOFTLOCKUP(void); void lkdtm_HARDLOCKUP(void); void lkdtm_SPINLOCKUP(void); void lkdtm_HUNG_TASK(void); +void lkdtm_OVERFLOW_SIGNED(void); +void lkdtm_OVERFLOW_UNSIGNED(void); +void lkdtm_ARRAY_BOUNDS(void); void lkdtm_CORRUPT_LIST_ADD(void); void lkdtm_CORRUPT_LIST_DEL(void); void lkdtm_CORRUPT_USER_DS(void); @@ -31,6 +34,7 @@ void lkdtm_UNSET_SMEP(void); #ifdef CONFIG_X86_32 void lkdtm_DOUBLE_FAULT(void); #endif +void lkdtm_CORRUPT_PAC(void); /* lkdtm_heap.c */ void __init lkdtm_heap_init(void); diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c index d5a084475abc..d1a5c0705be3 100644 --- a/drivers/misc/lkdtm/stackleak.c +++ b/drivers/misc/lkdtm/stackleak.c @@ -16,6 +16,7 @@ void lkdtm_STACKLEAK_ERASING(void) unsigned long *sp, left, found, i; const unsigned long check_depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); + bool test_failed = false; /* * For the details about the alignment of the poison values, see @@ -34,7 +35,8 @@ void lkdtm_STACKLEAK_ERASING(void) left--; } else { pr_err("FAIL: not enough stack space for the test\n"); - return; + test_failed = true; + goto end; } pr_info("checking unused part of the thread stack (%lu bytes)...\n", @@ -52,22 +54,29 @@ void lkdtm_STACKLEAK_ERASING(void) } if (found <= check_depth) { - pr_err("FAIL: thread stack is not erased (checked %lu bytes)\n", + pr_err("FAIL: the erased part is not found (checked %lu bytes)\n", i * sizeof(unsigned long)); - return; + test_failed = true; + goto end; } - pr_info("first %lu bytes are unpoisoned\n", + pr_info("the erased part begins after %lu not poisoned bytes\n", (i - found) * sizeof(unsigned long)); /* The rest of thread stack should be erased */ for (; i < left; i++) { if (*(sp - i) != STACKLEAK_POISON) { - pr_err("FAIL: thread stack is NOT properly erased\n"); - return; + pr_err("FAIL: bad value number %lu in the erased part: 0x%lx\n", + i, *(sp - i)); + test_failed = true; } } - pr_info("OK: the rest of the thread stack is properly erased\n"); - return; +end: + if (test_failed) { + pr_err("FAIL: the thread stack is NOT properly erased\n"); + dump_stack(); + } else { + pr_info("OK: the rest of the thread stack is properly erased\n"); + } } diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 9ad9c01ddf41..910f059b3384 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -91,7 +91,7 @@ struct mkhi_rule_id { struct mkhi_fwcaps { struct mkhi_rule_id id; u8 len; - u8 data[0]; + u8 data[]; } __packed; struct mkhi_fw_ver_block { @@ -119,7 +119,7 @@ struct mkhi_msg_hdr { struct mkhi_msg { struct mkhi_msg_hdr hdr; - u8 data[0]; + u8 data[]; } __packed; #define MKHI_OSVER_BUF_LEN (sizeof(struct mkhi_msg_hdr) + \ diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 1e3edbbacb1e..204d807e755b 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -1585,7 +1585,7 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, goto err; } - hbuf_len = mei_slots2data(hbuf_slots); + hbuf_len = mei_slots2data(hbuf_slots) & MEI_MSG_MAX_LEN_MASK; dr_slots = mei_dma_ring_empty_slots(dev); dr_len = mei_slots2data(dr_slots); @@ -1718,7 +1718,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb) goto out; } - hbuf_len = mei_slots2data(hbuf_slots); + hbuf_len = mei_slots2data(hbuf_slots) & MEI_MSG_MAX_LEN_MASK; dr_slots = mei_dma_ring_empty_slots(dev); dr_len = mei_slots2data(dr_slots); diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 87a0201ba6b3..9392934e3a06 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -75,9 +75,9 @@ #define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ #define MEI_DEV_ID_CNP_LP 0x9DE0 /* Cannon Point LP */ -#define MEI_DEV_ID_CNP_LP_4 0x9DE4 /* Cannon Point LP 4 (iTouch) */ +#define MEI_DEV_ID_CNP_LP_3 0x9DE4 /* Cannon Point LP 3 (iTouch) */ #define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ -#define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */ +#define MEI_DEV_ID_CNP_H_3 0xA364 /* Cannon Point H 3 (iTouch) */ #define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */ #define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */ @@ -87,6 +87,8 @@ #define MEI_DEV_ID_CMP_H 0x06e0 /* Comet Lake H */ #define MEI_DEV_ID_CMP_H_3 0x06e4 /* Comet Lake H 3 (iTouch) */ +#define MEI_DEV_ID_CDF 0x18D3 /* Cedar Fork */ + #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ #define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */ diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h index d025a5f8317e..b1a8d5ec88b3 100644 --- a/drivers/misc/mei/hw.h +++ b/drivers/misc/mei/hw.h @@ -209,11 +209,14 @@ struct mei_msg_hdr { u32 extension[0]; } __packed; +/* The length is up to 9 bits */ +#define MEI_MSG_MAX_LEN_MASK GENMASK(9, 0) + #define MEI_MSG_HDR_MAX 2 struct mei_bus_message { u8 hbm_cmd; - u8 data[0]; + u8 data[]; } __packed; /** diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index 76f8ff5ff974..3a29db07211d 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -533,7 +533,7 @@ struct mei_device { #endif /* CONFIG_DEBUG_FS */ const struct mei_hw_ops *ops; - char hw[0] __aligned(sizeof(void *)); + char hw[] __aligned(sizeof(void *)); }; static inline unsigned long mei_secs_to_jiffies(unsigned long sec) diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 2711451b3d87..3d21c38e2dbb 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -1,25 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2003-2019, Intel Corporation. All rights reserved. + * Copyright (c) 2003-2020, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ #include <linux/module.h> -#include <linux/moduleparam.h> #include <linux/kernel.h> #include <linux/device.h> -#include <linux/fs.h> #include <linux/errno.h> #include <linux/types.h> -#include <linux/fcntl.h> #include <linux/pci.h> -#include <linux/poll.h> -#include <linux/ioctl.h> -#include <linux/cdev.h> #include <linux/sched.h> -#include <linux/uuid.h> -#include <linux/compat.h> -#include <linux/jiffies.h> #include <linux/interrupt.h> #include <linux/pm_domain.h> @@ -92,9 +83,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_4, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_3, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_3, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)}, @@ -111,6 +102,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CDF, MEI_ME_PCH8_CFG)}, + /* required last entry */ {0, } }; diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c index f1c16a587495..beacf2a2f2b5 100644 --- a/drivers/misc/mei/pci-txe.c +++ b/drivers/misc/mei/pci-txe.c @@ -1,20 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2013-2017, Intel Corporation. All rights reserved. + * Copyright (c) 2013-2020, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/device.h> -#include <linux/fs.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/sched.h> -#include <linux/uuid.h> -#include <linux/jiffies.h> #include <linux/interrupt.h> #include <linux/workqueue.h> #include <linux/pm_domain.h> diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index b6841ba6d922..8f201d019f5a 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -133,8 +133,4 @@ config VOP OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -if VOP -source "drivers/vhost/Kconfig.vringh" -endif - endmenu diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index 4f2d9212432c..fb5b3989753d 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -137,7 +137,7 @@ static void *__mic_dma_alloc(struct device *dev, size_t size, struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); dma_addr_t tmp; - void *va = kmalloc(size, gfp | __GFP_ZERO); + void *va = kzalloc(size, gfp); if (va) { tmp = mic_map_single(mdev, va, size); diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c index a7743312da9c..d18cda966912 100644 --- a/drivers/misc/mic/host/mic_x100.c +++ b/drivers/misc/mic/host/mic_x100.c @@ -350,10 +350,10 @@ mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw) if (!buf) return -ENOMEM; - len += snprintf(buf, CMDLINE_SIZE - len, + len += scnprintf(buf, CMDLINE_SIZE - len, " mem=%dM", boot_mem); if (mdev->cosm_dev->cmdline) - snprintf(buf + len, CMDLINE_SIZE - len, " %s", + scnprintf(buf + len, CMDLINE_SIZE - len, " %s", mdev->cosm_dev->cmdline); memcpy_toio(cmd_line_va, buf, strlen(buf) + 1); kfree(buf); diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index a5e317073d95..ef5a1af6bab7 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -17,6 +17,7 @@ #include <linux/mutex.h> #include <linux/random.h> #include <linux/slab.h> +#include <linux/uaccess.h> #include <linux/pci.h> #include <linux/pci_ids.h> @@ -64,6 +65,9 @@ #define PCI_ENDPOINT_TEST_IRQ_TYPE 0x24 #define PCI_ENDPOINT_TEST_IRQ_NUMBER 0x28 +#define PCI_ENDPOINT_TEST_FLAGS 0x2c +#define FLAG_USE_DMA BIT(0) + #define PCI_DEVICE_ID_TI_AM654 0xb00c #define is_am654_pci_dev(pdev) \ @@ -98,11 +102,13 @@ struct pci_endpoint_test { struct completion irq_raised; int last_irq; int num_irqs; + int irq_type; /* mutex to protect the ioctls */ struct mutex mutex; struct miscdevice miscdev; enum pci_barno test_reg_bar; size_t alignment; + const char *name; }; struct pci_endpoint_test_data { @@ -157,6 +163,7 @@ static void pci_endpoint_test_free_irq_vectors(struct pci_endpoint_test *test) struct pci_dev *pdev = test->pdev; pci_free_irq_vectors(pdev); + test->irq_type = IRQ_TYPE_UNDEFINED; } static bool pci_endpoint_test_alloc_irq_vectors(struct pci_endpoint_test *test, @@ -191,6 +198,8 @@ static bool pci_endpoint_test_alloc_irq_vectors(struct pci_endpoint_test *test, irq = 0; res = false; } + + test->irq_type = type; test->num_irqs = irq; return res; @@ -218,7 +227,7 @@ static bool pci_endpoint_test_request_irq(struct pci_endpoint_test *test) for (i = 0; i < test->num_irqs; i++) { err = devm_request_irq(dev, pci_irq_vector(pdev, i), pci_endpoint_test_irqhandler, - IRQF_SHARED, DRV_MODULE_NAME, test); + IRQF_SHARED, test->name, test); if (err) goto fail; } @@ -315,11 +324,16 @@ static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test, return false; } -static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) +static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, + unsigned long arg) { + struct pci_endpoint_test_xfer_param param; bool ret = false; void *src_addr; void *dst_addr; + u32 flags = 0; + bool use_dma; + size_t size; dma_addr_t src_phys_addr; dma_addr_t dst_phys_addr; struct pci_dev *pdev = test->pdev; @@ -330,25 +344,46 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) dma_addr_t orig_dst_phys_addr; size_t offset; size_t alignment = test->alignment; + int irq_type = test->irq_type; u32 src_crc32; u32 dst_crc32; + int err; + err = copy_from_user(¶m, (void __user *)arg, sizeof(param)); + if (err) { + dev_err(dev, "Failed to get transfer param\n"); + return false; + } + + size = param.size; if (size > SIZE_MAX - alignment) goto err; + use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA); + if (use_dma) + flags |= FLAG_USE_DMA; + if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) { dev_err(dev, "Invalid IRQ type option\n"); goto err; } - orig_src_addr = dma_alloc_coherent(dev, size + alignment, - &orig_src_phys_addr, GFP_KERNEL); + orig_src_addr = kzalloc(size + alignment, GFP_KERNEL); if (!orig_src_addr) { dev_err(dev, "Failed to allocate source buffer\n"); ret = false; goto err; } + get_random_bytes(orig_src_addr, size + alignment); + orig_src_phys_addr = dma_map_single(dev, orig_src_addr, + size + alignment, DMA_TO_DEVICE); + if (dma_mapping_error(dev, orig_src_phys_addr)) { + dev_err(dev, "failed to map source buffer address\n"); + ret = false; + goto err_src_phys_addr; + } + if (alignment && !IS_ALIGNED(orig_src_phys_addr, alignment)) { src_phys_addr = PTR_ALIGN(orig_src_phys_addr, alignment); offset = src_phys_addr - orig_src_phys_addr; @@ -364,15 +399,21 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_SRC_ADDR, upper_32_bits(src_phys_addr)); - get_random_bytes(src_addr, size); src_crc32 = crc32_le(~0, src_addr, size); - orig_dst_addr = dma_alloc_coherent(dev, size + alignment, - &orig_dst_phys_addr, GFP_KERNEL); + orig_dst_addr = kzalloc(size + alignment, GFP_KERNEL); if (!orig_dst_addr) { dev_err(dev, "Failed to allocate destination address\n"); ret = false; - goto err_orig_src_addr; + goto err_dst_addr; + } + + orig_dst_phys_addr = dma_map_single(dev, orig_dst_addr, + size + alignment, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, orig_dst_phys_addr)) { + dev_err(dev, "failed to map destination buffer address\n"); + ret = false; + goto err_dst_phys_addr; } if (alignment && !IS_ALIGNED(orig_dst_phys_addr, alignment)) { @@ -392,6 +433,7 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_FLAGS, flags); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, @@ -399,24 +441,34 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) wait_for_completion(&test->irq_raised); + dma_unmap_single(dev, orig_dst_phys_addr, size + alignment, + DMA_FROM_DEVICE); + dst_crc32 = crc32_le(~0, dst_addr, size); if (dst_crc32 == src_crc32) ret = true; - dma_free_coherent(dev, size + alignment, orig_dst_addr, - orig_dst_phys_addr); +err_dst_phys_addr: + kfree(orig_dst_addr); -err_orig_src_addr: - dma_free_coherent(dev, size + alignment, orig_src_addr, - orig_src_phys_addr); +err_dst_addr: + dma_unmap_single(dev, orig_src_phys_addr, size + alignment, + DMA_TO_DEVICE); + +err_src_phys_addr: + kfree(orig_src_addr); err: return ret; } -static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) +static bool pci_endpoint_test_write(struct pci_endpoint_test *test, + unsigned long arg) { + struct pci_endpoint_test_xfer_param param; bool ret = false; + u32 flags = 0; + bool use_dma; u32 reg; void *addr; dma_addr_t phys_addr; @@ -426,24 +478,47 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) dma_addr_t orig_phys_addr; size_t offset; size_t alignment = test->alignment; + int irq_type = test->irq_type; + size_t size; u32 crc32; + int err; + + err = copy_from_user(¶m, (void __user *)arg, sizeof(param)); + if (err != 0) { + dev_err(dev, "Failed to get transfer param\n"); + return false; + } + size = param.size; if (size > SIZE_MAX - alignment) goto err; + use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA); + if (use_dma) + flags |= FLAG_USE_DMA; + if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) { dev_err(dev, "Invalid IRQ type option\n"); goto err; } - orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, - GFP_KERNEL); + orig_addr = kzalloc(size + alignment, GFP_KERNEL); if (!orig_addr) { dev_err(dev, "Failed to allocate address\n"); ret = false; goto err; } + get_random_bytes(orig_addr, size + alignment); + + orig_phys_addr = dma_map_single(dev, orig_addr, size + alignment, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, orig_phys_addr)) { + dev_err(dev, "failed to map source buffer address\n"); + ret = false; + goto err_phys_addr; + } + if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) { phys_addr = PTR_ALIGN(orig_phys_addr, alignment); offset = phys_addr - orig_phys_addr; @@ -453,8 +528,6 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) addr = orig_addr; } - get_random_bytes(addr, size); - crc32 = crc32_le(~0, addr, size); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_CHECKSUM, crc32); @@ -466,6 +539,7 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_FLAGS, flags); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, @@ -477,15 +551,24 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) if (reg & STATUS_READ_SUCCESS) ret = true; - dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr); + dma_unmap_single(dev, orig_phys_addr, size + alignment, + DMA_TO_DEVICE); + +err_phys_addr: + kfree(orig_addr); err: return ret; } -static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) +static bool pci_endpoint_test_read(struct pci_endpoint_test *test, + unsigned long arg) { + struct pci_endpoint_test_xfer_param param; bool ret = false; + u32 flags = 0; + bool use_dma; + size_t size; void *addr; dma_addr_t phys_addr; struct pci_dev *pdev = test->pdev; @@ -494,24 +577,44 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) dma_addr_t orig_phys_addr; size_t offset; size_t alignment = test->alignment; + int irq_type = test->irq_type; u32 crc32; + int err; + + err = copy_from_user(¶m, (void __user *)arg, sizeof(param)); + if (err) { + dev_err(dev, "Failed to get transfer param\n"); + return false; + } + size = param.size; if (size > SIZE_MAX - alignment) goto err; + use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA); + if (use_dma) + flags |= FLAG_USE_DMA; + if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) { dev_err(dev, "Invalid IRQ type option\n"); goto err; } - orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, - GFP_KERNEL); + orig_addr = kzalloc(size + alignment, GFP_KERNEL); if (!orig_addr) { dev_err(dev, "Failed to allocate destination address\n"); ret = false; goto err; } + orig_phys_addr = dma_map_single(dev, orig_addr, size + alignment, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, orig_phys_addr)) { + dev_err(dev, "failed to map source buffer address\n"); + ret = false; + goto err_phys_addr; + } + if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) { phys_addr = PTR_ALIGN(orig_phys_addr, alignment); offset = phys_addr - orig_phys_addr; @@ -528,6 +631,7 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_FLAGS, flags); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, @@ -535,15 +639,26 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) wait_for_completion(&test->irq_raised); + dma_unmap_single(dev, orig_phys_addr, size + alignment, + DMA_FROM_DEVICE); + crc32 = crc32_le(~0, addr, size); if (crc32 == pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_CHECKSUM)) ret = true; - dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr); +err_phys_addr: + kfree(orig_addr); err: return ret; } +static bool pci_endpoint_test_clear_irq(struct pci_endpoint_test *test) +{ + pci_endpoint_test_release_irq(test); + pci_endpoint_test_free_irq_vectors(test); + return true; +} + static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test, int req_irq_type) { @@ -555,7 +670,7 @@ static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test, return false; } - if (irq_type == req_irq_type) + if (test->irq_type == req_irq_type) return true; pci_endpoint_test_release_irq(test); @@ -567,12 +682,10 @@ static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test, if (!pci_endpoint_test_request_irq(test)) goto err; - irq_type = req_irq_type; return true; err: pci_endpoint_test_free_irq_vectors(test); - irq_type = IRQ_TYPE_UNDEFINED; return false; } @@ -616,6 +729,9 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, case PCITEST_GET_IRQTYPE: ret = irq_type; break; + case PCITEST_CLEAR_IRQ: + ret = pci_endpoint_test_clear_irq(test); + break; } ret: @@ -633,7 +749,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, { int err; int id; - char name[20]; + char name[24]; enum pci_barno bar; void __iomem *base; struct device *dev = &pdev->dev; @@ -652,6 +768,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, test->test_reg_bar = 0; test->alignment = 0; test->pdev = pdev; + test->irq_type = IRQ_TYPE_UNDEFINED; if (no_msi) irq_type = IRQ_TYPE_LEGACY; @@ -667,6 +784,12 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, init_completion(&test->irq_raised); mutex_init(&test->mutex); + if ((dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)) != 0) && + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) { + dev_err(dev, "Cannot set DMA mask\n"); + return -EINVAL; + } + err = pci_enable_device(pdev); if (err) { dev_err(dev, "Cannot enable PCI device\n"); @@ -684,9 +807,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, if (!pci_endpoint_test_alloc_irq_vectors(test, irq_type)) goto err_disable_irq; - if (!pci_endpoint_test_request_irq(test)) - goto err_disable_irq; - for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { base = pci_ioremap_bar(pdev, bar); @@ -716,12 +836,21 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, } snprintf(name, sizeof(name), DRV_MODULE_NAME ".%d", id); + test->name = kstrdup(name, GFP_KERNEL); + if (!test->name) { + err = -ENOMEM; + goto err_ida_remove; + } + + if (!pci_endpoint_test_request_irq(test)) + goto err_kfree_test_name; + misc_device = &test->miscdev; misc_device->minor = MISC_DYNAMIC_MINOR; misc_device->name = kstrdup(name, GFP_KERNEL); if (!misc_device->name) { err = -ENOMEM; - goto err_ida_remove; + goto err_release_irq; } misc_device->fops = &pci_endpoint_test_fops, @@ -736,6 +865,12 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, err_kfree_name: kfree(misc_device->name); +err_release_irq: + pci_endpoint_test_release_irq(test); + +err_kfree_test_name: + kfree(test->name); + err_ida_remove: ida_simple_remove(&pci_endpoint_test_ida, id); @@ -744,7 +879,6 @@ err_iounmap: if (test->bar[bar]) pci_iounmap(pdev, test->bar[bar]); } - pci_endpoint_test_release_irq(test); err_disable_irq: pci_endpoint_test_free_irq_vectors(test); @@ -770,6 +904,7 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) misc_deregister(&test->miscdev); kfree(misc_device->name); + kfree(test->name); ida_simple_remove(&pci_endpoint_test_ida, id); for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { if (test->bar[bar]) @@ -783,6 +918,12 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static const struct pci_endpoint_test_data default_data = { + .test_reg_bar = BAR_0, + .alignment = SZ_4K, + .irq_type = IRQ_TYPE_MSI, +}; + static const struct pci_endpoint_test_data am654_data = { .test_reg_bar = BAR_2, .alignment = SZ_64K, @@ -790,8 +931,12 @@ static const struct pci_endpoint_test_data am654_data = { }; static const struct pci_device_id pci_endpoint_test_tbl[] = { - { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA74x) }, - { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA72x) }, + { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA74x), + .driver_data = (kernel_ulong_t)&default_data, + }, + { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA72x), + .driver_data = (kernel_ulong_t)&default_data, + }, { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, 0x81c0) }, { PCI_DEVICE_DATA(SYNOPSYS, EDDA, NULL) }, { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_AM654), diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h index e77d1b1f9d05..85c103923632 100644 --- a/drivers/misc/sgi-gru/grulib.h +++ b/drivers/misc/sgi-gru/grulib.h @@ -136,7 +136,7 @@ struct gru_dump_context_header { pid_t pid; unsigned long vaddr; int cch_locked; - unsigned long data[0]; + unsigned long data[]; }; /* diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index a7e44b2eb413..5ce8f3081e96 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -372,7 +372,7 @@ struct gru_thread_state { int ts_data_valid; /* Indicates if ts_gdata has valid data */ struct gru_gseg_statistics ustats; /* User statistics */ - unsigned long ts_gdata[0]; /* save area for GRU data (CB, + unsigned long ts_gdata[]; /* save area for GRU data (CB, DS, CBE) */ }; diff --git a/drivers/misc/uacce/Kconfig b/drivers/misc/uacce/Kconfig new file mode 100644 index 000000000000..5e39b6083b0f --- /dev/null +++ b/drivers/misc/uacce/Kconfig @@ -0,0 +1,13 @@ +config UACCE + tristate "Accelerator Framework for User Land" + depends on IOMMU_API + help + UACCE provides interface for the user process to access the hardware + without interaction with the kernel space in data path. + + The user-space interface is described in + include/uapi/misc/uacce/uacce.h + + See Documentation/misc-devices/uacce.rst for more details. + + If you don't know what to do here, say N. diff --git a/drivers/misc/uacce/Makefile b/drivers/misc/uacce/Makefile new file mode 100644 index 000000000000..5b4374e8b5f2 --- /dev/null +++ b/drivers/misc/uacce/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +obj-$(CONFIG_UACCE) += uacce.o diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c new file mode 100644 index 000000000000..d39307f060bd --- /dev/null +++ b/drivers/misc/uacce/uacce.c @@ -0,0 +1,633 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/compat.h> +#include <linux/dma-mapping.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/poll.h> +#include <linux/uacce.h> + +static struct class *uacce_class; +static dev_t uacce_devt; +static DEFINE_MUTEX(uacce_mutex); +static DEFINE_XARRAY_ALLOC(uacce_xa); + +static int uacce_start_queue(struct uacce_queue *q) +{ + int ret = 0; + + mutex_lock(&uacce_mutex); + + if (q->state != UACCE_Q_INIT) { + ret = -EINVAL; + goto out_with_lock; + } + + if (q->uacce->ops->start_queue) { + ret = q->uacce->ops->start_queue(q); + if (ret < 0) + goto out_with_lock; + } + + q->state = UACCE_Q_STARTED; + +out_with_lock: + mutex_unlock(&uacce_mutex); + + return ret; +} + +static int uacce_put_queue(struct uacce_queue *q) +{ + struct uacce_device *uacce = q->uacce; + + mutex_lock(&uacce_mutex); + + if (q->state == UACCE_Q_ZOMBIE) + goto out; + + if ((q->state == UACCE_Q_STARTED) && uacce->ops->stop_queue) + uacce->ops->stop_queue(q); + + if ((q->state == UACCE_Q_INIT || q->state == UACCE_Q_STARTED) && + uacce->ops->put_queue) + uacce->ops->put_queue(q); + + q->state = UACCE_Q_ZOMBIE; +out: + mutex_unlock(&uacce_mutex); + + return 0; +} + +static long uacce_fops_unl_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + struct uacce_queue *q = filep->private_data; + struct uacce_device *uacce = q->uacce; + + switch (cmd) { + case UACCE_CMD_START_Q: + return uacce_start_queue(q); + + case UACCE_CMD_PUT_Q: + return uacce_put_queue(q); + + default: + if (!uacce->ops->ioctl) + return -EINVAL; + + return uacce->ops->ioctl(q, cmd, arg); + } +} + +#ifdef CONFIG_COMPAT +static long uacce_fops_compat_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + arg = (unsigned long)compat_ptr(arg); + + return uacce_fops_unl_ioctl(filep, cmd, arg); +} +#endif + +static int uacce_sva_exit(struct device *dev, struct iommu_sva *handle, + void *data) +{ + struct uacce_mm *uacce_mm = data; + struct uacce_queue *q; + + /* + * No new queue can be added concurrently because no caller can have a + * reference to this mm. But there may be concurrent calls to + * uacce_mm_put(), so we need the lock. + */ + mutex_lock(&uacce_mm->lock); + list_for_each_entry(q, &uacce_mm->queues, list) + uacce_put_queue(q); + uacce_mm->mm = NULL; + mutex_unlock(&uacce_mm->lock); + + return 0; +} + +static struct iommu_sva_ops uacce_sva_ops = { + .mm_exit = uacce_sva_exit, +}; + +static struct uacce_mm *uacce_mm_get(struct uacce_device *uacce, + struct uacce_queue *q, + struct mm_struct *mm) +{ + struct uacce_mm *uacce_mm = NULL; + struct iommu_sva *handle = NULL; + int ret; + + lockdep_assert_held(&uacce->mm_lock); + + list_for_each_entry(uacce_mm, &uacce->mm_list, list) { + if (uacce_mm->mm == mm) { + mutex_lock(&uacce_mm->lock); + list_add(&q->list, &uacce_mm->queues); + mutex_unlock(&uacce_mm->lock); + return uacce_mm; + } + } + + uacce_mm = kzalloc(sizeof(*uacce_mm), GFP_KERNEL); + if (!uacce_mm) + return NULL; + + if (uacce->flags & UACCE_DEV_SVA) { + /* + * Safe to pass an incomplete uacce_mm, since mm_exit cannot + * fire while we hold a reference to the mm. + */ + handle = iommu_sva_bind_device(uacce->parent, mm, uacce_mm); + if (IS_ERR(handle)) + goto err_free; + + ret = iommu_sva_set_ops(handle, &uacce_sva_ops); + if (ret) + goto err_unbind; + + uacce_mm->pasid = iommu_sva_get_pasid(handle); + if (uacce_mm->pasid == IOMMU_PASID_INVALID) + goto err_unbind; + } + + uacce_mm->mm = mm; + uacce_mm->handle = handle; + INIT_LIST_HEAD(&uacce_mm->queues); + mutex_init(&uacce_mm->lock); + list_add(&q->list, &uacce_mm->queues); + list_add(&uacce_mm->list, &uacce->mm_list); + + return uacce_mm; + +err_unbind: + if (handle) + iommu_sva_unbind_device(handle); +err_free: + kfree(uacce_mm); + return NULL; +} + +static void uacce_mm_put(struct uacce_queue *q) +{ + struct uacce_mm *uacce_mm = q->uacce_mm; + + lockdep_assert_held(&q->uacce->mm_lock); + + mutex_lock(&uacce_mm->lock); + list_del(&q->list); + mutex_unlock(&uacce_mm->lock); + + if (list_empty(&uacce_mm->queues)) { + if (uacce_mm->handle) + iommu_sva_unbind_device(uacce_mm->handle); + list_del(&uacce_mm->list); + kfree(uacce_mm); + } +} + +static int uacce_fops_open(struct inode *inode, struct file *filep) +{ + struct uacce_mm *uacce_mm = NULL; + struct uacce_device *uacce; + struct uacce_queue *q; + int ret = 0; + + uacce = xa_load(&uacce_xa, iminor(inode)); + if (!uacce) + return -ENODEV; + + q = kzalloc(sizeof(struct uacce_queue), GFP_KERNEL); + if (!q) + return -ENOMEM; + + mutex_lock(&uacce->mm_lock); + uacce_mm = uacce_mm_get(uacce, q, current->mm); + mutex_unlock(&uacce->mm_lock); + if (!uacce_mm) { + ret = -ENOMEM; + goto out_with_mem; + } + + q->uacce = uacce; + q->uacce_mm = uacce_mm; + + if (uacce->ops->get_queue) { + ret = uacce->ops->get_queue(uacce, uacce_mm->pasid, q); + if (ret < 0) + goto out_with_mm; + } + + init_waitqueue_head(&q->wait); + filep->private_data = q; + uacce->inode = inode; + q->state = UACCE_Q_INIT; + + return 0; + +out_with_mm: + mutex_lock(&uacce->mm_lock); + uacce_mm_put(q); + mutex_unlock(&uacce->mm_lock); +out_with_mem: + kfree(q); + return ret; +} + +static int uacce_fops_release(struct inode *inode, struct file *filep) +{ + struct uacce_queue *q = filep->private_data; + struct uacce_device *uacce = q->uacce; + + uacce_put_queue(q); + + mutex_lock(&uacce->mm_lock); + uacce_mm_put(q); + mutex_unlock(&uacce->mm_lock); + + kfree(q); + + return 0; +} + +static vm_fault_t uacce_vma_fault(struct vm_fault *vmf) +{ + if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE)) + return VM_FAULT_SIGBUS; + + return 0; +} + +static void uacce_vma_close(struct vm_area_struct *vma) +{ + struct uacce_queue *q = vma->vm_private_data; + struct uacce_qfile_region *qfr = NULL; + + if (vma->vm_pgoff < UACCE_MAX_REGION) + qfr = q->qfrs[vma->vm_pgoff]; + + kfree(qfr); +} + +static const struct vm_operations_struct uacce_vm_ops = { + .fault = uacce_vma_fault, + .close = uacce_vma_close, +}; + +static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma) +{ + struct uacce_queue *q = filep->private_data; + struct uacce_device *uacce = q->uacce; + struct uacce_qfile_region *qfr; + enum uacce_qfrt type = UACCE_MAX_REGION; + int ret = 0; + + if (vma->vm_pgoff < UACCE_MAX_REGION) + type = vma->vm_pgoff; + else + return -EINVAL; + + qfr = kzalloc(sizeof(*qfr), GFP_KERNEL); + if (!qfr) + return -ENOMEM; + + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_WIPEONFORK; + vma->vm_ops = &uacce_vm_ops; + vma->vm_private_data = q; + qfr->type = type; + + mutex_lock(&uacce_mutex); + + if (q->state != UACCE_Q_INIT && q->state != UACCE_Q_STARTED) { + ret = -EINVAL; + goto out_with_lock; + } + + if (q->qfrs[type]) { + ret = -EEXIST; + goto out_with_lock; + } + + switch (type) { + case UACCE_QFRT_MMIO: + if (!uacce->ops->mmap) { + ret = -EINVAL; + goto out_with_lock; + } + + ret = uacce->ops->mmap(q, vma, qfr); + if (ret) + goto out_with_lock; + + break; + + case UACCE_QFRT_DUS: + if (!uacce->ops->mmap) { + ret = -EINVAL; + goto out_with_lock; + } + + ret = uacce->ops->mmap(q, vma, qfr); + if (ret) + goto out_with_lock; + break; + + default: + ret = -EINVAL; + goto out_with_lock; + } + + q->qfrs[type] = qfr; + mutex_unlock(&uacce_mutex); + + return ret; + +out_with_lock: + mutex_unlock(&uacce_mutex); + kfree(qfr); + return ret; +} + +static __poll_t uacce_fops_poll(struct file *file, poll_table *wait) +{ + struct uacce_queue *q = file->private_data; + struct uacce_device *uacce = q->uacce; + + poll_wait(file, &q->wait, wait); + if (uacce->ops->is_q_updated && uacce->ops->is_q_updated(q)) + return EPOLLIN | EPOLLRDNORM; + + return 0; +} + +static const struct file_operations uacce_fops = { + .owner = THIS_MODULE, + .open = uacce_fops_open, + .release = uacce_fops_release, + .unlocked_ioctl = uacce_fops_unl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = uacce_fops_compat_ioctl, +#endif + .mmap = uacce_fops_mmap, + .poll = uacce_fops_poll, +}; + +#define to_uacce_device(dev) container_of(dev, struct uacce_device, dev) + +static ssize_t api_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%s\n", uacce->api_ver); +} + +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%u\n", uacce->flags); +} + +static ssize_t available_instances_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + if (!uacce->ops->get_available_instances) + return -ENODEV; + + return sprintf(buf, "%d\n", + uacce->ops->get_available_instances(uacce)); +} + +static ssize_t algorithms_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%s\n", uacce->algs); +} + +static ssize_t region_mmio_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%lu\n", + uacce->qf_pg_num[UACCE_QFRT_MMIO] << PAGE_SHIFT); +} + +static ssize_t region_dus_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%lu\n", + uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT); +} + +static DEVICE_ATTR_RO(api); +static DEVICE_ATTR_RO(flags); +static DEVICE_ATTR_RO(available_instances); +static DEVICE_ATTR_RO(algorithms); +static DEVICE_ATTR_RO(region_mmio_size); +static DEVICE_ATTR_RO(region_dus_size); + +static struct attribute *uacce_dev_attrs[] = { + &dev_attr_api.attr, + &dev_attr_flags.attr, + &dev_attr_available_instances.attr, + &dev_attr_algorithms.attr, + &dev_attr_region_mmio_size.attr, + &dev_attr_region_dus_size.attr, + NULL, +}; + +static umode_t uacce_dev_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct uacce_device *uacce = to_uacce_device(dev); + + if (((attr == &dev_attr_region_mmio_size.attr) && + (!uacce->qf_pg_num[UACCE_QFRT_MMIO])) || + ((attr == &dev_attr_region_dus_size.attr) && + (!uacce->qf_pg_num[UACCE_QFRT_DUS]))) + return 0; + + return attr->mode; +} + +static struct attribute_group uacce_dev_group = { + .is_visible = uacce_dev_is_visible, + .attrs = uacce_dev_attrs, +}; + +__ATTRIBUTE_GROUPS(uacce_dev); + +static void uacce_release(struct device *dev) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + kfree(uacce); +} + +/** + * uacce_alloc() - alloc an accelerator + * @parent: pointer of uacce parent device + * @interface: pointer of uacce_interface for register + * + * Returns uacce pointer if success and ERR_PTR if not + * Need check returned negotiated uacce->flags + */ +struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface) +{ + unsigned int flags = interface->flags; + struct uacce_device *uacce; + int ret; + + uacce = kzalloc(sizeof(struct uacce_device), GFP_KERNEL); + if (!uacce) + return ERR_PTR(-ENOMEM); + + if (flags & UACCE_DEV_SVA) { + ret = iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_SVA); + if (ret) + flags &= ~UACCE_DEV_SVA; + } + + uacce->parent = parent; + uacce->flags = flags; + uacce->ops = interface->ops; + + ret = xa_alloc(&uacce_xa, &uacce->dev_id, uacce, xa_limit_32b, + GFP_KERNEL); + if (ret < 0) + goto err_with_uacce; + + INIT_LIST_HEAD(&uacce->mm_list); + mutex_init(&uacce->mm_lock); + device_initialize(&uacce->dev); + uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id); + uacce->dev.class = uacce_class; + uacce->dev.groups = uacce_dev_groups; + uacce->dev.parent = uacce->parent; + uacce->dev.release = uacce_release; + dev_set_name(&uacce->dev, "%s-%d", interface->name, uacce->dev_id); + + return uacce; + +err_with_uacce: + if (flags & UACCE_DEV_SVA) + iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA); + kfree(uacce); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(uacce_alloc); + +/** + * uacce_register() - add the accelerator to cdev and export to user space + * @uacce: The initialized uacce device + * + * Return 0 if register succeeded, or an error. + */ +int uacce_register(struct uacce_device *uacce) +{ + if (!uacce) + return -ENODEV; + + uacce->cdev = cdev_alloc(); + if (!uacce->cdev) + return -ENOMEM; + + uacce->cdev->ops = &uacce_fops; + uacce->cdev->owner = THIS_MODULE; + + return cdev_device_add(uacce->cdev, &uacce->dev); +} +EXPORT_SYMBOL_GPL(uacce_register); + +/** + * uacce_remove() - remove the accelerator + * @uacce: the accelerator to remove + */ +void uacce_remove(struct uacce_device *uacce) +{ + struct uacce_mm *uacce_mm; + struct uacce_queue *q; + + if (!uacce) + return; + /* + * unmap remaining mapping from user space, preventing user still + * access the mmaped area while parent device is already removed + */ + if (uacce->inode) + unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1); + + /* ensure no open queue remains */ + mutex_lock(&uacce->mm_lock); + list_for_each_entry(uacce_mm, &uacce->mm_list, list) { + /* + * We don't take the uacce_mm->lock here. Since we hold the + * device's mm_lock, no queue can be added to or removed from + * this uacce_mm. We may run concurrently with mm_exit, but + * uacce_put_queue() is serialized and iommu_sva_unbind_device() + * waits for the lock that mm_exit is holding. + */ + list_for_each_entry(q, &uacce_mm->queues, list) + uacce_put_queue(q); + + if (uacce->flags & UACCE_DEV_SVA) { + iommu_sva_unbind_device(uacce_mm->handle); + uacce_mm->handle = NULL; + } + } + mutex_unlock(&uacce->mm_lock); + + /* disable sva now since no opened queues */ + if (uacce->flags & UACCE_DEV_SVA) + iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA); + + if (uacce->cdev) + cdev_device_del(uacce->cdev, &uacce->dev); + xa_erase(&uacce_xa, uacce->dev_id); + put_device(&uacce->dev); +} +EXPORT_SYMBOL_GPL(uacce_remove); + +static int __init uacce_init(void) +{ + int ret; + + uacce_class = class_create(THIS_MODULE, UACCE_NAME); + if (IS_ERR(uacce_class)) + return PTR_ERR(uacce_class); + + ret = alloc_chrdev_region(&uacce_devt, 0, MINORMASK, UACCE_NAME); + if (ret) + class_destroy(uacce_class); + + return ret; +} + +static __exit void uacce_exit(void) +{ + unregister_chrdev_region(uacce_devt, MINORMASK); + class_destroy(uacce_class); +} + +subsys_initcall(uacce_init); +module_exit(uacce_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hisilicon Tech. Co., Ltd."); +MODULE_DESCRIPTION("Accelerator interface for Userland applications"); diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c index 058fcd7f9f01..a431787c0898 100644 --- a/drivers/misc/vexpress-syscfg.c +++ b/drivers/misc/vexpress-syscfg.c @@ -42,7 +42,7 @@ struct vexpress_syscfg_func { struct vexpress_syscfg *syscfg; struct regmap *regmap; int num_templates; - u32 template[0]; /* Keep it last! */ + u32 template[]; /* Keep it last! */ }; |