aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/misc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/Kconfig1
-rw-r--r--drivers/misc/Makefile1
-rw-r--r--drivers/misc/cardreader/rts5227.c3
-rw-r--r--drivers/misc/cardreader/rts5249.c2
-rw-r--r--drivers/misc/cardreader/rts5260.c2
-rw-r--r--drivers/misc/cardreader/rts5261.c2
-rw-r--r--drivers/misc/eeprom/at24.c4
-rw-r--r--drivers/misc/habanalabs/command_submission.c51
-rw-r--r--drivers/misc/habanalabs/debugfs.c92
-rw-r--r--drivers/misc/habanalabs/device.c2
-rw-r--r--drivers/misc/habanalabs/goya/goya.c204
-rw-r--r--drivers/misc/habanalabs/goya/goya_coresight.c4
-rw-r--r--drivers/misc/habanalabs/goya/goya_hwmgr.c2
-rw-r--r--drivers/misc/habanalabs/habanalabs.h62
-rw-r--r--drivers/misc/habanalabs/habanalabs_drv.c11
-rw-r--r--drivers/misc/habanalabs/hwmon.c106
-rw-r--r--drivers/misc/habanalabs/include/armcp_if.h20
-rw-r--r--drivers/misc/habanalabs/include/goya/goya_async_events.h4
-rw-r--r--drivers/misc/habanalabs/include/goya/goya_reg_map.h39
-rw-r--r--drivers/misc/habanalabs/include/hl_boot_if.h39
-rw-r--r--drivers/misc/habanalabs/memory.c222
-rw-r--r--drivers/misc/habanalabs/mmu.c110
-rw-r--r--drivers/misc/lkdtm/bugs.c111
-rw-r--r--drivers/misc/lkdtm/core.c4
-rw-r--r--drivers/misc/lkdtm/lkdtm.h4
-rw-r--r--drivers/misc/lkdtm/stackleak.c25
-rw-r--r--drivers/misc/mei/bus-fixup.c4
-rw-r--r--drivers/misc/mei/client.c4
-rw-r--r--drivers/misc/mei/hw-me-regs.h6
-rw-r--r--drivers/misc/mei/hw.h5
-rw-r--r--drivers/misc/mei/mei_dev.h2
-rw-r--r--drivers/misc/mei/pci-me.c17
-rw-r--r--drivers/misc/mei/pci-txe.c5
-rw-r--r--drivers/misc/mic/Kconfig4
-rw-r--r--drivers/misc/mic/host/mic_boot.c2
-rw-r--r--drivers/misc/mic/host/mic_x100.c4
-rw-r--r--drivers/misc/pci_endpoint_test.c213
-rw-r--r--drivers/misc/sgi-gru/grulib.h2
-rw-r--r--drivers/misc/sgi-gru/grutables.h2
-rw-r--r--drivers/misc/uacce/Kconfig13
-rw-r--r--drivers/misc/uacce/Makefile2
-rw-r--r--drivers/misc/uacce/uacce.c633
-rw-r--r--drivers/misc/vexpress-syscfg.c2
43 files changed, 1694 insertions, 353 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 7f0d48f406e3..99e151475d8f 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -480,4 +480,5 @@ source "drivers/misc/cxl/Kconfig"
source "drivers/misc/ocxl/Kconfig"
source "drivers/misc/cardreader/Kconfig"
source "drivers/misc/habanalabs/Kconfig"
+source "drivers/misc/uacce/Kconfig"
endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index c1860d35dc7e..9abf2923d831 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -56,4 +56,5 @@ obj-$(CONFIG_OCXL) += ocxl/
obj-y += cardreader/
obj-$(CONFIG_PVPANIC) += pvpanic.o
obj-$(CONFIG_HABANA_AI) += habanalabs/
+obj-$(CONFIG_UACCE) += uacce/
obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c
index 4feed296a327..3a9467aaa435 100644
--- a/drivers/misc/cardreader/rts5227.c
+++ b/drivers/misc/cardreader/rts5227.c
@@ -394,7 +394,8 @@ static const struct pcr_ops rts522a_pcr_ops = {
void rts522a_init_params(struct rtsx_pcr *pcr)
{
rts5227_init_params(pcr);
-
+ pcr->ops = &rts522a_pcr_ops;
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 20, 11);
pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3;
pcr->option.ocp_en = 1;
diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c
index db936e4d6e56..1a81cda948c1 100644
--- a/drivers/misc/cardreader/rts5249.c
+++ b/drivers/misc/cardreader/rts5249.c
@@ -618,6 +618,7 @@ static const struct pcr_ops rts524a_pcr_ops = {
void rts524a_init_params(struct rtsx_pcr *pcr)
{
rts5249_init_params(pcr);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
pcr->option.ltr_l1off_snooze_sspwrgate =
LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
@@ -733,6 +734,7 @@ static const struct pcr_ops rts525a_pcr_ops = {
void rts525a_init_params(struct rtsx_pcr *pcr)
{
rts5249_init_params(pcr);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(25, 29, 11);
pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
pcr->option.ltr_l1off_snooze_sspwrgate =
LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c
index 4214f02a17fd..711054ebad74 100644
--- a/drivers/misc/cardreader/rts5260.c
+++ b/drivers/misc/cardreader/rts5260.c
@@ -662,7 +662,7 @@ void rts5260_init_params(struct rtsx_pcr *pcr)
pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
pcr->aspm_en = ASPM_L1_EN;
- pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
pcr->ic_version = rts5260_get_ic_version(pcr);
diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c
index bc4967a6efa1..78c3b1d424c3 100644
--- a/drivers/misc/cardreader/rts5261.c
+++ b/drivers/misc/cardreader/rts5261.c
@@ -764,7 +764,7 @@ void rts5261_init_params(struct rtsx_pcr *pcr)
pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
pcr->aspm_en = ASPM_L1_EN;
- pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 27, 16);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 11);
pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
pcr->ic_version = rts5261_get_ic_version(pcr);
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 031eb64549af..9ff18d4961ce 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -227,6 +227,7 @@ MODULE_DEVICE_TABLE(of, at24_of_match);
static const struct acpi_device_id at24_acpi_ids[] = {
{ "INT3499", (kernel_ulong_t)&at24_data_INT3499 },
+ { "TPF0001", (kernel_ulong_t)&at24_data_24c1024 },
{ /* END OF LIST */ }
};
MODULE_DEVICE_TABLE(acpi, at24_acpi_ids);
@@ -712,13 +713,14 @@ static int at24_probe(struct i2c_client *client)
* chip is functional.
*/
err = at24_read(at24, 0, &test_byte, 1);
- pm_runtime_idle(dev);
if (err) {
pm_runtime_disable(dev);
regulator_disable(at24->vcc_reg);
return -ENODEV;
}
+ pm_runtime_idle(dev);
+
if (writable)
dev_info(dev, "%u byte %s EEPROM, writable, %u bytes/write\n",
byte_len, client->name, at24->write_max);
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 0bf08678431b..409276b6374d 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -129,6 +129,8 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
spin_unlock(&job->user_cb->lock);
hl_cb_put(job->user_cb);
job->user_cb = NULL;
+ } else if (!rc) {
+ job->job_cb_size = job->user_cb_size;
}
return rc;
@@ -507,7 +509,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
struct hl_cb *cb;
bool int_queues_only = true;
u32 size_to_copy;
- int rc, i, parse_cnt;
+ int rc, i;
*cs_seq = ULLONG_MAX;
@@ -547,7 +549,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
hl_debugfs_add_cs(cs);
/* Validate ALL the CS chunks before submitting the CS */
- for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
+ for (i = 0 ; i < num_chunks ; i++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
enum hl_queue_type queue_type;
bool is_kernel_allocated_cb;
@@ -585,10 +587,6 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
job->cs = cs;
job->user_cb = cb;
job->user_cb_size = chunk->cb_size;
- if (is_kernel_allocated_cb)
- job->job_cb_size = cb->size;
- else
- job->job_cb_size = chunk->cb_size;
job->hw_queue_id = chunk->queue_index;
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
@@ -659,8 +657,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
struct hl_device *hdev = hpriv->hdev;
union hl_cs_args *args = data;
struct hl_ctx *ctx = hpriv->ctx;
- void __user *chunks;
- u32 num_chunks;
+ void __user *chunks_execute, *chunks_restore;
+ u32 num_chunks_execute, num_chunks_restore;
u64 cs_seq = ULONG_MAX;
int rc, do_ctx_switch;
bool need_soft_reset = false;
@@ -673,13 +671,25 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
goto out;
}
+ chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
+ num_chunks_execute = args->in.num_chunks_execute;
+
+ if (!num_chunks_execute) {
+ dev_err(hdev->dev,
+ "Got execute CS with 0 chunks, context %d\n",
+ ctx->asid);
+ rc = -EINVAL;
+ goto out;
+ }
+
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
long ret;
- chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
- num_chunks = args->in.num_chunks_restore;
+ chunks_restore =
+ (void __user *) (uintptr_t) args->in.chunks_restore;
+ num_chunks_restore = args->in.num_chunks_restore;
mutex_lock(&hpriv->restore_phase_mutex);
@@ -707,13 +717,13 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
hdev->asic_funcs->restore_phase_topology(hdev);
- if (num_chunks == 0) {
+ if (!num_chunks_restore) {
dev_dbg(hdev->dev,
"Need to run restore phase but restore CS is empty\n");
rc = 0;
} else {
- rc = _hl_cs_ioctl(hpriv, chunks, num_chunks,
- &cs_seq);
+ rc = _hl_cs_ioctl(hpriv, chunks_restore,
+ num_chunks_restore, &cs_seq);
}
mutex_unlock(&hpriv->restore_phase_mutex);
@@ -726,7 +736,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
}
/* Need to wait for restore completion before execution phase */
- if (num_chunks > 0) {
+ if (num_chunks_restore) {
ret = _hl_cs_wait_ioctl(hdev, ctx,
jiffies_to_usecs(hdev->timeout_jiffies),
cs_seq);
@@ -754,18 +764,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
}
}
- chunks = (void __user *)(uintptr_t)args->in.chunks_execute;
- num_chunks = args->in.num_chunks_execute;
-
- if (num_chunks == 0) {
- dev_err(hdev->dev,
- "Got execute CS with 0 chunks, context %d\n",
- ctx->asid);
- rc = -EINVAL;
- goto out;
- }
-
- rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq);
+ rc = _hl_cs_ioctl(hpriv, chunks_execute, num_chunks_execute, &cs_seq);
out:
if (rc != -EAGAIN) {
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 20413e350343..756d36ed5d95 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data)
}
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
- prop->va_space_dram_start_address,
- prop->va_space_dram_end_address);
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
+ /* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock);
@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
goto out;
if (hdev->dram_supports_virtual_memory &&
- addr >= prop->va_space_dram_start_address &&
- addr < prop->va_space_dram_end_address)
+ (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
return true;
- if (addr >= prop->va_space_host_start_address &&
- addr < prop->va_space_host_end_address)
+ if (addr >= prop->pmmu.start_addr &&
+ addr < prop->pmmu.end_addr)
+ return true;
+
+ if (addr >= prop->pmmu_huge.start_addr &&
+ addr < prop->pmmu_huge.end_addr)
return true;
out:
return false;
@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
}
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
- prop->va_space_dram_start_address,
- prop->va_space_dram_end_address);
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
+ /* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock);
@@ -705,6 +710,65 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
return count;
}
+static ssize_t hl_data_read64(struct file *f, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ char tmp_buf[32];
+ u64 addr = entry->addr;
+ u64 val;
+ ssize_t rc;
+
+ if (*ppos)
+ return 0;
+
+ if (hl_is_device_va(hdev, addr)) {
+ rc = device_va_to_pa(hdev, addr, &addr);
+ if (rc)
+ return rc;
+ }
+
+ rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
+ return rc;
+ }
+
+ sprintf(tmp_buf, "0x%016llx\n", val);
+ return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+ strlen(tmp_buf));
+}
+
+static ssize_t hl_data_write64(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u64 addr = entry->addr;
+ u64 value;
+ ssize_t rc;
+
+ rc = kstrtoull_from_user(buf, count, 16, &value);
+ if (rc)
+ return rc;
+
+ if (hl_is_device_va(hdev, addr)) {
+ rc = device_va_to_pa(hdev, addr, &addr);
+ if (rc)
+ return rc;
+ }
+
+ rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
+ value, addr);
+ return rc;
+ }
+
+ return count;
+}
+
static ssize_t hl_get_power_state(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -912,6 +976,12 @@ static const struct file_operations hl_data32b_fops = {
.write = hl_data_write32
};
+static const struct file_operations hl_data64b_fops = {
+ .owner = THIS_MODULE,
+ .read = hl_data_read64,
+ .write = hl_data_write64
+};
+
static const struct file_operations hl_i2c_data_fops = {
.owner = THIS_MODULE,
.read = hl_i2c_data_read,
@@ -1025,6 +1095,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_data32b_fops);
+ debugfs_create_file("data64",
+ 0644,
+ dev_entry->root,
+ dev_entry,
+ &hl_data64b_fops);
+
debugfs_create_file("set_power_state",
0200,
dev_entry->root,
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index b680b0caa69b..aef4de36b7aa 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -36,7 +36,7 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
status = HL_DEVICE_STATUS_OPERATIONAL;
return status;
-};
+}
static void hpriv_release(struct kref *ref)
{
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index b8a8de24aaf7..68f065607544 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -324,7 +324,11 @@ static u32 goya_all_events[] = {
GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
- GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
+ GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
+ GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
+ GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
+ GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
+ GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
};
static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
@@ -393,19 +397,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->dmmu.hop2_mask = HOP2_MASK;
prop->dmmu.hop3_mask = HOP3_MASK;
prop->dmmu.hop4_mask = HOP4_MASK;
- prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
+ prop->dmmu.start_addr = VA_DDR_SPACE_START;
+ prop->dmmu.end_addr = VA_DDR_SPACE_END;
+ prop->dmmu.page_size = PAGE_SIZE_2MB;
- /* No difference between PMMU and DMMU except of page size */
+ /* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
- prop->dmmu.page_size = PAGE_SIZE_2MB;
+ prop->pmmu.start_addr = VA_HOST_SPACE_START;
+ prop->pmmu.end_addr = VA_HOST_SPACE_END;
prop->pmmu.page_size = PAGE_SIZE_4KB;
- prop->va_space_host_start_address = VA_HOST_SPACE_START;
- prop->va_space_host_end_address = VA_HOST_SPACE_END;
- prop->va_space_dram_start_address = VA_DDR_SPACE_START;
- prop->va_space_dram_end_address = VA_DDR_SPACE_END;
- prop->dram_size_for_default_page_mapping =
- prop->va_space_dram_end_address;
+ /* PMMU and HPMMU are the same except of page size */
+ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
+ prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
+
+ prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID;
prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
@@ -2573,8 +2579,7 @@ static int goya_hw_init(struct hl_device *hdev)
* After CPU initialization is finished, change DDR bar mapping inside
* iATU to point to the start address of the MMU page tables
*/
- if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
- (MMU_PAGE_TABLES_ADDR &
+ if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
dev_err(hdev->dev,
"failed to map DDR bar to MMU page tables\n");
@@ -3443,12 +3448,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
/*
* WA for HW-23.
* We can't allow user to read from Host using QMANs other than 1.
+ * PMMU and HPMMU addresses are equal, check only one of them.
*/
if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
le32_to_cpu(user_dma_pkt->tsize),
- hdev->asic_prop.va_space_host_start_address,
- hdev->asic_prop.va_space_host_end_address)) {
+ hdev->asic_prop.pmmu.start_addr,
+ hdev->asic_prop.pmmu.end_addr)) {
dev_err(hdev->dev,
"Can't DMA from host on queue other then 1\n");
return -EFAULT;
@@ -4178,6 +4184,96 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
return rc;
}
+static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 ddr_bar_addr;
+ int rc = 0;
+
+ if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
+ u32 val_l = RREG32(addr - CFG_BASE);
+ u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
+
+ *val = (((u64) val_h) << 32) | val_l;
+
+ } else if ((addr >= SRAM_BASE_ADDR) &&
+ (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
+
+ *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
+ (addr - SRAM_BASE_ADDR));
+
+ } else if ((addr >= DRAM_PHYS_BASE) &&
+ (addr <=
+ DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
+
+ u64 bar_base_addr = DRAM_PHYS_BASE +
+ (addr & ~(prop->dram_pci_bar_size - 0x1ull));
+
+ ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
+ if (ddr_bar_addr != U64_MAX) {
+ *val = readq(hdev->pcie_bar[DDR_BAR_ID] +
+ (addr - bar_base_addr));
+
+ ddr_bar_addr = goya_set_ddr_bar_base(hdev,
+ ddr_bar_addr);
+ }
+ if (ddr_bar_addr == U64_MAX)
+ rc = -EIO;
+
+ } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
+ *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
+
+ } else {
+ rc = -EFAULT;
+ }
+
+ return rc;
+}
+
+static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 ddr_bar_addr;
+ int rc = 0;
+
+ if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
+ WREG32(addr - CFG_BASE, lower_32_bits(val));
+ WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
+
+ } else if ((addr >= SRAM_BASE_ADDR) &&
+ (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
+
+ writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
+ (addr - SRAM_BASE_ADDR));
+
+ } else if ((addr >= DRAM_PHYS_BASE) &&
+ (addr <=
+ DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
+
+ u64 bar_base_addr = DRAM_PHYS_BASE +
+ (addr & ~(prop->dram_pci_bar_size - 0x1ull));
+
+ ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
+ if (ddr_bar_addr != U64_MAX) {
+ writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
+ (addr - bar_base_addr));
+
+ ddr_bar_addr = goya_set_ddr_bar_base(hdev,
+ ddr_bar_addr);
+ }
+ if (ddr_bar_addr == U64_MAX)
+ rc = -EIO;
+
+ } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
+ *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
+
+ } else {
+ rc = -EFAULT;
+ }
+
+ return rc;
+}
+
static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
{
struct goya_device *goya = hdev->asic_specific;
@@ -4297,6 +4393,14 @@ static const char *_goya_get_event_desc(u16 event_type)
return "TPC%d_bmon_spmu";
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
return "DMA_bm_ch%d";
+ case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
+ return "POWER_ENV_S";
+ case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
+ return "POWER_ENV_E";
+ case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
+ return "THERMAL_ENV_S";
+ case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
+ return "THERMAL_ENV_E";
default:
return "N/A";
}
@@ -4388,22 +4492,22 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
static void goya_print_razwi_info(struct hl_device *hdev)
{
if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
- dev_err(hdev->dev, "Illegal write to LBW\n");
+ dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
}
if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
- dev_err(hdev->dev, "Illegal read from LBW\n");
+ dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
}
if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
- dev_err(hdev->dev, "Illegal write to HBW\n");
+ dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
}
if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
- dev_err(hdev->dev, "Illegal read from HBW\n");
+ dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
}
}
@@ -4423,7 +4527,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev)
addr <<= 32;
addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
- dev_err(hdev->dev, "MMU page fault on va 0x%llx\n", addr);
+ dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
+ addr);
WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
}
@@ -4435,7 +4540,7 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
char desc[20] = "";
goya_get_event_desc(event_type, desc, sizeof(desc));
- dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
+ dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
event_type, desc);
if (razwi) {
@@ -4526,6 +4631,33 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
return rc;
}
+static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
+{
+ switch (event_type) {
+ case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
+ dev_info_ratelimited(hdev->dev,
+ "Clock throttling due to power consumption\n");
+ break;
+ case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
+ dev_info_ratelimited(hdev->dev,
+ "Power envelop is safe, back to optimal clock\n");
+ break;
+ case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
+ dev_info_ratelimited(hdev->dev,
+ "Clock throttling due to overheating\n");
+ break;
+ case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
+ dev_info_ratelimited(hdev->dev,
+ "Thermal envelop is safe, back to optimal clock\n");
+ break;
+
+ default:
+ dev_err(hdev->dev, "Received invalid clock change event %d\n",
+ event_type);
+ break;
+ }
+}
+
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
{
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
@@ -4609,6 +4741,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
goya_unmask_irq(hdev, event_type);
break;
+ case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
+ case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
+ case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
+ case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
+ goya_print_clk_change_info(hdev, event_type);
+ goya_unmask_irq(hdev, event_type);
+ break;
+
default:
dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
event_type);
@@ -4776,7 +4916,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
- prop->dram_base_address + off, PAGE_SIZE_2MB);
+ prop->dram_base_address + off, PAGE_SIZE_2MB,
+ (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
if (rc) {
dev_err(hdev->dev, "Map failed for address 0x%llx\n",
prop->dram_base_address + off);
@@ -4786,7 +4927,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
- hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
+ hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
if (rc) {
dev_err(hdev->dev,
@@ -4799,7 +4940,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
rc = hl_mmu_map(hdev->kernel_ctx,
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
hdev->cpu_accessible_dma_address + cpu_off,
- PAGE_SIZE_4KB);
+ PAGE_SIZE_4KB, true);
if (rc) {
dev_err(hdev->dev,
"Map failed for CPU accessible memory\n");
@@ -4825,14 +4966,15 @@ unmap_cpu:
for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
if (hl_mmu_unmap(hdev->kernel_ctx,
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
- PAGE_SIZE_4KB))
+ PAGE_SIZE_4KB, true))
dev_warn_ratelimited(hdev->dev,
"failed to unmap address 0x%llx\n",
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
unmap:
for (; off >= 0 ; off -= PAGE_SIZE_2MB)
if (hl_mmu_unmap(hdev->kernel_ctx,
- prop->dram_base_address + off, PAGE_SIZE_2MB))
+ prop->dram_base_address + off, PAGE_SIZE_2MB,
+ true))
dev_warn_ratelimited(hdev->dev,
"failed to unmap address 0x%llx\n",
prop->dram_base_address + off);
@@ -4857,14 +4999,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
- PAGE_SIZE_2MB))
+ PAGE_SIZE_2MB, true))
dev_warn(hdev->dev,
"Failed to unmap CPU accessible memory\n");
} else {
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
if (hl_mmu_unmap(hdev->kernel_ctx,
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
- PAGE_SIZE_4KB))
+ PAGE_SIZE_4KB,
+ (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
dev_warn_ratelimited(hdev->dev,
"failed to unmap address 0x%llx\n",
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
@@ -4872,7 +5015,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
if (hl_mmu_unmap(hdev->kernel_ctx,
- prop->dram_base_address + off, PAGE_SIZE_2MB))
+ prop->dram_base_address + off, PAGE_SIZE_2MB,
+ (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
dev_warn_ratelimited(hdev->dev,
"Failed to unmap address 0x%llx\n",
prop->dram_base_address + off);
@@ -5113,6 +5257,7 @@ static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
}
static void goya_hw_queues_lock(struct hl_device *hdev)
+ __acquires(&goya->hw_queues_lock)
{
struct goya_device *goya = hdev->asic_specific;
@@ -5120,6 +5265,7 @@ static void goya_hw_queues_lock(struct hl_device *hdev)
}
static void goya_hw_queues_unlock(struct hl_device *hdev)
+ __releases(&goya->hw_queues_lock)
{
struct goya_device *goya = hdev->asic_specific;
@@ -5180,6 +5326,8 @@ static const struct hl_asic_funcs goya_funcs = {
.restore_phase_topology = goya_restore_phase_topology,
.debugfs_read32 = goya_debugfs_read32,
.debugfs_write32 = goya_debugfs_write32,
+ .debugfs_read64 = goya_debugfs_read64,
+ .debugfs_write64 = goya_debugfs_write64,
.add_device_attr = goya_add_device_attr,
.handle_eqe = goya_handle_eqe,
.set_pll_profile = goya_set_pll_profile,
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index c1ee6e2b5dff..a1bc930d904f 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr,
u64 range_start, range_end;
if (hdev->mmu_enable) {
- range_start = prop->va_space_dram_start_address;
- range_end = prop->va_space_dram_end_address;
+ range_start = prop->dmmu.start_addr;
+ range_end = prop->dmmu.end_addr;
} else {
range_start = prop->dram_user_base_address;
range_end = prop->dram_end_address;
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index b2ebc01e27f4..cdd4903e48fa 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -298,8 +298,8 @@ static ssize_t pm_mng_profile_store(struct device *dev,
/* Make sure we are in LOW PLL when changing modes */
if (hdev->pm_mng_profile == PM_MANUAL) {
hdev->curr_pll_profile = PLL_HIGH;
- hl_device_set_frequency(hdev, PLL_LOW);
hdev->pm_mng_profile = PM_AUTO;
+ hl_device_set_frequency(hdev, PLL_LOW);
}
} else if (strncmp("manual", buf, strlen("manual")) == 0) {
if (hdev->pm_mng_profile == PM_AUTO) {
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 00c949f4ccd1..31ebcf9458fe 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -132,6 +132,8 @@ enum hl_device_hw_state {
/**
* struct hl_mmu_properties - ASIC specific MMU address translation properties.
+ * @start_addr: virtual start address of the memory region.
+ * @end_addr: virtual end address of the memory region.
* @hop0_shift: shift of hop 0 mask.
* @hop1_shift: shift of hop 1 mask.
* @hop2_shift: shift of hop 2 mask.
@@ -143,9 +145,10 @@ enum hl_device_hw_state {
* @hop3_mask: mask to get the PTE address in hop 3.
* @hop4_mask: mask to get the PTE address in hop 4.
* @page_size: default page size used to allocate memory.
- * @huge_page_size: page size used to allocate memory with huge pages.
*/
struct hl_mmu_properties {
+ u64 start_addr;
+ u64 end_addr;
u64 hop0_shift;
u64 hop1_shift;
u64 hop2_shift;
@@ -157,7 +160,6 @@ struct hl_mmu_properties {
u64 hop3_mask;
u64 hop4_mask;
u32 page_size;
- u32 huge_page_size;
};
/**
@@ -169,6 +171,8 @@ struct hl_mmu_properties {
* @preboot_ver: F/W Preboot version.
* @dmmu: DRAM MMU address translation properties.
* @pmmu: PCI (host) MMU address translation properties.
+ * @pmmu_huge: PCI (host) MMU address translation properties for memory
+ * allocated with huge pages.
* @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access.
@@ -178,14 +182,6 @@ struct hl_mmu_properties {
* @dram_size: DRAM total size.
* @dram_pci_bar_size: size of PCI bar towards DRAM.
* @max_power_default: max power of the device after reset
- * @va_space_host_start_address: base address of virtual memory range for
- * mapping host memory.
- * @va_space_host_end_address: end address of virtual memory range for
- * mapping host memory.
- * @va_space_dram_start_address: base address of virtual memory range for
- * mapping DRAM memory.
- * @va_space_dram_end_address: end address of virtual memory range for
- * mapping DRAM memory.
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
* fault.
* @pcie_dbi_base_address: Base address of the PCIE_DBI block.
@@ -218,6 +214,7 @@ struct asic_fixed_properties {
char preboot_ver[VERSION_MAX_LEN];
struct hl_mmu_properties dmmu;
struct hl_mmu_properties pmmu;
+ struct hl_mmu_properties pmmu_huge;
u64 sram_base_address;
u64 sram_end_address;
u64 sram_user_base_address;
@@ -227,10 +224,6 @@ struct asic_fixed_properties {
u64 dram_size;
u64 dram_pci_bar_size;
u64 max_power_default;
- u64 va_space_host_start_address;
- u64 va_space_host_end_address;
- u64 va_space_dram_start_address;
- u64 va_space_dram_end_address;
u64 dram_size_for_default_page_mapping;
u64 pcie_dbi_base_address;
u64 pcie_aux_dbi_reg_addr;
@@ -431,10 +424,12 @@ struct hl_eq {
* enum hl_asic_type - supported ASIC types.
* @ASIC_INVALID: Invalid ASIC type.
* @ASIC_GOYA: Goya device.
+ * @ASIC_GAUDI: Gaudi device.
*/
enum hl_asic_type {
ASIC_INVALID,
- ASIC_GOYA
+ ASIC_GOYA,
+ ASIC_GAUDI
};
struct hl_cs_parser;
@@ -589,6 +584,8 @@ struct hl_asic_funcs {
void (*restore_phase_topology)(struct hl_device *hdev);
int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
+ int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
+ int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
void (*add_device_attr)(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void (*handle_eqe)(struct hl_device *hdev,
@@ -658,6 +655,8 @@ struct hl_va_range {
* this hits 0l. It is incremented on CS and CS_WAIT.
* @cs_pending: array of DMA fence objects representing pending CS.
* @host_va_range: holds available virtual addresses for host mappings.
+ * @host_huge_va_range: holds available virtual addresses for host mappings
+ * with huge pages.
* @dram_va_range: holds available virtual addresses for DRAM mappings.
* @mem_hash_lock: protects the mem_hash.
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
@@ -688,8 +687,9 @@ struct hl_ctx {
struct hl_device *hdev;
struct kref refcount;
struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
- struct hl_va_range host_va_range;
- struct hl_va_range dram_va_range;
+ struct hl_va_range *host_va_range;
+ struct hl_va_range *host_huge_va_range;
+ struct hl_va_range *dram_va_range;
struct mutex mem_hash_lock;
struct mutex mmu_lock;
struct list_head debugfs_list;
@@ -763,7 +763,7 @@ struct hl_userptr {
* @aborted: true if CS was aborted due to some device error.
*/
struct hl_cs {
- u8 jobs_in_queue_cnt[HL_MAX_QUEUES];
+ u16 jobs_in_queue_cnt[HL_MAX_QUEUES];
struct hl_ctx *ctx;
struct list_head job_list;
spinlock_t job_lock;
@@ -1291,6 +1291,8 @@ struct hl_device_idle_busy_ts {
* otherwise.
* @dram_supports_virtual_memory: is MMU enabled towards DRAM.
* @dram_default_page_mapping: is DRAM default page mapping enabled.
+ * @pmmu_huge_range: is a different virtual addresses range used for PMMU with
+ * huge pages.
* @init_done: is the initialization of the device done.
* @mmu_enable: is MMU enabled.
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
@@ -1372,6 +1374,7 @@ struct hl_device {
u8 reset_on_lockup;
u8 dram_supports_virtual_memory;
u8 dram_default_page_mapping;
+ u8 pmmu_huge_range;
u8 init_done;
u8 device_cpu_disabled;
u8 dma_mask;
@@ -1573,8 +1576,10 @@ int hl_mmu_init(struct hl_device *hdev);
void hl_mmu_fini(struct hl_device *hdev);
int hl_mmu_ctx_init(struct hl_ctx *ctx);
void hl_mmu_ctx_fini(struct hl_ctx *ctx);
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size);
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+ u32 page_size, bool flush_pte);
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+ bool flush_pte);
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);
@@ -1606,11 +1611,18 @@ int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask);
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
-long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
-long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr);
-long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr);
-long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr);
-long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr);
+int hl_get_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_set_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long value);
+int hl_get_voltage(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_get_current(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_get_fan_speed(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_get_pwm_info(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value);
u64 hl_get_max_power(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index 8c342fb499ca..b670859c677a 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -40,12 +40,13 @@ MODULE_PARM_DESC(reset_on_lockup,
#define PCI_VENDOR_ID_HABANALABS 0x1da3
#define PCI_IDS_GOYA 0x0001
+#define PCI_IDS_GAUDI 0x1000
static const struct pci_device_id ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
+ { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
{ 0, }
};
-MODULE_DEVICE_TABLE(pci, ids);
/*
* get_asic_type - translate device id to asic type
@@ -63,6 +64,9 @@ static enum hl_asic_type get_asic_type(u16 device)
case PCI_IDS_GOYA:
asic_type = ASIC_GOYA;
break;
+ case PCI_IDS_GAUDI:
+ asic_type = ASIC_GAUDI;
+ break;
default:
asic_type = ASIC_INVALID;
break;
@@ -263,6 +267,11 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
dev_err(&pdev->dev, "Unsupported ASIC\n");
rc = -ENODEV;
goto free_hdev;
+ } else if (hdev->asic_type == ASIC_GAUDI) {
+ dev_err(&pdev->dev,
+ "GAUDI is not supported by the current kernel\n");
+ rc = -ENODEV;
+ goto free_hdev;
}
} else {
hdev->asic_type = asic_type;
diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c
index 7be4bace9b4f..a21a26e07c3b 100644
--- a/drivers/misc/habanalabs/hwmon.c
+++ b/drivers/misc/habanalabs/hwmon.c
@@ -113,6 +113,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
struct hl_device *hdev = dev_get_drvdata(dev);
+ int rc;
if (hl_device_disabled_or_in_reset(hdev))
return -ENODEV;
@@ -125,36 +126,40 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
case hwmon_temp_crit:
case hwmon_temp_max_hyst:
case hwmon_temp_crit_hyst:
+ case hwmon_temp_offset:
+ case hwmon_temp_highest:
break;
default:
return -EINVAL;
}
- *val = hl_get_temperature(hdev, channel, attr);
+ rc = hl_get_temperature(hdev, channel, attr, val);
break;
case hwmon_in:
switch (attr) {
case hwmon_in_input:
case hwmon_in_min:
case hwmon_in_max:
+ case hwmon_in_highest:
break;
default:
return -EINVAL;
}
- *val = hl_get_voltage(hdev, channel, attr);
+ rc = hl_get_voltage(hdev, channel, attr, val);
break;
case hwmon_curr:
switch (attr) {
case hwmon_curr_input:
case hwmon_curr_min:
case hwmon_curr_max:
+ case hwmon_curr_highest:
break;
default:
return -EINVAL;
}
- *val = hl_get_current(hdev, channel, attr);
+ rc = hl_get_current(hdev, channel, attr, val);
break;
case hwmon_fan:
switch (attr) {
@@ -165,7 +170,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
default:
return -EINVAL;
}
- *val = hl_get_fan_speed(hdev, channel, attr);
+ rc = hl_get_fan_speed(hdev, channel, attr, val);
break;
case hwmon_pwm:
switch (attr) {
@@ -175,12 +180,12 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
default:
return -EINVAL;
}
- *val = hl_get_pwm_info(hdev, channel, attr);
+ rc = hl_get_pwm_info(hdev, channel, attr, val);
break;
default:
return -EINVAL;
}
- return 0;
+ return rc;
}
static int hl_write(struct device *dev, enum hwmon_sensor_types type,
@@ -192,6 +197,15 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
return -ENODEV;
switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_offset:
+ break;
+ default:
+ return -EINVAL;
+ }
+ hl_set_temperature(hdev, channel, attr, val);
+ break;
case hwmon_pwm:
switch (attr) {
case hwmon_pwm_input:
@@ -219,7 +233,10 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_temp_max_hyst:
case hwmon_temp_crit:
case hwmon_temp_crit_hyst:
+ case hwmon_temp_highest:
return 0444;
+ case hwmon_temp_offset:
+ return 0644;
}
break;
case hwmon_in:
@@ -227,6 +244,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_in_input:
case hwmon_in_min:
case hwmon_in_max:
+ case hwmon_in_highest:
return 0444;
}
break;
@@ -235,6 +253,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_curr_input:
case hwmon_curr_min:
case hwmon_curr_max:
+ case hwmon_curr_highest:
return 0444;
}
break;
@@ -265,10 +284,10 @@ static const struct hwmon_ops hl_hwmon_ops = {
.write = hl_write
};
-long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
+int hl_get_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
- long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -279,22 +298,47 @@ long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, &result);
+ SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get temperature from sensor %d, error %d\n",
sensor_index, rc);
- result = 0;
+ *value = 0;
}
- return result;
+ return rc;
}
-long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
+int hl_set_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+ pkt.value = __cpu_to_le64(value);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, NULL);
+
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to set temperature of sensor %d, error %d\n",
+ sensor_index, rc);
+
+ return rc;
+}
+
+int hl_get_voltage(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
- long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -305,22 +349,22 @@ long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, &result);
+ SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get voltage from sensor %d, error %d\n",
sensor_index, rc);
- result = 0;
+ *value = 0;
}
- return result;
+ return rc;
}
-long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr)
+int hl_get_current(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
- long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -331,22 +375,22 @@ long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, &result);
+ SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get current from sensor %d, error %d\n",
sensor_index, rc);
- result = 0;
+ *value = 0;
}
- return result;
+ return rc;
}
-long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr)
+int hl_get_fan_speed(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
- long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -357,22 +401,22 @@ long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, &result);
+ SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get fan speed from sensor %d, error %d\n",
sensor_index, rc);
- result = 0;
+ *value = 0;
}
- return result;
+ return rc;
}
-long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr)
+int hl_get_pwm_info(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
- long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -383,16 +427,16 @@ long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, &result);
+ SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get pwm info from sensor %d, error %d\n",
sensor_index, rc);
- result = 0;
+ *value = 0;
}
- return result;
+ return rc;
}
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h
index e4c6699a1868..bdd0a4c3a9cf 100644
--- a/drivers/misc/habanalabs/include/armcp_if.h
+++ b/drivers/misc/habanalabs/include/armcp_if.h
@@ -189,6 +189,10 @@ enum pq_init_status {
* ArmCP to write to the structure, to prevent data corruption in case of
* mismatched driver/FW versions.
*
+ * ARMCP_PACKET_TEMPERATURE_SET -
+ * Set the value of the offset property of a specified thermal sensor.
+ * The packet's arguments specify the desired sensor and the field to
+ * set.
*/
enum armcp_packet_id {
@@ -214,6 +218,8 @@ enum armcp_packet_id {
ARMCP_PACKET_MAX_POWER_GET, /* sysfs */
ARMCP_PACKET_MAX_POWER_SET, /* sysfs */
ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */
+ ARMCP_RESERVED,
+ ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */
};
#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
@@ -271,24 +277,32 @@ enum armcp_packet_rc {
armcp_packet_fault
};
+/*
+ * armcp_temp_type should adhere to hwmon_temp_attributes
+ * defined in Linux kernel hwmon.h file
+ */
enum armcp_temp_type {
armcp_temp_input,
armcp_temp_max = 6,
armcp_temp_max_hyst,
armcp_temp_crit,
- armcp_temp_crit_hyst
+ armcp_temp_crit_hyst,
+ armcp_temp_offset = 19,
+ armcp_temp_highest = 22
};
enum armcp_in_attributes {
armcp_in_input,
armcp_in_min,
- armcp_in_max
+ armcp_in_max,
+ armcp_in_highest = 7
};
enum armcp_curr_attributes {
armcp_curr_input,
armcp_curr_min,
- armcp_curr_max
+ armcp_curr_max,
+ armcp_curr_highest = 7
};
enum armcp_fan_attributes {
diff --git a/drivers/misc/habanalabs/include/goya/goya_async_events.h b/drivers/misc/habanalabs/include/goya/goya_async_events.h
index bb7a1aa3279e..5fb92362fc5f 100644
--- a/drivers/misc/habanalabs/include/goya/goya_async_events.h
+++ b/drivers/misc/habanalabs/include/goya/goya_async_events.h
@@ -188,6 +188,10 @@ enum goya_async_event_id {
GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486,
GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487,
+ GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S = 507,
+ GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E = 508,
+ GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S = 509,
+ GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E = 510,
GOYA_ASYNC_EVENT_ID_LAST_VALID_ID = 1023,
GOYA_ASYNC_EVENT_ID_SIZE
};
diff --git a/drivers/misc/habanalabs/include/goya/goya_reg_map.h b/drivers/misc/habanalabs/include/goya/goya_reg_map.h
index cd89723c7f61..08061282cd9c 100644
--- a/drivers/misc/habanalabs/include/goya/goya_reg_map.h
+++ b/drivers/misc/habanalabs/include/goya/goya_reg_map.h
@@ -11,24 +11,27 @@
/*
* PSOC scratch-pad registers
*/
-#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
-#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
-#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
-#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
-#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
-#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
-#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
-#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
-#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
-#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
-#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
-#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26
-#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27
-#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28
-#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29
-#define mmUBOOT_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
-#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
+#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
+#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
+#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
+#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
+#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
+#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
+#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
+#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
+#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
+#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
+#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
+#define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
+#define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25
+#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26
+#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27
+#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28
+#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29
+#define mmRDWR_TEST mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
+#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
-#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS
+#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS
+#define mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS mmPSOC_GLOBAL_CONF_WARM_REBOOT
#endif /* GOYA_REG_MAP_H_ */
diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h
index 2853a2de8cf6..f7992a69fd3a 100644
--- a/drivers/misc/habanalabs/include/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/hl_boot_if.h
@@ -8,20 +8,35 @@
#ifndef HL_BOOT_IF_H
#define HL_BOOT_IF_H
+#define LKD_HARD_RESET_MAGIC 0xED7BD694
+
+/* CPU error bits in BOOT_ERROR registers */
+#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0)
+#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1)
+#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2)
+#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3)
+#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4)
+#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5)
+#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6)
+#define CPU_BOOT_ERR0_ENABLED (1 << 31)
+
enum cpu_boot_status {
CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */
- CPU_BOOT_STATUS_IN_WFE,
- CPU_BOOT_STATUS_DRAM_RDY,
- CPU_BOOT_STATUS_SRAM_AVAIL,
- CPU_BOOT_STATUS_IN_BTL, /* BTL is H/W FSM */
- CPU_BOOT_STATUS_IN_PREBOOT,
- CPU_BOOT_STATUS_IN_SPL,
- CPU_BOOT_STATUS_IN_UBOOT,
- CPU_BOOT_STATUS_DRAM_INIT_FAIL,
- CPU_BOOT_STATUS_FIT_CORRUPTED,
- CPU_BOOT_STATUS_UBOOT_NOT_READY,
- CPU_BOOT_STATUS_RESERVED,
- CPU_BOOT_STATUS_TS_INIT_FAIL,
+ CPU_BOOT_STATUS_IN_WFE = 1,
+ CPU_BOOT_STATUS_DRAM_RDY = 2,
+ CPU_BOOT_STATUS_SRAM_AVAIL = 3,
+ CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */
+ CPU_BOOT_STATUS_IN_PREBOOT = 5,
+ CPU_BOOT_STATUS_IN_SPL = 6,
+ CPU_BOOT_STATUS_IN_UBOOT = 7,
+ CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_UBOOT_NOT_READY = 10,
+ CPU_BOOT_STATUS_NIC_FW_RDY = 11,
+ CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_READY_TO_BOOT = 15,
};
enum kmd_msg {
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index 6c72cb4eff54..a72f766ca470 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -530,7 +530,7 @@ static u64 get_va_block(struct hl_device *hdev,
* or not, hence we continue with the biggest possible
* granularity.
*/
- page_size = hdev->asic_prop.pmmu.huge_page_size;
+ page_size = hdev->asic_prop.pmmu_huge.page_size;
else
page_size = hdev->asic_prop.dmmu.page_size;
@@ -638,13 +638,12 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
struct hl_userptr *userptr,
struct hl_vm_phys_pg_pack **pphys_pg_pack)
{
- struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
struct hl_vm_phys_pg_pack *phys_pg_pack;
struct scatterlist *sg;
dma_addr_t dma_addr;
u64 page_mask, total_npages;
u32 npages, page_size = PAGE_SIZE,
- huge_page_size = mmu_prop->huge_page_size;
+ huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
bool first = true, is_huge_page_opt = true;
int rc, i, j;
u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
@@ -747,7 +746,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
paddr = phys_pg_pack->pages[i];
- rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
+ rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
+ (i + 1) == phys_pg_pack->npages);
if (rc) {
dev_err(hdev->dev,
"map failed for handle %u, npages: %llu, mapped: %llu",
@@ -765,7 +765,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
err:
next_vaddr = vaddr;
for (i = 0 ; i < mapped_pg_cnt ; i++) {
- if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+ if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+ (i + 1) == mapped_pg_cnt))
dev_warn_ratelimited(hdev->dev,
"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
phys_pg_pack->handle, next_vaddr,
@@ -794,7 +795,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
next_vaddr = vaddr;
for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
- if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+ if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+ (i + 1) == phys_pg_pack->npages))
dev_warn_ratelimited(hdev->dev,
"unmap failed for vaddr: 0x%llx\n", next_vaddr);
@@ -853,6 +855,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
struct hl_vm_phys_pg_pack *phys_pg_pack;
struct hl_userptr *userptr = NULL;
struct hl_vm_hash_node *hnode;
+ struct hl_va_range *va_range;
enum vm_type_t *vm_type;
u64 ret_vaddr, hint_addr;
u32 handle = 0;
@@ -924,9 +927,16 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
goto hnode_err;
}
- ret_vaddr = get_va_block(hdev,
- is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
- phys_pg_pack->total_size, hint_addr, is_userptr);
+ if (is_userptr)
+ if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
+ va_range = ctx->host_va_range;
+ else
+ va_range = ctx->host_huge_va_range;
+ else
+ va_range = ctx->dram_va_range;
+
+ ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
+ hint_addr, is_userptr);
if (!ret_vaddr) {
dev_err(hdev->dev, "no available va block for handle %u\n",
handle);
@@ -965,10 +975,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return 0;
map_err:
- if (add_va_block(hdev,
- is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
- ret_vaddr,
- ret_vaddr + phys_pg_pack->total_size - 1))
+ if (add_va_block(hdev, va_range, ret_vaddr,
+ ret_vaddr + phys_pg_pack->total_size - 1))
dev_warn(hdev->dev,
"release va block failed for handle 0x%x, vaddr: 0x%llx\n",
handle, ret_vaddr);
@@ -1030,7 +1038,6 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
if (*vm_type == VM_TYPE_USERPTR) {
is_userptr = true;
- va_range = &ctx->host_va_range;
userptr = hnode->ptr;
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
&phys_pg_pack);
@@ -1040,9 +1047,15 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
vaddr);
goto vm_type_err;
}
+
+ if (phys_pg_pack->page_size ==
+ hdev->asic_prop.pmmu.page_size)
+ va_range = ctx->host_va_range;
+ else
+ va_range = ctx->host_huge_va_range;
} else if (*vm_type == VM_TYPE_PHYS_PACK) {
is_userptr = false;
- va_range = &ctx->dram_va_range;
+ va_range = ctx->dram_va_range;
phys_pg_pack = hnode->ptr;
} else {
dev_warn(hdev->dev,
@@ -1438,19 +1451,18 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
}
/*
- * hl_va_range_init - initialize virtual addresses range
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_range : pointer to the range to initialize
- * @start : range start address
- * @end : range end address
+ * va_range_init - initialize virtual addresses range
+ * @hdev: pointer to the habanalabs device structure
+ * @va_range: pointer to the range to initialize
+ * @start: range start address
+ * @end: range end address
*
* This function does the following:
* - Initializes the virtual addresses list of the given range with the given
* addresses.
*/
-static int hl_va_range_init(struct hl_device *hdev,
- struct hl_va_range *va_range, u64 start, u64 end)
+static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
+ u64 start, u64 end)
{
int rc;
@@ -1485,47 +1497,105 @@ static int hl_va_range_init(struct hl_device *hdev,
}
/*
- * hl_vm_ctx_init_with_ranges - initialize virtual memory for context
+ * va_range_fini() - clear a virtual addresses range
+ * @hdev: pointer to the habanalabs structure
+ * va_range: pointer to virtual addresses range
*
- * @ctx : pointer to the habanalabs context structure
- * @host_range_start : host virtual addresses range start
- * @host_range_end : host virtual addresses range end
- * @dram_range_start : dram virtual addresses range start
- * @dram_range_end : dram virtual addresses range end
+ * This function does the following:
+ * - Frees the virtual addresses block list and its lock
+ */
+static void va_range_fini(struct hl_device *hdev,
+ struct hl_va_range *va_range)
+{
+ mutex_lock(&va_range->lock);
+ clear_va_list_locked(hdev, &va_range->list);
+ mutex_unlock(&va_range->lock);
+
+ mutex_destroy(&va_range->lock);
+ kfree(va_range);
+}
+
+/*
+ * vm_ctx_init_with_ranges() - initialize virtual memory for context
+ * @ctx: pointer to the habanalabs context structure
+ * @host_range_start: host virtual addresses range start.
+ * @host_range_end: host virtual addresses range end.
+ * @host_huge_range_start: host virtual addresses range start for memory
+ * allocated with huge pages.
+ * @host_huge_range_end: host virtual addresses range end for memory allocated
+ * with huge pages.
+ * @dram_range_start: dram virtual addresses range start.
+ * @dram_range_end: dram virtual addresses range end.
*
* This function initializes the following:
* - MMU for context
* - Virtual address to area descriptor hashtable
* - Virtual block list of available virtual memory
*/
-static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
- u64 host_range_end, u64 dram_range_start,
- u64 dram_range_end)
+static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
+ u64 host_range_start,
+ u64 host_range_end,
+ u64 host_huge_range_start,
+ u64 host_huge_range_end,
+ u64 dram_range_start,
+ u64 dram_range_end)
{
struct hl_device *hdev = ctx->hdev;
int rc;
+ ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
+ if (!ctx->host_va_range)
+ return -ENOMEM;
+
+ ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
+ GFP_KERNEL);
+ if (!ctx->host_huge_va_range) {
+ rc = -ENOMEM;
+ goto host_huge_va_range_err;
+ }
+
+ ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
+ if (!ctx->dram_va_range) {
+ rc = -ENOMEM;
+ goto dram_va_range_err;
+ }
+
rc = hl_mmu_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
- return rc;
+ goto mmu_ctx_err;
}
mutex_init(&ctx->mem_hash_lock);
hash_init(ctx->mem_hash);
- mutex_init(&ctx->host_va_range.lock);
+ mutex_init(&ctx->host_va_range->lock);
- rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start,
- host_range_end);
+ rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
+ host_range_end);
if (rc) {
dev_err(hdev->dev, "failed to init host vm range\n");
- goto host_vm_err;
+ goto host_page_range_err;
+ }
+
+ if (hdev->pmmu_huge_range) {
+ mutex_init(&ctx->host_huge_va_range->lock);
+
+ rc = va_range_init(hdev, ctx->host_huge_va_range,
+ host_huge_range_start,
+ host_huge_range_end);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to init host huge vm range\n");
+ goto host_hpage_range_err;
+ }
+ } else {
+ ctx->host_huge_va_range = ctx->host_va_range;
}
- mutex_init(&ctx->dram_va_range.lock);
+ mutex_init(&ctx->dram_va_range->lock);
- rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start,
+ rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
dram_range_end);
if (rc) {
dev_err(hdev->dev, "failed to init dram vm range\n");
@@ -1537,15 +1607,29 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
return 0;
dram_vm_err:
- mutex_destroy(&ctx->dram_va_range.lock);
+ mutex_destroy(&ctx->dram_va_range->lock);
- mutex_lock(&ctx->host_va_range.lock);
- clear_va_list_locked(hdev, &ctx->host_va_range.list);
- mutex_unlock(&ctx->host_va_range.lock);
-host_vm_err:
- mutex_destroy(&ctx->host_va_range.lock);
+ if (hdev->pmmu_huge_range) {
+ mutex_lock(&ctx->host_huge_va_range->lock);
+ clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
+ mutex_unlock(&ctx->host_huge_va_range->lock);
+ }
+host_hpage_range_err:
+ if (hdev->pmmu_huge_range)
+ mutex_destroy(&ctx->host_huge_va_range->lock);
+ mutex_lock(&ctx->host_va_range->lock);
+ clear_va_list_locked(hdev, &ctx->host_va_range->list);
+ mutex_unlock(&ctx->host_va_range->lock);
+host_page_range_err:
+ mutex_destroy(&ctx->host_va_range->lock);
mutex_destroy(&ctx->mem_hash_lock);
hl_mmu_ctx_fini(ctx);
+mmu_ctx_err:
+ kfree(ctx->dram_va_range);
+dram_va_range_err:
+ kfree(ctx->host_huge_va_range);
+host_huge_va_range_err:
+ kfree(ctx->host_va_range);
return rc;
}
@@ -1553,8 +1637,8 @@ host_vm_err:
int hl_vm_ctx_init(struct hl_ctx *ctx)
{
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
- u64 host_range_start, host_range_end, dram_range_start,
- dram_range_end;
+ u64 host_range_start, host_range_end, host_huge_range_start,
+ host_huge_range_end, dram_range_start, dram_range_end;
atomic64_set(&ctx->dram_phys_mem, 0);
@@ -1566,38 +1650,26 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
* address of the memory related to the given handle.
*/
if (ctx->hdev->mmu_enable) {
- dram_range_start = prop->va_space_dram_start_address;
- dram_range_end = prop->va_space_dram_end_address;
- host_range_start = prop->va_space_host_start_address;
- host_range_end = prop->va_space_host_end_address;
+ dram_range_start = prop->dmmu.start_addr;
+ dram_range_end = prop->dmmu.end_addr;
+ host_range_start = prop->pmmu.start_addr;
+ host_range_end = prop->pmmu.end_addr;
+ host_huge_range_start = prop->pmmu_huge.start_addr;
+ host_huge_range_end = prop->pmmu_huge.end_addr;
} else {
dram_range_start = prop->dram_user_base_address;
dram_range_end = prop->dram_end_address;
host_range_start = prop->dram_user_base_address;
host_range_end = prop->dram_end_address;
+ host_huge_range_start = prop->dram_user_base_address;
+ host_huge_range_end = prop->dram_end_address;
}
- return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
- dram_range_start, dram_range_end);
-}
-
-/*
- * hl_va_range_fini - clear a virtual addresses range
- *
- * @hdev : pointer to the habanalabs structure
- * va_range : pointer to virtual addresses range
- *
- * This function does the following:
- * - Frees the virtual addresses block list and its lock
- */
-static void hl_va_range_fini(struct hl_device *hdev,
- struct hl_va_range *va_range)
-{
- mutex_lock(&va_range->lock);
- clear_va_list_locked(hdev, &va_range->list);
- mutex_unlock(&va_range->lock);
-
- mutex_destroy(&va_range->lock);
+ return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
+ host_huge_range_start,
+ host_huge_range_end,
+ dram_range_start,
+ dram_range_end);
}
/*
@@ -1664,8 +1736,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
}
spin_unlock(&vm->idr_lock);
- hl_va_range_fini(hdev, &ctx->dram_va_range);
- hl_va_range_fini(hdev, &ctx->host_va_range);
+ va_range_fini(hdev, ctx->dram_va_range);
+ if (hdev->pmmu_huge_range)
+ va_range_fini(hdev, ctx->host_huge_va_range);
+ va_range_fini(hdev, ctx->host_va_range);
mutex_destroy(&ctx->mem_hash_lock);
hl_mmu_ctx_fini(ctx);
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
index 6262b26e2086..a290d6b49d78 100644
--- a/drivers/misc/habanalabs/mmu.c
+++ b/drivers/misc/habanalabs/mmu.c
@@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
return phys_hop_addr + pte_offset;
}
+static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+ return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
+}
+
static int dram_default_mapping_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
@@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
curr_pte;
bool is_huge, clear_hop3 = true;
+ /* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
hop0_addr = get_hop0_addr(ctx);
@@ -637,29 +647,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
clear_hop3 = true;
if (!clear_hop3)
- goto flush;
+ goto mapped;
clear_pte(ctx, hop3_pte_addr);
if (put_pte(ctx, hop3_addr))
- goto flush;
+ goto mapped;
clear_pte(ctx, hop2_pte_addr);
if (put_pte(ctx, hop2_addr))
- goto flush;
+ goto mapped;
clear_pte(ctx, hop1_pte_addr);
if (put_pte(ctx, hop1_addr))
- goto flush;
+ goto mapped;
clear_pte(ctx, hop0_pte_addr);
}
-flush:
- flush(ctx);
-
+mapped:
return 0;
not_mapped:
@@ -675,6 +683,7 @@ not_mapped:
* @ctx: pointer to the context structure
* @virt_addr: virt addr to map from
* @page_size: size of the page to unmap
+ * @flush_pte: whether to do a PCI flush
*
* This function does the following:
* - Check that the virt addr is mapped
@@ -685,40 +694,43 @@ not_mapped:
* changes the MMU hash, it must be protected by a lock.
* However, because it maps only a single page, the lock should be implemented
* in a higher level in order to protect the entire mapping of the memory area
+ *
+ * For optimization reasons PCI flush may be requested once after unmapping of
+ * large area.
*/
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+ bool flush_pte)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr;
u32 real_page_size, npages;
- int i, rc;
+ int i, rc = 0;
bool is_dram_addr;
if (!hdev->mmu_enable)
return 0;
- is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
- prop->va_space_dram_start_address,
- prop->va_space_dram_end_address);
+ is_dram_addr = is_dram_va(hdev, virt_addr);
- mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+ if (is_dram_addr)
+ mmu_prop = &prop->dmmu;
+ else if ((page_size % prop->pmmu_huge.page_size) == 0)
+ mmu_prop = &prop->pmmu_huge;
+ else
+ mmu_prop = &prop->pmmu;
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately.
*/
- if ((page_size % mmu_prop->huge_page_size) == 0) {
- real_page_size = mmu_prop->huge_page_size;
- } else if ((page_size % mmu_prop->page_size) == 0) {
+ if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size;
} else {
dev_err(hdev->dev,
- "page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
- page_size,
- mmu_prop->page_size >> 10,
- mmu_prop->huge_page_size >> 20);
+ "page size of %u is not %uKB aligned, can't unmap\n",
+ page_size, mmu_prop->page_size >> 10);
return -EFAULT;
}
@@ -729,12 +741,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
for (i = 0 ; i < npages ; i++) {
rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
if (rc)
- return rc;
+ break;
real_virt_addr += real_page_size;
}
- return 0;
+ if (flush_pte)
+ flush(ctx);
+
+ return rc;
}
static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
@@ -753,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
hop4_new = false, is_huge;
int rc = -ENOMEM;
- mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
/*
* This mapping function can map a page or a huge page. For huge page
* there are only 3 hops rather than 4. Currently the DRAM allocation
@@ -762,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* one of the two page sizes. Since this is a common code for all the
* three cases, we need this hugs page check.
*/
- is_huge = page_size == mmu_prop->huge_page_size;
-
- if (is_dram_addr && !is_huge) {
- dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
- return -EFAULT;
+ if (is_dram_addr) {
+ mmu_prop = &prop->dmmu;
+ is_huge = true;
+ } else if (page_size == prop->pmmu_huge.page_size) {
+ mmu_prop = &prop->pmmu_huge;
+ is_huge = true;
+ } else {
+ mmu_prop = &prop->pmmu;
+ is_huge = false;
}
hop0_addr = get_hop0_addr(ctx);
@@ -885,8 +902,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
get_pte(ctx, hop3_addr);
}
- flush(ctx);
-
return 0;
err:
@@ -909,6 +924,7 @@ err:
* @virt_addr: virt addr to map from
* @phys_addr: phys addr to map to
* @page_size: physical page size
+ * @flush_pte: whether to do a PCI flush
*
* This function does the following:
* - Check that the virt addr is not mapped
@@ -919,8 +935,12 @@ err:
* changes the MMU hash, it must be protected by a lock.
* However, because it maps only a single page, the lock should be implemented
* in a higher level in order to protect the entire mapping of the memory area
+ *
+ * For optimization reasons PCI flush may be requested once after mapping of
+ * large area.
*/
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
+ bool flush_pte)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -933,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
if (!hdev->mmu_enable)
return 0;
- is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
- prop->va_space_dram_start_address,
- prop->va_space_dram_end_address);
+ is_dram_addr = is_dram_va(hdev, virt_addr);
- mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+ if (is_dram_addr)
+ mmu_prop = &prop->dmmu;
+ else if ((page_size % prop->pmmu_huge.page_size) == 0)
+ mmu_prop = &prop->pmmu_huge;
+ else
+ mmu_prop = &prop->pmmu;
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and map them separately.
*/
- if ((page_size % mmu_prop->huge_page_size) == 0) {
- real_page_size = mmu_prop->huge_page_size;
- } else if ((page_size % mmu_prop->page_size) == 0) {
+ if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size;
} else {
dev_err(hdev->dev,
- "page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
- page_size,
- mmu_prop->page_size >> 10,
- mmu_prop->huge_page_size >> 20);
+ "page size of %u is not %uKB aligned, can't unmap\n",
+ page_size, mmu_prop->page_size >> 10);
return -EFAULT;
}
@@ -976,6 +995,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
mapped_cnt++;
}
+ if (flush_pte)
+ flush(ctx);
+
return 0;
err:
@@ -988,6 +1010,8 @@ err:
real_virt_addr += real_page_size;
}
+ flush(ctx);
+
return rc;
}
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index de87693cf557..886459e0ddd9 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -11,6 +11,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h>
#include <linux/uaccess.h>
+#include <linux/slab.h>
#ifdef CONFIG_X86_32
#include <asm/desc.h>
@@ -175,6 +176,80 @@ void lkdtm_HUNG_TASK(void)
schedule();
}
+volatile unsigned int huge = INT_MAX - 2;
+volatile unsigned int ignored;
+
+void lkdtm_OVERFLOW_SIGNED(void)
+{
+ int value;
+
+ value = huge;
+ pr_info("Normal signed addition ...\n");
+ value += 1;
+ ignored = value;
+
+ pr_info("Overflowing signed addition ...\n");
+ value += 4;
+ ignored = value;
+}
+
+
+void lkdtm_OVERFLOW_UNSIGNED(void)
+{
+ unsigned int value;
+
+ value = huge;
+ pr_info("Normal unsigned addition ...\n");
+ value += 1;
+ ignored = value;
+
+ pr_info("Overflowing unsigned addition ...\n");
+ value += 4;
+ ignored = value;
+}
+
+/* Intentially using old-style flex array definition of 1 byte. */
+struct array_bounds_flex_array {
+ int one;
+ int two;
+ char data[1];
+};
+
+struct array_bounds {
+ int one;
+ int two;
+ char data[8];
+ int three;
+};
+
+void lkdtm_ARRAY_BOUNDS(void)
+{
+ struct array_bounds_flex_array *not_checked;
+ struct array_bounds *checked;
+ volatile int i;
+
+ not_checked = kmalloc(sizeof(*not_checked) * 2, GFP_KERNEL);
+ checked = kmalloc(sizeof(*checked) * 2, GFP_KERNEL);
+
+ pr_info("Array access within bounds ...\n");
+ /* For both, touch all bytes in the actual member size. */
+ for (i = 0; i < sizeof(checked->data); i++)
+ checked->data[i] = 'A';
+ /*
+ * For the uninstrumented flex array member, also touch 1 byte
+ * beyond to verify it is correctly uninstrumented.
+ */
+ for (i = 0; i < sizeof(not_checked->data) + 1; i++)
+ not_checked->data[i] = 'A';
+
+ pr_info("Array access beyond bounds ...\n");
+ for (i = 0; i < sizeof(checked->data) + 1; i++)
+ checked->data[i] = 'B';
+
+ kfree(not_checked);
+ kfree(checked);
+}
+
void lkdtm_CORRUPT_LIST_ADD(void)
{
/*
@@ -378,3 +453,39 @@ void lkdtm_DOUBLE_FAULT(void)
pr_err("XFAIL: this test is ia32-only\n");
#endif
}
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+static noinline void change_pac_parameters(void)
+{
+ /* Reset the keys of current task */
+ ptrauth_thread_init_kernel(current);
+ ptrauth_thread_switch_kernel(current);
+}
+
+#define CORRUPT_PAC_ITERATE 10
+noinline void lkdtm_CORRUPT_PAC(void)
+{
+ int i;
+
+ if (!system_supports_address_auth()) {
+ pr_err("FAIL: arm64 pointer authentication feature not present\n");
+ return;
+ }
+
+ pr_info("Change the PAC parameters to force function return failure\n");
+ /*
+ * Pac is a hash value computed from input keys, return address and
+ * stack pointer. As pac has fewer bits so there is a chance of
+ * collision, so iterate few times to reduce the collision probability.
+ */
+ for (i = 0; i < CORRUPT_PAC_ITERATE; i++)
+ change_pac_parameters();
+
+ pr_err("FAIL: %s test failed. Kernel may be unstable from here\n", __func__);
+}
+#else /* !CONFIG_ARM64_PTR_AUTH */
+noinline void lkdtm_CORRUPT_PAC(void)
+{
+ pr_err("FAIL: arm64 pointer authentication config disabled\n");
+}
+#endif
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index ee0d6e721441..a5e344df9166 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -116,6 +116,7 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(STACK_GUARD_PAGE_LEADING),
CRASHTYPE(STACK_GUARD_PAGE_TRAILING),
CRASHTYPE(UNSET_SMEP),
+ CRASHTYPE(CORRUPT_PAC),
CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
CRASHTYPE(OVERWRITE_ALLOCATION),
CRASHTYPE(WRITE_AFTER_FREE),
@@ -129,6 +130,9 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(HARDLOCKUP),
CRASHTYPE(SPINLOCKUP),
CRASHTYPE(HUNG_TASK),
+ CRASHTYPE(OVERFLOW_SIGNED),
+ CRASHTYPE(OVERFLOW_UNSIGNED),
+ CRASHTYPE(ARRAY_BOUNDS),
CRASHTYPE(EXEC_DATA),
CRASHTYPE(EXEC_STACK),
CRASHTYPE(EXEC_KMALLOC),
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index c56d23e37643..601a2156a0d4 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -22,6 +22,9 @@ void lkdtm_SOFTLOCKUP(void);
void lkdtm_HARDLOCKUP(void);
void lkdtm_SPINLOCKUP(void);
void lkdtm_HUNG_TASK(void);
+void lkdtm_OVERFLOW_SIGNED(void);
+void lkdtm_OVERFLOW_UNSIGNED(void);
+void lkdtm_ARRAY_BOUNDS(void);
void lkdtm_CORRUPT_LIST_ADD(void);
void lkdtm_CORRUPT_LIST_DEL(void);
void lkdtm_CORRUPT_USER_DS(void);
@@ -31,6 +34,7 @@ void lkdtm_UNSET_SMEP(void);
#ifdef CONFIG_X86_32
void lkdtm_DOUBLE_FAULT(void);
#endif
+void lkdtm_CORRUPT_PAC(void);
/* lkdtm_heap.c */
void __init lkdtm_heap_init(void);
diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c
index d5a084475abc..d1a5c0705be3 100644
--- a/drivers/misc/lkdtm/stackleak.c
+++ b/drivers/misc/lkdtm/stackleak.c
@@ -16,6 +16,7 @@ void lkdtm_STACKLEAK_ERASING(void)
unsigned long *sp, left, found, i;
const unsigned long check_depth =
STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
+ bool test_failed = false;
/*
* For the details about the alignment of the poison values, see
@@ -34,7 +35,8 @@ void lkdtm_STACKLEAK_ERASING(void)
left--;
} else {
pr_err("FAIL: not enough stack space for the test\n");
- return;
+ test_failed = true;
+ goto end;
}
pr_info("checking unused part of the thread stack (%lu bytes)...\n",
@@ -52,22 +54,29 @@ void lkdtm_STACKLEAK_ERASING(void)
}
if (found <= check_depth) {
- pr_err("FAIL: thread stack is not erased (checked %lu bytes)\n",
+ pr_err("FAIL: the erased part is not found (checked %lu bytes)\n",
i * sizeof(unsigned long));
- return;
+ test_failed = true;
+ goto end;
}
- pr_info("first %lu bytes are unpoisoned\n",
+ pr_info("the erased part begins after %lu not poisoned bytes\n",
(i - found) * sizeof(unsigned long));
/* The rest of thread stack should be erased */
for (; i < left; i++) {
if (*(sp - i) != STACKLEAK_POISON) {
- pr_err("FAIL: thread stack is NOT properly erased\n");
- return;
+ pr_err("FAIL: bad value number %lu in the erased part: 0x%lx\n",
+ i, *(sp - i));
+ test_failed = true;
}
}
- pr_info("OK: the rest of the thread stack is properly erased\n");
- return;
+end:
+ if (test_failed) {
+ pr_err("FAIL: the thread stack is NOT properly erased\n");
+ dump_stack();
+ } else {
+ pr_info("OK: the rest of the thread stack is properly erased\n");
+ }
}
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 9ad9c01ddf41..910f059b3384 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -91,7 +91,7 @@ struct mkhi_rule_id {
struct mkhi_fwcaps {
struct mkhi_rule_id id;
u8 len;
- u8 data[0];
+ u8 data[];
} __packed;
struct mkhi_fw_ver_block {
@@ -119,7 +119,7 @@ struct mkhi_msg_hdr {
struct mkhi_msg {
struct mkhi_msg_hdr hdr;
- u8 data[0];
+ u8 data[];
} __packed;
#define MKHI_OSVER_BUF_LEN (sizeof(struct mkhi_msg_hdr) + \
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 1e3edbbacb1e..204d807e755b 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -1585,7 +1585,7 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
goto err;
}
- hbuf_len = mei_slots2data(hbuf_slots);
+ hbuf_len = mei_slots2data(hbuf_slots) & MEI_MSG_MAX_LEN_MASK;
dr_slots = mei_dma_ring_empty_slots(dev);
dr_len = mei_slots2data(dr_slots);
@@ -1718,7 +1718,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb)
goto out;
}
- hbuf_len = mei_slots2data(hbuf_slots);
+ hbuf_len = mei_slots2data(hbuf_slots) & MEI_MSG_MAX_LEN_MASK;
dr_slots = mei_dma_ring_empty_slots(dev);
dr_len = mei_slots2data(dr_slots);
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index 87a0201ba6b3..9392934e3a06 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -75,9 +75,9 @@
#define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */
#define MEI_DEV_ID_CNP_LP 0x9DE0 /* Cannon Point LP */
-#define MEI_DEV_ID_CNP_LP_4 0x9DE4 /* Cannon Point LP 4 (iTouch) */
+#define MEI_DEV_ID_CNP_LP_3 0x9DE4 /* Cannon Point LP 3 (iTouch) */
#define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */
-#define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */
+#define MEI_DEV_ID_CNP_H_3 0xA364 /* Cannon Point H 3 (iTouch) */
#define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */
#define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */
@@ -87,6 +87,8 @@
#define MEI_DEV_ID_CMP_H 0x06e0 /* Comet Lake H */
#define MEI_DEV_ID_CMP_H_3 0x06e4 /* Comet Lake H 3 (iTouch) */
+#define MEI_DEV_ID_CDF 0x18D3 /* Cedar Fork */
+
#define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */
#define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */
diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h
index d025a5f8317e..b1a8d5ec88b3 100644
--- a/drivers/misc/mei/hw.h
+++ b/drivers/misc/mei/hw.h
@@ -209,11 +209,14 @@ struct mei_msg_hdr {
u32 extension[0];
} __packed;
+/* The length is up to 9 bits */
+#define MEI_MSG_MAX_LEN_MASK GENMASK(9, 0)
+
#define MEI_MSG_HDR_MAX 2
struct mei_bus_message {
u8 hbm_cmd;
- u8 data[0];
+ u8 data[];
} __packed;
/**
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index 76f8ff5ff974..3a29db07211d 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -533,7 +533,7 @@ struct mei_device {
#endif /* CONFIG_DEBUG_FS */
const struct mei_hw_ops *ops;
- char hw[0] __aligned(sizeof(void *));
+ char hw[] __aligned(sizeof(void *));
};
static inline unsigned long mei_secs_to_jiffies(unsigned long sec)
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 2711451b3d87..3d21c38e2dbb 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -1,25 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2003-2019, Intel Corporation. All rights reserved.
+ * Copyright (c) 2003-2020, Intel Corporation. All rights reserved.
* Intel Management Engine Interface (Intel MEI) Linux driver
*/
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/device.h>
-#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/types.h>
-#include <linux/fcntl.h>
#include <linux/pci.h>
-#include <linux/poll.h>
-#include <linux/ioctl.h>
-#include <linux/cdev.h>
#include <linux/sched.h>
-#include <linux/uuid.h>
-#include <linux/compat.h>
-#include <linux/jiffies.h>
#include <linux/interrupt.h>
#include <linux/pm_domain.h>
@@ -92,9 +83,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
{MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH12_CFG)},
- {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_4, MEI_ME_PCH8_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_3, MEI_ME_PCH8_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)},
- {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_3, MEI_ME_PCH8_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)},
@@ -111,6 +102,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
{MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_CDF, MEI_ME_PCH8_CFG)},
+
/* required last entry */
{0, }
};
diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c
index f1c16a587495..beacf2a2f2b5 100644
--- a/drivers/misc/mei/pci-txe.c
+++ b/drivers/misc/mei/pci-txe.c
@@ -1,20 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2013-2017, Intel Corporation. All rights reserved.
+ * Copyright (c) 2013-2020, Intel Corporation. All rights reserved.
* Intel Management Engine Interface (Intel MEI) Linux driver
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/device.h>
-#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/sched.h>
-#include <linux/uuid.h>
-#include <linux/jiffies.h>
#include <linux/interrupt.h>
#include <linux/workqueue.h>
#include <linux/pm_domain.h>
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index b6841ba6d922..8f201d019f5a 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -133,8 +133,4 @@ config VOP
OS and tools for MIC to use with this driver are available from
<http://software.intel.com/en-us/mic-developer>.
-if VOP
-source "drivers/vhost/Kconfig.vringh"
-endif
-
endmenu
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
index 4f2d9212432c..fb5b3989753d 100644
--- a/drivers/misc/mic/host/mic_boot.c
+++ b/drivers/misc/mic/host/mic_boot.c
@@ -137,7 +137,7 @@ static void *__mic_dma_alloc(struct device *dev, size_t size,
struct scif_hw_dev *scdev = dev_get_drvdata(dev);
struct mic_device *mdev = scdev_to_mdev(scdev);
dma_addr_t tmp;
- void *va = kmalloc(size, gfp | __GFP_ZERO);
+ void *va = kzalloc(size, gfp);
if (va) {
tmp = mic_map_single(mdev, va, size);
diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c
index a7743312da9c..d18cda966912 100644
--- a/drivers/misc/mic/host/mic_x100.c
+++ b/drivers/misc/mic/host/mic_x100.c
@@ -350,10 +350,10 @@ mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw)
if (!buf)
return -ENOMEM;
- len += snprintf(buf, CMDLINE_SIZE - len,
+ len += scnprintf(buf, CMDLINE_SIZE - len,
" mem=%dM", boot_mem);
if (mdev->cosm_dev->cmdline)
- snprintf(buf + len, CMDLINE_SIZE - len, " %s",
+ scnprintf(buf + len, CMDLINE_SIZE - len, " %s",
mdev->cosm_dev->cmdline);
memcpy_toio(cmd_line_va, buf, strlen(buf) + 1);
kfree(buf);
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index a5e317073d95..ef5a1af6bab7 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -17,6 +17,7 @@
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
@@ -64,6 +65,9 @@
#define PCI_ENDPOINT_TEST_IRQ_TYPE 0x24
#define PCI_ENDPOINT_TEST_IRQ_NUMBER 0x28
+#define PCI_ENDPOINT_TEST_FLAGS 0x2c
+#define FLAG_USE_DMA BIT(0)
+
#define PCI_DEVICE_ID_TI_AM654 0xb00c
#define is_am654_pci_dev(pdev) \
@@ -98,11 +102,13 @@ struct pci_endpoint_test {
struct completion irq_raised;
int last_irq;
int num_irqs;
+ int irq_type;
/* mutex to protect the ioctls */
struct mutex mutex;
struct miscdevice miscdev;
enum pci_barno test_reg_bar;
size_t alignment;
+ const char *name;
};
struct pci_endpoint_test_data {
@@ -157,6 +163,7 @@ static void pci_endpoint_test_free_irq_vectors(struct pci_endpoint_test *test)
struct pci_dev *pdev = test->pdev;
pci_free_irq_vectors(pdev);
+ test->irq_type = IRQ_TYPE_UNDEFINED;
}
static bool pci_endpoint_test_alloc_irq_vectors(struct pci_endpoint_test *test,
@@ -191,6 +198,8 @@ static bool pci_endpoint_test_alloc_irq_vectors(struct pci_endpoint_test *test,
irq = 0;
res = false;
}
+
+ test->irq_type = type;
test->num_irqs = irq;
return res;
@@ -218,7 +227,7 @@ static bool pci_endpoint_test_request_irq(struct pci_endpoint_test *test)
for (i = 0; i < test->num_irqs; i++) {
err = devm_request_irq(dev, pci_irq_vector(pdev, i),
pci_endpoint_test_irqhandler,
- IRQF_SHARED, DRV_MODULE_NAME, test);
+ IRQF_SHARED, test->name, test);
if (err)
goto fail;
}
@@ -315,11 +324,16 @@ static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test,
return false;
}
-static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size)
+static bool pci_endpoint_test_copy(struct pci_endpoint_test *test,
+ unsigned long arg)
{
+ struct pci_endpoint_test_xfer_param param;
bool ret = false;
void *src_addr;
void *dst_addr;
+ u32 flags = 0;
+ bool use_dma;
+ size_t size;
dma_addr_t src_phys_addr;
dma_addr_t dst_phys_addr;
struct pci_dev *pdev = test->pdev;
@@ -330,25 +344,46 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size)
dma_addr_t orig_dst_phys_addr;
size_t offset;
size_t alignment = test->alignment;
+ int irq_type = test->irq_type;
u32 src_crc32;
u32 dst_crc32;
+ int err;
+ err = copy_from_user(&param, (void __user *)arg, sizeof(param));
+ if (err) {
+ dev_err(dev, "Failed to get transfer param\n");
+ return false;
+ }
+
+ size = param.size;
if (size > SIZE_MAX - alignment)
goto err;
+ use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
+ if (use_dma)
+ flags |= FLAG_USE_DMA;
+
if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
dev_err(dev, "Invalid IRQ type option\n");
goto err;
}
- orig_src_addr = dma_alloc_coherent(dev, size + alignment,
- &orig_src_phys_addr, GFP_KERNEL);
+ orig_src_addr = kzalloc(size + alignment, GFP_KERNEL);
if (!orig_src_addr) {
dev_err(dev, "Failed to allocate source buffer\n");
ret = false;
goto err;
}
+ get_random_bytes(orig_src_addr, size + alignment);
+ orig_src_phys_addr = dma_map_single(dev, orig_src_addr,
+ size + alignment, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, orig_src_phys_addr)) {
+ dev_err(dev, "failed to map source buffer address\n");
+ ret = false;
+ goto err_src_phys_addr;
+ }
+
if (alignment && !IS_ALIGNED(orig_src_phys_addr, alignment)) {
src_phys_addr = PTR_ALIGN(orig_src_phys_addr, alignment);
offset = src_phys_addr - orig_src_phys_addr;
@@ -364,15 +399,21 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size)
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_SRC_ADDR,
upper_32_bits(src_phys_addr));
- get_random_bytes(src_addr, size);
src_crc32 = crc32_le(~0, src_addr, size);
- orig_dst_addr = dma_alloc_coherent(dev, size + alignment,
- &orig_dst_phys_addr, GFP_KERNEL);
+ orig_dst_addr = kzalloc(size + alignment, GFP_KERNEL);
if (!orig_dst_addr) {
dev_err(dev, "Failed to allocate destination address\n");
ret = false;
- goto err_orig_src_addr;
+ goto err_dst_addr;
+ }
+
+ orig_dst_phys_addr = dma_map_single(dev, orig_dst_addr,
+ size + alignment, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, orig_dst_phys_addr)) {
+ dev_err(dev, "failed to map destination buffer address\n");
+ ret = false;
+ goto err_dst_phys_addr;
}
if (alignment && !IS_ALIGNED(orig_dst_phys_addr, alignment)) {
@@ -392,6 +433,7 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size)
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE,
size);
+ pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_FLAGS, flags);
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type);
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1);
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
@@ -399,24 +441,34 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size)
wait_for_completion(&test->irq_raised);
+ dma_unmap_single(dev, orig_dst_phys_addr, size + alignment,
+ DMA_FROM_DEVICE);
+
dst_crc32 = crc32_le(~0, dst_addr, size);
if (dst_crc32 == src_crc32)
ret = true;
- dma_free_coherent(dev, size + alignment, orig_dst_addr,
- orig_dst_phys_addr);
+err_dst_phys_addr:
+ kfree(orig_dst_addr);
-err_orig_src_addr:
- dma_free_coherent(dev, size + alignment, orig_src_addr,
- orig_src_phys_addr);
+err_dst_addr:
+ dma_unmap_single(dev, orig_src_phys_addr, size + alignment,
+ DMA_TO_DEVICE);
+
+err_src_phys_addr:
+ kfree(orig_src_addr);
err:
return ret;
}
-static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size)
+static bool pci_endpoint_test_write(struct pci_endpoint_test *test,
+ unsigned long arg)
{
+ struct pci_endpoint_test_xfer_param param;
bool ret = false;
+ u32 flags = 0;
+ bool use_dma;
u32 reg;
void *addr;
dma_addr_t phys_addr;
@@ -426,24 +478,47 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size)
dma_addr_t orig_phys_addr;
size_t offset;
size_t alignment = test->alignment;
+ int irq_type = test->irq_type;
+ size_t size;
u32 crc32;
+ int err;
+
+ err = copy_from_user(&param, (void __user *)arg, sizeof(param));
+ if (err != 0) {
+ dev_err(dev, "Failed to get transfer param\n");
+ return false;
+ }
+ size = param.size;
if (size > SIZE_MAX - alignment)
goto err;
+ use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
+ if (use_dma)
+ flags |= FLAG_USE_DMA;
+
if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
dev_err(dev, "Invalid IRQ type option\n");
goto err;
}
- orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr,
- GFP_KERNEL);
+ orig_addr = kzalloc(size + alignment, GFP_KERNEL);
if (!orig_addr) {
dev_err(dev, "Failed to allocate address\n");
ret = false;
goto err;
}
+ get_random_bytes(orig_addr, size + alignment);
+
+ orig_phys_addr = dma_map_single(dev, orig_addr, size + alignment,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, orig_phys_addr)) {
+ dev_err(dev, "failed to map source buffer address\n");
+ ret = false;
+ goto err_phys_addr;
+ }
+
if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) {
phys_addr = PTR_ALIGN(orig_phys_addr, alignment);
offset = phys_addr - orig_phys_addr;
@@ -453,8 +528,6 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size)
addr = orig_addr;
}
- get_random_bytes(addr, size);
-
crc32 = crc32_le(~0, addr, size);
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_CHECKSUM,
crc32);
@@ -466,6 +539,7 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size)
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size);
+ pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_FLAGS, flags);
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type);
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1);
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
@@ -477,15 +551,24 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size)
if (reg & STATUS_READ_SUCCESS)
ret = true;
- dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr);
+ dma_unmap_single(dev, orig_phys_addr, size + alignment,
+ DMA_TO_DEVICE);
+
+err_phys_addr:
+ kfree(orig_addr);
err:
return ret;
}
-static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size)
+static bool pci_endpoint_test_read(struct pci_endpoint_test *test,
+ unsigned long arg)
{
+ struct pci_endpoint_test_xfer_param param;
bool ret = false;
+ u32 flags = 0;
+ bool use_dma;
+ size_t size;
void *addr;
dma_addr_t phys_addr;
struct pci_dev *pdev = test->pdev;
@@ -494,24 +577,44 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size)
dma_addr_t orig_phys_addr;
size_t offset;
size_t alignment = test->alignment;
+ int irq_type = test->irq_type;
u32 crc32;
+ int err;
+
+ err = copy_from_user(&param, (void __user *)arg, sizeof(param));
+ if (err) {
+ dev_err(dev, "Failed to get transfer param\n");
+ return false;
+ }
+ size = param.size;
if (size > SIZE_MAX - alignment)
goto err;
+ use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
+ if (use_dma)
+ flags |= FLAG_USE_DMA;
+
if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
dev_err(dev, "Invalid IRQ type option\n");
goto err;
}
- orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr,
- GFP_KERNEL);
+ orig_addr = kzalloc(size + alignment, GFP_KERNEL);
if (!orig_addr) {
dev_err(dev, "Failed to allocate destination address\n");
ret = false;
goto err;
}
+ orig_phys_addr = dma_map_single(dev, orig_addr, size + alignment,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, orig_phys_addr)) {
+ dev_err(dev, "failed to map source buffer address\n");
+ ret = false;
+ goto err_phys_addr;
+ }
+
if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) {
phys_addr = PTR_ALIGN(orig_phys_addr, alignment);
offset = phys_addr - orig_phys_addr;
@@ -528,6 +631,7 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size)
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size);
+ pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_FLAGS, flags);
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type);
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1);
pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
@@ -535,15 +639,26 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size)
wait_for_completion(&test->irq_raised);
+ dma_unmap_single(dev, orig_phys_addr, size + alignment,
+ DMA_FROM_DEVICE);
+
crc32 = crc32_le(~0, addr, size);
if (crc32 == pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_CHECKSUM))
ret = true;
- dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr);
+err_phys_addr:
+ kfree(orig_addr);
err:
return ret;
}
+static bool pci_endpoint_test_clear_irq(struct pci_endpoint_test *test)
+{
+ pci_endpoint_test_release_irq(test);
+ pci_endpoint_test_free_irq_vectors(test);
+ return true;
+}
+
static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test,
int req_irq_type)
{
@@ -555,7 +670,7 @@ static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test,
return false;
}
- if (irq_type == req_irq_type)
+ if (test->irq_type == req_irq_type)
return true;
pci_endpoint_test_release_irq(test);
@@ -567,12 +682,10 @@ static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test,
if (!pci_endpoint_test_request_irq(test))
goto err;
- irq_type = req_irq_type;
return true;
err:
pci_endpoint_test_free_irq_vectors(test);
- irq_type = IRQ_TYPE_UNDEFINED;
return false;
}
@@ -616,6 +729,9 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd,
case PCITEST_GET_IRQTYPE:
ret = irq_type;
break;
+ case PCITEST_CLEAR_IRQ:
+ ret = pci_endpoint_test_clear_irq(test);
+ break;
}
ret:
@@ -633,7 +749,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
{
int err;
int id;
- char name[20];
+ char name[24];
enum pci_barno bar;
void __iomem *base;
struct device *dev = &pdev->dev;
@@ -652,6 +768,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
test->test_reg_bar = 0;
test->alignment = 0;
test->pdev = pdev;
+ test->irq_type = IRQ_TYPE_UNDEFINED;
if (no_msi)
irq_type = IRQ_TYPE_LEGACY;
@@ -667,6 +784,12 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
init_completion(&test->irq_raised);
mutex_init(&test->mutex);
+ if ((dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)) != 0) &&
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) {
+ dev_err(dev, "Cannot set DMA mask\n");
+ return -EINVAL;
+ }
+
err = pci_enable_device(pdev);
if (err) {
dev_err(dev, "Cannot enable PCI device\n");
@@ -684,9 +807,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
if (!pci_endpoint_test_alloc_irq_vectors(test, irq_type))
goto err_disable_irq;
- if (!pci_endpoint_test_request_irq(test))
- goto err_disable_irq;
-
for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
base = pci_ioremap_bar(pdev, bar);
@@ -716,12 +836,21 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
}
snprintf(name, sizeof(name), DRV_MODULE_NAME ".%d", id);
+ test->name = kstrdup(name, GFP_KERNEL);
+ if (!test->name) {
+ err = -ENOMEM;
+ goto err_ida_remove;
+ }
+
+ if (!pci_endpoint_test_request_irq(test))
+ goto err_kfree_test_name;
+
misc_device = &test->miscdev;
misc_device->minor = MISC_DYNAMIC_MINOR;
misc_device->name = kstrdup(name, GFP_KERNEL);
if (!misc_device->name) {
err = -ENOMEM;
- goto err_ida_remove;
+ goto err_release_irq;
}
misc_device->fops = &pci_endpoint_test_fops,
@@ -736,6 +865,12 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
err_kfree_name:
kfree(misc_device->name);
+err_release_irq:
+ pci_endpoint_test_release_irq(test);
+
+err_kfree_test_name:
+ kfree(test->name);
+
err_ida_remove:
ida_simple_remove(&pci_endpoint_test_ida, id);
@@ -744,7 +879,6 @@ err_iounmap:
if (test->bar[bar])
pci_iounmap(pdev, test->bar[bar]);
}
- pci_endpoint_test_release_irq(test);
err_disable_irq:
pci_endpoint_test_free_irq_vectors(test);
@@ -770,6 +904,7 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev)
misc_deregister(&test->miscdev);
kfree(misc_device->name);
+ kfree(test->name);
ida_simple_remove(&pci_endpoint_test_ida, id);
for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
if (test->bar[bar])
@@ -783,6 +918,12 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
}
+static const struct pci_endpoint_test_data default_data = {
+ .test_reg_bar = BAR_0,
+ .alignment = SZ_4K,
+ .irq_type = IRQ_TYPE_MSI,
+};
+
static const struct pci_endpoint_test_data am654_data = {
.test_reg_bar = BAR_2,
.alignment = SZ_64K,
@@ -790,8 +931,12 @@ static const struct pci_endpoint_test_data am654_data = {
};
static const struct pci_device_id pci_endpoint_test_tbl[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA74x) },
- { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA72x) },
+ { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA74x),
+ .driver_data = (kernel_ulong_t)&default_data,
+ },
+ { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA72x),
+ .driver_data = (kernel_ulong_t)&default_data,
+ },
{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, 0x81c0) },
{ PCI_DEVICE_DATA(SYNOPSYS, EDDA, NULL) },
{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_AM654),
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
index e77d1b1f9d05..85c103923632 100644
--- a/drivers/misc/sgi-gru/grulib.h
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -136,7 +136,7 @@ struct gru_dump_context_header {
pid_t pid;
unsigned long vaddr;
int cch_locked;
- unsigned long data[0];
+ unsigned long data[];
};
/*
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index a7e44b2eb413..5ce8f3081e96 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -372,7 +372,7 @@ struct gru_thread_state {
int ts_data_valid; /* Indicates if ts_gdata has
valid data */
struct gru_gseg_statistics ustats; /* User statistics */
- unsigned long ts_gdata[0]; /* save area for GRU data (CB,
+ unsigned long ts_gdata[]; /* save area for GRU data (CB,
DS, CBE) */
};
diff --git a/drivers/misc/uacce/Kconfig b/drivers/misc/uacce/Kconfig
new file mode 100644
index 000000000000..5e39b6083b0f
--- /dev/null
+++ b/drivers/misc/uacce/Kconfig
@@ -0,0 +1,13 @@
+config UACCE
+ tristate "Accelerator Framework for User Land"
+ depends on IOMMU_API
+ help
+ UACCE provides interface for the user process to access the hardware
+ without interaction with the kernel space in data path.
+
+ The user-space interface is described in
+ include/uapi/misc/uacce/uacce.h
+
+ See Documentation/misc-devices/uacce.rst for more details.
+
+ If you don't know what to do here, say N.
diff --git a/drivers/misc/uacce/Makefile b/drivers/misc/uacce/Makefile
new file mode 100644
index 000000000000..5b4374e8b5f2
--- /dev/null
+++ b/drivers/misc/uacce/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+obj-$(CONFIG_UACCE) += uacce.o
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
new file mode 100644
index 000000000000..d39307f060bd
--- /dev/null
+++ b/drivers/misc/uacce/uacce.c
@@ -0,0 +1,633 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/compat.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/uacce.h>
+
+static struct class *uacce_class;
+static dev_t uacce_devt;
+static DEFINE_MUTEX(uacce_mutex);
+static DEFINE_XARRAY_ALLOC(uacce_xa);
+
+static int uacce_start_queue(struct uacce_queue *q)
+{
+ int ret = 0;
+
+ mutex_lock(&uacce_mutex);
+
+ if (q->state != UACCE_Q_INIT) {
+ ret = -EINVAL;
+ goto out_with_lock;
+ }
+
+ if (q->uacce->ops->start_queue) {
+ ret = q->uacce->ops->start_queue(q);
+ if (ret < 0)
+ goto out_with_lock;
+ }
+
+ q->state = UACCE_Q_STARTED;
+
+out_with_lock:
+ mutex_unlock(&uacce_mutex);
+
+ return ret;
+}
+
+static int uacce_put_queue(struct uacce_queue *q)
+{
+ struct uacce_device *uacce = q->uacce;
+
+ mutex_lock(&uacce_mutex);
+
+ if (q->state == UACCE_Q_ZOMBIE)
+ goto out;
+
+ if ((q->state == UACCE_Q_STARTED) && uacce->ops->stop_queue)
+ uacce->ops->stop_queue(q);
+
+ if ((q->state == UACCE_Q_INIT || q->state == UACCE_Q_STARTED) &&
+ uacce->ops->put_queue)
+ uacce->ops->put_queue(q);
+
+ q->state = UACCE_Q_ZOMBIE;
+out:
+ mutex_unlock(&uacce_mutex);
+
+ return 0;
+}
+
+static long uacce_fops_unl_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ struct uacce_queue *q = filep->private_data;
+ struct uacce_device *uacce = q->uacce;
+
+ switch (cmd) {
+ case UACCE_CMD_START_Q:
+ return uacce_start_queue(q);
+
+ case UACCE_CMD_PUT_Q:
+ return uacce_put_queue(q);
+
+ default:
+ if (!uacce->ops->ioctl)
+ return -EINVAL;
+
+ return uacce->ops->ioctl(q, cmd, arg);
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static long uacce_fops_compat_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ arg = (unsigned long)compat_ptr(arg);
+
+ return uacce_fops_unl_ioctl(filep, cmd, arg);
+}
+#endif
+
+static int uacce_sva_exit(struct device *dev, struct iommu_sva *handle,
+ void *data)
+{
+ struct uacce_mm *uacce_mm = data;
+ struct uacce_queue *q;
+
+ /*
+ * No new queue can be added concurrently because no caller can have a
+ * reference to this mm. But there may be concurrent calls to
+ * uacce_mm_put(), so we need the lock.
+ */
+ mutex_lock(&uacce_mm->lock);
+ list_for_each_entry(q, &uacce_mm->queues, list)
+ uacce_put_queue(q);
+ uacce_mm->mm = NULL;
+ mutex_unlock(&uacce_mm->lock);
+
+ return 0;
+}
+
+static struct iommu_sva_ops uacce_sva_ops = {
+ .mm_exit = uacce_sva_exit,
+};
+
+static struct uacce_mm *uacce_mm_get(struct uacce_device *uacce,
+ struct uacce_queue *q,
+ struct mm_struct *mm)
+{
+ struct uacce_mm *uacce_mm = NULL;
+ struct iommu_sva *handle = NULL;
+ int ret;
+
+ lockdep_assert_held(&uacce->mm_lock);
+
+ list_for_each_entry(uacce_mm, &uacce->mm_list, list) {
+ if (uacce_mm->mm == mm) {
+ mutex_lock(&uacce_mm->lock);
+ list_add(&q->list, &uacce_mm->queues);
+ mutex_unlock(&uacce_mm->lock);
+ return uacce_mm;
+ }
+ }
+
+ uacce_mm = kzalloc(sizeof(*uacce_mm), GFP_KERNEL);
+ if (!uacce_mm)
+ return NULL;
+
+ if (uacce->flags & UACCE_DEV_SVA) {
+ /*
+ * Safe to pass an incomplete uacce_mm, since mm_exit cannot
+ * fire while we hold a reference to the mm.
+ */
+ handle = iommu_sva_bind_device(uacce->parent, mm, uacce_mm);
+ if (IS_ERR(handle))
+ goto err_free;
+
+ ret = iommu_sva_set_ops(handle, &uacce_sva_ops);
+ if (ret)
+ goto err_unbind;
+
+ uacce_mm->pasid = iommu_sva_get_pasid(handle);
+ if (uacce_mm->pasid == IOMMU_PASID_INVALID)
+ goto err_unbind;
+ }
+
+ uacce_mm->mm = mm;
+ uacce_mm->handle = handle;
+ INIT_LIST_HEAD(&uacce_mm->queues);
+ mutex_init(&uacce_mm->lock);
+ list_add(&q->list, &uacce_mm->queues);
+ list_add(&uacce_mm->list, &uacce->mm_list);
+
+ return uacce_mm;
+
+err_unbind:
+ if (handle)
+ iommu_sva_unbind_device(handle);
+err_free:
+ kfree(uacce_mm);
+ return NULL;
+}
+
+static void uacce_mm_put(struct uacce_queue *q)
+{
+ struct uacce_mm *uacce_mm = q->uacce_mm;
+
+ lockdep_assert_held(&q->uacce->mm_lock);
+
+ mutex_lock(&uacce_mm->lock);
+ list_del(&q->list);
+ mutex_unlock(&uacce_mm->lock);
+
+ if (list_empty(&uacce_mm->queues)) {
+ if (uacce_mm->handle)
+ iommu_sva_unbind_device(uacce_mm->handle);
+ list_del(&uacce_mm->list);
+ kfree(uacce_mm);
+ }
+}
+
+static int uacce_fops_open(struct inode *inode, struct file *filep)
+{
+ struct uacce_mm *uacce_mm = NULL;
+ struct uacce_device *uacce;
+ struct uacce_queue *q;
+ int ret = 0;
+
+ uacce = xa_load(&uacce_xa, iminor(inode));
+ if (!uacce)
+ return -ENODEV;
+
+ q = kzalloc(sizeof(struct uacce_queue), GFP_KERNEL);
+ if (!q)
+ return -ENOMEM;
+
+ mutex_lock(&uacce->mm_lock);
+ uacce_mm = uacce_mm_get(uacce, q, current->mm);
+ mutex_unlock(&uacce->mm_lock);
+ if (!uacce_mm) {
+ ret = -ENOMEM;
+ goto out_with_mem;
+ }
+
+ q->uacce = uacce;
+ q->uacce_mm = uacce_mm;
+
+ if (uacce->ops->get_queue) {
+ ret = uacce->ops->get_queue(uacce, uacce_mm->pasid, q);
+ if (ret < 0)
+ goto out_with_mm;
+ }
+
+ init_waitqueue_head(&q->wait);
+ filep->private_data = q;
+ uacce->inode = inode;
+ q->state = UACCE_Q_INIT;
+
+ return 0;
+
+out_with_mm:
+ mutex_lock(&uacce->mm_lock);
+ uacce_mm_put(q);
+ mutex_unlock(&uacce->mm_lock);
+out_with_mem:
+ kfree(q);
+ return ret;
+}
+
+static int uacce_fops_release(struct inode *inode, struct file *filep)
+{
+ struct uacce_queue *q = filep->private_data;
+ struct uacce_device *uacce = q->uacce;
+
+ uacce_put_queue(q);
+
+ mutex_lock(&uacce->mm_lock);
+ uacce_mm_put(q);
+ mutex_unlock(&uacce->mm_lock);
+
+ kfree(q);
+
+ return 0;
+}
+
+static vm_fault_t uacce_vma_fault(struct vm_fault *vmf)
+{
+ if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE))
+ return VM_FAULT_SIGBUS;
+
+ return 0;
+}
+
+static void uacce_vma_close(struct vm_area_struct *vma)
+{
+ struct uacce_queue *q = vma->vm_private_data;
+ struct uacce_qfile_region *qfr = NULL;
+
+ if (vma->vm_pgoff < UACCE_MAX_REGION)
+ qfr = q->qfrs[vma->vm_pgoff];
+
+ kfree(qfr);
+}
+
+static const struct vm_operations_struct uacce_vm_ops = {
+ .fault = uacce_vma_fault,
+ .close = uacce_vma_close,
+};
+
+static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+ struct uacce_queue *q = filep->private_data;
+ struct uacce_device *uacce = q->uacce;
+ struct uacce_qfile_region *qfr;
+ enum uacce_qfrt type = UACCE_MAX_REGION;
+ int ret = 0;
+
+ if (vma->vm_pgoff < UACCE_MAX_REGION)
+ type = vma->vm_pgoff;
+ else
+ return -EINVAL;
+
+ qfr = kzalloc(sizeof(*qfr), GFP_KERNEL);
+ if (!qfr)
+ return -ENOMEM;
+
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_WIPEONFORK;
+ vma->vm_ops = &uacce_vm_ops;
+ vma->vm_private_data = q;
+ qfr->type = type;
+
+ mutex_lock(&uacce_mutex);
+
+ if (q->state != UACCE_Q_INIT && q->state != UACCE_Q_STARTED) {
+ ret = -EINVAL;
+ goto out_with_lock;
+ }
+
+ if (q->qfrs[type]) {
+ ret = -EEXIST;
+ goto out_with_lock;
+ }
+
+ switch (type) {
+ case UACCE_QFRT_MMIO:
+ if (!uacce->ops->mmap) {
+ ret = -EINVAL;
+ goto out_with_lock;
+ }
+
+ ret = uacce->ops->mmap(q, vma, qfr);
+ if (ret)
+ goto out_with_lock;
+
+ break;
+
+ case UACCE_QFRT_DUS:
+ if (!uacce->ops->mmap) {
+ ret = -EINVAL;
+ goto out_with_lock;
+ }
+
+ ret = uacce->ops->mmap(q, vma, qfr);
+ if (ret)
+ goto out_with_lock;
+ break;
+
+ default:
+ ret = -EINVAL;
+ goto out_with_lock;
+ }
+
+ q->qfrs[type] = qfr;
+ mutex_unlock(&uacce_mutex);
+
+ return ret;
+
+out_with_lock:
+ mutex_unlock(&uacce_mutex);
+ kfree(qfr);
+ return ret;
+}
+
+static __poll_t uacce_fops_poll(struct file *file, poll_table *wait)
+{
+ struct uacce_queue *q = file->private_data;
+ struct uacce_device *uacce = q->uacce;
+
+ poll_wait(file, &q->wait, wait);
+ if (uacce->ops->is_q_updated && uacce->ops->is_q_updated(q))
+ return EPOLLIN | EPOLLRDNORM;
+
+ return 0;
+}
+
+static const struct file_operations uacce_fops = {
+ .owner = THIS_MODULE,
+ .open = uacce_fops_open,
+ .release = uacce_fops_release,
+ .unlocked_ioctl = uacce_fops_unl_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = uacce_fops_compat_ioctl,
+#endif
+ .mmap = uacce_fops_mmap,
+ .poll = uacce_fops_poll,
+};
+
+#define to_uacce_device(dev) container_of(dev, struct uacce_device, dev)
+
+static ssize_t api_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sprintf(buf, "%s\n", uacce->api_ver);
+}
+
+static ssize_t flags_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sprintf(buf, "%u\n", uacce->flags);
+}
+
+static ssize_t available_instances_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ if (!uacce->ops->get_available_instances)
+ return -ENODEV;
+
+ return sprintf(buf, "%d\n",
+ uacce->ops->get_available_instances(uacce));
+}
+
+static ssize_t algorithms_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sprintf(buf, "%s\n", uacce->algs);
+}
+
+static ssize_t region_mmio_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sprintf(buf, "%lu\n",
+ uacce->qf_pg_num[UACCE_QFRT_MMIO] << PAGE_SHIFT);
+}
+
+static ssize_t region_dus_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sprintf(buf, "%lu\n",
+ uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT);
+}
+
+static DEVICE_ATTR_RO(api);
+static DEVICE_ATTR_RO(flags);
+static DEVICE_ATTR_RO(available_instances);
+static DEVICE_ATTR_RO(algorithms);
+static DEVICE_ATTR_RO(region_mmio_size);
+static DEVICE_ATTR_RO(region_dus_size);
+
+static struct attribute *uacce_dev_attrs[] = {
+ &dev_attr_api.attr,
+ &dev_attr_flags.attr,
+ &dev_attr_available_instances.attr,
+ &dev_attr_algorithms.attr,
+ &dev_attr_region_mmio_size.attr,
+ &dev_attr_region_dus_size.attr,
+ NULL,
+};
+
+static umode_t uacce_dev_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ if (((attr == &dev_attr_region_mmio_size.attr) &&
+ (!uacce->qf_pg_num[UACCE_QFRT_MMIO])) ||
+ ((attr == &dev_attr_region_dus_size.attr) &&
+ (!uacce->qf_pg_num[UACCE_QFRT_DUS])))
+ return 0;
+
+ return attr->mode;
+}
+
+static struct attribute_group uacce_dev_group = {
+ .is_visible = uacce_dev_is_visible,
+ .attrs = uacce_dev_attrs,
+};
+
+__ATTRIBUTE_GROUPS(uacce_dev);
+
+static void uacce_release(struct device *dev)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ kfree(uacce);
+}
+
+/**
+ * uacce_alloc() - alloc an accelerator
+ * @parent: pointer of uacce parent device
+ * @interface: pointer of uacce_interface for register
+ *
+ * Returns uacce pointer if success and ERR_PTR if not
+ * Need check returned negotiated uacce->flags
+ */
+struct uacce_device *uacce_alloc(struct device *parent,
+ struct uacce_interface *interface)
+{
+ unsigned int flags = interface->flags;
+ struct uacce_device *uacce;
+ int ret;
+
+ uacce = kzalloc(sizeof(struct uacce_device), GFP_KERNEL);
+ if (!uacce)
+ return ERR_PTR(-ENOMEM);
+
+ if (flags & UACCE_DEV_SVA) {
+ ret = iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_SVA);
+ if (ret)
+ flags &= ~UACCE_DEV_SVA;
+ }
+
+ uacce->parent = parent;
+ uacce->flags = flags;
+ uacce->ops = interface->ops;
+
+ ret = xa_alloc(&uacce_xa, &uacce->dev_id, uacce, xa_limit_32b,
+ GFP_KERNEL);
+ if (ret < 0)
+ goto err_with_uacce;
+
+ INIT_LIST_HEAD(&uacce->mm_list);
+ mutex_init(&uacce->mm_lock);
+ device_initialize(&uacce->dev);
+ uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id);
+ uacce->dev.class = uacce_class;
+ uacce->dev.groups = uacce_dev_groups;
+ uacce->dev.parent = uacce->parent;
+ uacce->dev.release = uacce_release;
+ dev_set_name(&uacce->dev, "%s-%d", interface->name, uacce->dev_id);
+
+ return uacce;
+
+err_with_uacce:
+ if (flags & UACCE_DEV_SVA)
+ iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA);
+ kfree(uacce);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(uacce_alloc);
+
+/**
+ * uacce_register() - add the accelerator to cdev and export to user space
+ * @uacce: The initialized uacce device
+ *
+ * Return 0 if register succeeded, or an error.
+ */
+int uacce_register(struct uacce_device *uacce)
+{
+ if (!uacce)
+ return -ENODEV;
+
+ uacce->cdev = cdev_alloc();
+ if (!uacce->cdev)
+ return -ENOMEM;
+
+ uacce->cdev->ops = &uacce_fops;
+ uacce->cdev->owner = THIS_MODULE;
+
+ return cdev_device_add(uacce->cdev, &uacce->dev);
+}
+EXPORT_SYMBOL_GPL(uacce_register);
+
+/**
+ * uacce_remove() - remove the accelerator
+ * @uacce: the accelerator to remove
+ */
+void uacce_remove(struct uacce_device *uacce)
+{
+ struct uacce_mm *uacce_mm;
+ struct uacce_queue *q;
+
+ if (!uacce)
+ return;
+ /*
+ * unmap remaining mapping from user space, preventing user still
+ * access the mmaped area while parent device is already removed
+ */
+ if (uacce->inode)
+ unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
+
+ /* ensure no open queue remains */
+ mutex_lock(&uacce->mm_lock);
+ list_for_each_entry(uacce_mm, &uacce->mm_list, list) {
+ /*
+ * We don't take the uacce_mm->lock here. Since we hold the
+ * device's mm_lock, no queue can be added to or removed from
+ * this uacce_mm. We may run concurrently with mm_exit, but
+ * uacce_put_queue() is serialized and iommu_sva_unbind_device()
+ * waits for the lock that mm_exit is holding.
+ */
+ list_for_each_entry(q, &uacce_mm->queues, list)
+ uacce_put_queue(q);
+
+ if (uacce->flags & UACCE_DEV_SVA) {
+ iommu_sva_unbind_device(uacce_mm->handle);
+ uacce_mm->handle = NULL;
+ }
+ }
+ mutex_unlock(&uacce->mm_lock);
+
+ /* disable sva now since no opened queues */
+ if (uacce->flags & UACCE_DEV_SVA)
+ iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA);
+
+ if (uacce->cdev)
+ cdev_device_del(uacce->cdev, &uacce->dev);
+ xa_erase(&uacce_xa, uacce->dev_id);
+ put_device(&uacce->dev);
+}
+EXPORT_SYMBOL_GPL(uacce_remove);
+
+static int __init uacce_init(void)
+{
+ int ret;
+
+ uacce_class = class_create(THIS_MODULE, UACCE_NAME);
+ if (IS_ERR(uacce_class))
+ return PTR_ERR(uacce_class);
+
+ ret = alloc_chrdev_region(&uacce_devt, 0, MINORMASK, UACCE_NAME);
+ if (ret)
+ class_destroy(uacce_class);
+
+ return ret;
+}
+
+static __exit void uacce_exit(void)
+{
+ unregister_chrdev_region(uacce_devt, MINORMASK);
+ class_destroy(uacce_class);
+}
+
+subsys_initcall(uacce_init);
+module_exit(uacce_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Hisilicon Tech. Co., Ltd.");
+MODULE_DESCRIPTION("Accelerator interface for Userland applications");
diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c
index 058fcd7f9f01..a431787c0898 100644
--- a/drivers/misc/vexpress-syscfg.c
+++ b/drivers/misc/vexpress-syscfg.c
@@ -42,7 +42,7 @@ struct vexpress_syscfg_func {
struct vexpress_syscfg *syscfg;
struct regmap *regmap;
int num_templates;
- u32 template[0]; /* Keep it last! */
+ u32 template[]; /* Keep it last! */
};