diff options
Diffstat (limited to 'drivers/gpu/host1x')
-rw-r--r-- | drivers/gpu/host1x/Kconfig | 2 | ||||
-rw-r--r-- | drivers/gpu/host1x/bus.c | 2 | ||||
-rw-r--r-- | drivers/gpu/host1x/cdma.c | 6 | ||||
-rw-r--r-- | drivers/gpu/host1x/channel.c | 13 | ||||
-rw-r--r-- | drivers/gpu/host1x/channel.h | 1 | ||||
-rw-r--r-- | drivers/gpu/host1x/dev.c | 236 | ||||
-rw-r--r-- | drivers/gpu/host1x/dev.h | 3 | ||||
-rw-r--r-- | drivers/gpu/host1x/intr.c | 1 | ||||
-rw-r--r-- | drivers/gpu/host1x/job.c | 91 | ||||
-rw-r--r-- | drivers/gpu/host1x/job.h | 4 |
10 files changed, 248 insertions, 111 deletions
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig index cf987a317a55..6dab94adf25e 100644 --- a/drivers/gpu/host1x/Kconfig +++ b/drivers/gpu/host1x/Kconfig @@ -2,7 +2,7 @@ config TEGRA_HOST1X tristate "NVIDIA Tegra host1x driver" depends on ARCH_TEGRA || (ARM && COMPILE_TEST) - select IOMMU_IOVA if IOMMU_SUPPORT + select IOMMU_IOVA help Driver for the NVIDIA Tegra host1x hardware. diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 742aa9ff21b8..2c8559ff3481 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -445,7 +445,7 @@ static int host1x_device_add(struct host1x *host1x, of_dma_configure(&device->dev, host1x->dev->of_node, true); device->dev.dma_parms = &device->dma_parms; - dma_set_max_seg_size(&device->dev, SZ_4M); + dma_set_max_seg_size(&device->dev, UINT_MAX); err = host1x_device_parse_dt(device, driver); if (err < 0) { diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 48c84c48299c..e8d3fda91d8a 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -232,9 +232,9 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, * * Must be called with the cdma lock held. */ -int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, - struct host1x_cdma *cdma, - unsigned int needed) +static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, + struct host1x_cdma *cdma, + unsigned int needed) { while (true) { struct push_buffer *pb = &cdma->push_buffer; diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c index 1436295aa450..4cd212bb570d 100644 --- a/drivers/gpu/host1x/channel.c +++ b/drivers/gpu/host1x/channel.c @@ -115,14 +115,14 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host) /** * host1x_channel_request() - Allocate a channel - * @device: Host1x unit this channel will be used to send commands to + * @client: Host1x client this channel will be used to send commands to * - * Allocates a new host1x channel for @device. May return NULL if CDMA + * Allocates a new host1x channel for @client. May return NULL if CDMA * initialization fails. */ -struct host1x_channel *host1x_channel_request(struct device *dev) +struct host1x_channel *host1x_channel_request(struct host1x_client *client) { - struct host1x *host = dev_get_drvdata(dev->parent); + struct host1x *host = dev_get_drvdata(client->dev->parent); struct host1x_channel_list *chlist = &host->channel_list; struct host1x_channel *channel; int err; @@ -133,7 +133,8 @@ struct host1x_channel *host1x_channel_request(struct device *dev) kref_init(&channel->refcount); mutex_init(&channel->submitlock); - channel->dev = dev; + channel->client = client; + channel->dev = client->dev; err = host1x_hw_channel_init(host, channel, channel->id); if (err < 0) @@ -148,7 +149,7 @@ struct host1x_channel *host1x_channel_request(struct device *dev) fail: clear_bit(channel->id, chlist->allocated_channels); - dev_err(dev, "failed to initialize channel\n"); + dev_err(client->dev, "failed to initialize channel\n"); return NULL; } diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h index 4fd694834f74..39044ff6c3aa 100644 --- a/drivers/gpu/host1x/channel.h +++ b/drivers/gpu/host1x/channel.h @@ -26,6 +26,7 @@ struct host1x_channel { unsigned int id; struct mutex submitlock; void __iomem *regs; + struct host1x_client *client; struct device *dev; struct host1x_cdma cdma; }; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 5a3f797240d4..a738ea55e407 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -18,10 +18,6 @@ #include <trace/events/host1x.h> #undef CREATE_TRACE_POINTS -#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) -#include <asm/dma-iommu.h> -#endif - #include "bus.h" #include "channel.h" #include "debug.h" @@ -77,6 +73,10 @@ static const struct host1x_info host1x01_info = { .init = host1x01_init, .sync_offset = 0x3000, .dma_mask = DMA_BIT_MASK(32), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, }; static const struct host1x_info host1x02_info = { @@ -87,6 +87,10 @@ static const struct host1x_info host1x02_info = { .init = host1x02_init, .sync_offset = 0x3000, .dma_mask = DMA_BIT_MASK(32), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, }; static const struct host1x_info host1x04_info = { @@ -97,6 +101,10 @@ static const struct host1x_info host1x04_info = { .init = host1x04_init, .sync_offset = 0x2100, .dma_mask = DMA_BIT_MASK(34), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, }; static const struct host1x_info host1x05_info = { @@ -107,6 +115,10 @@ static const struct host1x_info host1x05_info = { .init = host1x05_init, .sync_offset = 0x2100, .dma_mask = DMA_BIT_MASK(34), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, }; static const struct host1x_sid_entry tegra186_sid_table[] = { @@ -126,6 +138,7 @@ static const struct host1x_info host1x06_info = { .init = host1x06_init, .sync_offset = 0x0, .dma_mask = DMA_BIT_MASK(40), + .has_wide_gather = true, .has_hypervisor = true, .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, @@ -148,6 +161,7 @@ static const struct host1x_info host1x07_info = { .init = host1x07_init, .sync_offset = 0x0, .dma_mask = DMA_BIT_MASK(40), + .has_wide_gather = true, .has_hypervisor = true, .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), .sid_table = tegra194_sid_table, @@ -178,6 +192,117 @@ static void host1x_setup_sid_table(struct host1x *host) } } +static struct iommu_domain *host1x_iommu_attach(struct host1x *host) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); + int err; + + /* + * If the host1x firewall is enabled, there's no need to enable IOMMU + * support. Similarly, if host1x is already attached to an IOMMU (via + * the DMA API), don't try to attach again. + */ + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain) + return domain; + + host->group = iommu_group_get(host->dev); + if (host->group) { + struct iommu_domain_geometry *geometry; + dma_addr_t start, end; + unsigned long order; + + err = iova_cache_get(); + if (err < 0) + goto put_group; + + host->domain = iommu_domain_alloc(&platform_bus_type); + if (!host->domain) { + err = -ENOMEM; + goto put_cache; + } + + err = iommu_attach_group(host->domain, host->group); + if (err) { + if (err == -ENODEV) + err = 0; + + goto free_domain; + } + + geometry = &host->domain->geometry; + start = geometry->aperture_start & host->info->dma_mask; + end = geometry->aperture_end & host->info->dma_mask; + + order = __ffs(host->domain->pgsize_bitmap); + init_iova_domain(&host->iova, 1UL << order, start >> order); + host->iova_end = end; + + domain = host->domain; + } + + return domain; + +free_domain: + iommu_domain_free(host->domain); + host->domain = NULL; +put_cache: + iova_cache_put(); +put_group: + iommu_group_put(host->group); + host->group = NULL; + + return ERR_PTR(err); +} + +static int host1x_iommu_init(struct host1x *host) +{ + u64 mask = host->info->dma_mask; + struct iommu_domain *domain; + int err; + + domain = host1x_iommu_attach(host); + if (IS_ERR(domain)) { + err = PTR_ERR(domain); + dev_err(host->dev, "failed to attach to IOMMU: %d\n", err); + return err; + } + + /* + * If we're not behind an IOMMU make sure we don't get push buffers + * that are allocated outside of the range addressable by the GATHER + * opcode. + * + * Newer generations of Tegra (Tegra186 and later) support a wide + * variant of the GATHER opcode that allows addressing more bits. + */ + if (!domain && !host->info->has_wide_gather) + mask = DMA_BIT_MASK(32); + + err = dma_coerce_mask_and_coherent(host->dev, mask); + if (err < 0) { + dev_err(host->dev, "failed to set DMA mask: %d\n", err); + return err; + } + + return 0; +} + +static void host1x_iommu_exit(struct host1x *host) +{ + if (host->domain) { + put_iova_domain(&host->iova); + iommu_detach_group(host->domain, host->group); + + iommu_domain_free(host->domain); + host->domain = NULL; + + iova_cache_put(); + + iommu_group_put(host->group); + host->group = NULL; + } +} + static int host1x_probe(struct platform_device *pdev) { struct host1x *host; @@ -237,7 +362,8 @@ static int host1x_probe(struct platform_device *pdev) return PTR_ERR(host->hv_regs); } - dma_set_mask_and_coherent(host->dev, host->info->dma_mask); + host->dev->dma_parms = &host->dma_parms; + dma_set_max_seg_size(host->dev, UINT_MAX); if (host->info->init) { err = host->info->init(host); @@ -261,87 +387,42 @@ static int host1x_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to get reset: %d\n", err); return err; } -#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) - if (host->dev->archdata.mapping) { - struct dma_iommu_mapping *mapping = - to_dma_iommu_mapping(host->dev); - arm_iommu_detach_device(host->dev); - arm_iommu_release_mapping(mapping); - } -#endif - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) - goto skip_iommu; - - host->group = iommu_group_get(&pdev->dev); - if (host->group) { - struct iommu_domain_geometry *geometry; - u64 mask = dma_get_mask(host->dev); - dma_addr_t start, end; - unsigned long order; - - err = iova_cache_get(); - if (err < 0) - goto put_group; - - host->domain = iommu_domain_alloc(&platform_bus_type); - if (!host->domain) { - err = -ENOMEM; - goto put_cache; - } - err = iommu_attach_group(host->domain, host->group); - if (err) { - if (err == -ENODEV) { - iommu_domain_free(host->domain); - host->domain = NULL; - iova_cache_put(); - iommu_group_put(host->group); - host->group = NULL; - goto skip_iommu; - } - - goto fail_free_domain; - } - - geometry = &host->domain->geometry; - start = geometry->aperture_start & mask; - end = geometry->aperture_end & mask; - - order = __ffs(host->domain->pgsize_bitmap); - init_iova_domain(&host->iova, 1UL << order, start >> order); - host->iova_end = end; + err = host1x_iommu_init(host); + if (err < 0) { + dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); + return err; } -skip_iommu: err = host1x_channel_list_init(&host->channel_list, host->info->nb_channels); if (err) { dev_err(&pdev->dev, "failed to initialize channel list\n"); - goto fail_detach_device; + goto iommu_exit; } err = clk_prepare_enable(host->clk); if (err < 0) { dev_err(&pdev->dev, "failed to enable clock\n"); - goto fail_free_channels; + goto free_channels; } err = reset_control_deassert(host->rst); if (err < 0) { dev_err(&pdev->dev, "failed to deassert reset: %d\n", err); - goto fail_unprepare_disable; + goto unprepare_disable; } err = host1x_syncpt_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize syncpts\n"); - goto fail_reset_assert; + goto reset_assert; } err = host1x_intr_init(host, syncpt_irq); if (err) { dev_err(&pdev->dev, "failed to initialize interrupts\n"); - goto fail_deinit_syncpt; + goto deinit_syncpt; } host1x_debug_init(host); @@ -351,33 +432,22 @@ skip_iommu: err = host1x_register(host); if (err < 0) - goto fail_deinit_intr; + goto deinit_intr; return 0; -fail_deinit_intr: +deinit_intr: host1x_intr_deinit(host); -fail_deinit_syncpt: +deinit_syncpt: host1x_syncpt_deinit(host); -fail_reset_assert: +reset_assert: reset_control_assert(host->rst); -fail_unprepare_disable: +unprepare_disable: clk_disable_unprepare(host->clk); -fail_free_channels: +free_channels: host1x_channel_list_free(&host->channel_list); -fail_detach_device: - if (host->group && host->domain) { - put_iova_domain(&host->iova); - iommu_detach_group(host->domain, host->group); - } -fail_free_domain: - if (host->domain) - iommu_domain_free(host->domain); -put_cache: - if (host->group) - iova_cache_put(); -put_group: - iommu_group_put(host->group); +iommu_exit: + host1x_iommu_exit(host); return err; } @@ -387,18 +457,12 @@ static int host1x_remove(struct platform_device *pdev) struct host1x *host = platform_get_drvdata(pdev); host1x_unregister(host); + host1x_debug_deinit(host); host1x_intr_deinit(host); host1x_syncpt_deinit(host); reset_control_assert(host->rst); clk_disable_unprepare(host->clk); - - if (host->domain) { - put_iova_domain(&host->iova); - iommu_detach_group(host->domain, host->group); - iommu_domain_free(host->domain); - iova_cache_put(); - iommu_group_put(host->group); - } + host1x_iommu_exit(host); return 0; } diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index ff56f5e23a02..f781a9b0f39d 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -97,6 +97,7 @@ struct host1x_info { int (*init)(struct host1x *host1x); /* initialize per SoC ops */ unsigned int sync_offset; /* offset of syncpoint registers */ u64 dma_mask; /* mask of addressable memory */ + bool has_wide_gather; /* supports GATHER_W opcode */ bool has_hypervisor; /* has hypervisor registers */ unsigned int num_sid_entries; const struct host1x_sid_entry *sid_table; @@ -140,6 +141,8 @@ struct host1x { struct list_head devices; struct list_head list; + + struct device_dma_parameters dma_parms; }; void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v); diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 26f3c741d085..9245add23b5d 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -105,7 +105,6 @@ static void action_submit_complete(struct host1x_waitlist *waiter) /* Add nr_completed to trace */ trace_host1x_channel_submit_complete(dev_name(channel->dev), waiter->count, waiter->thresh); - } static void action_wakeup(struct host1x_waitlist *waiter) diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index eaa5c3352c13..25ca54de8fc5 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -99,6 +99,8 @@ EXPORT_SYMBOL(host1x_job_add_gather); static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { + struct host1x_client *client = job->client; + struct device *dev = client->dev; unsigned int i; int err; @@ -106,8 +108,8 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; + dma_addr_t phys_addr, *phys; struct sg_table *sgt; - dma_addr_t phys_addr; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -115,7 +117,50 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); + if (client->group) + phys = &phys_addr; + else + phys = NULL; + + sgt = host1x_bo_pin(dev, reloc->target.bo, phys); + if (IS_ERR(sgt)) { + err = PTR_ERR(sgt); + goto unpin; + } + + if (sgt) { + unsigned long mask = HOST1X_RELOC_READ | + HOST1X_RELOC_WRITE; + enum dma_data_direction dir; + + switch (reloc->flags & mask) { + case HOST1X_RELOC_READ: + dir = DMA_TO_DEVICE; + break; + + case HOST1X_RELOC_WRITE: + dir = DMA_FROM_DEVICE; + break; + + case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: + dir = DMA_BIDIRECTIONAL; + break; + + default: + err = -EINVAL; + goto unpin; + } + + err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir); + if (!err) { + err = -ENOMEM; + goto unpin; + } + + job->unpins[job->num_unpins].dev = dev; + job->unpins[job->num_unpins].dir = dir; + phys_addr = sg_dma_address(sgt->sgl); + } job->addr_phys[job->num_unpins] = phys_addr; job->unpins[job->num_unpins].bo = reloc->target.bo; @@ -139,7 +184,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - phys_addr = host1x_bo_pin(g->bo, &sgt); + sgt = host1x_bo_pin(host->dev, g->bo, NULL); + if (IS_ERR(sgt)) { + err = PTR_ERR(sgt); + goto unpin; + } if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { for_each_sg(sgt->sgl, sg, sgt->nents, j) @@ -163,15 +212,24 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - job->addr_phys[job->num_unpins] = - iova_dma_addr(&host->iova, alloc); job->unpins[job->num_unpins].size = gather_size; + phys_addr = iova_dma_addr(&host->iova, alloc); } else { - job->addr_phys[job->num_unpins] = phys_addr; + err = dma_map_sg(host->dev, sgt->sgl, sgt->nents, + DMA_TO_DEVICE); + if (!err) { + err = -ENOMEM; + goto unpin; + } + + job->unpins[job->num_unpins].dev = host->dev; + phys_addr = sg_dma_address(sgt->sgl); } - job->gather_addr_phys[i] = job->addr_phys[job->num_unpins]; + job->addr_phys[job->num_unpins] = phys_addr; + job->gather_addr_phys[i] = phys_addr; + job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; job->unpins[job->num_unpins].bo = g->bo; job->unpins[job->num_unpins].sgt = sgt; job->num_unpins++; @@ -436,7 +494,8 @@ out: return err; } -static inline int copy_gathers(struct host1x_job *job, struct device *dev) +static inline int copy_gathers(struct device *host, struct host1x_job *job, + struct device *dev) { struct host1x_firewall fw; size_t size = 0; @@ -459,12 +518,12 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) * Try a non-blocking allocation from a higher priority pools first, * as awaiting for the allocation here is a major performance hit. */ - job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_NOWAIT); /* the higher priority allocation failed, try the generic-blocking */ if (!job->gather_copy_mapped) - job->gather_copy_mapped = dma_alloc_wc(dev, size, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_KERNEL); if (!job->gather_copy_mapped) @@ -512,7 +571,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) goto out; if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { - err = copy_gathers(job, dev); + err = copy_gathers(host->dev, job, dev); if (err) goto out; } @@ -557,6 +616,8 @@ void host1x_job_unpin(struct host1x_job *job) for (i = 0; i < job->num_unpins; i++) { struct host1x_job_unpin_data *unpin = &job->unpins[i]; + struct device *dev = unpin->dev ?: host->dev; + struct sg_table *sgt = unpin->sgt; if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && unpin->size && host->domain) { @@ -566,14 +627,18 @@ void host1x_job_unpin(struct host1x_job *job) iova_pfn(&host->iova, job->addr_phys[i])); } - host1x_bo_unpin(unpin->bo, unpin->sgt); + if (unpin->dev && sgt) + dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents, + unpin->dir); + + host1x_bo_unpin(dev, unpin->bo, sgt); host1x_bo_put(unpin->bo); } job->num_unpins = 0; if (job->gather_copy_size) - dma_free_wc(job->channel->dev, job->gather_copy_size, + dma_free_wc(host->dev, job->gather_copy_size, job->gather_copy_mapped, job->gather_copy); } EXPORT_SYMBOL(host1x_job_unpin); diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 62b8805e6b35..94bc2e4ae241 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -8,6 +8,8 @@ #ifndef __HOST1X_JOB_H #define __HOST1X_JOB_H +#include <linux/dma-direction.h> + struct host1x_job_gather { unsigned int words; dma_addr_t base; @@ -19,7 +21,9 @@ struct host1x_job_gather { struct host1x_job_unpin_data { struct host1x_bo *bo; struct sg_table *sgt; + struct device *dev; size_t size; + enum dma_data_direction dir; }; /* |