diff options
author | 2025-03-28 17:44:52 -0700 | |
---|---|---|
committer | 2025-03-28 17:44:52 -0700 | |
commit | 0c86b42439b6c11d758b3392a21117934fef00c1 (patch) | |
tree | 6aa7071476067661867af33767cc0e1863397a04 /drivers/gpu/drm/xe/xe_pt.c | |
parent | Merge tag 'fbdev-for-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev (diff) | |
parent | Merge tag 'drm-intel-gt-next-2025-03-12' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-next (diff) | |
download | wireguard-linux-0c86b42439b6c11d758b3392a21117934fef00c1.tar.xz wireguard-linux-0c86b42439b6c11d758b3392a21117934fef00c1.zip |
Merge tag 'drm-next-2025-03-28' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie:
"Outside of drm there are some rust patches from Danilo who maintains
that area in here, and some pieces for drm header check tests.
The major things in here are a new driver supporting the touchbar
displays on M1/M2, the nova-core stub driver which is just the vehicle
for adding rust abstractions and start developing a real driver inside
of.
xe adds support for SVM with a non-driver specific SVM core
abstraction that will hopefully be useful for other drivers, along
with support for shrinking for TTM devices. I'm sure xe and AMD
support new devices, but the pipeline depth on these things is hard to
know what they end up being in the marketplace!
uapi:
- add mediatek tiled fourcc
- add support for notifying userspace on device wedged
new driver:
- appletbdrm: support for Apple Touchbar displays on m1/m2
- nova-core: skeleton rust driver to develop nova inside off
firmware:
- add some rust firmware pieces
rust:
- add 'LocalModule' type alias
component:
- add helper to query bound status
fbdev:
- fbtft: remove access to page->index
media:
- cec: tda998x: import driver from drm
dma-buf:
- add fast path for single fence merging
tests:
- fix lockdep warnings
atomic:
- allow full modeset on connector changes
- clarify semantics of allow_modeset and drm_atomic_helper_check
- async-flip: support on arbitary planes
- writeback: fix UAF
- Document atomic-state history
format-helper:
- support ARGB8888 to ARGB4444 conversions
buddy:
- fix multi-root cleanup
ci:
- update IGT
dp:
- support extended wake timeout
- mst: fix RAD to string conversion
- increase DPCD eDP control CAP size to 5 bytes
- add DPCD eDP v1.5 definition
- add helpers for LTTPR transparent mode
panic:
- encode QR code according to Fido 2.2
scheduler:
- add parameter struct for init
- improve job peek/pop operations
- optimise drm_sched_job struct layout
ttm:
- refactor pool allocation
- add helpers for TTM shrinker
panel-orientation:
- add a bunch of new quirks
panel:
- convert panels to multi-style functions
- edp: Add support for B140UAN04.4, BOE NV140FHM-NZ, CSW MNB601LS1-3,
LG LP079QX1-SP0V, MNE007QS3-7, STA 116QHD024002, Starry
116KHD024006, Lenovo T14s Gen6 Snapdragon
- himax-hx83102: Add support for CSOT PNA957QT1-1, Kingdisplay
kd110n11-51ie, Starry 2082109qfh040022-50e
- visionox-r66451: use multi-style MIPI-DSI functions
- raydium-rm67200: Add driver for Raydium RM67200
- simple: Add support for BOE AV123Z7M-N17, BOE AV123Z7M-N17
- sony-td4353-jdi: Use MIPI-DSI multi-func interface
- summit: Add driver for Apple Summit display panel
- visionox-rm692e5: Add driver for Visionox RM692E5
bridge:
- pass full atomic state to various callbacks
- adv7511: Report correct capabilities
- it6505: Fix HDCP V compare
- snd65dsi86: fix device IDs
- nwl-dsi: set bridge type
- ti-sn65si83: add error recovery and set bridge type
- synopsys: add HDMI audio support
xe:
- support device-wedged event
- add mmap support for PCI memory barrier
- perf pmu integration and expose per-engien activity
- add EU stall sampling support
- GPU SVM and Xe SVM implementation
- use TTM shrinker
- add survivability mode to allow the driver to do firmware updates
in critical failure states
- PXP HWDRM support for MTL and LNL
- expose package/vram temps over hwmon
- enable DP tunneling
- drop mmio_ext abstraction
- Reject BO evcition if BO is bound to current VM
- Xe suballocator improvements
- re-use display vmas when possible
- add GuC Buffer Cache abstraction
- PCI ID update for Panther Lake and Battlemage
- Enable SRIOV for Panther Lake
- Refactor VRAM manager location
i915:
- enable extends wake timeout
- support device-wedged event
- Enable DP 128b/132b SST DSC
- FBC dirty rectangle support for display version 30+
- convert i915/xe to drm client setup
- Compute HDMI PLLS for rates not in fixed tables
- Allow DSB usage when PSR is enabled on LNL+
- Enable panel replay without full modeset
- Enable async flips with compressed buffers on ICL+
- support luminance based brightness via DPCD for eDP
- enable VRR enable/disable without full modeset
- allow GuC SLPC default strategies on MTL+ for performance
- lots of display refactoring in move to struct intel_display
amdgpu:
- add device wedged event
- support async page flips on overlay planes
- enable broadcast RGB drm property
- add info ioctl for virt mode
- OEM i2c support for RGB lights
- GC 11.5.2 + 11.5.3 support
- SDMA 6.1.3 support
- NBIO 7.9.1 + 7.11.2 support
- MMHUB 1.8.1 + 3.3.2 support
- DCN 3.6.0 support
- Add dynamic workload profile switching for GC 10-12
- support larger VBIOS sizes
- Mark gttsize parameters as deprecated
- Initial JPEG queue resset support
amdkfd:
- add KFD per process flags for setting precision
- sync pasid values between KGD and KFD
- improve GTT/VRAM handling for APUs
- fix user queue validation on GC7/8
- SDMA queue reset support
raedeon:
- rs400 hyperz fix
i2c:
- td998x: drop platform_data, split driver into media and bridge
ast:
- transmitter chip detection refactoring
- vbios display mode refactoring
- astdp: fix connection status and filter unsupported modes
- cursor handling refactoring
imagination:
- check job dependencies with sched helper
ivpu:
- improve command queue handling
- use workqueue for IRQ handling
- add support HW fault injection
- locking fixes
mgag200:
- add support for G200eH5
msm:
- dpu: add concurrent writeback support for DPU 10.x+
- use LTTPR helpers
- GPU:
- Fix obscure GMU suspend failure
- Expose syncobj timeline support
- Extend GPU devcoredump with pagetable info
- a623 support
- Fix a6xx gen1/gen2 indexed-register blocks in gpu snapshot /
devcoredump
- Display:
- Add cpu-cfg interconnect paths on SM8560 and SM8650
- Introduce KMS OMMU fault handler, causing devcoredump snapshot
- Fixed error pointer dereference in msm_kms_init_aspace()
- DPU:
- Fix mode_changing handling
- Add writeback support on SM6150 (QCS615)
- Fix DSC programming in 1:1:1 topology
- Reworked hardware resource allocation, moving it to the CRTC code
- Enabled support for Concurrent WriteBack (CWB) on SM8650
- Enabled CDM blocks on all relevant platforms
- Reworked debugfs interface for BW/clocks debugging
- Clear perf params before calculating bw
- Support YUV formats on writeback
- Fixed double inclusion
- Fixed writeback in YUV formats when using cloned output, Dropped
wb2_formats_rgb
- Corrected dpu_crtc_check_mode_changed and struct dpu_encoder_virt
kerneldocs
- Fixed uninitialized variable in dpu_crtc_kickoff_clone_mode()
- DSI:
- DSC-related fixes
- Rework clock programming
- DSI PHY:
- Fix 7nm (and lower) PHY programming
- Add proper DT schema definitions for DSI PHY clocks
- HDMI:
- Rework the driver, enabling the use of the HDMI Connector
framework
- Bindings:
- Added eDP PHY on SA8775P
nouveau:
- move drm_slave_encoder interface into driver
- nvkm: refactor GSP RPC
- use LTTPR helpers
mediatek:
- HDMI fixup and refinement
- add MT8188 dsc compatible
- MT8365 SoC support
panthor:
- Expose sizes of intenral BOs via fdinfo
- Fix race between reset and suspend
- Improve locking
qaic:
- Add support for AIC200
renesas:
- Fix limits in DT bindings
rockchip:
- support rk3562-mali
- rk3576: Add HDMI support
- vop2: Add new display modes on RK3588 HDMI0 up to 4K
- Don't change HDMI reference clock rate
- Fix DT bindings
- analogix_dp: add eDP support
- fix shutodnw
solomon:
- Set SPI device table to silence warnings
- Fix pixel and scanline encoding
v3d:
- handle clock
vc4:
- Use drm_exec
- Use dma-resv for wait-BO ioctl
- Remove seqno infrastructure
virtgpu:
- Support partial mappings of GEM objects
- Reserve VGA resources during initialization
- Fix UAF in virtgpu_dma_buf_free_obj()
- Add panic support
vkms:
- Switch to a managed modesetting pipeline
- Add support for ARGB8888
- fix UAf
xlnx:
- Set correct DMA segment size
- use mutex guards
- Fix error handling
- Fix docs"
* tag 'drm-next-2025-03-28' of https://gitlab.freedesktop.org/drm/kernel: (1762 commits)
drm/amd/pm: Update feature list for smu_v13_0_6
drm/amdgpu: Add parameter documentation for amdgpu_sync_fence
drm/amdgpu/discovery: optionally use fw based ip discovery
drm/amdgpu/discovery: use specific ip_discovery.bin for legacy asics
drm/amdgpu/discovery: check ip_discovery fw file available
drm/amd/pm: Remove unnecessay UQ10 to UINT conversion
drm/amd/pm: Remove unnecessay UQ10 to UINT conversion
drm/amdgpu/sdma_v4_4_2: update VM flush implementation for SDMA
drm/amdgpu: Optimize VM invalidation engine allocation and synchronize GPU TLB flush
drm/amd/amdgpu: Increase max rings to enable SDMA page ring
drm/amdgpu: Decode deferred error type in gfx aca bank parser
drm/amdgpu/gfx11: Add Cleaner Shader Support for GFX11.5 GPUs
drm/amdgpu/mes: clean up SDMA HQD loop
drm/amdgpu/mes: enable compute pipes across all MEC
drm/amdgpu/mes: drop MES 10.x leftovers
drm/amdgpu/mes: optimize compute loop handling
drm/amdgpu/sdma: guilty tracking is per instance
drm/amdgpu/sdma: fix engine reset handling
drm/amdgpu: remove invalid usage of sched.ready
drm/amdgpu: add cleaner shader trace point
...
Diffstat (limited to 'drivers/gpu/drm/xe/xe_pt.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_pt.c | 399 |
1 files changed, 348 insertions, 51 deletions
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index dc24baa84092..ffaf0d02dc7d 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -20,6 +20,7 @@ #include "xe_res_cursor.h" #include "xe_sched_job.h" #include "xe_sync.h" +#include "xe_svm.h" #include "xe_trace.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" @@ -219,6 +220,20 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) } /** + * xe_pt_clear() - Clear a page-table. + * @xe: xe device. + * @pt: The page-table. + * + * Clears page-table by setting to zero. + */ +void xe_pt_clear(struct xe_device *xe, struct xe_pt *pt) +{ + struct iosys_map *map = &pt->bo->vmap; + + xe_map_memset(xe, map, 0, 0, SZ_4K); +} + +/** * DOC: Pagetable building * * Below we use the term "page-table" for both page-directories, containing @@ -593,6 +608,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { * range. * @tile: The tile we're building for. * @vma: The vma indicating the address range. + * @range: The range indicating the address range. * @entries: Storage for the update entries used for connecting the tree to * the main tree at commit time. * @num_entries: On output contains the number of @entries used. @@ -608,6 +624,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { */ static int xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, + struct xe_svm_range *range, struct xe_vm_pgtable_update *entries, u32 *num_entries) { struct xe_device *xe = tile_to_xe(tile); @@ -625,14 +642,43 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .vm = xe_vma_vm(vma), .tile = tile, .curs = &curs, - .va_curs_start = xe_vma_start(vma), + .va_curs_start = range ? range->base.itree.start : + xe_vma_start(vma), .vma = vma, .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, }; struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; int ret; + if (range) { + /* Move this entire thing to xe_svm.c? */ + xe_svm_notifier_lock(xe_vma_vm(vma)); + if (!xe_svm_range_pages_valid(range)) { + xe_svm_range_debug(range, "BIND PREPARE - RETRY"); + xe_svm_notifier_unlock(xe_vma_vm(vma)); + return -EAGAIN; + } + if (xe_svm_range_has_dma_mapping(range)) { + xe_res_first_dma(range->base.dma_addr, 0, + range->base.itree.last + 1 - range->base.itree.start, + &curs); + is_devmem = xe_res_is_vram(&curs); + if (is_devmem) + xe_svm_range_debug(range, "BIND PREPARE - DMA VRAM"); + else + xe_svm_range_debug(range, "BIND PREPARE - DMA"); + } else { + xe_assert(xe, false); + } + /* + * Note, when unlocking the resource cursor dma addresses may become + * stale, but the bind will be aborted anyway at commit time. + */ + xe_svm_notifier_unlock(xe_vma_vm(vma)); + } + + xe_walk.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem; + /** * Default atomic expectations for different allocation scenarios are as follows: * @@ -654,7 +700,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, * gets migrated to LMEM, bind such allocations with * device atomics enabled. */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) + else if (is_devmem) xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; } else { xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; @@ -670,15 +716,16 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (is_devmem) { xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); + xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0; } if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - xe_bo_assert_held(bo); + if (!range) + xe_bo_assert_held(bo); - if (!xe_vma_is_null(vma)) { + if (!xe_vma_is_null(vma) && !range) { if (xe_vma_is_userptr(vma)) xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, xe_vma_size(vma), &curs); @@ -688,12 +735,14 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, else xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), xe_vma_size(vma), &curs); - } else { + } else if (!range) { curs.size = xe_vma_size(vma); } - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); + ret = xe_pt_walk_range(&pt->base, pt->level, + range ? range->base.itree.start : xe_vma_start(vma), + range ? range->base.itree.last + 1 : xe_vma_end(vma), + &xe_walk.base); *num_entries = xe_walk.wupd.num_used_entries; return ret; @@ -837,6 +886,46 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) return xe_walk.needs_invalidate; } +/** + * xe_pt_zap_ptes_range() - Zap (zero) gpu ptes of a SVM range + * @tile: The tile we're zapping for. + * @vm: The VM we're zapping for. + * @range: The SVM range we're zapping for. + * + * SVM invalidation needs to be able to zap the gpu ptes of a given address + * range. In order to be able to do that, that function needs access to the + * shared page-table entries so it can either clear the leaf PTEs or + * clear the pointers to lower-level page-tables. The caller is required + * to hold the SVM notifier lock. + * + * Return: Whether ptes were actually updated and a TLB invalidation is + * required. + */ +bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm, + struct xe_svm_range *range) +{ + struct xe_pt_zap_ptes_walk xe_walk = { + .base = { + .ops = &xe_pt_zap_ptes_ops, + .shifts = xe_normal_pt_shifts, + .max_level = XE_PT_HIGHEST_LEVEL, + }, + .tile = tile, + }; + struct xe_pt *pt = vm->pt_root[tile->id]; + u8 pt_mask = (range->tile_present & ~range->tile_invalidated); + + xe_svm_assert_in_notifier(vm); + + if (!(pt_mask & BIT(tile->id))) + return false; + + (void)xe_pt_walk_shared(&pt->base, pt->level, range->base.itree.start, + range->base.itree.last + 1, &xe_walk.base); + + return xe_walk.needs_invalidate; +} + static void xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, struct iosys_map *map, void *data, @@ -880,13 +969,19 @@ static void xe_pt_cancel_bind(struct xe_vma *vma, } } +#define XE_INVALID_VMA ((struct xe_vma *)(0xdeaddeadull)) + static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma) { - struct xe_vm *vm = xe_vma_vm(vma); + struct xe_vm *vm; + + if (vma == XE_INVALID_VMA) + return; + vm = xe_vma_vm(vma); lockdep_assert_held(&vm->lock); - if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma)) + if (!xe_vma_has_no_bo(vma)) dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); xe_vm_assert_held(vm); @@ -894,8 +989,12 @@ static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma) static void xe_pt_commit_locks_assert(struct xe_vma *vma) { - struct xe_vm *vm = xe_vma_vm(vma); + struct xe_vm *vm; + + if (vma == XE_INVALID_VMA) + return; + vm = xe_vma_vm(vma); xe_pt_commit_prepare_locks_assert(vma); if (xe_vma_is_userptr(vma)) @@ -923,7 +1022,8 @@ static void xe_pt_commit(struct xe_vma *vma, int j_ = j + entries[i].ofs; pt_dir->children[j_] = pt_dir->staging[j_]; - xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred); + xe_pt_destroy(oldpte, (vma == XE_INVALID_VMA) ? 0 : + xe_vma_vm(vma)->flags, deferred); } } } @@ -1002,12 +1102,13 @@ static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, static int xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, + struct xe_svm_range *range, struct xe_vm_pgtable_update *entries, u32 *num_entries) { int err; *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); + err = xe_pt_stage_bind(tile, vma, range, entries, num_entries); if (!err) xe_tile_assert(tile, *num_entries); @@ -1090,6 +1191,11 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, { int err = 0; + /* + * No need to check for is_cpu_addr_mirror here as vma_add_deps is a + * NOP if VMA is_cpu_addr_mirror + */ + switch (op->base.op) { case DRM_GPUVA_OP_MAP: if (!op->map.immediate && xe_vm_in_fault_mode(vm)) @@ -1108,6 +1214,8 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, case DRM_GPUVA_OP_PREFETCH: err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job); break; + case DRM_GPUVA_OP_DRIVER: + break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } @@ -1312,6 +1420,40 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) return err; } +static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) +{ + struct xe_vm *vm = pt_update->vops->vm; + struct xe_vma_ops *vops = pt_update->vops; + struct xe_vma_op *op; + int err; + + err = xe_pt_pre_commit(pt_update); + if (err) + return err; + + xe_svm_notifier_lock(vm); + + list_for_each_entry(op, &vops->list, link) { + struct xe_svm_range *range = op->map_range.range; + + if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) + continue; + + xe_svm_range_debug(range, "PRE-COMMIT"); + + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); + xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); + + if (!xe_svm_range_pages_valid(range)) { + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); + xe_svm_notifier_unlock(vm); + return -EAGAIN; + } + } + + return 0; +} + struct invalidation_fence { struct xe_gt_tlb_invalidation_fence base; struct xe_gt *gt; @@ -1497,7 +1639,9 @@ static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { * xe_pt_stage_unbind() - Build page-table update structures for an unbind * operation * @tile: The tile we're unbinding for. + * @vm: The vm * @vma: The vma we're unbinding. + * @range: The range we're unbinding. * @entries: Caller-provided storage for the update structures. * * Builds page-table update structures for an unbind operation. The function @@ -1507,9 +1651,14 @@ static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { * * Return: The number of entries used. */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, +static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, + struct xe_vm *vm, + struct xe_vma *vma, + struct xe_svm_range *range, struct xe_vm_pgtable_update *entries) { + u64 start = range ? range->base.itree.start : xe_vma_start(vma); + u64 end = range ? range->base.itree.last + 1 : xe_vma_end(vma); struct xe_pt_stage_unbind_walk xe_walk = { .base = { .ops = &xe_pt_stage_unbind_ops, @@ -1518,14 +1667,14 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, .staging = true, }, .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), + .modified_start = start, + .modified_end = end, .wupd.entries = entries, }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; + struct xe_pt *pt = vm->pt_root[tile->id]; - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); + (void)xe_pt_walk_shared(&pt->base, pt->level, start, end, + &xe_walk.base); return xe_walk.wupd.num_used_entries; } @@ -1605,12 +1754,12 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, static void xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) + u64 start, u64 end) { + u64 last; u32 current_op = pt_update_ops->current_op; struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; int i, level = 0; - u64 start, last; for (i = 0; i < pt_op->num_entries; i++) { const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; @@ -1620,8 +1769,8 @@ xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, } /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; + start = ALIGN_DOWN(start, 0x1ull << xe_pt_shift(level)); + last = ALIGN(end, 0x1ull << xe_pt_shift(level)) - 1; if (start < pt_update_ops->start) pt_update_ops->start = start; @@ -1648,6 +1797,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; int err; + xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma)); xe_bo_assert_held(xe_vma_bo(vma)); vm_dbg(&xe_vma_vm(vma)->xe->drm, @@ -1662,7 +1812,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, if (err) return err; - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, + err = xe_pt_prepare_bind(tile, vma, NULL, pt_op->entries, &pt_op->num_entries); if (!err) { xe_tile_assert(tile, pt_op->num_entries <= @@ -1670,7 +1820,9 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, pt_op->num_entries, true); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); + xe_pt_update_ops_rfence_interval(pt_update_ops, + xe_vma_start(vma), + xe_vma_end(vma)); ++pt_update_ops->current_op; pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); @@ -1704,6 +1856,48 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, return err; } +static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, + struct xe_vm_pgtable_update_ops *pt_update_ops, + struct xe_vma *vma, struct xe_svm_range *range) +{ + u32 current_op = pt_update_ops->current_op; + struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; + int err; + + xe_tile_assert(tile, xe_vma_is_cpu_addr_mirror(vma)); + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "Preparing bind, with range [%lx...%lx)\n", + range->base.itree.start, range->base.itree.last); + + pt_op->vma = NULL; + pt_op->bind = true; + pt_op->rebind = BIT(tile->id) & range->tile_present; + + err = xe_pt_prepare_bind(tile, vma, range, pt_op->entries, + &pt_op->num_entries); + if (!err) { + xe_tile_assert(tile, pt_op->num_entries <= + ARRAY_SIZE(pt_op->entries)); + xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, + pt_op->num_entries, true); + + xe_pt_update_ops_rfence_interval(pt_update_ops, + range->base.itree.start, + range->base.itree.last + 1); + ++pt_update_ops->current_op; + pt_update_ops->needs_svm_lock = true; + + pt_op->vma = vma; + xe_pt_commit_prepare_bind(vma, pt_op->entries, + pt_op->num_entries, pt_op->rebind); + } else { + xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); + } + + return err; +} + static int unbind_op_prepare(struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, struct xe_vma *vma) @@ -1715,19 +1909,13 @@ static int unbind_op_prepare(struct xe_tile *tile, if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) return 0; + xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma)); xe_bo_assert_held(xe_vma_bo(vma)); vm_dbg(&xe_vma_vm(vma)->xe->drm, "Preparing unbind, with range [%llx...%llx)\n", xe_vma_start(vma), xe_vma_end(vma) - 1); - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - pt_op->vma = vma; pt_op->bind = false; pt_op->rebind = false; @@ -1736,11 +1924,13 @@ static int unbind_op_prepare(struct xe_tile *tile, if (err) return err; - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); + pt_op->num_entries = xe_pt_stage_unbind(tile, xe_vma_vm(vma), + vma, NULL, pt_op->entries); xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); + xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma), + xe_vma_end(vma)); ++pt_update_ops->current_op; pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); pt_update_ops->needs_invalidation = true; @@ -1750,6 +1940,42 @@ static int unbind_op_prepare(struct xe_tile *tile, return 0; } +static int unbind_range_prepare(struct xe_vm *vm, + struct xe_tile *tile, + struct xe_vm_pgtable_update_ops *pt_update_ops, + struct xe_svm_range *range) +{ + u32 current_op = pt_update_ops->current_op; + struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; + + if (!(range->tile_present & BIT(tile->id))) + return 0; + + vm_dbg(&vm->xe->drm, + "Preparing unbind, with range [%lx...%lx)\n", + range->base.itree.start, range->base.itree.last); + + pt_op->vma = XE_INVALID_VMA; + pt_op->bind = false; + pt_op->rebind = false; + + pt_op->num_entries = xe_pt_stage_unbind(tile, vm, NULL, range, + pt_op->entries); + + xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, + pt_op->num_entries, false); + xe_pt_update_ops_rfence_interval(pt_update_ops, range->base.itree.start, + range->base.itree.last + 1); + ++pt_update_ops->current_op; + pt_update_ops->needs_svm_lock = true; + pt_update_ops->needs_invalidation = true; + + xe_pt_commit_prepare_unbind(XE_INVALID_VMA, pt_op->entries, + pt_op->num_entries); + + return 0; +} + static int op_prepare(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -1761,15 +1987,21 @@ static int op_prepare(struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) + if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || + op->map.is_cpu_addr_mirror) break; err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); pt_update_ops->wait_vm_kernel = true; break; case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); + { + struct xe_vma *old = gpuva_to_vma(op->base.remap.unmap->va); + + if (xe_vma_is_cpu_addr_mirror(old)) + break; + + err = unbind_op_prepare(tile, pt_update_ops, old); if (!err && op->remap.prev) { err = bind_op_prepare(vm, tile, pt_update_ops, @@ -1782,15 +2014,40 @@ static int op_prepare(struct xe_vm *vm, pt_update_ops->wait_vm_bookkeep = true; } break; + } case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); + { + struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); + + if (xe_vma_is_cpu_addr_mirror(vma)) + break; + + err = unbind_op_prepare(tile, pt_update_ops, vma); break; + } case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); + { + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + + if (xe_vma_is_cpu_addr_mirror(vma)) + break; + + err = bind_op_prepare(vm, tile, pt_update_ops, vma); pt_update_ops->wait_vm_kernel = true; break; + } + case DRM_GPUVA_OP_DRIVER: + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); + + err = bind_range_prepare(vm, tile, pt_update_ops, + op->map_range.vma, + op->map_range.range); + } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { + err = unbind_range_prepare(vm, tile, pt_update_ops, + op->unmap_range.range); + } + break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } @@ -1860,6 +2117,8 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vma *vma, struct dma_fence *fence, struct dma_fence *fence2) { + xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma)); + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? @@ -1893,6 +2152,8 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vma *vma, struct dma_fence *fence, struct dma_fence *fence2) { + xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma)); + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? @@ -1927,16 +2188,21 @@ static void op_commit(struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) + if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || + op->map.is_cpu_addr_mirror) break; bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, fence2); break; case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); + { + struct xe_vma *old = gpuva_to_vma(op->base.remap.unmap->va); + + if (xe_vma_is_cpu_addr_mirror(old)) + break; + + unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2); if (op->remap.prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, @@ -1945,14 +2211,35 @@ static void op_commit(struct xe_vm *vm, bind_op_commit(vm, tile, pt_update_ops, op->remap.next, fence, fence2); break; + } case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); + { + struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); + + if (!xe_vma_is_cpu_addr_mirror(vma)) + unbind_op_commit(vm, tile, pt_update_ops, vma, fence, + fence2); break; + } case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); + { + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + + if (!xe_vma_is_cpu_addr_mirror(vma)) + bind_op_commit(vm, tile, pt_update_ops, vma, fence, + fence2); + break; + } + case DRM_GPUVA_OP_DRIVER: + { + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { + op->map_range.range->tile_present |= BIT(tile->id); + op->map_range.range->tile_invalidated &= ~BIT(tile->id); + } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { + op->unmap_range.range->tile_present &= ~BIT(tile->id); + } break; + } default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } @@ -1970,6 +2257,12 @@ static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { .pre_commit = xe_pt_userptr_pre_commit, }; +static const struct xe_migrate_pt_update_ops svm_migrate_ops = { + .populate = xe_vm_populate_pgtable, + .clear = xe_migrate_clear_pgtable_callback, + .pre_commit = xe_pt_svm_pre_commit, +}; + /** * xe_pt_update_ops_run() - Run PT update operations * @tile: Tile of PT update operations @@ -1995,7 +2288,9 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vma_op *op; int err = 0, i; struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? + .ops = pt_update_ops->needs_svm_lock ? + &svm_migrate_ops : + pt_update_ops->needs_userptr_lock ? &userptr_migrate_ops : &migrate_ops, .vops = vops, @@ -2116,6 +2411,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) &ifence->base.base, &mfence->base.base); } + if (pt_update_ops->needs_svm_lock) + xe_svm_notifier_unlock(vm); if (pt_update_ops->needs_userptr_lock) up_read(&vm->userptr.notifier_lock); |