:p
atchew
Login
The new VM_BIND interface only supported 4K pages. This was problematic as it leaves performance on the table because GPUs don't have sophisticated TLB hardware. Additionally, this meant that we couldn't enable compression on the userspace side as the HW supports compression on larger page sizes only which was a major (>50% in some cases) performance loss. This patchset sets out to add support for larger page sizes and also enable compression when userspace binds with the corresponding PTE kinds and alignment. Ben Skeggs (2): drm/nouveau/mmu/gp100: Remove unused/broken support for compression drm/nouveau/mmu/tu102: Add support for compressed kinds Mary Guillemard (2): drm/nouveau/uvmm: Prepare for larger pages drm/nouveau/uvmm: Allow larger pages Mohamed Ahmed (1): drm/nouveau/drm: Bump the driver version to 1.4.1 to report new features drivers/gpu/drm/nouveau/nouveau_drv.h | 4 +- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 106 ++++++++++++++---- drivers/gpu/drm/nouveau/nouveau_uvmm.h | 1 + .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 69 +++++++----- .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 4 +- 5 files changed, 131 insertions(+), 53 deletions(-) -- 2.51.0
From: Mary Guillemard <mary@mary.zone> Currently memory allocated by VM_BIND uAPI can only have a granuality matching PAGE_SIZE (4KiB in common case) To have a better memory management and to allow big (64KiB) and huge (2MiB) pages later in the serie, we are now passing the page shift all around the internals of UVMM. Co-developed-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> Signed-off-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> Signed-off-by: Mary Guillemard <mary@mary.zone> --- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 55 ++++++++++++++++---------- drivers/gpu/drm/nouveau/nouveau_uvmm.h | 1 + 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_vmm_sparse_unref(struct nouveau_uvmm *uvmm, static int nouveau_uvmm_vmm_get(struct nouveau_uvmm *uvmm, - u64 addr, u64 range) + u64 addr, u64 range, u8 page_shift) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_get(vmm, addr, range, PAGE_SHIFT); + return nvif_vmm_raw_get(vmm, addr, range, page_shift); } static int nouveau_uvmm_vmm_put(struct nouveau_uvmm *uvmm, - u64 addr, u64 range) + u64 addr, u64 range, u8 page_shift) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_put(vmm, addr, range, PAGE_SHIFT); + return nvif_vmm_raw_put(vmm, addr, range, page_shift); } static int nouveau_uvmm_vmm_unmap(struct nouveau_uvmm *uvmm, - u64 addr, u64 range, bool sparse) + u64 addr, u64 range, u8 page_shift, bool sparse) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_unmap(vmm, addr, range, PAGE_SHIFT, sparse); + return nvif_vmm_raw_unmap(vmm, addr, range, page_shift, sparse); } static int nouveau_uvmm_vmm_map(struct nouveau_uvmm *uvmm, - u64 addr, u64 range, + u64 addr, u64 range, u8 page_shift, u64 bo_offset, u8 kind, struct nouveau_mem *mem) { @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_vmm_map(struct nouveau_uvmm *uvmm, return -ENOSYS; } - return nvif_vmm_raw_map(vmm, addr, range, PAGE_SHIFT, + return nvif_vmm_raw_map(vmm, addr, range, page_shift, &args, argc, &mem->mem, bo_offset); } @@ -XXX,XX +XXX,XX @@ nouveau_uvma_vmm_put(struct nouveau_uvma *uvma) { u64 addr = uvma->va.va.addr; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; - return nouveau_uvmm_vmm_put(to_uvmm(uvma), addr, range); + return nouveau_uvmm_vmm_put(to_uvmm(uvma), addr, range, page_shift); } static int @@ -XXX,XX +XXX,XX @@ nouveau_uvma_map(struct nouveau_uvma *uvma, u64 addr = uvma->va.va.addr; u64 offset = uvma->va.gem.offset; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; return nouveau_uvmm_vmm_map(to_uvmm(uvma), addr, range, - offset, uvma->kind, mem); + page_shift, offset, uvma->kind, + mem); } static int @@ -XXX,XX +XXX,XX @@ nouveau_uvma_unmap(struct nouveau_uvma *uvma) { u64 addr = uvma->va.va.addr; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; bool sparse = !!uvma->region; if (drm_gpuva_invalidated(&uvma->va)) return 0; - return nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, sparse); + return nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, page_shift, sparse); } static int @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, if (vmm_get_range) nouveau_uvmm_vmm_put(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + PAGE_SHIFT); break; } case DRM_GPUVA_OP_REMAP: { @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, u64 ustart = va->va.addr; u64 urange = va->va.range; u64 uend = ustart + urange; + u8 page_shift = uvma_from_va(va)->page_shift; /* Nothing to do for mappings we merge with. */ if (uend == vmm_get_start || @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, u64 vmm_get_range = ustart - vmm_get_start; nouveau_uvmm_vmm_put(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + page_shift); } vmm_get_start = uend; break; @@ -XXX,XX +XXX,XX @@ static int op_map_prepare(struct nouveau_uvmm *uvmm, struct nouveau_uvma **puvma, struct drm_gpuva_op_map *op, - struct uvmm_map_args *args) + struct uvmm_map_args *args, + u8 page_shift) { struct nouveau_uvma *uvma; int ret; @@ -XXX,XX +XXX,XX @@ op_map_prepare(struct nouveau_uvmm *uvmm, uvma->region = args->region; uvma->kind = args->kind; + uvma->page_shift = page_shift; drm_gpuva_map(&uvmm->base, &uvma->va, op); @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, case DRM_GPUVA_OP_MAP: { u64 vmm_get_range = vmm_get_end - vmm_get_start; - ret = op_map_prepare(uvmm, &new->map, &op->map, args); + ret = op_map_prepare(uvmm, &new->map, &op->map, args, PAGE_SHIFT); if (ret) goto unwind; if (vmm_get_range) { ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + new->map->page_shift); if (ret) { op_map_prepare_unwind(new->map); goto unwind; @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, if (r->prev) { ret = op_map_prepare(uvmm, &new->prev, r->prev, - &remap_args); + &remap_args, uvma_from_va(va)->page_shift); if (ret) goto unwind; @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, if (r->next) { ret = op_map_prepare(uvmm, &new->next, r->next, - &remap_args); + &remap_args, uvma_from_va(va)->page_shift); if (ret) { if (r->prev) op_map_prepare_unwind(new->prev); @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, u64 ustart = va->va.addr; u64 urange = va->va.range; u64 uend = ustart + urange; + u8 page_shift = uvma_from_va(va)->page_shift; op_unmap_prepare(u); @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, u64 vmm_get_range = ustart - vmm_get_start; ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, page_shift); if (ret) { op_unmap_prepare_unwind(va); goto unwind; @@ -XXX,XX +XXX,XX @@ op_unmap_range(struct drm_gpuva_op_unmap *u, u64 addr, u64 range) { struct nouveau_uvma *uvma = uvma_from_va(u->va); + u8 page_shift = uvma->page_shift; bool sparse = !!uvma->region; if (!drm_gpuva_invalidated(u->va)) - nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, sparse); + nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, page_shift, sparse); } static void @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_cleanup(struct nouveau_uvmm *uvmm, struct drm_gpuva_op_map *n = r->next; struct drm_gpuva *va = r->unmap->va; struct nouveau_uvma *uvma = uvma_from_va(va); + u8 page_shift = uvma->page_shift; if (unmap) { u64 addr = va->va.addr; @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_cleanup(struct nouveau_uvmm *uvmm, if (n) end = n->va.addr; - nouveau_uvmm_vmm_put(uvmm, addr, end - addr); + nouveau_uvmm_vmm_put(uvmm, addr, end - addr, page_shift); } nouveau_uvma_gem_put(uvma); diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h @@ -XXX,XX +XXX,XX @@ struct nouveau_uvma { struct nouveau_uvma_region *region; u8 kind; + u8 page_shift; }; #define uvmm_from_gpuvm(x) container_of((x), struct nouveau_uvmm, base) -- 2.51.0
From: Mary Guillemard <mary@mary.zone> Now that everything in UVMM knows about the variable page shift, we can select larger values. The proposed approach rely on nouveau_bo::page unless it would cause alignment issues (in which case we fall back to searching an appropriate shift) Co-developed-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> Signed-off-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> Signed-off-by: Mary Guillemard <mary@mary.zone> --- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 55 +++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -XXX,XX +XXX,XX @@ op_unmap_prepare_unwind(struct drm_gpuva *va) drm_gpuva_insert(va->vm, va); } +static bool +op_map_aligned_to_page_shift(const struct drm_gpuva_op_map *op, u8 page_shift) +{ + u64 page_size = 1ULL << page_shift; + + return op->va.addr % page_size == 0 && op->va.range % page_size == 0 && + op->gem.offset % page_size == 0; +} + +static u8 +select_page_shift(struct nouveau_uvmm *uvmm, struct drm_gpuva_op_map *op) +{ + struct nouveau_bo *nvbo = nouveau_gem_object(op->gem.obj); + + if (nvbo) { + /* If the BO preferred page shift already fits, use it. */ + if (op_map_aligned_to_page_shift(op, nvbo->page)) + return nvbo->page; + + struct nouveau_mem *mem = nouveau_mem(nvbo->bo.resource); + struct nvif_vmm *vmm = &uvmm->vmm.vmm; + int i; + + /* Otherwise let's find a granuality that will fit. */ + for (i = 0; i < vmm->page_nr; i++) { + /* Ignore anything that is bigger or identical to the BO preference. */ + if (vmm->page[i].shift >= nvbo->page) + continue; + + /* Skip incompatible domains. */ + if ((mem->mem.type & NVIF_MEM_VRAM) && !vmm->page[i].vram) + continue; + if ((mem->mem.type & NVIF_MEM_HOST) && + (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) + continue; + + /* If it fits, return the proposed shift. */ + if (op_map_aligned_to_page_shift(op, vmm->page[i].shift)) + return vmm->page[i].shift; + } + + /* If we get here then nothing can reconcile the requirements. This should never + * happen. + */ + WARN_ON(1); + } + + return PAGE_SHIFT; +} + static void nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, struct nouveau_uvma_prealloc *new, @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, if (vmm_get_range) nouveau_uvmm_vmm_put(uvmm, vmm_get_start, vmm_get_range, - PAGE_SHIFT); + select_page_shift(uvmm, &op->map)); break; } case DRM_GPUVA_OP_REMAP: { @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, case DRM_GPUVA_OP_MAP: { u64 vmm_get_range = vmm_get_end - vmm_get_start; - ret = op_map_prepare(uvmm, &new->map, &op->map, args, PAGE_SHIFT); + ret = op_map_prepare(uvmm, &new->map, &op->map, args, + select_page_shift(uvmm, &op->map)); if (ret) goto unwind; -- 2.51.0
From: Ben Skeggs <bskeggs@nvidia.com> From GP100 onwards it's not possible to initialise comptag RAM without PMU firmware, which nouveau has no support for. As such, this code is essentially a no-op and will always revert to the equivalent non-compressed kind due to comptag allocation failure. It's also broken for the needs of VM_BIND/Vulkan. Remove the code entirely to make way for supporting compression on GPUs that support GSM-RM. Signed-off-by: Ben Skeggs <bskeggs@nvidia.com> --- .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 39 ++----------------- .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 4 +- 2 files changed, 6 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -XXX,XX +XXX,XX @@ */ #include "vmm.h" -#include <core/client.h> #include <subdev/fb.h> -#include <subdev/ltc.h> #include <subdev/timer.h> #include <engine/gr.h> @@ -XXX,XX +XXX,XX @@ gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, { u64 data = (addr >> 4) | map->type; - map->type += ptes * map->ctag; - while (ptes--) { VMM_WO064(pt, vmm, ptei++ * 8, data); data += map->next; @@ -XXX,XX +XXX,XX @@ gp100_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, while (ptes--) { const u64 data = (*map->dma++ >> 4) | map->type; VMM_WO064(pt, vmm, ptei++ * 8, data); - map->type += map->ctag; } nvkm_done(pt->memory); return; @@ -XXX,XX +XXX,XX @@ gp100_vmm_pd0_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, { u64 data = (addr >> 4) | map->type; - map->type += ptes * map->ctag; - while (ptes--) { VMM_WO128(pt, vmm, ptei++ * 0x10, data, 0ULL); data += map->next; @@ -XXX,XX +XXX,XX @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, struct gp100_vmm_map_vn vn; struct gp100_vmm_map_v0 v0; } *args = argv; - struct nvkm_device *device = vmm->mmu->subdev.device; - struct nvkm_memory *memory = map->memory; u8 kind, kind_inv, priv, ro, vol; int kindn, aper, ret = -ENOSYS; const u8 *kindm; @@ -XXX,XX +XXX,XX @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, } if (kindm[kind] != kind) { - u64 tags = nvkm_memory_size(memory) >> 16; - if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) { - VMM_DEBUG(vmm, "comp %d %02x", aper, page->type); - return -EINVAL; - } - - if (!map->no_comp) { - ret = nvkm_memory_tags_get(memory, device, tags, - nvkm_ltc_tags_clear, - &map->tags); - if (ret) { - VMM_DEBUG(vmm, "comp %d", ret); - return ret; - } - } - - if (!map->no_comp && map->tags->mn) { - tags = map->tags->mn->offset + (map->offset >> 16); - map->ctag |= ((1ULL << page->shift) >> 16) << 36; - map->type |= tags << 36; - map->next |= map->ctag; - } else { - kind = kindm[kind]; - } + /* Revert to non-compressed kind. */ + kind = kindm[kind]; } map->type |= BIT(0); @@ -XXX,XX +XXX,XX @@ gp100_vmm = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, - { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxC }, - { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxC }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxx }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxx }, { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SVHx }, {} } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c @@ -XXX,XX +XXX,XX @@ gp10b_vmm = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, - { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHC }, - { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHC }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHx }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHx }, { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SxHx }, {} } -- 2.51.0
From: Ben Skeggs <bskeggs@nvidia.com> Allow compressed PTE kinds to be written into PTEs when GSP-RM is present, rather than reverting to their non-compressed versions. Signed-off-by: Ben Skeggs <bskeggs@nvidia.com> --- .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 46 ++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -XXX,XX +XXX,XX @@ gp100_vmm_pgt_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, nvkm_done(pt->memory); } +static inline u64 +gp100_vmm_comptag_nr(u64 size) +{ + return size >> 16; /* One comptag per 64KiB VRAM. */ +} + +static inline u64 +gp100_vmm_pte_comptagline_base(u64 addr) +{ + /* RM allocates enough comptags for all of VRAM, so use a 1:1 mapping. */ + return (1 + gp100_vmm_comptag_nr(addr)) << 36; /* NV_MMU_VER2_PTE_COMPTAGLINE */ +} + +static inline u64 +gp100_vmm_pte_comptagline_incr(u32 page_size) +{ + return gp100_vmm_comptag_nr(page_size) << 36; /* NV_MMU_VER2_PTE_COMPTAGLINE */ +} + static inline void gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) { u64 data = (addr >> 4) | map->type; + if (map->ctag) + data |= gp100_vmm_pte_comptagline_base(addr); + while (ptes--) { VMM_WO064(pt, vmm, ptei++ * 8, data); data += map->next; @@ -XXX,XX +XXX,XX @@ gp100_vmm_pd0_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, { u64 data = (addr >> 4) | map->type; + if (map->ctag) + data |= gp100_vmm_pte_comptagline_base(addr); + while (ptes--) { VMM_WO128(pt, vmm, ptei++ * 0x10, data, 0ULL); data += map->next; @@ -XXX,XX +XXX,XX @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, return -EINVAL; } + /* Handle compression. */ if (kindm[kind] != kind) { - /* Revert to non-compressed kind. */ - kind = kindm[kind]; + struct nvkm_device *device = vmm->mmu->subdev.device; + + /* Compression is only supported when using GSP-RM, as + * PMU firmware is required in order to initialise the + * compbit backing store. + */ + if (nvkm_gsp_rm(device->gsp)) { + /* Turing GPUs require PTE_COMPTAGLINE to be filled, + * in addition to specifying a compressed kind. + */ + if (device->card_type < GA100) { + map->ctag = gp100_vmm_pte_comptagline_incr(1 << map->page->shift); + map->next |= map->ctag; + } + } else { + /* Revert to non-compressed kind. */ + kind = kindm[kind]; + } } map->type |= BIT(0); -- 2.51.0
This is so that NVK can enable compression for kernels that support it and avoid cases where an older kernel would MMU fault when a newer mesa would try to use compression. Signed-off-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> --- drivers/gpu/drm/nouveau/nouveau_drv.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -XXX,XX +XXX,XX @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 4 -#define DRIVER_PATCHLEVEL 0 +#define DRIVER_PATCHLEVEL 1 /* * 1.1.1: @@ -XXX,XX +XXX,XX @@ * programs that get directly linked with NVKM. * 1.3.1: * - implemented limited ABI16/NVIF interop + * 1.4.1: + * - add variable page sizes and compression for Turing+ */ #include <linux/notifier.h> -- 2.51.0
The new VM_BIND interface only supported 4K pages. This was problematic as it left performance on the table because GPUs don't have sophisticated TLB and page walker hardware. Additionally, the HW can only do compression on large (64K) and huge (2M) pages, which is a major performance booster (>50% in some cases). This patchset sets out to add support for larger page sizes and also enable compression and set the compression tags when userspace binds with the corresponding PTE kinds and alignment. It also increments the nouveau version number which allows userspace to use compression only when the kernel actually supports both features and avoid breaking the system if a newer mesa version is paired with an older kernel version. For the associated userspace MR, please see !36450: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36450 - v2: Implement review comments. - v1: Initial implementation. Ben Skeggs (2): drm/nouveau/mmu/gp100: Remove unused/broken support for compression drm/nouveau/mmu/tu102: Add support for compressed kinds Mary Guillemard (2): drm/nouveau/uvmm: Prepare for larger pages drm/nouveau/uvmm: Allow larger pages Mohamed Ahmed (1): drm/nouveau/drm: Bump the driver version to 1.4.1 to report new features drivers/gpu/drm/nouveau/nouveau_drv.h | 4 +- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 71 ++++++++++++++----- drivers/gpu/drm/nouveau/nouveau_uvmm.h | 1 + .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 69 ++++++++++-------- .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 4 +- 5 files changed, 100 insertions(+), 49 deletions(-) -- 2.51.0
From: Mary Guillemard <mary@mary.zone> Currently memory allocated by VM_BIND uAPI can only have a granuality matching PAGE_SIZE (4KiB in common case) To have a better memory management and to allow big (64KiB) and huge (2MiB) pages later in the series, we are now passing the page shift all around the internals of UVMM. Signed-off-by: Mary Guillemard <mary@mary.zone> Co-developed-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> Signed-off-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> --- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 46 ++++++++++++++++---------- drivers/gpu/drm/nouveau/nouveau_uvmm.h | 1 + 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_vmm_sparse_unref(struct nouveau_uvmm *uvmm, static int nouveau_uvmm_vmm_get(struct nouveau_uvmm *uvmm, - u64 addr, u64 range) + u64 addr, u64 range, u8 page_shift) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_get(vmm, addr, range, PAGE_SHIFT); + return nvif_vmm_raw_get(vmm, addr, range, page_shift); } static int nouveau_uvmm_vmm_put(struct nouveau_uvmm *uvmm, - u64 addr, u64 range) + u64 addr, u64 range, u8 page_shift) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_put(vmm, addr, range, PAGE_SHIFT); + return nvif_vmm_raw_put(vmm, addr, range, page_shift); } static int nouveau_uvmm_vmm_unmap(struct nouveau_uvmm *uvmm, - u64 addr, u64 range, bool sparse) + u64 addr, u64 range, u8 page_shift, bool sparse) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_unmap(vmm, addr, range, PAGE_SHIFT, sparse); + return nvif_vmm_raw_unmap(vmm, addr, range, page_shift, sparse); } static int nouveau_uvmm_vmm_map(struct nouveau_uvmm *uvmm, - u64 addr, u64 range, + u64 addr, u64 range, u8 page_shift, u64 bo_offset, u8 kind, struct nouveau_mem *mem) { @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_vmm_map(struct nouveau_uvmm *uvmm, return -ENOSYS; } - return nvif_vmm_raw_map(vmm, addr, range, PAGE_SHIFT, + return nvif_vmm_raw_map(vmm, addr, range, page_shift, &args, argc, &mem->mem, bo_offset); } @@ -XXX,XX +XXX,XX @@ nouveau_uvma_vmm_put(struct nouveau_uvma *uvma) { u64 addr = uvma->va.va.addr; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; - return nouveau_uvmm_vmm_put(to_uvmm(uvma), addr, range); + return nouveau_uvmm_vmm_put(to_uvmm(uvma), addr, range, page_shift); } static int @@ -XXX,XX +XXX,XX @@ nouveau_uvma_map(struct nouveau_uvma *uvma, u64 addr = uvma->va.va.addr; u64 offset = uvma->va.gem.offset; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; return nouveau_uvmm_vmm_map(to_uvmm(uvma), addr, range, - offset, uvma->kind, mem); + page_shift, offset, uvma->kind, + mem); } static int @@ -XXX,XX +XXX,XX @@ nouveau_uvma_unmap(struct nouveau_uvma *uvma) { u64 addr = uvma->va.va.addr; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; bool sparse = !!uvma->region; if (drm_gpuva_invalidated(&uvma->va)) return 0; - return nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, sparse); + return nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, page_shift, sparse); } static int @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, if (vmm_get_range) nouveau_uvmm_vmm_put(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + PAGE_SHIFT); break; } case DRM_GPUVA_OP_REMAP: { @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, u64 ustart = va->va.addr; u64 urange = va->va.range; u64 uend = ustart + urange; + u8 page_shift = uvma_from_va(va)->page_shift; /* Nothing to do for mappings we merge with. */ if (uend == vmm_get_start || @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, u64 vmm_get_range = ustart - vmm_get_start; nouveau_uvmm_vmm_put(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + page_shift); } vmm_get_start = uend; break; @@ -XXX,XX +XXX,XX @@ op_map_prepare(struct nouveau_uvmm *uvmm, uvma->region = args->region; uvma->kind = args->kind; + uvma->page_shift = PAGE_SHIFT; drm_gpuva_map(&uvmm->base, &uvma->va, op); @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, if (vmm_get_range) { ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + new->map->page_shift); if (ret) { op_map_prepare_unwind(new->map); goto unwind; @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, u64 ustart = va->va.addr; u64 urange = va->va.range; u64 uend = ustart + urange; + u8 page_shift = uvma_from_va(va)->page_shift; op_unmap_prepare(u); @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, u64 vmm_get_range = ustart - vmm_get_start; ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, page_shift); if (ret) { op_unmap_prepare_unwind(va); goto unwind; @@ -XXX,XX +XXX,XX @@ op_unmap_range(struct drm_gpuva_op_unmap *u, u64 addr, u64 range) { struct nouveau_uvma *uvma = uvma_from_va(u->va); + u8 page_shift = uvma->page_shift; bool sparse = !!uvma->region; if (!drm_gpuva_invalidated(u->va)) - nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, sparse); + nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, page_shift, sparse); } static void @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_cleanup(struct nouveau_uvmm *uvmm, struct drm_gpuva_op_map *n = r->next; struct drm_gpuva *va = r->unmap->va; struct nouveau_uvma *uvma = uvma_from_va(va); + u8 page_shift = uvma->page_shift; if (unmap) { u64 addr = va->va.addr; @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_cleanup(struct nouveau_uvmm *uvmm, if (n) end = n->va.addr; - nouveau_uvmm_vmm_put(uvmm, addr, end - addr); + nouveau_uvmm_vmm_put(uvmm, addr, end - addr, page_shift); } nouveau_uvma_gem_put(uvma); diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h @@ -XXX,XX +XXX,XX @@ struct nouveau_uvma { struct nouveau_uvma_region *region; u8 kind; + u8 page_shift; }; #define uvmm_from_gpuvm(x) container_of((x), struct nouveau_uvmm, base) -- 2.51.0
From: Mary Guillemard <mary@mary.zone> Now that everything in UVMM knows about the variable page shift, we can select larger values. The proposed approach relies on nouveau_bo::page unless if it would cause alignment issues (in which case we fall back to searching for an appropriate shift) Signed-off-by: Mary Guillemard <mary@mary.zone> Co-developed-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> Signed-off-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> --- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 29 ++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -XXX,XX +XXX,XX @@ op_unmap_prepare_unwind(struct drm_gpuva *va) drm_gpuva_insert(va->vm, va); } +static bool +op_map_aligned_to_page_shift(const struct drm_gpuva_op_map *op, u8 page_shift) +{ + u64 page_size = 1ULL << page_shift; + + return op->va.addr % page_size == 0 && op->va.range % page_size == 0 && + op->gem.offset % page_size == 0; +} + +static u8 +select_page_shift(struct nouveau_uvmm *uvmm, struct drm_gpuva_op_map *op) +{ + struct nouveau_bo *nvbo = nouveau_gem_object(op->gem.obj); + + /* nouveau_bo_fixup_align() guarantees for us that the page size will be aligned + * but just in case, make sure that it is aligned. + */ + if (op_map_aligned_to_page_shift(op, nvbo->page)) + return nvbo->page; + + /* This should never happen, but raise a warning and return 4K if we get here. */ + WARN_ON(1); + return PAGE_SHIFT; +} + static void nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, struct nouveau_uvma_prealloc *new, @@ -XXX,XX +XXX,XX @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, if (vmm_get_range) nouveau_uvmm_vmm_put(uvmm, vmm_get_start, vmm_get_range, - PAGE_SHIFT); + select_page_shift(uvmm, &op->map)); break; } case DRM_GPUVA_OP_REMAP: { @@ -XXX,XX +XXX,XX @@ op_map_prepare(struct nouveau_uvmm *uvmm, uvma->region = args->region; uvma->kind = args->kind; - uvma->page_shift = PAGE_SHIFT; + uvma->page_shift = select_page_shift(uvmm, op); drm_gpuva_map(&uvmm->base, &uvma->va, op); -- 2.51.0
From: Ben Skeggs <bskeggs@nvidia.com> From GP100 onwards it's not possible to initialise comptag RAM without PMU firmware, which nouveau has no support for. As such, this code is essentially a no-op and will always revert to the equivalent non-compressed kind due to comptag allocation failure. It's also broken for the needs of VM_BIND/Vulkan. Remove the code entirely to make way for supporting compression on GPUs that support GSM-RM. Signed-off-by: Ben Skeggs <bskeggs@nvidia.com> Signed-off-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> --- .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 39 ++----------------- .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 4 +- 2 files changed, 6 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -XXX,XX +XXX,XX @@ */ #include "vmm.h" -#include <core/client.h> #include <subdev/fb.h> -#include <subdev/ltc.h> #include <subdev/timer.h> #include <engine/gr.h> @@ -XXX,XX +XXX,XX @@ gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, { u64 data = (addr >> 4) | map->type; - map->type += ptes * map->ctag; - while (ptes--) { VMM_WO064(pt, vmm, ptei++ * 8, data); data += map->next; @@ -XXX,XX +XXX,XX @@ gp100_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, while (ptes--) { const u64 data = (*map->dma++ >> 4) | map->type; VMM_WO064(pt, vmm, ptei++ * 8, data); - map->type += map->ctag; } nvkm_done(pt->memory); return; @@ -XXX,XX +XXX,XX @@ gp100_vmm_pd0_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, { u64 data = (addr >> 4) | map->type; - map->type += ptes * map->ctag; - while (ptes--) { VMM_WO128(pt, vmm, ptei++ * 0x10, data, 0ULL); data += map->next; @@ -XXX,XX +XXX,XX @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, struct gp100_vmm_map_vn vn; struct gp100_vmm_map_v0 v0; } *args = argv; - struct nvkm_device *device = vmm->mmu->subdev.device; - struct nvkm_memory *memory = map->memory; u8 kind, kind_inv, priv, ro, vol; int kindn, aper, ret = -ENOSYS; const u8 *kindm; @@ -XXX,XX +XXX,XX @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, } if (kindm[kind] != kind) { - u64 tags = nvkm_memory_size(memory) >> 16; - if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) { - VMM_DEBUG(vmm, "comp %d %02x", aper, page->type); - return -EINVAL; - } - - if (!map->no_comp) { - ret = nvkm_memory_tags_get(memory, device, tags, - nvkm_ltc_tags_clear, - &map->tags); - if (ret) { - VMM_DEBUG(vmm, "comp %d", ret); - return ret; - } - } - - if (!map->no_comp && map->tags->mn) { - tags = map->tags->mn->offset + (map->offset >> 16); - map->ctag |= ((1ULL << page->shift) >> 16) << 36; - map->type |= tags << 36; - map->next |= map->ctag; - } else { - kind = kindm[kind]; - } + /* Revert to non-compressed kind. */ + kind = kindm[kind]; } map->type |= BIT(0); @@ -XXX,XX +XXX,XX @@ gp100_vmm = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, - { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxC }, - { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxC }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxx }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxx }, { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SVHx }, {} } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c @@ -XXX,XX +XXX,XX @@ gp10b_vmm = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, - { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHC }, - { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHC }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHx }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHx }, { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SxHx }, {} } -- 2.51.0
From: Ben Skeggs <bskeggs@nvidia.com> Allow compressed PTE kinds to be written into PTEs when GSP-RM is present, rather than reverting to their non-compressed versions. Signed-off-by: Ben Skeggs <bskeggs@nvidia.com> Signed-off-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> --- .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 46 ++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -XXX,XX +XXX,XX @@ gp100_vmm_pgt_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, nvkm_done(pt->memory); } +static inline u64 +gp100_vmm_comptag_nr(u64 size) +{ + return size >> 16; /* One comptag per 64KiB VRAM. */ +} + +static inline u64 +gp100_vmm_pte_comptagline_base(u64 addr) +{ + /* RM allocates enough comptags for all of VRAM, so use a 1:1 mapping. */ + return (1 + gp100_vmm_comptag_nr(addr)) << 36; /* NV_MMU_VER2_PTE_COMPTAGLINE */ +} + +static inline u64 +gp100_vmm_pte_comptagline_incr(u32 page_size) +{ + return gp100_vmm_comptag_nr(page_size) << 36; /* NV_MMU_VER2_PTE_COMPTAGLINE */ +} + static inline void gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) { u64 data = (addr >> 4) | map->type; + if (map->ctag) + data |= gp100_vmm_pte_comptagline_base(addr); + while (ptes--) { VMM_WO064(pt, vmm, ptei++ * 8, data); data += map->next; @@ -XXX,XX +XXX,XX @@ gp100_vmm_pd0_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, { u64 data = (addr >> 4) | map->type; + if (map->ctag) + data |= gp100_vmm_pte_comptagline_base(addr); + while (ptes--) { VMM_WO128(pt, vmm, ptei++ * 0x10, data, 0ULL); data += map->next; @@ -XXX,XX +XXX,XX @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, return -EINVAL; } + /* Handle compression. */ if (kindm[kind] != kind) { - /* Revert to non-compressed kind. */ - kind = kindm[kind]; + struct nvkm_device *device = vmm->mmu->subdev.device; + + /* Compression is only supported when using GSP-RM, as + * PMU firmware is required in order to initialise the + * compbit backing store. + */ + if (nvkm_gsp_rm(device->gsp)) { + /* Turing GPUs require PTE_COMPTAGLINE to be filled, + * in addition to specifying a compressed kind. + */ + if (device->card_type < GA100) { + map->ctag = gp100_vmm_pte_comptagline_incr(1 << map->page->shift); + map->next |= map->ctag; + } + } else { + /* Revert to non-compressed kind. */ + kind = kindm[kind]; + } } map->type |= BIT(0); -- 2.51.0
The HW can only do compression on large and huge pages, and enabling it on 4K pages leads to a MMU fault. Compression also needs kernel support for handling the compressed kinds and managing the compression tags. This increments the nouveau version number which allows NVK to enable it only when the kernel actually supports both features and avoid breaking the system if a newer mesa version is paired with an older kernel version. For the associated userspace MR, please see !36450: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36450 Signed-off-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com> --- drivers/gpu/drm/nouveau/nouveau_drv.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index XXXXXXX..XXXXXXX 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -XXX,XX +XXX,XX @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 4 -#define DRIVER_PATCHLEVEL 0 +#define DRIVER_PATCHLEVEL 1 /* * 1.1.1: @@ -XXX,XX +XXX,XX @@ * programs that get directly linked with NVKM. * 1.3.1: * - implemented limited ABI16/NVIF interop + * 1.4.1: + * - add variable page sizes and compression for Turing+ */ #include <linux/notifier.h> -- 2.51.0