From: Boris Brezillon <boris.brezillon@collabora.com>
We are going to add flags/properties that will impact the VA merging
ability. Instead of sprinkling tests all over the place in
__drm_gpuvm_sm_map(), let's add a helper aggregating all these checks
can call it for every existing VA we walk through in the
__drm_gpuvm_sm_map() loop.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com>
---
drivers/gpu/drm/drm_gpuvm.c | 47 +++++++++++++++++++++++++++++--------
1 file changed, 37 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 05978c5c38b1..dc3c2f906400 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -2098,12 +2098,48 @@ op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv,
return fn->sm_step_unmap(&op, priv);
}
+static bool can_merge(struct drm_gpuvm *gpuvm, const struct drm_gpuva *a,
+ const struct drm_gpuva *b)
+{
+ /* Only GEM-based mappings can be merged, and they must point to
+ * the same GEM object.
+ */
+ if (a->gem.obj != b->gem.obj || !a->gem.obj)
+ return false;
+
+ /* Let's keep things simple for now and force all flags to match. */
+ if (a->flags != b->flags)
+ return false;
+
+ /* Order VAs for the rest of the checks. */
+ if (a->va.addr > b->va.addr)
+ swap(a, b);
+
+ /* We assume the caller already checked that VAs overlap or are
+ * contiguous.
+ */
+ if (drm_WARN_ON(gpuvm->drm, b->va.addr > a->va.addr + a->va.range))
+ return false;
+
+ /* We intentionally ignore u64 underflows because all we care about
+ * here is whether the VA diff matches the GEM offset diff.
+ */
+ return b->va.addr - a->va.addr == b->gem.offset - a->gem.offset;
+}
+
static int
__drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
const struct drm_gpuvm_ops *ops, void *priv,
const struct drm_gpuvm_map_req *req)
{
struct drm_gpuva *va, *next;
+ struct drm_gpuva reqva = {
+ .va.addr = req->va.addr,
+ .va.range = req->va.range,
+ .gem.offset = req->gem.offset,
+ .gem.obj = req->gem.obj,
+ .flags = req->flags,
+ };
u64 req_end = req->va.addr + req->va.range;
int ret;
@@ -2116,12 +2152,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
u64 addr = va->va.addr;
u64 range = va->va.range;
u64 end = addr + range;
- bool merge = !!va->gem.obj;
+ bool merge = can_merge(gpuvm, va, &reqva);
if (addr == req->va.addr) {
- merge &= obj == req->gem.obj &&
- offset == req->gem.offset;
-
if (end == req_end) {
ret = op_unmap_cb(ops, priv, va, merge);
if (ret)
@@ -2163,8 +2196,6 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
};
struct drm_gpuva_op_unmap u = { .va = va };
- merge &= obj == req->gem.obj &&
- offset + ls_range == req->gem.offset;
u.keep = merge;
if (end == req_end) {
@@ -2196,10 +2227,6 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
break;
}
} else if (addr > req->va.addr) {
- merge &= obj == req->gem.obj &&
- offset == req->gem.offset +
- (addr - req->va.addr);
-
if (end == req_end) {
ret = op_unmap_cb(ops, priv, va, merge);
if (ret)
--
2.47.2
On 07.07.2025 17:04, Caterina Shablia wrote: > From: Boris Brezillon <boris.brezillon@collabora.com> > > We are going to add flags/properties that will impact the VA merging > ability. Instead of sprinkling tests all over the place in > __drm_gpuvm_sm_map(), let's add a helper aggregating all these checks > can call it for every existing VA we walk through in the > __drm_gpuvm_sm_map() loop. > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com> > --- > drivers/gpu/drm/drm_gpuvm.c | 47 +++++++++++++++++++++++++++++-------- > 1 file changed, 37 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c > index 05978c5c38b1..dc3c2f906400 100644 > --- a/drivers/gpu/drm/drm_gpuvm.c > +++ b/drivers/gpu/drm/drm_gpuvm.c > @@ -2098,12 +2098,48 @@ op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv, > return fn->sm_step_unmap(&op, priv); > } > > +static bool can_merge(struct drm_gpuvm *gpuvm, const struct drm_gpuva *a, > + const struct drm_gpuva *b) > +{ > + /* Only GEM-based mappings can be merged, and they must point to > + * the same GEM object. > + */ > + if (a->gem.obj != b->gem.obj || !a->gem.obj) > + return false; > + > + /* Let's keep things simple for now and force all flags to match. */ > + if (a->flags != b->flags) > + return false; > + > + /* Order VAs for the rest of the checks. */ > + if (a->va.addr > b->va.addr) > + swap(a, b); > + > + /* We assume the caller already checked that VAs overlap or are > + * contiguous. > + */ > + if (drm_WARN_ON(gpuvm->drm, b->va.addr > a->va.addr + a->va.range)) > + return false; > + > + /* We intentionally ignore u64 underflows because all we care about > + * here is whether the VA diff matches the GEM offset diff. > + */ > + return b->va.addr - a->va.addr == b->gem.offset - a->gem.offset; If we're reordering the VAs for the rest of the checks, when could underflow happen? > +} > + > static int > __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > const struct drm_gpuvm_ops *ops, void *priv, > const struct drm_gpuvm_map_req *req) > { > struct drm_gpuva *va, *next; > + struct drm_gpuva reqva = { > + .va.addr = req->va.addr, > + .va.range = req->va.range, > + .gem.offset = req->gem.offset, > + .gem.obj = req->gem.obj, > + .flags = req->flags, > + }; > u64 req_end = req->va.addr + req->va.range; > int ret; > > @@ -2116,12 +2152,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > u64 addr = va->va.addr; > u64 range = va->va.range; > u64 end = addr + range; > - bool merge = !!va->gem.obj; > + bool merge = can_merge(gpuvm, va, &reqva); > > if (addr == req->va.addr) { > - merge &= obj == req->gem.obj && > - offset == req->gem.offset; > - > if (end == req_end) { > ret = op_unmap_cb(ops, priv, va, merge); > if (ret) > @@ -2163,8 +2196,6 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > }; > struct drm_gpuva_op_unmap u = { .va = va }; > > - merge &= obj == req->gem.obj && > - offset + ls_range == req->gem.offset; > u.keep = merge; > > if (end == req_end) { > @@ -2196,10 +2227,6 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > break; > } > } else if (addr > req->va.addr) { > - merge &= obj == req->gem.obj && > - offset == req->gem.offset + > - (addr - req->va.addr); > - > if (end == req_end) { > ret = op_unmap_cb(ops, priv, va, merge); > if (ret) > -- > 2.47.2 Adrian Larumbe
On Tue, 22 Jul 2025 20:17:14 +0100 Adrian Larumbe <adrian.larumbe@collabora.com> wrote: > On 07.07.2025 17:04, Caterina Shablia wrote: > > From: Boris Brezillon <boris.brezillon@collabora.com> > > > > We are going to add flags/properties that will impact the VA merging > > ability. Instead of sprinkling tests all over the place in > > __drm_gpuvm_sm_map(), let's add a helper aggregating all these checks > > can call it for every existing VA we walk through in the > > __drm_gpuvm_sm_map() loop. > > > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > > Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com> > > --- > > drivers/gpu/drm/drm_gpuvm.c | 47 +++++++++++++++++++++++++++++-------- > > 1 file changed, 37 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c > > index 05978c5c38b1..dc3c2f906400 100644 > > --- a/drivers/gpu/drm/drm_gpuvm.c > > +++ b/drivers/gpu/drm/drm_gpuvm.c > > @@ -2098,12 +2098,48 @@ op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv, > > return fn->sm_step_unmap(&op, priv); > > } > > > > +static bool can_merge(struct drm_gpuvm *gpuvm, const struct drm_gpuva *a, > > + const struct drm_gpuva *b) > > +{ > > + /* Only GEM-based mappings can be merged, and they must point to > > + * the same GEM object. > > + */ > > + if (a->gem.obj != b->gem.obj || !a->gem.obj) > > + return false; > > + > > + /* Let's keep things simple for now and force all flags to match. */ > > + if (a->flags != b->flags) > > + return false; > > + > > + /* Order VAs for the rest of the checks. */ > > + if (a->va.addr > b->va.addr) > > + swap(a, b); > > + > > + /* We assume the caller already checked that VAs overlap or are > > + * contiguous. > > + */ > > + if (drm_WARN_ON(gpuvm->drm, b->va.addr > a->va.addr + a->va.range)) > > + return false; > > + > > + /* We intentionally ignore u64 underflows because all we care about > > + * here is whether the VA diff matches the GEM offset diff. > > + */ > > + return b->va.addr - a->va.addr == b->gem.offset - a->gem.offset; > > If we're reordering the VAs for the rest of the checks, when could underflow happen? I think this comments predates the re-ordering (I originally tried not to order VAs). > > > +} > > + > > static int > > __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > > const struct drm_gpuvm_ops *ops, void *priv, > > const struct drm_gpuvm_map_req *req) > > { > > struct drm_gpuva *va, *next; > > + struct drm_gpuva reqva = { > > + .va.addr = req->va.addr, > > + .va.range = req->va.range, > > + .gem.offset = req->gem.offset, > > + .gem.obj = req->gem.obj, > > + .flags = req->flags, > > + }; > > u64 req_end = req->va.addr + req->va.range; > > int ret; > > > > @@ -2116,12 +2152,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > > u64 addr = va->va.addr; > > u64 range = va->va.range; > > u64 end = addr + range; > > - bool merge = !!va->gem.obj; > > + bool merge = can_merge(gpuvm, va, &reqva); > > > > if (addr == req->va.addr) { > > - merge &= obj == req->gem.obj && > > - offset == req->gem.offset; > > - > > if (end == req_end) { > > ret = op_unmap_cb(ops, priv, va, merge); > > if (ret) > > @@ -2163,8 +2196,6 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > > }; > > struct drm_gpuva_op_unmap u = { .va = va }; > > > > - merge &= obj == req->gem.obj && > > - offset + ls_range == req->gem.offset; > > u.keep = merge; > > > > if (end == req_end) { > > @@ -2196,10 +2227,6 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > > break; > > } > > } else if (addr > req->va.addr) { > > - merge &= obj == req->gem.obj && > > - offset == req->gem.offset + > > - (addr - req->va.addr); > > - > > if (end == req_end) { > > ret = op_unmap_cb(ops, priv, va, merge); > > if (ret) > > -- > > 2.47.2 > > > Adrian Larumbe
On Mon Jul 7, 2025 at 7:04 PM CEST, Caterina Shablia wrote: > diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c > index 05978c5c38b1..dc3c2f906400 100644 > --- a/drivers/gpu/drm/drm_gpuvm.c > +++ b/drivers/gpu/drm/drm_gpuvm.c > @@ -2098,12 +2098,48 @@ op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv, > return fn->sm_step_unmap(&op, priv); > } > > +static bool can_merge(struct drm_gpuvm *gpuvm, const struct drm_gpuva *a, > + const struct drm_gpuva *b) > +{ > + /* Only GEM-based mappings can be merged, and they must point to > + * the same GEM object. > + */ > + if (a->gem.obj != b->gem.obj || !a->gem.obj) > + return false; > + > + /* Let's keep things simple for now and force all flags to match. */ > + if (a->flags != b->flags) > + return false; > + > + /* Order VAs for the rest of the checks. */ > + if (a->va.addr > b->va.addr) > + swap(a, b); > + > + /* We assume the caller already checked that VAs overlap or are > + * contiguous. > + */ > + if (drm_WARN_ON(gpuvm->drm, b->va.addr > a->va.addr + a->va.range)) > + return false; > + > + /* We intentionally ignore u64 underflows because all we care about > + * here is whether the VA diff matches the GEM offset diff. > + */ > + return b->va.addr - a->va.addr == b->gem.offset - a->gem.offset; > +} > + > static int > __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > const struct drm_gpuvm_ops *ops, void *priv, > const struct drm_gpuvm_map_req *req) > { > struct drm_gpuva *va, *next; > + struct drm_gpuva reqva = { > + .va.addr = req->va.addr, > + .va.range = req->va.range, > + .gem.offset = req->gem.offset, > + .gem.obj = req->gem.obj, > + .flags = req->flags, Huh? Where does req->flags come from? I don't remember that this flag exists in struct drm_gpuvm_map_req in the preceding patch? > + }; > u64 req_end = req->va.addr + req->va.range; > int ret; > > @@ -2116,12 +2152,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > u64 addr = va->va.addr; > u64 range = va->va.range; > u64 end = addr + range; > - bool merge = !!va->gem.obj; > + bool merge = can_merge(gpuvm, va, &reqva); I know you want to do the swap() trick above, but I don't like creating a temporary struct drm_gpuva with all the other uninitialized fields. If you really want this, can we please limit the scope? Maybe the following helper: static bool can_merge(struct drm_gpuvm *gpuvm, const struct drm_gpuva *va, struct drm_gpuvm_map_req *req) { struct drm_gpuva reqva = { ... }; return __can_merge(gpuvm, va, reqva); }
On Mon, 07 Jul 2025 21:00:54 +0200 "Danilo Krummrich" <dakr@kernel.org> wrote: > On Mon Jul 7, 2025 at 7:04 PM CEST, Caterina Shablia wrote: > > diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c > > index 05978c5c38b1..dc3c2f906400 100644 > > --- a/drivers/gpu/drm/drm_gpuvm.c > > +++ b/drivers/gpu/drm/drm_gpuvm.c > > @@ -2098,12 +2098,48 @@ op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv, > > return fn->sm_step_unmap(&op, priv); > > } > > > > +static bool can_merge(struct drm_gpuvm *gpuvm, const struct drm_gpuva *a, > > + const struct drm_gpuva *b) > > +{ > > + /* Only GEM-based mappings can be merged, and they must point to > > + * the same GEM object. > > + */ > > + if (a->gem.obj != b->gem.obj || !a->gem.obj) > > + return false; > > + > > + /* Let's keep things simple for now and force all flags to match. */ > > + if (a->flags != b->flags) > > + return false; > > + > > + /* Order VAs for the rest of the checks. */ > > + if (a->va.addr > b->va.addr) > > + swap(a, b); > > + > > + /* We assume the caller already checked that VAs overlap or are > > + * contiguous. > > + */ > > + if (drm_WARN_ON(gpuvm->drm, b->va.addr > a->va.addr + a->va.range)) > > + return false; > > + > > + /* We intentionally ignore u64 underflows because all we care about > > + * here is whether the VA diff matches the GEM offset diff. > > + */ > > + return b->va.addr - a->va.addr == b->gem.offset - a->gem.offset; > > +} > > + > > static int > > __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > > const struct drm_gpuvm_ops *ops, void *priv, > > const struct drm_gpuvm_map_req *req) > > { > > struct drm_gpuva *va, *next; > > + struct drm_gpuva reqva = { > > + .va.addr = req->va.addr, > > + .va.range = req->va.range, > > + .gem.offset = req->gem.offset, > > + .gem.obj = req->gem.obj, > > + .flags = req->flags, > > Huh? Where does req->flags come from? I don't remember that this flag exists in > struct drm_gpuvm_map_req in the preceding patch? Oops, I re-ordered commits, and forgot to verify that the series was bisectable. This should be part of patch 4 actually. > > > + }; > > u64 req_end = req->va.addr + req->va.range; > > int ret; > > > > @@ -2116,12 +2152,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, > > u64 addr = va->va.addr; > > u64 range = va->va.range; > > u64 end = addr + range; > > - bool merge = !!va->gem.obj; > > + bool merge = can_merge(gpuvm, va, &reqva); > > I know you want to do the swap() trick above, but I don't like creating a > temporary struct drm_gpuva with all the other uninitialized fields. I mean, I could do it the other way around (gpuva -> op_map), but it means doing it on each va with cross. > > If you really want this, can we please limit the scope? Maybe the following > helper: > > static bool can_merge(struct drm_gpuvm *gpuvm, > const struct drm_gpuva *va, > struct drm_gpuvm_map_req *req) > { > struct drm_gpuva reqva = { ... }; > return __can_merge(gpuvm, va, reqva); It's a bit of a shame though, because then this reqva is initialized every time can_merge() is called, instead of once at the beginning of an sm_map() operation. But maybe the compiler is smart enough to see through it when inlining (assuming it actually inlines the check). > }
On Mon Jul 7, 2025 at 9:00 PM CEST, Danilo Krummrich wrote: > On Mon Jul 7, 2025 at 7:04 PM CEST, Caterina Shablia wrote: >> diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c >> index 05978c5c38b1..dc3c2f906400 100644 >> --- a/drivers/gpu/drm/drm_gpuvm.c >> +++ b/drivers/gpu/drm/drm_gpuvm.c >> @@ -2098,12 +2098,48 @@ op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv, >> return fn->sm_step_unmap(&op, priv); >> } >> >> +static bool can_merge(struct drm_gpuvm *gpuvm, const struct drm_gpuva *a, >> + const struct drm_gpuva *b) >> +{ >> + /* Only GEM-based mappings can be merged, and they must point to >> + * the same GEM object. >> + */ >> + if (a->gem.obj != b->gem.obj || !a->gem.obj) >> + return false; >> + >> + /* Let's keep things simple for now and force all flags to match. */ >> + if (a->flags != b->flags) >> + return false; Forgot to mention, this can include driver specific flags. How do we know from the generic code whether this condition makes sense? *At least* it would need to be documented. However, I think it would be better to provide an optional callback for drivers to check whether merge makes sense or not. This doesn't mean we need drivers to do those common checks, this can remain here in the common code. >> + >> + /* Order VAs for the rest of the checks. */ >> + if (a->va.addr > b->va.addr) >> + swap(a, b); >> + >> + /* We assume the caller already checked that VAs overlap or are >> + * contiguous. >> + */ >> + if (drm_WARN_ON(gpuvm->drm, b->va.addr > a->va.addr + a->va.range)) >> + return false; >> + >> + /* We intentionally ignore u64 underflows because all we care about >> + * here is whether the VA diff matches the GEM offset diff. >> + */ >> + return b->va.addr - a->va.addr == b->gem.offset - a->gem.offset; >> +} >> + >> static int >> __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, >> const struct drm_gpuvm_ops *ops, void *priv, >> const struct drm_gpuvm_map_req *req) >> { >> struct drm_gpuva *va, *next; >> + struct drm_gpuva reqva = { >> + .va.addr = req->va.addr, >> + .va.range = req->va.range, >> + .gem.offset = req->gem.offset, >> + .gem.obj = req->gem.obj, >> + .flags = req->flags, > > Huh? Where does req->flags come from? I don't remember that this flag exists in > struct drm_gpuvm_map_req in the preceding patch? > >> + }; >> u64 req_end = req->va.addr + req->va.range; >> int ret; >> >> @@ -2116,12 +2152,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, >> u64 addr = va->va.addr; >> u64 range = va->va.range; >> u64 end = addr + range; >> - bool merge = !!va->gem.obj; >> + bool merge = can_merge(gpuvm, va, &reqva); > > I know you want to do the swap() trick above, but I don't like creating a > temporary struct drm_gpuva with all the other uninitialized fields. > > If you really want this, can we please limit the scope? Maybe the following > helper: > > static bool can_merge(struct drm_gpuvm *gpuvm, > const struct drm_gpuva *va, > struct drm_gpuvm_map_req *req) > { > struct drm_gpuva reqva = { ... }; > return __can_merge(gpuvm, va, reqva); > }
On Mon, 07 Jul 2025 21:06:50 +0200 "Danilo Krummrich" <dakr@kernel.org> wrote: > On Mon Jul 7, 2025 at 9:00 PM CEST, Danilo Krummrich wrote: > > On Mon Jul 7, 2025 at 7:04 PM CEST, Caterina Shablia wrote: > >> diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c > >> index 05978c5c38b1..dc3c2f906400 100644 > >> --- a/drivers/gpu/drm/drm_gpuvm.c > >> +++ b/drivers/gpu/drm/drm_gpuvm.c > >> @@ -2098,12 +2098,48 @@ op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv, > >> return fn->sm_step_unmap(&op, priv); > >> } > >> > >> +static bool can_merge(struct drm_gpuvm *gpuvm, const struct drm_gpuva *a, > >> + const struct drm_gpuva *b) > >> +{ > >> + /* Only GEM-based mappings can be merged, and they must point to > >> + * the same GEM object. > >> + */ > >> + if (a->gem.obj != b->gem.obj || !a->gem.obj) > >> + return false; > >> + > >> + /* Let's keep things simple for now and force all flags to match. */ > >> + if (a->flags != b->flags) > >> + return false; > > Forgot to mention, this can include driver specific flags. How do we know from > the generic code whether this condition makes sense? *At least* it would need to > be documented. You're right, it should have been: if ((a->flags & DRM_GPUVA_MERGEABLE_FLAGS_MASK) != (b->flags & DRM_GPUVA_MERGEABLE_FLAGS_MASK)) return false; with DRM_GPUVA_COMMON_FLAGS_MASK set to the set of flags that matter when merging. > > However, I think it would be better to provide an optional callback for drivers > to check whether merge makes sense or not. This doesn't mean we need drivers to > do those common checks, this can remain here in the common code. Seems a bit premature to me. Again, if there's a need for drivers to add extra checks we can always add a callback at this point, but until this is the case, I'd rather stick to these common checks.
© 2016 - 2025 Red Hat, Inc.