drivers/gpu/drm/drm_buddy.c | 78 +++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 7 deletions(-)
Currently drm-buddy does not have full knowledge of continuous memory.
Lets consider scenario below.
order 1: L R
order 0: LL LR RL RR
for order 1 allocation, it can offer L or R or LR+RL.
For now, we only implement L or R case for continuous memory allocation.
So this patch aims to implement the LR+RL case.
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
change from v2:
search continuous block in nearby root if needed
change from v1:
implement top-down continuous allocation
---
drivers/gpu/drm/drm_buddy.c | 78 +++++++++++++++++++++++++++++++++----
1 file changed, 71 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..ff58eb3136d2 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -386,6 +386,58 @@ alloc_range_bias(struct drm_buddy *mm,
return ERR_PTR(err);
}
+static struct drm_buddy_block *
+find_continuous_blocks(struct drm_buddy *mm,
+ int order,
+ unsigned long flags,
+ struct drm_buddy_block **rn)
+{
+ struct list_head *head = &mm->free_list[order];
+ struct drm_buddy_block *node, *parent, *free_node, *max_node = NULL;
+ int i;
+
+ list_for_each_entry(free_node, head, link) {
+ if (max_node) {
+ if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
+ break;
+
+ if (drm_buddy_block_offset(free_node) <
+ drm_buddy_block_offset(max_node))
+ continue;
+ }
+
+ parent = free_node;
+ do {
+ node = parent;
+ parent = parent->parent;
+ } while (parent && parent->right == node);
+
+ if (!parent) {
+ for (i = 0; i < mm->n_roots - 1; i++)
+ if (mm->roots[i] == node)
+ break;
+ if (i == mm->n_roots - 1)
+ continue;
+ node = mm->roots[i + 1];
+ } else {
+ node = parent->right;
+ }
+
+ while (drm_buddy_block_is_split(node))
+ node = node->left;
+
+ if (drm_buddy_block_is_free(node) &&
+ drm_buddy_block_order(node) == order) {
+ *rn = node;
+ max_node = free_node;
+ BUG_ON(drm_buddy_block_offset(node) !=
+ drm_buddy_block_offset(max_node) +
+ drm_buddy_block_size(mm, max_node));
+ }
+ }
+ return max_node;
+}
+
static struct drm_buddy_block *
get_maxblock(struct list_head *head)
{
@@ -637,7 +689,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
struct list_head *blocks,
unsigned long flags)
{
- struct drm_buddy_block *block = NULL;
+ struct drm_buddy_block *block = NULL, *rblock = NULL;
unsigned int min_order, order;
unsigned long pages;
LIST_HEAD(allocated);
@@ -689,17 +741,29 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
break;
if (order-- == min_order) {
+ if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
+ min_order != 0 && pages == BIT(order + 1)) {
+ block = find_continuous_blocks(mm,
+ order,
+ flags,
+ &rblock);
+ if (block)
+ break;
+ }
err = -ENOSPC;
goto err_free;
}
} while (1);
- mark_allocated(block);
- mm->avail -= drm_buddy_block_size(mm, block);
- kmemleak_update_trace(block);
- list_add_tail(&block->link, &allocated);
-
- pages -= BIT(order);
+ do {
+ mark_allocated(block);
+ mm->avail -= drm_buddy_block_size(mm, block);
+ kmemleak_update_trace(block);
+ list_add_tail(&block->link, &allocated);
+ pages -= BIT(order);
+ block = rblock;
+ rblock = NULL;
+ } while (block);
if (!pages)
break;
--
2.34.1
Hi Xinhui, On 11/28/2022 12:04 PM, xinhui pan wrote: > Currently drm-buddy does not have full knowledge of continuous memory. > > Lets consider scenario below. > order 1: L R > order 0: LL LR RL RR > for order 1 allocation, it can offer L or R or LR+RL. > > For now, we only implement L or R case for continuous memory allocation. > So this patch aims to implement the LR+RL case. > > Signed-off-by: xinhui pan <xinhui.pan@amd.com> > --- > change from v2: > search continuous block in nearby root if needed > > change from v1: > implement top-down continuous allocation > --- > drivers/gpu/drm/drm_buddy.c | 78 +++++++++++++++++++++++++++++++++---- > 1 file changed, 71 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c > index 11bb59399471..ff58eb3136d2 100644 > --- a/drivers/gpu/drm/drm_buddy.c > +++ b/drivers/gpu/drm/drm_buddy.c > @@ -386,6 +386,58 @@ alloc_range_bias(struct drm_buddy *mm, > return ERR_PTR(err); > } > > +static struct drm_buddy_block * > +find_continuous_blocks(struct drm_buddy *mm, > + int order, > + unsigned long flags, > + struct drm_buddy_block **rn) > +{ > + struct list_head *head = &mm->free_list[order]; > + struct drm_buddy_block *node, *parent, *free_node, *max_node = NULL; NIT: We usually name the variable as *block or ***_block for drm buddy and we have *node or ***_node for drm mm manager. > + int i; > + > + list_for_each_entry(free_node, head, link) { > + if (max_node) { > + if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION)) > + break; > + > + if (drm_buddy_block_offset(free_node) < > + drm_buddy_block_offset(max_node)) > + continue; > + } > + > + parent = free_node; > + do { > + node = parent; > + parent = parent->parent; > + } while (parent && parent->right == node); > + > + if (!parent) { > + for (i = 0; i < mm->n_roots - 1; i++) > + if (mm->roots[i] == node) > + break; > + if (i == mm->n_roots - 1) > + continue; > + node = mm->roots[i + 1]; > + } else { > + node = parent->right; > + } > + > + while (drm_buddy_block_is_split(node)) > + node = node->left; > + > + if (drm_buddy_block_is_free(node) && > + drm_buddy_block_order(node) == order) { > + *rn = node; > + max_node = free_node; > + BUG_ON(drm_buddy_block_offset(node) != > + drm_buddy_block_offset(max_node) + > + drm_buddy_block_size(mm, max_node)); > + } > + } > + return max_node; > +} > + > static struct drm_buddy_block * > get_maxblock(struct list_head *head) > { > @@ -637,7 +689,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, > struct list_head *blocks, > unsigned long flags) > { > - struct drm_buddy_block *block = NULL; > + struct drm_buddy_block *block = NULL, *rblock = NULL; > unsigned int min_order, order; > unsigned long pages; > LIST_HEAD(allocated); > @@ -689,17 +741,29 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, > break; > > if (order-- == min_order) { > + if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) && > + min_order != 0 && pages == BIT(order + 1)) { > + block = find_continuous_blocks(mm, > + order, > + flags, > + &rblock); > + if (block) > + break; > + } > err = -ENOSPC; > goto err_free; > } > } while (1); > > - mark_allocated(block); > - mm->avail -= drm_buddy_block_size(mm, block); > - kmemleak_update_trace(block); > - list_add_tail(&block->link, &allocated); > - > - pages -= BIT(order); > + do { > + mark_allocated(block); > + mm->avail -= drm_buddy_block_size(mm, block); > + kmemleak_update_trace(block); > + list_add_tail(&block->link, &allocated); > + pages -= BIT(order); > + block = rblock; > + rblock = NULL; > + } while (block); I think with this approach, if we are lucky enough we may get contiguous blocks in one order level down in RL combination from the freelist? Regards, Arun > > if (!pages) > break;
[AMD Official Use Only - General] Hi Arun, Thanks for your reply. comments are inline. ________________________________________ 发件人: Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@amd.com> 发送时间: 2022年11月29日 1:09 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org 抄送: linux-kernel@vger.kernel.org; dri-devel@lists.freedesktop.org; matthew.auld@intel.com; daniel@ffwll.ch; Koenig, Christian 主题: Re: [PATCH v3] drm: Optimise for continuous memory allocation Hi Xinhui, On 11/28/2022 12:04 PM, xinhui pan wrote: > Currently drm-buddy does not have full knowledge of continuous memory. > > Lets consider scenario below. > order 1: L R > order 0: LL LR RL RR > for order 1 allocation, it can offer L or R or LR+RL. > > For now, we only implement L or R case for continuous memory allocation. > So this patch aims to implement the LR+RL case. > > Signed-off-by: xinhui pan <xinhui.pan@amd.com> > --- > change from v2: > search continuous block in nearby root if needed > > change from v1: > implement top-down continuous allocation > --- > drivers/gpu/drm/drm_buddy.c | 78 +++++++++++++++++++++++++++++++++---- > 1 file changed, 71 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c > index 11bb59399471..ff58eb3136d2 100644 > --- a/drivers/gpu/drm/drm_buddy.c > +++ b/drivers/gpu/drm/drm_buddy.c > @@ -386,6 +386,58 @@ alloc_range_bias(struct drm_buddy *mm, > return ERR_PTR(err); > } > > +static struct drm_buddy_block * > +find_continuous_blocks(struct drm_buddy *mm, > + int order, > + unsigned long flags, > + struct drm_buddy_block **rn) > +{ > + struct list_head *head = &mm->free_list[order]; > + struct drm_buddy_block *node, *parent, *free_node, *max_node = NULL; NIT: We usually name the variable as *block or ***_block for drm buddy and we have *node or ***_node for drm mm manager. [xh] Oh, yes. The code naming is important. Will fix it. > + int i; > + > + list_for_each_entry(free_node, head, link) { > + if (max_node) { > + if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION)) > + break; > + > + if (drm_buddy_block_offset(free_node) < > + drm_buddy_block_offset(max_node)) > + continue; > + } > + > + parent = free_node; > + do { > + node = parent; > + parent = parent->parent; > + } while (parent && parent->right == node); > + > + if (!parent) { > + for (i = 0; i < mm->n_roots - 1; i++) > + if (mm->roots[i] == node) > + break; > + if (i == mm->n_roots - 1) > + continue; > + node = mm->roots[i + 1]; > + } else { > + node = parent->right; > + } > + > + while (drm_buddy_block_is_split(node)) > + node = node->left; > + > + if (drm_buddy_block_is_free(node) && > + drm_buddy_block_order(node) == order) { > + *rn = node; > + max_node = free_node; > + BUG_ON(drm_buddy_block_offset(node) != > + drm_buddy_block_offset(max_node) + > + drm_buddy_block_size(mm, max_node)); > + } > + } > + return max_node; > +} > + > static struct drm_buddy_block * > get_maxblock(struct list_head *head) > { > @@ -637,7 +689,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, > struct list_head *blocks, > unsigned long flags) > { > - struct drm_buddy_block *block = NULL; > + struct drm_buddy_block *block = NULL, *rblock = NULL; > unsigned int min_order, order; > unsigned long pages; > LIST_HEAD(allocated); > @@ -689,17 +741,29 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, > break; > > if (order-- == min_order) { > + if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) && > + min_order != 0 && pages == BIT(order + 1)) { > + block = find_continuous_blocks(mm, > + order, > + flags, > + &rblock); > + if (block) > + break; > + } > err = -ENOSPC; > goto err_free; > } > } while (1); > > - mark_allocated(block); > - mm->avail -= drm_buddy_block_size(mm, block); > - kmemleak_update_trace(block); > - list_add_tail(&block->link, &allocated); > - > - pages -= BIT(order); > + do { > + mark_allocated(block); > + mm->avail -= drm_buddy_block_size(mm, block); > + kmemleak_update_trace(block); > + list_add_tail(&block->link, &allocated); > + pages -= BIT(order); > + block = rblock; > + rblock = NULL; > + } while (block); I think with this approach, if we are lucky enough we may get contiguous blocks in one order level down in RL combination from the freelist? [xh] That is just what I did at first time like something below. list_for_each_entry(node, head, link) list_for_each_entry_reverse(rnode, head, link) I do the test with one ROCM application while running gdm restart background which is a very normal scenario. The test result shows there is about 4% chance to find the continuous blocks in this case. This 4% is really good enough as eviction is much more expensive. So after that, I need take care of the rest 96% to not introduce too much workload. Compared with the two loops, walking through the tree is a little cheaper. thanks xinhui Regards, Arun > > if (!pages) > break;
© 2016 - 2025 Red Hat, Inc.