[PATCH] xen/mm: page_alloc fix duplicated order shift operation in the loop

Paran Lee posted 1 patch 2 years, 7 months ago
Test gitlab-ci passed
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20220423193501.GA10077@DESKTOP-NK4TH6S.localdomain
xen/common/page_alloc.c | 51 ++++++++++++++++++++++-------------------
1 file changed, 27 insertions(+), 24 deletions(-)
[PATCH] xen/mm: page_alloc fix duplicated order shift operation in the loop
Posted by Paran Lee 2 years, 7 months ago
It doesn't seem necessary to do that
duplicate calculation of order shift 2^@order in the loop.

In addition, I fixed type of total_avail_pages from long
to unsigned long. because when total_avail_pages static variable
substitute in functions of page alloc local variable,
type of local variables is unsigned long.

Signed-off-by: Paran Lee <p4ranlee@gmail.com>
---
 xen/common/page_alloc.c | 51 ++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 319029140f..9a955ce84e 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -456,7 +456,7 @@ static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
 static unsigned long node_need_scrub[MAX_NUMNODES];
 
 static unsigned long *avail[MAX_NUMNODES];
-static long total_avail_pages;
+static unsigned long total_avail_pages;
 
 static DEFINE_SPINLOCK(heap_lock);
 static long outstanding_claims; /* total outstanding claims by all domains */
@@ -922,8 +922,9 @@ static struct page_info *alloc_heap_pages(
     struct domain *d)
 {
     nodeid_t node;
-    unsigned int i, buddy_order, zone, first_dirty;
-    unsigned long request = 1UL << order;
+    unsigned int buddy_order, zone, first_dirty;
+    unsigned int buddy_request;
+    unsigned long i, request = 1UL << order;
     struct page_info *pg;
     bool need_tlbflush = false;
     uint32_t tlbflush_timestamp = 0;
@@ -975,16 +976,17 @@ static struct page_info *alloc_heap_pages(
     while ( buddy_order != order )
     {
         buddy_order--;
+        buddy_request = 1U << buddy_order;
         page_list_add_scrub(pg, node, zone, buddy_order,
-                            (1U << buddy_order) > first_dirty ?
+                            buddy_request > first_dirty ?
                             first_dirty : INVALID_DIRTY_IDX);
-        pg += 1U << buddy_order;
+        pg += buddy_request;
 
         if ( first_dirty != INVALID_DIRTY_IDX )
         {
             /* Adjust first_dirty */
-            if ( first_dirty >= 1U << buddy_order )
-                first_dirty -= 1U << buddy_order;
+            if ( first_dirty >= buddy_request )
+                first_dirty -= buddy_request;
             else
                 first_dirty = 0; /* We've moved past original first_dirty */
         }
@@ -1000,13 +1002,13 @@ static struct page_info *alloc_heap_pages(
     if ( d != NULL )
         d->last_alloc_node = node;
 
-    for ( i = 0; i < (1 << order); i++ )
+    for ( i = 0; i < request; i++ )
     {
         /* Reference count must continuously be zero for free pages. */
         if ( (pg[i].count_info & ~PGC_need_scrub) != PGC_state_free )
         {
             printk(XENLOG_ERR
-                   "pg[%u] MFN %"PRI_mfn" c=%#lx o=%u v=%#lx t=%#x\n",
+                   "pg[%lu] MFN %"PRI_mfn" c=%#lx o=%u v=%#lx t=%#x\n",
                    i, mfn_x(page_to_mfn(pg + i)),
                    pg[i].count_info, pg[i].v.free.order,
                    pg[i].u.free.val, pg[i].tlbflush_timestamp);
@@ -1034,7 +1036,7 @@ static struct page_info *alloc_heap_pages(
     if ( first_dirty != INVALID_DIRTY_IDX ||
          (scrub_debug && !(memflags & MEMF_no_scrub)) )
     {
-        for ( i = 0; i < (1U << order); i++ )
+        for ( i = 0; i < request; i++ )
         {
             if ( test_and_clear_bit(_PGC_need_scrub, &pg[i].count_info) )
             {
@@ -1063,7 +1065,7 @@ static struct page_info *alloc_heap_pages(
      * can control its own visibility of/through the cache.
      */
     mfn = page_to_mfn(pg);
-    for ( i = 0; i < (1U << order); i++ )
+    for ( i = 0; i < request; i++ )
         flush_page_to_ram(mfn_x(mfn) + i, !(memflags & MEMF_no_icache_flush));
 
     return pg;
@@ -1437,15 +1439,16 @@ static void free_heap_pages(
 {
     unsigned long mask;
     mfn_t mfn = page_to_mfn(pg);
-    unsigned int i, node = phys_to_nid(mfn_to_maddr(mfn));
+    unsigned int node = phys_to_nid(mfn_to_maddr(mfn));
     unsigned int zone = page_to_zone(pg);
+    unsigned long i, request = 1UL << order;
     bool pg_offlined = false;
 
     ASSERT(order <= MAX_ORDER);
 
     spin_lock(&heap_lock);
 
-    for ( i = 0; i < (1 << order); i++ )
+    for ( i = 0; i < request; i++ )
     {
         if ( mark_page_free(&pg[i], mfn_add(mfn, i)) )
             pg_offlined = true;
@@ -1457,11 +1460,11 @@ static void free_heap_pages(
         }
     }
 
-    avail[node][zone] += 1 << order;
-    total_avail_pages += 1 << order;
+    avail[node][zone] += request;
+    total_avail_pages += request;
     if ( need_scrub )
     {
-        node_need_scrub[node] += 1 << order;
+        node_need_scrub[node] += request;
         pg->u.free.first_dirty = 0;
     }
     else
@@ -1490,7 +1493,7 @@ static void free_heap_pages(
             /* Update predecessor's first_dirty if necessary. */
             if ( predecessor->u.free.first_dirty == INVALID_DIRTY_IDX &&
                  pg->u.free.first_dirty != INVALID_DIRTY_IDX )
-                predecessor->u.free.first_dirty = (1U << order) +
+                predecessor->u.free.first_dirty = mask +
                                                   pg->u.free.first_dirty;
 
             pg = predecessor;
@@ -1511,7 +1514,7 @@ static void free_heap_pages(
             /* Update pg's first_dirty if necessary. */
             if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX &&
                  successor->u.free.first_dirty != INVALID_DIRTY_IDX )
-                pg->u.free.first_dirty = (1U << order) +
+                pg->u.free.first_dirty = mask +
                                          successor->u.free.first_dirty;
 
             page_list_del(successor, &heap(node, zone, order));
@@ -2416,7 +2419,7 @@ struct page_info *alloc_domheap_pages(
 void free_domheap_pages(struct page_info *pg, unsigned int order)
 {
     struct domain *d = page_get_owner(pg);
-    unsigned int i;
+    unsigned long i, request = 1UL << order;
     bool drop_dom_ref;
 
     ASSERT(!in_irq());
@@ -2426,10 +2429,10 @@ void free_domheap_pages(struct page_info *pg, unsigned int order)
         /* NB. May recursively lock from relinquish_memory(). */
         spin_lock_recursive(&d->page_alloc_lock);
 
-        for ( i = 0; i < (1 << order); i++ )
+        for ( i = 0; i < request; i++ )
             arch_free_heap_page(d, &pg[i]);
 
-        d->xenheap_pages -= 1 << order;
+        d->xenheap_pages -= request;
         drop_dom_ref = (d->xenheap_pages == 0);
 
         spin_unlock_recursive(&d->page_alloc_lock);
@@ -2443,12 +2446,12 @@ void free_domheap_pages(struct page_info *pg, unsigned int order)
             /* NB. May recursively lock from relinquish_memory(). */
             spin_lock_recursive(&d->page_alloc_lock);
 
-            for ( i = 0; i < (1 << order); i++ )
+            for ( i = 0; i < request; i++ )
             {
                 if ( pg[i].u.inuse.type_info & PGT_count_mask )
                 {
                     printk(XENLOG_ERR
-                           "pg[%u] MFN %"PRI_mfn" c=%#lx o=%u v=%#lx t=%#x\n",
+                           "pg[%lu] MFN %"PRI_mfn" c=%#lx o=%u v=%#lx t=%#x\n",
                            i, mfn_x(page_to_mfn(pg + i)),
                            pg[i].count_info, pg[i].v.free.order,
                            pg[i].u.free.val, pg[i].tlbflush_timestamp);
@@ -2462,7 +2465,7 @@ void free_domheap_pages(struct page_info *pg, unsigned int order)
                 }
             }
 
-            drop_dom_ref = !domain_adjust_tot_pages(d, -(1 << order));
+            drop_dom_ref = !domain_adjust_tot_pages(d, -request);
 
             spin_unlock_recursive(&d->page_alloc_lock);
 
-- 
2.25.1
Re: [PATCH] xen/mm: page_alloc fix duplicated order shift operation in the loop
Posted by Jan Beulich 2 years, 7 months ago
On 23.04.2022 21:35, Paran Lee wrote:
> It doesn't seem necessary to do that
> duplicate calculation of order shift 2^@order in the loop.

Once again I'm not convinced. As in the other patch the compiler could
do this transformation via its CSE pass if it sees fit.

Also (applicable as well to the other patch) I think "fix" in the
subject is misleading: There's nothing wrong with the original code.

> In addition, I fixed type of total_avail_pages from long
> to unsigned long. because when total_avail_pages static variable
> substitute in functions of page alloc local variable,
> type of local variables is unsigned long.

You've done more changes, some of which are questionable.

> @@ -922,8 +922,9 @@ static struct page_info *alloc_heap_pages(
>      struct domain *d)
>  {
>      nodeid_t node;
> -    unsigned int i, buddy_order, zone, first_dirty;
> -    unsigned long request = 1UL << order;
> +    unsigned int buddy_order, zone, first_dirty;
> +    unsigned int buddy_request;
> +    unsigned long i, request = 1UL << order;

E.g. it's not clear why these both need to be unsigned long when the
largest chunk which can be allocated in one go is 1GiB (MAX_ORDER).
At least on x86 operations on 32-bit quantities are generally
slightly more efficient than such on 64-bit values. If we wanted to
cater for architectures setting MAX_ORDER to 32 or higher, I think
the type used should become a typedef picking "unsigned int" at least
on x86.

> @@ -975,16 +976,17 @@ static struct page_info *alloc_heap_pages(
>      while ( buddy_order != order )
>      {
>          buddy_order--;
> +        buddy_request = 1U << buddy_order;

Such a local variable would better have narrowest possible scope.

> @@ -1490,7 +1493,7 @@ static void free_heap_pages(
>              /* Update predecessor's first_dirty if necessary. */
>              if ( predecessor->u.free.first_dirty == INVALID_DIRTY_IDX &&
>                   pg->u.free.first_dirty != INVALID_DIRTY_IDX )
> -                predecessor->u.free.first_dirty = (1U << order) +
> +                predecessor->u.free.first_dirty = mask +
>                                                    pg->u.free.first_dirty;
>  
>              pg = predecessor;
> @@ -1511,7 +1514,7 @@ static void free_heap_pages(
>              /* Update pg's first_dirty if necessary. */
>              if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX &&
>                   successor->u.free.first_dirty != INVALID_DIRTY_IDX )
> -                pg->u.free.first_dirty = (1U << order) +
> +                pg->u.free.first_dirty = mask +
>                                           successor->u.free.first_dirty;

This re-use of an existing local variable looks reasonable to me.
It would be nice though if the variable's scope was shrunk and its
type was also adjusted to unsigned int.

Jan