[PATCH v9 03/15] xen/bitops: implement fls{l}() in common logic

Oleksii Kurochko posted 15 patches 1 year, 9 months ago
There is a newer version of this series
[PATCH v9 03/15] xen/bitops: implement fls{l}() in common logic
Posted by Oleksii Kurochko 1 year, 9 months ago
To avoid the compilation error below, it is needed to update to places
in common/page_alloc.c where flsl() is used as now flsl() returns unsigned int:

./include/xen/kernel.h:18:21: error: comparison of distinct pointer types lacks a cast [-Werror]
       18 |         (void) (&_x == &_y);            \
          |                     ^~
    common/page_alloc.c:1843:34: note: in expansion of macro 'min'
     1843 |         unsigned int inc_order = min(MAX_ORDER, flsl(e - s) - 1);

generic_fls{l} was used instead of __builtin_clz{l}(x) as if x is 0,
the result in undefined.

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
 The patch is almost independent from Andrew's patch series
 ( https://lore.kernel.org/xen-devel/20240313172716.2325427-1-andrew.cooper3@citrix.com/T/#t)
 except test_fls() function which IMO can be merged as a separate patch after Andrew's patch
 will be fully ready.
---
Changes in V9:
 - update return type of fls and flsl() to unsigned int to be aligned with other
   bit ops.
 - update places where return value of fls() and flsl() is compared with int.
 - update the commit message.
---
Changes in V8:
 - do proper rebase: back definition of fls{l} to the current patch.
 - drop the changes which removed ffz() in PPC. it should be done not
   in this patch.
 - add a message after Signed-off.
---
Changes in V7:
 - Code style fixes
---
Changes in V6:
 - new patch for the patch series.
---
 xen/arch/arm/include/asm/arm32/bitops.h |  2 +-
 xen/arch/arm/include/asm/arm64/bitops.h |  6 ++----
 xen/arch/arm/include/asm/bitops.h       |  7 ++-----
 xen/arch/ppc/include/asm/bitops.h       |  3 ---
 xen/arch/x86/include/asm/bitops.h       |  6 ++++--
 xen/common/bitops.c                     | 22 ++++++++++++++++++++++
 xen/common/page_alloc.c                 |  4 ++--
 xen/include/xen/bitops.h                | 24 ++++++++++++++++++++++++
 8 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/xen/arch/arm/include/asm/arm32/bitops.h b/xen/arch/arm/include/asm/arm32/bitops.h
index d0309d47c1..5552d4e945 100644
--- a/xen/arch/arm/include/asm/arm32/bitops.h
+++ b/xen/arch/arm/include/asm/arm32/bitops.h
@@ -1,7 +1,7 @@
 #ifndef _ARM_ARM32_BITOPS_H
 #define _ARM_ARM32_BITOPS_H
 
-#define flsl fls
+#define arch_flsl fls
 
 /*
  * Little endian assembly bitops.  nr = 0 -> byte 0 bit 0.
diff --git a/xen/arch/arm/include/asm/arm64/bitops.h b/xen/arch/arm/include/asm/arm64/bitops.h
index 0efde29068..5f5d97faa0 100644
--- a/xen/arch/arm/include/asm/arm64/bitops.h
+++ b/xen/arch/arm/include/asm/arm64/bitops.h
@@ -22,17 +22,15 @@ static /*__*/always_inline unsigned long __ffs(unsigned long word)
  */
 #define ffz(x)  __ffs(~(x))
 
-static inline int flsl(unsigned long x)
+static inline int arch_flsl(unsigned long x)
 {
         uint64_t ret;
 
-        if (__builtin_constant_p(x))
-               return generic_flsl(x);
-
         asm("clz\t%0, %1" : "=r" (ret) : "r" (x));
 
         return BITS_PER_LONG - ret;
 }
+#define arch_flsl arch_flsl
 
 /* Based on linux/include/asm-generic/bitops/find.h */
 
diff --git a/xen/arch/arm/include/asm/bitops.h b/xen/arch/arm/include/asm/bitops.h
index 8e16335e76..860d6d4689 100644
--- a/xen/arch/arm/include/asm/bitops.h
+++ b/xen/arch/arm/include/asm/bitops.h
@@ -78,17 +78,14 @@ bool clear_mask16_timeout(uint16_t mask, volatile void *p,
  * the clz instruction for much better code efficiency.
  */
 
-static inline int fls(unsigned int x)
+static inline int arch_fls(unsigned int x)
 {
         int ret;
 
-        if (__builtin_constant_p(x))
-               return generic_fls(x);
-
         asm("clz\t%"__OP32"0, %"__OP32"1" : "=r" (ret) : "r" (x));
         return 32 - ret;
 }
-
+#define arch_fls arch_fls
 
 #define arch_ffs(x) ({ unsigned int __t = (x); fls(ISOLATE_LSB(__t)); })
 #define arch_ffsl(x) ({ unsigned long __t = (x); flsl(ISOLATE_LSB(__t)); })
diff --git a/xen/arch/ppc/include/asm/bitops.h b/xen/arch/ppc/include/asm/bitops.h
index e2b6473c8c..ca308fd62b 100644
--- a/xen/arch/ppc/include/asm/bitops.h
+++ b/xen/arch/ppc/include/asm/bitops.h
@@ -119,9 +119,6 @@ static inline int test_and_set_bit(unsigned int nr, volatile void *addr)
         (volatile unsigned int *)addr + BITOP_WORD(nr)) != 0;
 }
 
-#define flsl(x) generic_flsl(x)
-#define fls(x) generic_fls(x)
-
 /* Based on linux/include/asm-generic/bitops/ffz.h */
 /*
  * ffz - find first zero in word.
diff --git a/xen/arch/x86/include/asm/bitops.h b/xen/arch/x86/include/asm/bitops.h
index 23f09fdb7a..8f3d76fe44 100644
--- a/xen/arch/x86/include/asm/bitops.h
+++ b/xen/arch/x86/include/asm/bitops.h
@@ -425,7 +425,7 @@ static always_inline unsigned int arch_ffsl(unsigned long x)
  *
  * This is defined the same way as ffs.
  */
-static inline int flsl(unsigned long x)
+static always_inline int arch_flsl(unsigned long x)
 {
     long r;
 
@@ -435,8 +435,9 @@ static inline int flsl(unsigned long x)
           "1:" : "=r" (r) : "rm" (x));
     return (int)r+1;
 }
+#define arch_flsl arch_flsl
 
-static inline int fls(unsigned int x)
+static always_inline int arch_fls(unsigned int x)
 {
     int r;
 
@@ -446,6 +447,7 @@ static inline int fls(unsigned int x)
           "1:" : "=r" (r) : "rm" (x));
     return r + 1;
 }
+#define arch_fls arch_fls
 
 /**
  * hweightN - returns the hamming weight of a N-bit word
diff --git a/xen/common/bitops.c b/xen/common/bitops.c
index a8c32f6767..95bc47176b 100644
--- a/xen/common/bitops.c
+++ b/xen/common/bitops.c
@@ -62,9 +62,31 @@ static void test_ffs(void)
     CHECK(ffs64, (uint64_t)0x8000000000000000, 64);
 }
 
+static void test_fls(void)
+{
+    /* unsigned int ffs(unsigned int) */
+    CHECK(fls, 1, 1);
+    CHECK(fls, 3, 2);
+    CHECK(fls, 3U << 30, 32);
+
+    /* unsigned int flsl(unsigned long) */
+    CHECK(flsl, 1, 1);
+    CHECK(flsl, 1UL << (BITS_PER_LONG - 1), BITS_PER_LONG);
+#if BITS_PER_LONG > 32
+    CHECK(flsl, 3UL << 32, 34);
+#endif
+
+    /* unsigned int fls64(uint64_t) */
+    CHECK(fls64, 1, 1);
+    CHECK(fls64, 0x00000000C0000000ULL, 32);
+    CHECK(fls64, 0x0000000180000000ULL, 33);
+    CHECK(fls64, 0xC000000000000000ULL, 64);
+}
+
 static int __init cf_check test_bitops(void)
 {
     test_ffs();
+    test_fls();
 
     return 0;
 }
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index be4ba3962a..eed6b2a901 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1842,7 +1842,7 @@ static void _init_heap_pages(const struct page_info *pg,
          * Note that the value of ffsl() and flsl() starts from 1 so we need
          * to decrement it by 1.
          */
-        unsigned int inc_order = min(MAX_ORDER, flsl(e - s) - 1);
+        unsigned int inc_order = min(MAX_ORDER + 0U, flsl(e - s) - 1);
 
         if ( s )
             inc_order = min(inc_order, ffsl(s) - 1U);
@@ -2266,7 +2266,7 @@ void __init xenheap_max_mfn(unsigned long mfn)
     ASSERT(!first_node_initialised);
     ASSERT(!xenheap_bits);
     BUILD_BUG_ON((PADDR_BITS - PAGE_SHIFT) >= BITS_PER_LONG);
-    xenheap_bits = min(flsl(mfn + 1) - 1 + PAGE_SHIFT, PADDR_BITS);
+    xenheap_bits = min(flsl(mfn + 1) - 1 + PAGE_SHIFT, PADDR_BITS + 0U);
     printk(XENLOG_INFO "Xen heap: %u bits\n", xenheap_bits);
 }
 
diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h
index 4f3399273a..f5be10b928 100644
--- a/xen/include/xen/bitops.h
+++ b/xen/include/xen/bitops.h
@@ -203,6 +203,30 @@ static always_inline bool test_bit(int nr, const volatile void *addr)
     test_bit(nr, addr);                                 \
 })
 
+static always_inline __pure unsigned int fls(unsigned int x)
+{
+    if ( __builtin_constant_p(x) )
+        return generic_fls(x);
+
+#ifndef arch_fls
+#define arch_fls generic_fls
+#endif
+
+    return arch_fls(x);
+}
+
+static always_inline __pure unsigned int flsl(unsigned long x)
+{
+    if ( __builtin_constant_p(x) )
+        return generic_flsl(x);
+
+#ifndef arch_flsl
+#define arch_flsl generic_flsl
+#endif
+
+    return arch_flsl(x);
+}
+
 /*
  * Find First Set bit.  Bits are labelled from 1.
  */
-- 
2.45.0
Re: [PATCH v9 03/15] xen/bitops: implement fls{l}() in common logic
Posted by Jan Beulich 1 year, 9 months ago
On 06.05.2024 12:15, Oleksii Kurochko wrote:
> Changes in V9:
>  - update return type of fls and flsl() to unsigned int to be aligned with other
>    bit ops.

But this then needs carrying through to ...

> --- a/xen/arch/arm/include/asm/arm64/bitops.h
> +++ b/xen/arch/arm/include/asm/arm64/bitops.h
> @@ -22,17 +22,15 @@ static /*__*/always_inline unsigned long __ffs(unsigned long word)
>   */
>  #define ffz(x)  __ffs(~(x))
>  
> -static inline int flsl(unsigned long x)
> +static inline int arch_flsl(unsigned long x)

... e.g. here. You don't want to introduce signed/unsigned mismatches.

Also why do you keep "inline" here, while ...

> --- a/xen/arch/x86/include/asm/bitops.h
> +++ b/xen/arch/x86/include/asm/bitops.h
> @@ -425,7 +425,7 @@ static always_inline unsigned int arch_ffsl(unsigned long x)
>   *
>   * This is defined the same way as ffs.
>   */
> -static inline int flsl(unsigned long x)
> +static always_inline int arch_flsl(unsigned long x)

... you switch to always_inline here?

(replying out of order)

> --- a/xen/arch/arm/include/asm/arm32/bitops.h
> +++ b/xen/arch/arm/include/asm/arm32/bitops.h
> @@ -1,7 +1,7 @@
>  #ifndef _ARM_ARM32_BITOPS_H
>  #define _ARM_ARM32_BITOPS_H
>  
> -#define flsl fls
> +#define arch_flsl fls

It's the Arm maintainers to ultimately judge, but I'd be inclined to suggest

#define arch_flsl arch_fls

instead. That's not only behaviorally closer to what was there before, but
also reduces (a tiny bit) the amount of work the compiler needs to carry out.

Jan
Re: [PATCH v9 03/15] xen/bitops: implement fls{l}() in common logic
Posted by Oleksii K. 1 year, 8 months ago
On Wed, 2024-05-15 at 11:09 +0200, Jan Beulich wrote:
> But this then needs carrying through to ...
> 
> > --- a/xen/arch/arm/include/asm/arm64/bitops.h
> > +++ b/xen/arch/arm/include/asm/arm64/bitops.h
> > @@ -22,17 +22,15 @@ static /*__*/always_inline unsigned long
> > __ffs(unsigned long word)
> >    */
> >   #define ffz(x)  __ffs(~(x))
> >   
> > -static inline int flsl(unsigned long x)
> > +static inline int arch_flsl(unsigned long x)
> 
> ... e.g. here. You don't want to introduce signed/unsigned
> mismatches.
Is it critical for x86 to return int for flsl() and fls() or I can
update the return type for x86 too?

   static always_inline int arch_flsl(unsigned long x)
   {
       long r;
   
       asm ( "bsr %1,%0\n\t"
             "jnz 1f\n\t"
             "mov $-1,%0\n"
             "1:" : "=r" (r) : "rm" (x));
       return (int)r+1;
   }
   #define arch_flsl arch_flsl
   
   static always_inline int arch_fls(unsigned int x)
   {
       int r;
   
       asm ( "bsr %1,%0\n\t"
             "jnz 1f\n\t"
             "mov $-1,%0\n"
             "1:" : "=r" (r) : "rm" (x));
       return r + 1;
   }
   #define arch_fls arch_fls

Any specific reason why 'long' and 'int' types for r are used?

~ Oleksii
Re: [PATCH v9 03/15] xen/bitops: implement fls{l}() in common logic
Posted by Jan Beulich 1 year, 8 months ago
On 17.05.2024 11:06, Oleksii K. wrote:
> On Wed, 2024-05-15 at 11:09 +0200, Jan Beulich wrote:
>> But this then needs carrying through to ...
>>
>>> --- a/xen/arch/arm/include/asm/arm64/bitops.h
>>> +++ b/xen/arch/arm/include/asm/arm64/bitops.h
>>> @@ -22,17 +22,15 @@ static /*__*/always_inline unsigned long
>>> __ffs(unsigned long word)
>>>    */
>>>   #define ffz(x)  __ffs(~(x))
>>>   
>>> -static inline int flsl(unsigned long x)
>>> +static inline int arch_flsl(unsigned long x)
>>
>> ... e.g. here. You don't want to introduce signed/unsigned
>> mismatches.
> Is it critical for x86 to return int for flsl() and fls() or I can
> update the return type for x86 too?

The return types ought to be changed imo, for everything to end up
consistent.

>    static always_inline int arch_flsl(unsigned long x)
>    {
>        long r;
>    
>        asm ( "bsr %1,%0\n\t"
>              "jnz 1f\n\t"
>              "mov $-1,%0\n"
>              "1:" : "=r" (r) : "rm" (x));
>        return (int)r+1;
>    }
>    #define arch_flsl arch_flsl
>    
>    static always_inline int arch_fls(unsigned int x)
>    {
>        int r;
>    
>        asm ( "bsr %1,%0\n\t"
>              "jnz 1f\n\t"
>              "mov $-1,%0\n"
>              "1:" : "=r" (r) : "rm" (x));
>        return r + 1;
>    }
>    #define arch_fls arch_fls
> 
> Any specific reason why 'long' and 'int' types for r are used?

I don't think so. I expect it merely fits with no-one having cared about
signedness back at the time.

Jan

Re: [PATCH v9 03/15] xen/bitops: implement fls{l}() in common logic
Posted by Oleksii K. 1 year, 9 months ago
On Wed, 2024-05-15 at 11:09 +0200, Jan Beulich wrote:
> On 06.05.2024 12:15, Oleksii Kurochko wrote:
> > Changes in V9:
> >  - update return type of fls and flsl() to unsigned int to be
> > aligned with other
> >    bit ops.
> 
> But this then needs carrying through to ...
> 
> > --- a/xen/arch/arm/include/asm/arm64/bitops.h
> > +++ b/xen/arch/arm/include/asm/arm64/bitops.h
> > @@ -22,17 +22,15 @@ static /*__*/always_inline unsigned long
> > __ffs(unsigned long word)
> >   */
> >  #define ffz(x)  __ffs(~(x))
> >  
> > -static inline int flsl(unsigned long x)
> > +static inline int arch_flsl(unsigned long x)
> 
> ... e.g. here. You don't want to introduce signed/unsigned
> mismatches.
Do you mean that generic flsl() uses 'unsigned int' as a return type,
but arch_flsl continue to use 'int'?

> 
> Also why do you keep "inline" here, while ...
> 
> > --- a/xen/arch/x86/include/asm/bitops.h
> > +++ b/xen/arch/x86/include/asm/bitops.h
> > @@ -425,7 +425,7 @@ static always_inline unsigned int
> > arch_ffsl(unsigned long x)
> >   *
> >   * This is defined the same way as ffs.
> >   */
> > -static inline int flsl(unsigned long x)
> > +static always_inline int arch_flsl(unsigned long x)
> 
> ... you switch to always_inline here?
Because Adnrew's patch with bitops.h for x86 changes to always_inline,
so to be consistent, at least, for architecture.

~ Oleksii

> 
> (replying out of order)
> 
> > --- a/xen/arch/arm/include/asm/arm32/bitops.h
> > +++ b/xen/arch/arm/include/asm/arm32/bitops.h
> > @@ -1,7 +1,7 @@
> >  #ifndef _ARM_ARM32_BITOPS_H
> >  #define _ARM_ARM32_BITOPS_H
> >  
> > -#define flsl fls
> > +#define arch_flsl fls
> 
> It's the Arm maintainers to ultimately judge, but I'd be inclined to
> suggest
> 
> #define arch_flsl arch_fls
> 
> instead. That's not only behaviorally closer to what was there
> before, but
> also reduces (a tiny bit) the amount of work the compiler needs to
> carry out.
> 
> Jan
Re: [PATCH v9 03/15] xen/bitops: implement fls{l}() in common logic
Posted by Jan Beulich 1 year, 9 months ago
On 15.05.2024 15:55, Oleksii K. wrote:
> On Wed, 2024-05-15 at 11:09 +0200, Jan Beulich wrote:
>> On 06.05.2024 12:15, Oleksii Kurochko wrote:
>>> Changes in V9:
>>>  - update return type of fls and flsl() to unsigned int to be
>>> aligned with other
>>>    bit ops.
>>
>> But this then needs carrying through to ...
>>
>>> --- a/xen/arch/arm/include/asm/arm64/bitops.h
>>> +++ b/xen/arch/arm/include/asm/arm64/bitops.h
>>> @@ -22,17 +22,15 @@ static /*__*/always_inline unsigned long
>>> __ffs(unsigned long word)
>>>   */
>>>  #define ffz(x)  __ffs(~(x))
>>>  
>>> -static inline int flsl(unsigned long x)
>>> +static inline int arch_flsl(unsigned long x)
>>
>> ... e.g. here. You don't want to introduce signed/unsigned
>> mismatches.
> Do you mean that generic flsl() uses 'unsigned int' as a return type,
> but arch_flsl continue to use 'int'?

Yes.

>> Also why do you keep "inline" here, while ...
>>
>>> --- a/xen/arch/x86/include/asm/bitops.h
>>> +++ b/xen/arch/x86/include/asm/bitops.h
>>> @@ -425,7 +425,7 @@ static always_inline unsigned int
>>> arch_ffsl(unsigned long x)
>>>   *
>>>   * This is defined the same way as ffs.
>>>   */
>>> -static inline int flsl(unsigned long x)
>>> +static always_inline int arch_flsl(unsigned long x)
>>
>> ... you switch to always_inline here?
> Because Adnrew's patch with bitops.h for x86 changes to always_inline,
> so to be consistent, at least, for architecture.

And why not extend this to Arm?

Jan

Re: [PATCH v9 03/15] xen/bitops: implement fls{l}() in common logic
Posted by Oleksii K. 1 year, 9 months ago
On Wed, 2024-05-15 at 16:07 +0200, Jan Beulich wrote:
> On 15.05.2024 15:55, Oleksii K. wrote:
> > On Wed, 2024-05-15 at 11:09 +0200, Jan Beulich wrote:
> > > On 06.05.2024 12:15, Oleksii Kurochko wrote:
> > > > Changes in V9:
> > > >  - update return type of fls and flsl() to unsigned int to be
> > > > aligned with other
> > > >    bit ops.
> > > 
> > > But this then needs carrying through to ...
> > > 
> > > > --- a/xen/arch/arm/include/asm/arm64/bitops.h
> > > > +++ b/xen/arch/arm/include/asm/arm64/bitops.h
> > > > @@ -22,17 +22,15 @@ static /*__*/always_inline unsigned long
> > > > __ffs(unsigned long word)
> > > >   */
> > > >  #define ffz(x)  __ffs(~(x))
> > > >  
> > > > -static inline int flsl(unsigned long x)
> > > > +static inline int arch_flsl(unsigned long x)
> > > 
> > > ... e.g. here. You don't want to introduce signed/unsigned
> > > mismatches.
> > Do you mean that generic flsl() uses 'unsigned int' as a return
> > type,
> > but arch_flsl continue to use 'int'?
> 
> Yes.
> 
> > > Also why do you keep "inline" here, while ...
> > > 
> > > > --- a/xen/arch/x86/include/asm/bitops.h
> > > > +++ b/xen/arch/x86/include/asm/bitops.h
> > > > @@ -425,7 +425,7 @@ static always_inline unsigned int
> > > > arch_ffsl(unsigned long x)
> > > >   *
> > > >   * This is defined the same way as ffs.
> > > >   */
> > > > -static inline int flsl(unsigned long x)
> > > > +static always_inline int arch_flsl(unsigned long x)
> > > 
> > > ... you switch to always_inline here?
> > Because Adnrew's patch with bitops.h for x86 changes to
> > always_inline,
> > so to be consistent, at least, for architecture.
> 
> And why not extend this to Arm?
No specific reason, just wanted to do minimal amount of necessary
changes. I'll do that in the the next patch version.

~ Oleksii