[v2] ppc: Enable full Xen build

[PATCH v2 4/8] xen/ppc: Implement bitops.h

Posted by Shawn Anastasio 2 years, 5 months ago

Implement bitops.h, based on Linux's implementation as of commit
5321d1b1afb9a17302c6cec79f0cbf823eb0d3fc. Though it is based off of
Linux's implementation, this code diverges significantly in a number of
ways:
  - Bitmap entries changed to 32-bit words to match X86 and Arm on Xen
  - PPC32-specific code paths dropped
  - Formatting completely re-done to more closely line up with Xen.
    Including 4 space indentation.

Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com>
---
v2:
  - Clarify changes from Linux implementation in commit message
  - Use PPC_ATOMIC_{ENTRY,EXIT}_BARRIER macros from <asm/memory.h> instead
    of hardcoded "sync" instructions in inline assembly blocks.
  - Fix macro line-continuing backslash spacing
  - Fix hard-tab usage in find_*_bit C functions.

 xen/arch/ppc/include/asm/bitops.h | 332 +++++++++++++++++++++++++++++-
 1 file changed, 329 insertions(+), 3 deletions(-)

diff --git a/xen/arch/ppc/include/asm/bitops.h b/xen/arch/ppc/include/asm/bitops.h
index 548e97b414..dc35e54838 100644
--- a/xen/arch/ppc/include/asm/bitops.h
+++ b/xen/arch/ppc/include/asm/bitops.h
@@ -1,9 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Adapted from Linux's arch/powerpc/include/asm/bitops.h.
+ *
+ * Merged version by David Gibson <david@gibson.dropbear.id.au>.
+ * Based on ppc64 versions by: Dave Engebretsen, Todd Inglett, Don
+ * Reed, Pat McCarthy, Peter Bergner, Anton Blanchard.  They
+ * originally took it from the ppc32 code.
+ */
 #ifndef _ASM_PPC_BITOPS_H
 #define _ASM_PPC_BITOPS_H

+#include <asm/memory.h>
+
+#define __set_bit(n,p)            set_bit(n,p)
+#define __clear_bit(n,p)          clear_bit(n,p)
+
+#define BITOP_BITS_PER_WORD     32
+#define BITOP_MASK(nr)          (1UL << ((nr) % BITOP_BITS_PER_WORD))
+#define BITOP_WORD(nr)          ((nr) / BITOP_BITS_PER_WORD)
+#define BITS_PER_BYTE           8
+
 /* PPC bit number conversion */
-#define PPC_BITLSHIFT(be)	(BITS_PER_LONG - 1 - (be))
-#define PPC_BIT(bit)		(1UL << PPC_BITLSHIFT(bit))
-#define PPC_BITMASK(bs, be)	((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+#define PPC_BITLSHIFT(be)    (BITS_PER_LONG - 1 - (be))
+#define PPC_BIT(bit)         (1UL << PPC_BITLSHIFT(bit))
+#define PPC_BITMASK(bs, be)  ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+
+/* Macro for generating the ***_bits() functions */
+#define DEFINE_BITOP(fn, op, prefix)                                           \
+static inline void fn(unsigned long mask,                                      \
+        volatile unsigned int *_p)                                             \
+{                                                                              \
+    unsigned long old;                                                         \
+    unsigned int *p = (unsigned int *)_p;                                      \
+    asm volatile (                                                             \
+    prefix                                                                     \
+"1: lwarx %0,0,%3,0\n"                                                         \
+    #op "%I2 %0,%0,%2\n"                                                       \
+    "stwcx. %0,0,%3\n"                                                         \
+    "bne- 1b\n"                                                                \
+    : "=&r" (old), "+m" (*p)                                                   \
+    : "rK" (mask), "r" (p)                                                     \
+    : "cc", "memory");                                                         \
+}
+
+DEFINE_BITOP(set_bits, or, "")
+DEFINE_BITOP(change_bits, xor, "")
+
+#define DEFINE_CLROP(fn, prefix)                                               \
+static inline void fn(unsigned long mask, volatile unsigned int *_p)           \
+{                                                                              \
+    unsigned long old;                                                         \
+    unsigned int *p = (unsigned int *)_p;                                      \
+    asm volatile (                                                             \
+    prefix                                                                     \
+"1: lwarx %0,0,%3,0\n"                                                         \
+    "andc %0,%0,%2\n"                                                          \
+    "stwcx. %0,0,%3\n"                                                         \
+    "bne- 1b\n"                                                                \
+    : "=&r" (old), "+m" (*p)                                                   \
+    : "r" (mask), "r" (p)                                                      \
+    : "cc", "memory");                                                         \
+}
+
+DEFINE_CLROP(clear_bits, "")
+
+static inline void set_bit(int nr, volatile void *addr)
+{
+    set_bits(BITOP_MASK(nr), (volatile unsigned int *)addr + BITOP_WORD(nr));
+}
+static inline void clear_bit(int nr, volatile void *addr)
+{
+    clear_bits(BITOP_MASK(nr), (volatile unsigned int *)addr + BITOP_WORD(nr));
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const volatile void *addr)
+{
+        const volatile unsigned long *p = (const volatile unsigned long *)addr;
+        return 1UL & (p[BITOP_WORD(nr)] >> (nr & (BITOP_BITS_PER_WORD-1)));
+}
+
+static inline unsigned long test_and_clear_bits(unsigned long mask, volatile void *_p)
+{
+    unsigned long old, t;
+    unsigned int *p = (unsigned int *)_p;
+
+    asm volatile (
+        PPC_ATOMIC_ENTRY_BARRIER
+        "1: lwarx %0,0,%3,0\n"
+        "andc %1,%0,%2\n"
+        "stwcx. %1,0,%3\n"
+        "bne- 1b\n"
+        PPC_ATOMIC_EXIT_BARRIER
+        : "=&r" (old), "=&r" (t)
+        : "r" (mask), "r" (p)
+        : "cc", "memory");
+
+    return (old & mask);
+}
+
+static inline int test_and_clear_bit(unsigned int nr,
+                                       volatile void *addr)
+{
+    return test_and_clear_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
+}
+
+#define DEFINE_TESTOP(fn, op, eh)                                              \
+static inline unsigned long fn(                                                \
+        unsigned long mask,                                                    \
+        volatile unsigned int *_p)                                             \
+{                                                                              \
+    unsigned long old, t;                                                      \
+    unsigned int *p = (unsigned int *)_p;                                      \
+    __asm__ __volatile__ (                                                     \
+    PPC_ATOMIC_ENTRY_BARRIER                                                   \
+"1:" "lwarx %0,0,%3,%4\n"                                                      \
+    #op "%I2 %1,%0,%2\n"                                                       \
+    "stwcx. %1,0,%3\n"                                                         \
+    "bne- 1b\n"                                                                \
+    PPC_ATOMIC_EXIT_BARRIER                                                    \
+    : "=&r" (old), "=&r" (t)                                                   \
+    : "rK" (mask), "r" (p), "n" (eh)                                           \
+    : "cc", "memory");                                                         \
+    return (old & mask);                                                       \
+}
+
+DEFINE_TESTOP(test_and_set_bits, or, 0)
+
+static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
+{
+    return test_and_set_bits(BITOP_MASK(nr), (volatile unsigned int *)addr + BITOP_WORD(nr)) != 0;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile void *addr)
+{
+        unsigned int mask = BITOP_MASK(nr);
+        volatile unsigned int *p =
+                ((volatile unsigned int *)addr) + BITOP_WORD(nr);
+        unsigned int old = *p;
+
+        *p = old | mask;
+        return (old & mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile void *addr)
+{
+        unsigned int mask = BITOP_MASK(nr);
+        volatile unsigned int *p =
+                ((volatile unsigned int *)addr) + BITOP_WORD(nr);
+        unsigned int old = *p;
+
+        *p = old & ~mask;
+        return (old & mask) != 0;
+}
+
+#define flsl(x) generic_flsl(x)
+#define fls(x) generic_fls(x)
+#define ffs(x) ({ unsigned int __t = (x); fls(__t & -__t); })
+#define ffsl(x) ({ unsigned long __t = (x); flsl(__t & -__t); })
+
+/* Based on linux/include/asm-generic/bitops/ffz.h */
+/*
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+#define ffz(x)  __ffs(~(x))
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+#define hweight64(x) generic_hweight64(x)
+#define hweight32(x) generic_hweight32(x)
+#define hweight16(x) generic_hweight16(x)
+#define hweight8(x) generic_hweight8(x)
+
+/* Based on linux/include/asm-generic/bitops/builtin-__ffs.h */
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static /*__*/always_inline unsigned long __ffs(unsigned long word)
+{
+        return __builtin_ctzl(word);
+}
+
+/**
+ * find_first_set_bit - find the first set bit in @word
+ * @word: the word to search
+ *
+ * Returns the bit-number of the first set bit (first bit being 0).
+ * The input must *not* be zero.
+ */
+#define find_first_set_bit(x) ({ ffsl(x) - 1; })
+
+/*
+ * Find the first set bit in a memory region.
+ */
+static inline unsigned long find_first_bit(const unsigned long *addr,
+                                           unsigned long size)
+{
+    const unsigned long *p = addr;
+    unsigned long result = 0;
+    unsigned long tmp;
+
+    while (size & ~(BITS_PER_LONG-1)) {
+        if ((tmp = *(p++)))
+            goto found;
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size)
+        return result;
+
+    tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
+    if (tmp == 0UL)        /* Are any bits set? */
+        return result + size;    /* Nope. */
+found:
+    return result + __ffs(tmp);
+}
+
+static inline unsigned long find_next_bit(const unsigned long *addr,
+                                          unsigned long size,
+                                          unsigned long offset)
+{
+    const unsigned long *p = addr + BITOP_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
+
+    if (offset >= size)
+        return size;
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp &= (~0UL << offset);
+        if (size < BITS_PER_LONG)
+            goto found_first;
+        if (tmp)
+            goto found_middle;
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size & ~(BITS_PER_LONG-1)) {
+        if ((tmp = *(p++)))
+            goto found_middle;
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size)
+        return result;
+    tmp = *p;
+
+found_first:
+    tmp &= (~0UL >> (BITS_PER_LONG - size));
+    if (tmp == 0UL)        /* Are any bits set? */
+        return result + size;    /* Nope. */
+found_middle:
+    return result + __ffs(tmp);
+}
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+static inline unsigned long find_next_zero_bit(const unsigned long *addr,
+                                               unsigned long size,
+                                               unsigned long offset)
+{
+    const unsigned long *p = addr + BITOP_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
+
+    if (offset >= size)
+        return size;
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp |= ~0UL >> (BITS_PER_LONG - offset);
+        if (size < BITS_PER_LONG)
+            goto found_first;
+        if (~tmp)
+            goto found_middle;
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size & ~(BITS_PER_LONG-1)) {
+        if (~(tmp = *(p++)))
+            goto found_middle;
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size)
+        return result;
+    tmp = *p;
+
+found_first:
+    tmp |= ~0UL << size;
+    if (tmp == ~0UL)    /* Are any bits zero? */
+        return result + size;    /* Nope. */
+found_middle:
+    return result + ffz(tmp);
+}

 #endif /* _ASM_PPC_BITOPS_H */
--
2.30.2

Re: [PATCH v2 4/8] xen/ppc: Implement bitops.h

Posted by Jan Beulich 2 years, 5 months ago

On 23.08.2023 22:07, Shawn Anastasio wrote:
> Implement bitops.h, based on Linux's implementation as of commit
> 5321d1b1afb9a17302c6cec79f0cbf823eb0d3fc. Though it is based off of
> Linux's implementation, this code diverges significantly in a number of
> ways:
>   - Bitmap entries changed to 32-bit words to match X86 and Arm on Xen
>   - PPC32-specific code paths dropped
>   - Formatting completely re-done to more closely line up with Xen.
>     Including 4 space indentation.

With this goal, ...

> --- a/xen/arch/ppc/include/asm/bitops.h
> +++ b/xen/arch/ppc/include/asm/bitops.h
> @@ -1,9 +1,335 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * Adapted from Linux's arch/powerpc/include/asm/bitops.h.
> + *
> + * Merged version by David Gibson <david@gibson.dropbear.id.au>.
> + * Based on ppc64 versions by: Dave Engebretsen, Todd Inglett, Don
> + * Reed, Pat McCarthy, Peter Bergner, Anton Blanchard.  They
> + * originally took it from the ppc32 code.
> + */
>  #ifndef _ASM_PPC_BITOPS_H
>  #define _ASM_PPC_BITOPS_H
> 
> +#include <asm/memory.h>
> +
> +#define __set_bit(n,p)            set_bit(n,p)
> +#define __clear_bit(n,p)          clear_bit(n,p)

... you want to add blanks after the commas as well. (You might also
simply omit parameters altogether.)

> +#define BITOP_BITS_PER_WORD     32
> +#define BITOP_MASK(nr)          (1UL << ((nr) % BITOP_BITS_PER_WORD))
> +#define BITOP_WORD(nr)          ((nr) / BITOP_BITS_PER_WORD)
> +#define BITS_PER_BYTE           8
> +
>  /* PPC bit number conversion */
> -#define PPC_BITLSHIFT(be)	(BITS_PER_LONG - 1 - (be))
> -#define PPC_BIT(bit)		(1UL << PPC_BITLSHIFT(bit))
> -#define PPC_BITMASK(bs, be)	((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
> +#define PPC_BITLSHIFT(be)    (BITS_PER_LONG - 1 - (be))
> +#define PPC_BIT(bit)         (1UL << PPC_BITLSHIFT(bit))
> +#define PPC_BITMASK(bs, be)  ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
> +
> +/* Macro for generating the ***_bits() functions */
> +#define DEFINE_BITOP(fn, op, prefix)                                           \
> +static inline void fn(unsigned long mask,                                      \
> +        volatile unsigned int *_p)                                             \

Nit: Style. Either

static inline void fn(unsigned long mask,                                      \
                      volatile unsigned int *_p)                               \

or

static inline void fn(unsigned long mask,                                      \
    volatile unsigned int *_p)                                                 \

. Also there's again an underscore-prefixed identifier here.

> +{                                                                              \
> +    unsigned long old;                                                         \
> +    unsigned int *p = (unsigned int *)_p;                                      \
> +    asm volatile (                                                             \
> +    prefix                                                                     \
> +"1: lwarx %0,0,%3,0\n"                                                         \
> +    #op "%I2 %0,%0,%2\n"                                                       \
> +    "stwcx. %0,0,%3\n"                                                         \
> +    "bne- 1b\n"                                                                \
> +    : "=&r" (old), "+m" (*p)                                                   \
> +    : "rK" (mask), "r" (p)                                                     \
> +    : "cc", "memory");                                                         \

The asm() body wants indenting by another four blanks (more instances below).

> +}
> +
> +DEFINE_BITOP(set_bits, or, "")
> +DEFINE_BITOP(change_bits, xor, "")
> +
> +#define DEFINE_CLROP(fn, prefix)                                               \
> +static inline void fn(unsigned long mask, volatile unsigned int *_p)           \
> +{                                                                              \
> +    unsigned long old;                                                         \
> +    unsigned int *p = (unsigned int *)_p;                                      \
> +    asm volatile (                                                             \
> +    prefix                                                                     \
> +"1: lwarx %0,0,%3,0\n"                                                         \
> +    "andc %0,%0,%2\n"                                                          \
> +    "stwcx. %0,0,%3\n"                                                         \
> +    "bne- 1b\n"                                                                \
> +    : "=&r" (old), "+m" (*p)                                                   \
> +    : "r" (mask), "r" (p)                                                      \
> +    : "cc", "memory");                                                         \
> +}
> +
> +DEFINE_CLROP(clear_bits, "")
> +
> +static inline void set_bit(int nr, volatile void *addr)
> +{
> +    set_bits(BITOP_MASK(nr), (volatile unsigned int *)addr + BITOP_WORD(nr));
> +}
> +static inline void clear_bit(int nr, volatile void *addr)
> +{
> +    clear_bits(BITOP_MASK(nr), (volatile unsigned int *)addr + BITOP_WORD(nr));
> +}
> +
> +/**
> + * test_bit - Determine whether a bit is set
> + * @nr: bit number to test
> + * @addr: Address to start counting from
> + */
> +static inline int test_bit(int nr, const volatile void *addr)
> +{
> +        const volatile unsigned long *p = (const volatile unsigned long *)addr;
> +        return 1UL & (p[BITOP_WORD(nr)] >> (nr & (BITOP_BITS_PER_WORD-1)));

Nit: Too deep indentation. Plus blanks around - please. I also don't see
the need for the UL suffix, when the function returns int only (and really
means to return bool, I assume, but int is in line with x86 and Arm, I
expect).

> +}
> +
> +static inline unsigned long test_and_clear_bits(unsigned long mask, volatile void *_p)
> +{
> +    unsigned long old, t;
> +    unsigned int *p = (unsigned int *)_p;
> +
> +    asm volatile (
> +        PPC_ATOMIC_ENTRY_BARRIER
> +        "1: lwarx %0,0,%3,0\n"
> +        "andc %1,%0,%2\n"
> +        "stwcx. %1,0,%3\n"
> +        "bne- 1b\n"
> +        PPC_ATOMIC_EXIT_BARRIER
> +        : "=&r" (old), "=&r" (t)
> +        : "r" (mask), "r" (p)
> +        : "cc", "memory");
> +
> +    return (old & mask);
> +}
> +
> +static inline int test_and_clear_bit(unsigned int nr,
> +                                       volatile void *addr)

Nit: Too deep indentation again.

> +{
> +    return test_and_clear_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
> +}
> +
> +#define DEFINE_TESTOP(fn, op, eh)                                              \
> +static inline unsigned long fn(                                                \
> +        unsigned long mask,                                                    \
> +        volatile unsigned int *_p)                                             \

And yet once more (and there are more below).

> +{                                                                              \
> +    unsigned long old, t;                                                      \
> +    unsigned int *p = (unsigned int *)_p;                                      \
> +    __asm__ __volatile__ (                                                     \
> +    PPC_ATOMIC_ENTRY_BARRIER                                                   \
> +"1:" "lwarx %0,0,%3,%4\n"                                                      \
> +    #op "%I2 %1,%0,%2\n"                                                       \
> +    "stwcx. %1,0,%3\n"                                                         \
> +    "bne- 1b\n"                                                                \
> +    PPC_ATOMIC_EXIT_BARRIER                                                    \
> +    : "=&r" (old), "=&r" (t)                                                   \
> +    : "rK" (mask), "r" (p), "n" (eh)                                           \
> +    : "cc", "memory");                                                         \
> +    return (old & mask);                                                       \
> +}
> +
> +DEFINE_TESTOP(test_and_set_bits, or, 0)
> +
> +static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
> +{
> +    return test_and_set_bits(BITOP_MASK(nr), (volatile unsigned int *)addr + BITOP_WORD(nr)) != 0;

Too long line.

> +}
> +
> +/**
> + * __test_and_set_bit - Set a bit and return its old value
> + * @nr: Bit to set
> + * @addr: Address to count from
> + *
> + * This operation is non-atomic and can be reordered.
> + * If two examples of this operation race, one can appear to succeed
> + * but actually fail.  You must protect multiple accesses with a lock.
> + */
> +static inline int __test_and_set_bit(int nr, volatile void *addr)
> +{
> +        unsigned int mask = BITOP_MASK(nr);
> +        volatile unsigned int *p =
> +                ((volatile unsigned int *)addr) + BITOP_WORD(nr);
> +        unsigned int old = *p;
> +
> +        *p = old | mask;
> +        return (old & mask) != 0;
> +}
> +
> +/**
> + * __test_and_clear_bit - Clear a bit and return its old value
> + * @nr: Bit to clear
> + * @addr: Address to count from
> + *
> + * This operation is non-atomic and can be reordered.
> + * If two examples of this operation race, one can appear to succeed
> + * but actually fail.  You must protect multiple accesses with a lock.
> + */
> +static inline int __test_and_clear_bit(int nr, volatile void *addr)
> +{
> +        unsigned int mask = BITOP_MASK(nr);
> +        volatile unsigned int *p =
> +                ((volatile unsigned int *)addr) + BITOP_WORD(nr);
> +        unsigned int old = *p;
> +
> +        *p = old & ~mask;
> +        return (old & mask) != 0;
> +}
> +
> +#define flsl(x) generic_flsl(x)
> +#define fls(x) generic_fls(x)
> +#define ffs(x) ({ unsigned int __t = (x); fls(__t & -__t); })
> +#define ffsl(x) ({ unsigned long __t = (x); flsl(__t & -__t); })

Hmm, here you even have two underscores as prefixes.

> +/* Based on linux/include/asm-generic/bitops/ffz.h */
> +/*
> + * ffz - find first zero in word.
> + * @word: The word to search
> + *
> + * Undefined if no zero exists, so code should check against ~0UL first.
> + */
> +#define ffz(x)  __ffs(~(x))
> +
> +/**
> + * hweightN - returns the hamming weight of a N-bit word
> + * @x: the word to weigh
> + *
> + * The Hamming Weight of a number is the total number of bits set in it.
> + */
> +#define hweight64(x) generic_hweight64(x)
> +#define hweight32(x) generic_hweight32(x)
> +#define hweight16(x) generic_hweight16(x)
> +#define hweight8(x) generic_hweight8(x)

Not using popcnt{b,w,d}, e.g. via a compiler builtin?

> +/* Based on linux/include/asm-generic/bitops/builtin-__ffs.h */
> +/**
> + * __ffs - find first bit in word.
> + * @word: The word to search
> + *
> + * Undefined if no bit exists, so code should check against 0 first.
> + */
> +static /*__*/always_inline unsigned long __ffs(unsigned long word)

What's this odd comment about here?

> +{
> +        return __builtin_ctzl(word);
> +}
> +
> +/**
> + * find_first_set_bit - find the first set bit in @word
> + * @word: the word to search
> + *
> + * Returns the bit-number of the first set bit (first bit being 0).
> + * The input must *not* be zero.
> + */
> +#define find_first_set_bit(x) ({ ffsl(x) - 1; })

Simply

#define find_first_set_bit(x) (ffsl(x) - 1)

without use of any extensions?

> +/*
> + * Find the first set bit in a memory region.
> + */
> +static inline unsigned long find_first_bit(const unsigned long *addr,
> +                                           unsigned long size)
> +{
> +    const unsigned long *p = addr;
> +    unsigned long result = 0;
> +    unsigned long tmp;
> +
> +    while (size & ~(BITS_PER_LONG-1)) {
> +        if ((tmp = *(p++)))
> +            goto found;
> +        result += BITS_PER_LONG;
> +        size -= BITS_PER_LONG;
> +    }
> +    if (!size)
> +        return result;

Just using 4-blank indentation isn't enough to make this Xen style.
(More such elsewhere.)

> +    tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
> +    if (tmp == 0UL)        /* Are any bits set? */
> +        return result + size;    /* Nope. */
> +found:

Labels indented by at least one blank please. (More elsewhere.)

Jan

Re: [PATCH v2 4/8] xen/ppc: Implement bitops.h

Posted by Shawn Anastasio 2 years, 5 months ago

On 8/29/23 8:59 AM, Jan Beulich wrote:
> On 23.08.2023 22:07, Shawn Anastasio wrote:
>> Implement bitops.h, based on Linux's implementation as of commit
>> 5321d1b1afb9a17302c6cec79f0cbf823eb0d3fc. Though it is based off of
>> Linux's implementation, this code diverges significantly in a number of
>> ways:
>>   - Bitmap entries changed to 32-bit words to match X86 and Arm on Xen
>>   - PPC32-specific code paths dropped
>>   - Formatting completely re-done to more closely line up with Xen.
>>     Including 4 space indentation.
> 
> With this goal, ...
> 
>> --- a/xen/arch/ppc/include/asm/bitops.h
>> +++ b/xen/arch/ppc/include/asm/bitops.h
>> @@ -1,9 +1,335 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +/*
>> + * Adapted from Linux's arch/powerpc/include/asm/bitops.h.
>> + *
>> + * Merged version by David Gibson <david@gibson.dropbear.id.au>.
>> + * Based on ppc64 versions by: Dave Engebretsen, Todd Inglett, Don
>> + * Reed, Pat McCarthy, Peter Bergner, Anton Blanchard.  They
>> + * originally took it from the ppc32 code.
>> + */
>>  #ifndef _ASM_PPC_BITOPS_H
>>  #define _ASM_PPC_BITOPS_H
>>
>> +#include <asm/memory.h>
>> +
>> +#define __set_bit(n,p)            set_bit(n,p)
>> +#define __clear_bit(n,p)          clear_bit(n,p)
> 
> ... you want to add blanks after the commas as well. (You might also
> simply omit parameters altogether.)
> 
>> +#define BITOP_BITS_PER_WORD     32
>> +#define BITOP_MASK(nr)          (1UL << ((nr) % BITOP_BITS_PER_WORD))
>> +#define BITOP_WORD(nr)          ((nr) / BITOP_BITS_PER_WORD)
>> +#define BITS_PER_BYTE           8
>> +
>>  /* PPC bit number conversion */
>> -#define PPC_BITLSHIFT(be)	(BITS_PER_LONG - 1 - (be))
>> -#define PPC_BIT(bit)		(1UL << PPC_BITLSHIFT(bit))
>> -#define PPC_BITMASK(bs, be)	((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
>> +#define PPC_BITLSHIFT(be)    (BITS_PER_LONG - 1 - (be))
>> +#define PPC_BIT(bit)         (1UL << PPC_BITLSHIFT(bit))
>> +#define PPC_BITMASK(bs, be)  ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
>> +
>> +/* Macro for generating the ***_bits() functions */
>> +#define DEFINE_BITOP(fn, op, prefix)                                           \
>> +static inline void fn(unsigned long mask,                                      \
>> +        volatile unsigned int *_p)                                             \
> 
> Nit: Style. Either
> 
> static inline void fn(unsigned long mask,                                      \
>                       volatile unsigned int *_p)                               \
> 
> or
> 
> static inline void fn(unsigned long mask,                                      \
>     volatile unsigned int *_p)                                                 \
> 
> . Also there's again an underscore-prefixed identifier here.
>

Will fix both.

>> +{                                                                              \
>> +    unsigned long old;                                                         \
>> +    unsigned int *p = (unsigned int *)_p;                                      \
>> +    asm volatile (                                                             \
>> +    prefix                                                                     \
>> +"1: lwarx %0,0,%3,0\n"                                                         \
>> +    #op "%I2 %0,%0,%2\n"                                                       \
>> +    "stwcx. %0,0,%3\n"                                                         \
>> +    "bne- 1b\n"                                                                \
>> +    : "=&r" (old), "+m" (*p)                                                   \
>> +    : "rK" (mask), "r" (p)                                                     \
>> +    : "cc", "memory");                                                         \
> 
> The asm() body wants indenting by another four blanks (more instances below).
>

If I were to match the style used in the previous patch's atomic.h, the
body should be indented to line up with the opening ( of the asm
statement, right? I'll go ahead and do that for consistency's sake
unless you think it would be better to just leave it as-is with an extra
4 spaces of indentation.

>> +}
>> +
>> +DEFINE_BITOP(set_bits, or, "")
>> +DEFINE_BITOP(change_bits, xor, "")
>> +
>> +#define DEFINE_CLROP(fn, prefix)                                               \
>> +static inline void fn(unsigned long mask, volatile unsigned int *_p)           \
>> +{                                                                              \
>> +    unsigned long old;                                                         \
>> +    unsigned int *p = (unsigned int *)_p;                                      \
>> +    asm volatile (                                                             \
>> +    prefix                                                                     \
>> +"1: lwarx %0,0,%3,0\n"                                                         \
>> +    "andc %0,%0,%2\n"                                                          \
>> +    "stwcx. %0,0,%3\n"                                                         \
>> +    "bne- 1b\n"                                                                \
>> +    : "=&r" (old), "+m" (*p)                                                   \
>> +    : "r" (mask), "r" (p)                                                      \
>> +    : "cc", "memory");                                                         \
>> +}
>> +
>> +DEFINE_CLROP(clear_bits, "")
>> +
>> +static inline void set_bit(int nr, volatile void *addr)
>> +{
>> +    set_bits(BITOP_MASK(nr), (volatile unsigned int *)addr + BITOP_WORD(nr));
>> +}
>> +static inline void clear_bit(int nr, volatile void *addr)
>> +{
>> +    clear_bits(BITOP_MASK(nr), (volatile unsigned int *)addr + BITOP_WORD(nr));
>> +}
>> +
>> +/**
>> + * test_bit - Determine whether a bit is set
>> + * @nr: bit number to test
>> + * @addr: Address to start counting from
>> + */
>> +static inline int test_bit(int nr, const volatile void *addr)
>> +{
>> +        const volatile unsigned long *p = (const volatile unsigned long *)addr;
>> +        return 1UL & (p[BITOP_WORD(nr)] >> (nr & (BITOP_BITS_PER_WORD-1)));
> 
> Nit: Too deep indentation. Plus blanks around - please. I also don't see
> the need for the UL suffix, when the function returns int only (and really
> means to return bool, I assume, but int is in line with x86 and Arm, I
> expect).
>

Will fix the indentation and spacing around -. I'll also drop the UL
suffix, but will keep the int return type, since as you guessed it's
what Arm/x86 do.

>> +}
>> +
>> +static inline unsigned long test_and_clear_bits(unsigned long mask, volatile void *_p)
>> +{
>> +    unsigned long old, t;
>> +    unsigned int *p = (unsigned int *)_p;
>> +
>> +    asm volatile (
>> +        PPC_ATOMIC_ENTRY_BARRIER
>> +        "1: lwarx %0,0,%3,0\n"
>> +        "andc %1,%0,%2\n"
>> +        "stwcx. %1,0,%3\n"
>> +        "bne- 1b\n"
>> +        PPC_ATOMIC_EXIT_BARRIER
>> +        : "=&r" (old), "=&r" (t)
>> +        : "r" (mask), "r" (p)
>> +        : "cc", "memory");
>> +
>> +    return (old & mask);
>> +}
>> +
>> +static inline int test_and_clear_bit(unsigned int nr,
>> +                                       volatile void *addr)
> 
> Nit: Too deep indentation again.
>

Will fix this and all subsequent occurrences.

>> +DEFINE_TESTOP(test_and_set_bits, or, 0)
>> +
>> +static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
>> +{
>> +    return test_and_set_bits(BITOP_MASK(nr), (volatile unsigned int *)addr + BITOP_WORD(nr)) != 0;
> 
> Too long line.
>

Will fix.

>> +}
>> +
>> +/**
>> + * __test_and_set_bit - Set a bit and return its old value
>> + * @nr: Bit to set
>> + * @addr: Address to count from
>> + *
>> + * This operation is non-atomic and can be reordered.
>> + * If two examples of this operation race, one can appear to succeed
>> + * but actually fail.  You must protect multiple accesses with a lock.
>> + */
>> +static inline int __test_and_set_bit(int nr, volatile void *addr)
>> +{
>> +        unsigned int mask = BITOP_MASK(nr);
>> +        volatile unsigned int *p =
>> +                ((volatile unsigned int *)addr) + BITOP_WORD(nr);
>> +        unsigned int old = *p;
>> +
>> +        *p = old | mask;
>> +        return (old & mask) != 0;
>> +}
>> +
>> +/**
>> + * __test_and_clear_bit - Clear a bit and return its old value
>> + * @nr: Bit to clear
>> + * @addr: Address to count from
>> + *
>> + * This operation is non-atomic and can be reordered.
>> + * If two examples of this operation race, one can appear to succeed
>> + * but actually fail.  You must protect multiple accesses with a lock.
>> + */
>> +static inline int __test_and_clear_bit(int nr, volatile void *addr)
>> +{
>> +        unsigned int mask = BITOP_MASK(nr);
>> +        volatile unsigned int *p =
>> +                ((volatile unsigned int *)addr) + BITOP_WORD(nr);
>> +        unsigned int old = *p;
>> +
>> +        *p = old & ~mask;
>> +        return (old & mask) != 0;
>> +}
>> +
>> +#define flsl(x) generic_flsl(x)
>> +#define fls(x) generic_fls(x)
>> +#define ffs(x) ({ unsigned int __t = (x); fls(__t & -__t); })
>> +#define ffsl(x) ({ unsigned long __t = (x); flsl(__t & -__t); })
> 
> Hmm, here you even have two underscores as prefixes.
>

Another carryover from Arm, sorry. Will fix.

>> +/* Based on linux/include/asm-generic/bitops/ffz.h */
>> +/*
>> + * ffz - find first zero in word.
>> + * @word: The word to search
>> + *
>> + * Undefined if no zero exists, so code should check against ~0UL first.
>> + */
>> +#define ffz(x)  __ffs(~(x))
>> +
>> +/**
>> + * hweightN - returns the hamming weight of a N-bit word
>> + * @x: the word to weigh
>> + *
>> + * The Hamming Weight of a number is the total number of bits set in it.
>> + */
>> +#define hweight64(x) generic_hweight64(x)
>> +#define hweight32(x) generic_hweight32(x)
>> +#define hweight16(x) generic_hweight16(x)
>> +#define hweight8(x) generic_hweight8(x)
> 
> Not using popcnt{b,w,d}, e.g. via a compiler builtin?
>

Excellent point. It looks like gcc's __builtin_popcount* family of
builtins will do what we want here. I suppose the other architectures in
Xen don't do this because they target toolchains old enough to not have
these builtins?

>> +/* Based on linux/include/asm-generic/bitops/builtin-__ffs.h */
>> +/**
>> + * __ffs - find first bit in word.
>> + * @word: The word to search
>> + *
>> + * Undefined if no bit exists, so code should check against 0 first.
>> + */
>> +static /*__*/always_inline unsigned long __ffs(unsigned long word)
> 
> What's this odd comment about here?
>

(Yet another) carryover from Arm. Will fix.

>> +{
>> +        return __builtin_ctzl(word);
>> +}
>> +
>> +/**
>> + * find_first_set_bit - find the first set bit in @word
>> + * @word: the word to search
>> + *
>> + * Returns the bit-number of the first set bit (first bit being 0).
>> + * The input must *not* be zero.
>> + */
>> +#define find_first_set_bit(x) ({ ffsl(x) - 1; })
> 
> Simply
> 
> #define find_first_set_bit(x) (ffsl(x) - 1)
> 
> without use of any extensions?
>

Good point, the extension usage here is obviously unnecessary. Will
drop.

>> +/*
>> + * Find the first set bit in a memory region.
>> + */
>> +static inline unsigned long find_first_bit(const unsigned long *addr,
>> +                                           unsigned long size)
>> +{
>> +    const unsigned long *p = addr;
>> +    unsigned long result = 0;
>> +    unsigned long tmp;
>> +
>> +    while (size & ~(BITS_PER_LONG-1)) {
>> +        if ((tmp = *(p++)))
>> +            goto found;
>> +        result += BITS_PER_LONG;
>> +        size -= BITS_PER_LONG;
>> +    }
>> +    if (!size)
>> +        return result;
> 
> Just using 4-blank indentation isn't enough to make this Xen style.
> (More such elsewhere.)
>

Yes, fair enough. I'll reformat it.

>> +    tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
>> +    if (tmp == 0UL)        /* Are any bits set? */
>> +        return result + size;    /* Nope. */
>> +found:
> 
> Labels indented by at least one blank please. (More elsewhere.)

I wasn't aware of this style convention -- so a single blank before the
label would be correct? I'll add that in, then.

> Jan

Thanks,
Shawn

Re: [PATCH v2 4/8] xen/ppc: Implement bitops.h

Posted by Jan Beulich 2 years, 5 months ago

On 31.08.2023 22:06, Shawn Anastasio wrote:
> On 8/29/23 8:59 AM, Jan Beulich wrote:
>> On 23.08.2023 22:07, Shawn Anastasio wrote:
>>> +{                                                                              \
>>> +    unsigned long old;                                                         \
>>> +    unsigned int *p = (unsigned int *)_p;                                      \
>>> +    asm volatile (                                                             \
>>> +    prefix                                                                     \
>>> +"1: lwarx %0,0,%3,0\n"                                                         \
>>> +    #op "%I2 %0,%0,%2\n"                                                       \
>>> +    "stwcx. %0,0,%3\n"                                                         \
>>> +    "bne- 1b\n"                                                                \
>>> +    : "=&r" (old), "+m" (*p)                                                   \
>>> +    : "rK" (mask), "r" (p)                                                     \
>>> +    : "cc", "memory");                                                         \
>>
>> The asm() body wants indenting by another four blanks (more instances below).
>>
> 
> If I were to match the style used in the previous patch's atomic.h, the
> body should be indented to line up with the opening ( of the asm
> statement, right?

Depends on whether the ( is to remain the last token on its line. If so, ...

> I'll go ahead and do that for consistency's sake
> unless you think it would be better to just leave it as-is with an extra
> 4 spaces of indentation.

... this style of indentation would want using. Either is fine, in case you
have a specific preference.

>>> +/* Based on linux/include/asm-generic/bitops/ffz.h */
>>> +/*
>>> + * ffz - find first zero in word.
>>> + * @word: The word to search
>>> + *
>>> + * Undefined if no zero exists, so code should check against ~0UL first.
>>> + */
>>> +#define ffz(x)  __ffs(~(x))
>>> +
>>> +/**
>>> + * hweightN - returns the hamming weight of a N-bit word
>>> + * @x: the word to weigh
>>> + *
>>> + * The Hamming Weight of a number is the total number of bits set in it.
>>> + */
>>> +#define hweight64(x) generic_hweight64(x)
>>> +#define hweight32(x) generic_hweight32(x)
>>> +#define hweight16(x) generic_hweight16(x)
>>> +#define hweight8(x) generic_hweight8(x)
>>
>> Not using popcnt{b,w,d}, e.g. via a compiler builtin?
>>
> 
> Excellent point. It looks like gcc's __builtin_popcount* family of
> builtins will do what we want here. I suppose the other architectures in
> Xen don't do this because they target toolchains old enough to not have
> these builtins?

Well, on x86 a patch introducing its use is actually pending ("x86: use
POPCNT for hweight<N>() when available").

>>> +    tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
>>> +    if (tmp == 0UL)        /* Are any bits set? */
>>> +        return result + size;    /* Nope. */
>>> +found:
>>
>> Labels indented by at least one blank please. (More elsewhere.)
> 
> I wasn't aware of this style convention -- so a single blank before the
> label would be correct?

Yes. (There are cases where deeper indentation is neater, but this isn't
one of them.)

Jan