Intead of byte memset() calls use 32- or 64-bit version depending
on the compile-time BITS_PER_LONG value.
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
include/linux/bitmap.h | 16 ++++++++++++----
lib/bitmap.c | 4 ++--
2 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 2d5042d1b501..6eec4d4fd623 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -234,22 +234,30 @@ extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp,
static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
- unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ unsigned int len = BITS_TO_LONGS(nbits);
if (small_const_nbits(nbits))
*dst = 0;
else
- memset(dst, 0, len);
+#if BITS_PER_LONG == 64
+ memset64((uint64_t *)dst, 0, len);
+#else
+ memset32((uint32_t *)dst, 0, len);
+#endif
}
static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
{
- unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ unsigned int len = BITS_TO_LONGS(nbits);
if (small_const_nbits(nbits))
*dst = ~0UL;
else
- memset(dst, 0xff, len);
+#if BITS_PER_LONG == 64
+ memset64((uint64_t *)dst, GENMASK(63, 0), len);
+#else
+ memset32((uint32_t *)dst, GENMASK(31, 0), len);
+#endif
}
static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 935e0f96e785..df0fb37a5732 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -128,7 +128,7 @@ void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
dst[k] = lower | upper;
}
if (off)
- memset(&dst[lim - off], 0, off*sizeof(unsigned long));
+ bitmap_zero(&dst[lim - off], off);
}
EXPORT_SYMBOL(__bitmap_shift_right);
@@ -166,7 +166,7 @@ void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
dst[k + off] = lower | upper;
}
if (off)
- memset(dst, 0, off*sizeof(unsigned long));
+ bitmap_zero(dst, off);
}
EXPORT_SYMBOL(__bitmap_shift_left);
--
2.40.0.1.gaa8946217a0b
On 17/08/2023 18.54, Andy Shevchenko wrote:
> Intead of byte memset() calls use 32- or 64-bit version depending
> on the compile-time BITS_PER_LONG value.
>
> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> ---
> include/linux/bitmap.h | 16 ++++++++++++----
> lib/bitmap.c | 4 ++--
> 2 files changed, 14 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
> index 2d5042d1b501..6eec4d4fd623 100644
> --- a/include/linux/bitmap.h
> +++ b/include/linux/bitmap.h
> @@ -234,22 +234,30 @@ extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp,
>
> static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
> {
> - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
> + unsigned int len = BITS_TO_LONGS(nbits);
>
> if (small_const_nbits(nbits))
> *dst = 0;
> else
> - memset(dst, 0, len);
> +#if BITS_PER_LONG == 64
> + memset64((uint64_t *)dst, 0, len);
> +#else
> + memset32((uint32_t *)dst, 0, len);
> +#endif
> }
>
So _if_ this is worth it at all, all those new '#if BITS_PER_LONG == 64'
suggests that we should instead have a new helper memset_long(), no?
In fact, string.h already has that:
static inline void *memset_l(unsigned long *p, unsigned long v,
__kernel_size_t n)
> static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
> {
> - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
> + unsigned int len = BITS_TO_LONGS(nbits);
>
> if (small_const_nbits(nbits))
> *dst = ~0UL;
> else
> - memset(dst, 0xff, len);
> +#if BITS_PER_LONG == 64
> + memset64((uint64_t *)dst, GENMASK(63, 0), len);
> +#else
> + memset32((uint32_t *)dst, GENMASK(31, 0), len);
> +#endif> }
>
Please just spell an all-ones long "~0UL", that also matches the
small_const case.
> static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
> diff --git a/lib/bitmap.c b/lib/bitmap.c
> index 935e0f96e785..df0fb37a5732 100644
> --- a/lib/bitmap.c
> +++ b/lib/bitmap.c
> @@ -128,7 +128,7 @@ void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
> dst[k] = lower | upper;
> }
> if (off)
> - memset(&dst[lim - off], 0, off*sizeof(unsigned long));
> + bitmap_zero(&dst[lim - off], off);
This... can't be right. bitmap_zero() still takes an argument which is
the size in bits, while off is the whole number of words to shift. So if
called with say shift=128, we'd have off==2, and that bitmap_zero()
would, because bitmap_zero() rounds up to a whole number of words, end
up clearing just one word.
Perhaps a chance to add some more test cases? Maybe we're not exercising
any of the "shift more than BITS_PER_LONG" logic.
But rather than using bitmap_zero() here, forcing you to convert off to
a number of bits and then bitmap_zero to divide again, if you do this at
all, just change that memset() to memset_l().
> }
> EXPORT_SYMBOL(__bitmap_shift_right);
>
> @@ -166,7 +166,7 @@ void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
> dst[k + off] = lower | upper;
> }
> if (off)
> - memset(dst, 0, off*sizeof(unsigned long));
> + bitmap_zero(dst, off);
> }
Same here. Cannot possibly be correct, but will work by chance for off
<= 1, i.e. shift <= BITS_PER_LONG.
Rasmus
On Fri, Aug 18, 2023 at 08:53:38AM +0200, Rasmus Villemoes wrote: > On 17/08/2023 18.54, Andy Shevchenko wrote: ... > > +#if BITS_PER_LONG == 64 > > + memset64((uint64_t *)dst, 0, len); > > +#else > > + memset32((uint32_t *)dst, 0, len); > > +#endif > > } > > So _if_ this is worth it at all, Yury, I believe you have a set of performance tests for bitmaps, perhaps you can give a try and see if we got a benefit here. Yet, not all architectures have custom implementation of the optimized memset()/memcpy(). But at least ARM, x86 do. > all those new '#if BITS_PER_LONG == 64' > suggests that we should instead have a new helper memset_long(), no? > > In fact, string.h already has that: > > static inline void *memset_l(unsigned long *p, unsigned long v, > __kernel_size_t n) My grep skills suddenly degraded, my thoughts were exactly like yours, but I missed the _l instead of _long for which I grepped. ... > Please just spell an all-ones long "~0UL", that also matches the > small_const case. OK! ... > > - memset(&dst[lim - off], 0, off*sizeof(unsigned long)); > > + bitmap_zero(&dst[lim - off], off); > > This... can't be right. Indeed, I have in mind memset_long() and on the half-way replaced it with bitmap_zero(). > bitmap_zero() still takes an argument which is > the size in bits, while off is the whole number of words to shift. So if > called with say shift=128, we'd have off==2, and that bitmap_zero() > would, because bitmap_zero() rounds up to a whole number of words, end > up clearing just one word. > > Perhaps a chance to add some more test cases? Maybe we're not exercising > any of the "shift more than BITS_PER_LONG" logic. > > But rather than using bitmap_zero() here, forcing you to convert off to > a number of bits and then bitmap_zero to divide again, if you do this at > all, just change that memset() to memset_l(). Agree. -- With Best Regards, Andy Shevchenko
© 2016 - 2025 Red Hat, Inc.