[PATCH] alpha: Remove IO memcpy and memset

Julian Vetter posted 1 patch 1 year ago
arch/alpha/include/asm/io.h  |  22 -----
arch/alpha/include/asm/vga.h |   2 +-
arch/alpha/kernel/io.c       | 171 -----------------------------------
3 files changed, 1 insertion(+), 194 deletions(-)
[PATCH] alpha: Remove IO memcpy and memset
Posted by Julian Vetter 1 year ago
Recently a new IO memcpy and memset was added in libs/iomem_copy.c. So,
remove the alpha specific implementations and use the one from the lib.

Signed-off-by: Julian Vetter <julian@outer-limits.org>
---
 arch/alpha/include/asm/io.h  |  22 -----
 arch/alpha/include/asm/vga.h |   2 +-
 arch/alpha/kernel/io.c       | 171 -----------------------------------
 3 files changed, 1 insertion(+), 194 deletions(-)

diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index 65fe1e54c6da..2de75404bda1 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -587,28 +587,6 @@ extern inline u64 readq_relaxed(const volatile void __iomem *addr)
 #define writel_relaxed	writel
 #define writeq_relaxed	writeq
 
-/*
- * String version of IO memory access ops:
- */
-extern void memcpy_fromio(void *, const volatile void __iomem *, long);
-extern void memcpy_toio(volatile void __iomem *, const void *, long);
-extern void _memset_c_io(volatile void __iomem *, unsigned long, long);
-
-static inline void memset_io(volatile void __iomem *addr, u8 c, long len)
-{
-	_memset_c_io(addr, 0x0101010101010101UL * c, len);
-}
-
-#define __HAVE_ARCH_MEMSETW_IO
-static inline void memsetw_io(volatile void __iomem *addr, u16 c, long len)
-{
-	_memset_c_io(addr, 0x0001000100010001UL * c, len);
-}
-
-#define memset_io memset_io
-#define memcpy_fromio memcpy_fromio
-#define memcpy_toio memcpy_toio
-
 /*
  * String versions of in/out ops:
  */
diff --git a/arch/alpha/include/asm/vga.h b/arch/alpha/include/asm/vga.h
index 919931cb5b63..cac735bc3e16 100644
--- a/arch/alpha/include/asm/vga.h
+++ b/arch/alpha/include/asm/vga.h
@@ -34,7 +34,7 @@ static inline u16 scr_readw(volatile const u16 *addr)
 static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
 {
 	if (__is_ioaddr(s))
-		memsetw_io((u16 __iomem *) s, c, count);
+		memset_io((u16 __iomem *) s, c, count);
 	else
 		memset16(s, c, count / 2);
 }
diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c
index c28035d6d1e6..0d24a6ad682c 100644
--- a/arch/alpha/kernel/io.c
+++ b/arch/alpha/kernel/io.c
@@ -476,177 +476,6 @@ void outsl(unsigned long port, const void *src, unsigned long count)
 EXPORT_SYMBOL(iowrite32_rep);
 EXPORT_SYMBOL(outsl);
 
-
-/*
- * Copy data from IO memory space to "real" memory space.
- * This needs to be optimized.
- */
-void memcpy_fromio(void *to, const volatile void __iomem *from, long count)
-{
-	/* Optimize co-aligned transfers.  Everything else gets handled
-	   a byte at a time. */
-
-	if (count >= 8 && ((u64)to & 7) == ((u64)from & 7)) {
-		count -= 8;
-		do {
-			*(u64 *)to = __raw_readq(from);
-			count -= 8;
-			to += 8;
-			from += 8;
-		} while (count >= 0);
-		count += 8;
-	}
-
-	if (count >= 4 && ((u64)to & 3) == ((u64)from & 3)) {
-		count -= 4;
-		do {
-			*(u32 *)to = __raw_readl(from);
-			count -= 4;
-			to += 4;
-			from += 4;
-		} while (count >= 0);
-		count += 4;
-	}
-
-	if (count >= 2 && ((u64)to & 1) == ((u64)from & 1)) {
-		count -= 2;
-		do {
-			*(u16 *)to = __raw_readw(from);
-			count -= 2;
-			to += 2;
-			from += 2;
-		} while (count >= 0);
-		count += 2;
-	}
-
-	while (count > 0) {
-		*(u8 *) to = __raw_readb(from);
-		count--;
-		to++;
-		from++;
-	}
-	mb();
-}
-
-EXPORT_SYMBOL(memcpy_fromio);
-
-
-/*
- * Copy data from "real" memory space to IO memory space.
- * This needs to be optimized.
- */
-void memcpy_toio(volatile void __iomem *to, const void *from, long count)
-{
-	/* Optimize co-aligned transfers.  Everything else gets handled
-	   a byte at a time. */
-	/* FIXME -- align FROM.  */
-
-	if (count >= 8 && ((u64)to & 7) == ((u64)from & 7)) {
-		count -= 8;
-		do {
-			__raw_writeq(*(const u64 *)from, to);
-			count -= 8;
-			to += 8;
-			from += 8;
-		} while (count >= 0);
-		count += 8;
-	}
-
-	if (count >= 4 && ((u64)to & 3) == ((u64)from & 3)) {
-		count -= 4;
-		do {
-			__raw_writel(*(const u32 *)from, to);
-			count -= 4;
-			to += 4;
-			from += 4;
-		} while (count >= 0);
-		count += 4;
-	}
-
-	if (count >= 2 && ((u64)to & 1) == ((u64)from & 1)) {
-		count -= 2;
-		do {
-			__raw_writew(*(const u16 *)from, to);
-			count -= 2;
-			to += 2;
-			from += 2;
-		} while (count >= 0);
-		count += 2;
-	}
-
-	while (count > 0) {
-		__raw_writeb(*(const u8 *) from, to);
-		count--;
-		to++;
-		from++;
-	}
-	mb();
-}
-
-EXPORT_SYMBOL(memcpy_toio);
-
-
-/*
- * "memset" on IO memory space.
- */
-void _memset_c_io(volatile void __iomem *to, unsigned long c, long count)
-{
-	/* Handle any initial odd byte */
-	if (count > 0 && ((u64)to & 1)) {
-		__raw_writeb(c, to);
-		to++;
-		count--;
-	}
-
-	/* Handle any initial odd halfword */
-	if (count >= 2 && ((u64)to & 2)) {
-		__raw_writew(c, to);
-		to += 2;
-		count -= 2;
-	}
-
-	/* Handle any initial odd word */
-	if (count >= 4 && ((u64)to & 4)) {
-		__raw_writel(c, to);
-		to += 4;
-		count -= 4;
-	}
-
-	/* Handle all full-sized quadwords: we're aligned
-	   (or have a small count) */
-	count -= 8;
-	if (count >= 0) {
-		do {
-			__raw_writeq(c, to);
-			to += 8;
-			count -= 8;
-		} while (count >= 0);
-	}
-	count += 8;
-
-	/* The tail is word-aligned if we still have count >= 4 */
-	if (count >= 4) {
-		__raw_writel(c, to);
-		to += 4;
-		count -= 4;
-	}
-
-	/* The tail is half-word aligned if we have count >= 2 */
-	if (count >= 2) {
-		__raw_writew(c, to);
-		to += 2;
-		count -= 2;
-	}
-
-	/* And finally, one last byte.. */
-	if (count) {
-		__raw_writeb(c, to);
-	}
-	mb();
-}
-
-EXPORT_SYMBOL(_memset_c_io);
-
 #if IS_ENABLED(CONFIG_VGA_CONSOLE) || IS_ENABLED(CONFIG_MDA_CONSOLE)
 
 #include <asm/vga.h>
-- 
2.34.1
Re: [PATCH] alpha: Remove IO memcpy and memset
Posted by Maciej W. Rozycki 1 year ago
On Mon, 3 Feb 2025, Julian Vetter wrote:

> Recently a new IO memcpy and memset was added in libs/iomem_copy.c. So,
                                                      ^
 This has to be lib/iomem_copy.c.

> remove the alpha specific implementations and use the one from the lib.

 You're dropping the memory barriers, which the generic code doesn't have, 
so should we agree to switch to the generic version it has to be confirmed 
what the right course of action is:

1. Use wrapper calls with memory barriers.

2. Add memory barriers to generic code.

3. Conclude that dropping is correct and adjust the callers where 
   applicable.

4. Something else maybe.

See below for the actual places.

> diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c
> index c28035d6d1e6..0d24a6ad682c 100644
> --- a/arch/alpha/kernel/io.c
> +++ b/arch/alpha/kernel/io.c
> @@ -476,177 +476,6 @@ void outsl(unsigned long port, const void *src, unsigned long count)
>  EXPORT_SYMBOL(iowrite32_rep);
>  EXPORT_SYMBOL(outsl);
>  
> -
> -/*
> - * Copy data from IO memory space to "real" memory space.
> - * This needs to be optimized.
> - */
> -void memcpy_fromio(void *to, const volatile void __iomem *from, long count)
> -{
[...]
> -	while (count > 0) {
> -		*(u8 *) to = __raw_readb(from);
> -		count--;
> -		to++;
> -		from++;
> -	}
> -	mb();

 Here...

> -void memcpy_toio(volatile void __iomem *to, const void *from, long count)
> -{
[...]
> -	while (count > 0) {
> -		__raw_writeb(*(const u8 *) from, to);
> -		count--;
> -		to++;
> -		from++;
> -	}
> -	mb();

... here...

> -void _memset_c_io(volatile void __iomem *to, unsigned long c, long count)
> -{
[...]
> -	/* And finally, one last byte.. */
> -	if (count) {
> -		__raw_writeb(c, to);
> -	}
> -	mb();

... and here.

  Maciej
Re: [PATCH] alpha: Remove IO memcpy and memset
Posted by Arnd Bergmann 1 year ago
On Mon, Feb 3, 2025, at 15:18, Julian Vetter wrote:

> diff --git a/arch/alpha/include/asm/vga.h b/arch/alpha/include/asm/vga.h
> index 919931cb5b63..cac735bc3e16 100644
> --- a/arch/alpha/include/asm/vga.h
> +++ b/arch/alpha/include/asm/vga.h
> @@ -34,7 +34,7 @@ static inline u16 scr_readw(volatile const u16 *addr)
>  static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
>  {
>  	if (__is_ioaddr(s))
> -		memsetw_io((u16 __iomem *) s, c, count);
> +		memset_io((u16 __iomem *) s, c, count);
>  	else
>  		memset16(s, c, count / 2);
>  }

I don't think this is a correct conversion, memset_io() will
set every byte to the same value and ignore the upper half of
the 16-bit value.

On all other architectures, scr_memsetw() turns into a memset(),
but that does not work on older alpha machines since MMIO access
has additional constraints.

scr_memsetw() is the only caller of _memset_c_io(), so I think it
makes sense to move both inside of the CONFIG_VGA_CONSOLE block
along with scr_memcpyw() and scr_memmovew().

> -void _memset_c_io(volatile void __iomem *to, unsigned long c, long count)
> -{
> -	/* Handle any initial odd byte */
> -	if (count > 0 && ((u64)to & 1)) {
> -		__raw_writeb(c, to);
> -		to++;
> -		count--;
> -	}
> -
> -	/* Handle any initial odd halfword */
> -	if (count >= 2 && ((u64)to & 2)) {
> -		__raw_writew(c, to);
> -		to += 2;
> -		count -= 2;
> -	}
> -
> -	/* Handle any initial odd word */
> -	if (count >= 4 && ((u64)to & 4)) {
> -		__raw_writel(c, to);
> -		to += 4;
> -		count -= 4;
> -	}
> -

For this function I think it's close enough, the generic
version is slightly simpler since it skips the 2-byte and
4-byte stores between single-byte and 'long' stores.

      Arnd