arch/sh/include/asm/io.h | 8 --- arch/sh/kernel/io.c | 111 --------------------------------------- 2 files changed, 119 deletions(-) delete mode 100644 arch/sh/kernel/io.c
Remove IO memcpy and memset from sh specific code and fall back to the
new implementations from lib/iomem_copy.c. They use word accesses if the
buffers are aligned and only fall back to byte accesses for potentially
unaligned parts of a buffer.
Signed-off-by: Julian Vetter <julian@outer-limits.org>
---
Changes for V2:
- Removed also SH4 specific memcpy_fromio code
---
arch/sh/include/asm/io.h | 8 ---
arch/sh/kernel/io.c | 111 ---------------------------------------
2 files changed, 119 deletions(-)
delete mode 100644 arch/sh/kernel/io.c
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index cf5eab840d57..1e33a1c8b72d 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -268,14 +268,6 @@ __BUILD_IOPORT_STRING(q, u64)
#define IO_SPACE_LIMIT 0xffffffff
-/* We really want to try and get these to memcpy etc */
-#define memset_io memset_io
-#define memcpy_fromio memcpy_fromio
-#define memcpy_toio memcpy_toio
-void memcpy_fromio(void *, const volatile void __iomem *, unsigned long);
-void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
-void memset_io(volatile void __iomem *, int, unsigned long);
-
/* Quad-word real-mode I/O, don't ask.. */
unsigned long long peek_real_address_q(unsigned long long addr);
unsigned long long poke_real_address_q(unsigned long long addr,
diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
deleted file mode 100644
index da22f3b32d30..000000000000
--- a/arch/sh/kernel/io.c
+++ /dev/null
@@ -1,111 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * arch/sh/kernel/io.c - Machine independent I/O functions.
- *
- * Copyright (C) 2000 - 2009 Stuart Menefy
- * Copyright (C) 2005 Paul Mundt
- */
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <asm/machvec.h>
-#include <asm/io.h>
-
-/*
- * Copy data from IO memory space to "real" memory space.
- */
-void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned long count)
-{
- /*
- * Would it be worthwhile doing byte and long transfers first
- * to try and get aligned?
- */
-#ifdef CONFIG_CPU_SH4
- if ((count >= 0x20) &&
- (((u32)to & 0x1f) == 0) && (((u32)from & 0x3) == 0)) {
- int tmp2, tmp3, tmp4, tmp5, tmp6;
-
- __asm__ __volatile__(
- "1: \n\t"
- "mov.l @%7+, r0 \n\t"
- "mov.l @%7+, %2 \n\t"
- "movca.l r0, @%0 \n\t"
- "mov.l @%7+, %3 \n\t"
- "mov.l @%7+, %4 \n\t"
- "mov.l @%7+, %5 \n\t"
- "mov.l @%7+, %6 \n\t"
- "mov.l @%7+, r7 \n\t"
- "mov.l @%7+, r0 \n\t"
- "mov.l %2, @(0x04,%0) \n\t"
- "mov #0x20, %2 \n\t"
- "mov.l %3, @(0x08,%0) \n\t"
- "sub %2, %1 \n\t"
- "mov.l %4, @(0x0c,%0) \n\t"
- "cmp/hi %1, %2 ! T if 32 > count \n\t"
- "mov.l %5, @(0x10,%0) \n\t"
- "mov.l %6, @(0x14,%0) \n\t"
- "mov.l r7, @(0x18,%0) \n\t"
- "mov.l r0, @(0x1c,%0) \n\t"
- "bf.s 1b \n\t"
- " add #0x20, %0 \n\t"
- : "=&r" (to), "=&r" (count),
- "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4),
- "=&r" (tmp5), "=&r" (tmp6), "=&r" (from)
- : "7"(from), "0" (to), "1" (count)
- : "r0", "r7", "t", "memory");
- }
-#endif
-
- if ((((u32)to | (u32)from) & 0x3) == 0) {
- for (; count > 3; count -= 4) {
- *(u32 *)to = *(volatile u32 *)from;
- to += 4;
- from += 4;
- }
- }
-
- for (; count > 0; count--) {
- *(u8 *)to = *(volatile u8 *)from;
- to++;
- from++;
- }
-
- mb();
-}
-EXPORT_SYMBOL(memcpy_fromio);
-
-/*
- * Copy data from "real" memory space to IO memory space.
- */
-void memcpy_toio(volatile void __iomem *to, const void *from, unsigned long count)
-{
- if ((((u32)to | (u32)from) & 0x3) == 0) {
- for ( ; count > 3; count -= 4) {
- *(volatile u32 *)to = *(u32 *)from;
- to += 4;
- from += 4;
- }
- }
-
- for (; count > 0; count--) {
- *(volatile u8 *)to = *(u8 *)from;
- to++;
- from++;
- }
-
- mb();
-}
-EXPORT_SYMBOL(memcpy_toio);
-
-/*
- * "memset" on IO memory space.
- * This needs to be optimized.
- */
-void memset_io(volatile void __iomem *dst, int c, unsigned long count)
-{
- while (count) {
- count--;
- writeb(c, dst);
- dst++;
- }
-}
-EXPORT_SYMBOL(memset_io);
--
2.34.1
Hi Julian,
On Tue, 28 Jan 2025 at 16:22, Julian Vetter <julian@outer-limits.org> wrote:
> Remove IO memcpy and memset from sh specific code and fall back to the
> new implementations from lib/iomem_copy.c. They use word accesses if the
> buffers are aligned and only fall back to byte accesses for potentially
> unaligned parts of a buffer.
>
> Signed-off-by: Julian Vetter <julian@outer-limits.org>
> ---
> Changes for V2:
> - Removed also SH4 specific memcpy_fromio code
Thanks for the update!
> --- a/arch/sh/kernel/io.c
> +++ /dev/null
> @@ -1,111 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -/*
> - * arch/sh/kernel/io.c - Machine independent I/O functions.
> - *
> - * Copyright (C) 2000 - 2009 Stuart Menefy
> - * Copyright (C) 2005 Paul Mundt
> - */
> -#include <linux/module.h>
> -#include <linux/pci.h>
> -#include <asm/machvec.h>
> -#include <asm/io.h>
> -
> -/*
> - * Copy data from IO memory space to "real" memory space.
> - */
> -void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned long count)
> -{
[...]
> - mb();
> -}
> -EXPORT_SYMBOL(memcpy_fromio);
> -
> -/*
> - * Copy data from "real" memory space to IO memory space.
> - */
> -void memcpy_toio(volatile void __iomem *to, const void *from, unsigned long count)
> -{
[...]
> -
> - mb();
> -}
> -EXPORT_SYMBOL(memcpy_toio);
LGTM. My only worry is the removal of the mb() calls, cfr. the
scary warning at
https://elixir.bootlin.com/linux/v6.13/source/arch/sh/include/asm/barrier.h#L13
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
Hi Julian,
On Tue, 2025-01-28 at 11:13 +0100, Julian Vetter wrote:
> Remove IO memcpy and memset from sh specific code and fall back to the
> new implementations from lib/iomem_copy.c. They use word accesses if the
> buffers are aligned and only fall back to byte accesses for potentially
> unaligned parts of a buffer.
>
> Signed-off-by: Julian Vetter <julian@outer-limits.org>
> ---
> Changes for V2:
> - Removed also SH4 specific memcpy_fromio code
> ---
> arch/sh/include/asm/io.h | 8 ---
> arch/sh/kernel/io.c | 111 ---------------------------------------
> 2 files changed, 119 deletions(-)
> delete mode 100644 arch/sh/kernel/io.c
>
> diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
> index cf5eab840d57..1e33a1c8b72d 100644
> --- a/arch/sh/include/asm/io.h
> +++ b/arch/sh/include/asm/io.h
> @@ -268,14 +268,6 @@ __BUILD_IOPORT_STRING(q, u64)
>
> #define IO_SPACE_LIMIT 0xffffffff
>
> -/* We really want to try and get these to memcpy etc */
> -#define memset_io memset_io
> -#define memcpy_fromio memcpy_fromio
> -#define memcpy_toio memcpy_toio
> -void memcpy_fromio(void *, const volatile void __iomem *, unsigned long);
> -void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
> -void memset_io(volatile void __iomem *, int, unsigned long);
> -
> /* Quad-word real-mode I/O, don't ask.. */
> unsigned long long peek_real_address_q(unsigned long long addr);
> unsigned long long poke_real_address_q(unsigned long long addr,
> diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
> deleted file mode 100644
> index da22f3b32d30..000000000000
> --- a/arch/sh/kernel/io.c
> +++ /dev/null
> @@ -1,111 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -/*
> - * arch/sh/kernel/io.c - Machine independent I/O functions.
> - *
> - * Copyright (C) 2000 - 2009 Stuart Menefy
> - * Copyright (C) 2005 Paul Mundt
> - */
> -#include <linux/module.h>
> -#include <linux/pci.h>
> -#include <asm/machvec.h>
> -#include <asm/io.h>
> -
> -/*
> - * Copy data from IO memory space to "real" memory space.
> - */
> -void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned long count)
> -{
> - /*
> - * Would it be worthwhile doing byte and long transfers first
> - * to try and get aligned?
> - */
> -#ifdef CONFIG_CPU_SH4
> - if ((count >= 0x20) &&
> - (((u32)to & 0x1f) == 0) && (((u32)from & 0x3) == 0)) {
> - int tmp2, tmp3, tmp4, tmp5, tmp6;
> -
> - __asm__ __volatile__(
> - "1: \n\t"
> - "mov.l @%7+, r0 \n\t"
> - "mov.l @%7+, %2 \n\t"
> - "movca.l r0, @%0 \n\t"
> - "mov.l @%7+, %3 \n\t"
> - "mov.l @%7+, %4 \n\t"
> - "mov.l @%7+, %5 \n\t"
> - "mov.l @%7+, %6 \n\t"
> - "mov.l @%7+, r7 \n\t"
> - "mov.l @%7+, r0 \n\t"
> - "mov.l %2, @(0x04,%0) \n\t"
> - "mov #0x20, %2 \n\t"
> - "mov.l %3, @(0x08,%0) \n\t"
> - "sub %2, %1 \n\t"
> - "mov.l %4, @(0x0c,%0) \n\t"
> - "cmp/hi %1, %2 ! T if 32 > count \n\t"
> - "mov.l %5, @(0x10,%0) \n\t"
> - "mov.l %6, @(0x14,%0) \n\t"
> - "mov.l r7, @(0x18,%0) \n\t"
> - "mov.l r0, @(0x1c,%0) \n\t"
> - "bf.s 1b \n\t"
> - " add #0x20, %0 \n\t"
> - : "=&r" (to), "=&r" (count),
> - "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4),
> - "=&r" (tmp5), "=&r" (tmp6), "=&r" (from)
> - : "7"(from), "0" (to), "1" (count)
> - : "r0", "r7", "t", "memory");
> - }
> -#endif
> -
> - if ((((u32)to | (u32)from) & 0x3) == 0) {
> - for (; count > 3; count -= 4) {
> - *(u32 *)to = *(volatile u32 *)from;
> - to += 4;
> - from += 4;
> - }
> - }
> -
> - for (; count > 0; count--) {
> - *(u8 *)to = *(volatile u8 *)from;
> - to++;
> - from++;
> - }
> -
> - mb();
> -}
> -EXPORT_SYMBOL(memcpy_fromio);
> -
> -/*
> - * Copy data from "real" memory space to IO memory space.
> - */
> -void memcpy_toio(volatile void __iomem *to, const void *from, unsigned long count)
> -{
> - if ((((u32)to | (u32)from) & 0x3) == 0) {
> - for ( ; count > 3; count -= 4) {
> - *(volatile u32 *)to = *(u32 *)from;
> - to += 4;
> - from += 4;
> - }
> - }
> -
> - for (; count > 0; count--) {
> - *(volatile u8 *)to = *(u8 *)from;
> - to++;
> - from++;
> - }
> -
> - mb();
> -}
> -EXPORT_SYMBOL(memcpy_toio);
> -
> -/*
> - * "memset" on IO memory space.
> - * This needs to be optimized.
> - */
> -void memset_io(volatile void __iomem *dst, int c, unsigned long count)
> -{
> - while (count) {
> - count--;
> - writeb(c, dst);
> - dst++;
> - }
> -}
> -EXPORT_SYMBOL(memset_io);
I'm not sure that I understand the motivation to remove hand-optimized sh4 assembler
code for memset and drop it in favor of potentially slower generic C code. What is
the reasoning behind this?
Also, it seems that this patch would make your other patch
"sh: Remove memset_io from sh specific code"
obsolete.
Geert, do you have any opinions on this patch?
Thanks,
Adrian
--
.''`. John Paul Adrian Glaubitz
: :' : Debian Developer
`. `' Physicist
`- GPG: 62FF 8A75 84E0 2956 9546 0006 7426 3B37 F5B5 F913
Hi Adrian,
On Thu, 30 Jan 2025 at 09:44, John Paul Adrian Glaubitz
<glaubitz@physik.fu-berlin.de> wrote:
> On Tue, 2025-01-28 at 11:13 +0100, Julian Vetter wrote:
> > Remove IO memcpy and memset from sh specific code and fall back to the
> > new implementations from lib/iomem_copy.c. They use word accesses if the
> > buffers are aligned and only fall back to byte accesses for potentially
> > unaligned parts of a buffer.
> >
> > Signed-off-by: Julian Vetter <julian@outer-limits.org>
> > ---
> > Changes for V2:
> > - Removed also SH4 specific memcpy_fromio code
> I'm not sure that I understand the motivation to remove hand-optimized sh4 assembler
> code for memset and drop it in favor of potentially slower generic C code. What is
> the reasoning behind this?
See Arnd's feedback on v1
https://lore.kernel.org/all/ffe019a1-11b4-4ad7-bbe2-8ef3e01ffeb0@app.fastmail.com
> Also, it seems that this patch would make your other patch
>
> "sh: Remove memset_io from sh specific code"
>
> obsolete.
Yeah, that should have been mentioned under the ---.
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
Hi Geert, On Thu, 2025-01-30 at 10:13 +0100, Geert Uytterhoeven wrote: > Hi Adrian, > > On Thu, 30 Jan 2025 at 09:44, John Paul Adrian Glaubitz > <glaubitz@physik.fu-berlin.de> wrote: > > On Tue, 2025-01-28 at 11:13 +0100, Julian Vetter wrote: > > > Remove IO memcpy and memset from sh specific code and fall back to the > > > new implementations from lib/iomem_copy.c. They use word accesses if the > > > buffers are aligned and only fall back to byte accesses for potentially > > > unaligned parts of a buffer. > > > > > > Signed-off-by: Julian Vetter <julian@outer-limits.org> > > > --- > > > Changes for V2: > > > - Removed also SH4 specific memcpy_fromio code > > > I'm not sure that I understand the motivation to remove hand-optimized sh4 assembler > > code for memset and drop it in favor of potentially slower generic C code. What is > > the reasoning behind this? > > See Arnd's feedback on v1 > https://lore.kernel.org/all/ffe019a1-11b4-4ad7-bbe2-8ef3e01ffeb0@app.fastmail.com I read Arnd's feedback but I don't really know whether GCC produces better code than this hand-written assembly. Is there any compelling argument? I'm just worried we would slow down something as fundamental as memset(). > > Also, it seems that this patch would make your other patch > > > > "sh: Remove memset_io from sh specific code" > > > > obsolete. > > Yeah, that should have been mentioned under the ---. Thought so. Adrian -- .''`. John Paul Adrian Glaubitz : :' : Debian Developer `. `' Physicist `- GPG: 62FF 8A75 84E0 2956 9546 0006 7426 3B37 F5B5 F913
Hi Adrian,
On Thu, 30 Jan 2025 at 10:31, John Paul Adrian Glaubitz
<glaubitz@physik.fu-berlin.de> wrote:
> On Thu, 2025-01-30 at 10:13 +0100, Geert Uytterhoeven wrote:
> > On Thu, 30 Jan 2025 at 09:44, John Paul Adrian Glaubitz
> > <glaubitz@physik.fu-berlin.de> wrote:
> > > On Tue, 2025-01-28 at 11:13 +0100, Julian Vetter wrote:
> > > > Remove IO memcpy and memset from sh specific code and fall back to the
> > > > new implementations from lib/iomem_copy.c. They use word accesses if the
> > > > buffers are aligned and only fall back to byte accesses for potentially
> > > > unaligned parts of a buffer.
> > > >
> > > > Signed-off-by: Julian Vetter <julian@outer-limits.org>
> > > > ---
> > > > Changes for V2:
> > > > - Removed also SH4 specific memcpy_fromio code
> >
> > > I'm not sure that I understand the motivation to remove hand-optimized sh4 assembler
> > > code for memset and drop it in favor of potentially slower generic C code. What is
> > > the reasoning behind this?
> >
> > See Arnd's feedback on v1
> > https://lore.kernel.org/all/ffe019a1-11b4-4ad7-bbe2-8ef3e01ffeb0@app.fastmail.com
>
> I read Arnd's feedback but I don't really know whether GCC produces better code than
> this hand-written assembly. Is there any compelling argument?
>
> I'm just worried we would slow down something as fundamental as memset().
it's not memset(), but memset_io(), i.e. clearing (slow) mapped I/O memory.
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
Hi Geert, On Thu, 2025-01-30 at 10:35 +0100, Geert Uytterhoeven wrote: > Hi Adrian, > > On Thu, 30 Jan 2025 at 10:31, John Paul Adrian Glaubitz > <glaubitz@physik.fu-berlin.de> wrote: > > On Thu, 2025-01-30 at 10:13 +0100, Geert Uytterhoeven wrote: > > > On Thu, 30 Jan 2025 at 09:44, John Paul Adrian Glaubitz > > > <glaubitz@physik.fu-berlin.de> wrote: > > > > On Tue, 2025-01-28 at 11:13 +0100, Julian Vetter wrote: > > > > > Remove IO memcpy and memset from sh specific code and fall back to the > > > > > new implementations from lib/iomem_copy.c. They use word accesses if the > > > > > buffers are aligned and only fall back to byte accesses for potentially > > > > > unaligned parts of a buffer. > > > > > > > > > > Signed-off-by: Julian Vetter <julian@outer-limits.org> > > > > > --- > > > > > Changes for V2: > > > > > - Removed also SH4 specific memcpy_fromio code > > > > > > > I'm not sure that I understand the motivation to remove hand-optimized sh4 assembler > > > > code for memset and drop it in favor of potentially slower generic C code. What is > > > > the reasoning behind this? > > > > > > See Arnd's feedback on v1 > > > https://lore.kernel.org/all/ffe019a1-11b4-4ad7-bbe2-8ef3e01ffeb0@app.fastmail.com > > > > I read Arnd's feedback but I don't really know whether GCC produces better code than > > this hand-written assembly. Is there any compelling argument? > > > > I'm just worried we would slow down something as fundamental as memset(). > > it's not memset(), but memset_io(), i.e. clearing (slow) mapped I/O memory. Could you review the patch as well, so I have some peace of mind? ;-) Thanks, Adrian -- .''`. John Paul Adrian Glaubitz : :' : Debian Developer `. `' Physicist `- GPG: 62FF 8A75 84E0 2956 9546 0006 7426 3B37 F5B5 F913
On Tue, Jan 28, 2025, at 11:13, Julian Vetter wrote: > Remove IO memcpy and memset from sh specific code and fall back to the > new implementations from lib/iomem_copy.c. They use word accesses if the > buffers are aligned and only fall back to byte accesses for potentially > unaligned parts of a buffer. > > Signed-off-by: Julian Vetter <julian@outer-limits.org> > --- > Changes for V2: > - Removed also SH4 specific memcpy_fromio code Reviewed-by: Arnd Bergmann <arnd@arndb.de>
© 2016 - 2026 Red Hat, Inc.