drivers/mtd/devices/phram.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
One can't use memcpy on memory obtained by ioremap, because IO memory
may have different alignment and size access restriction than the system
memory. Use memremap as phram driver operates on RAM.
This fixes an unaligned access on ARM64, which could be triggered with
e.g. dd if=/dev/phram/by-name/testdev bs=8190 count=1
Unable to handle kernel paging request at virtual address ffffffc01208bfbf
Mem abort info:
ESR = 0x96000021
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000021
CM = 0, WnR = 0
swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000000cd5000
[ffffffc01208bfbf] pgd=00000002fffff003, p4d=00000002fffff003, pud=00000002fffff003, pmd=0000000100b43003, pte=0068000022221717
Internal error: Oops: 96000021 [#1] PREEMPT SMP
CPU: 2 PID: 14768 Comm: dd Tainted: G O 5.10.116-f13ddced70 #1
Hardware name: AXM56xx Victoria (DT)
pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--)
pc : __memcpy+0x168/0x230
lr : phram_read+0x68/0xb0 [phram]
sp : ffffffc0138f3bd0
x29: ffffffc0138f3bd0 x28: 0000000034a50090
x27: 0000000000000000 x26: ffffff81176ce000
x25: 0000000000000000 x24: 0000000000000000
x23: ffffffc0138f3cb8 x22: ffffff8109475000
x21: 0000000000000000 x20: ffffff81176ce000
x19: 0000000000001fff x18: 0000000000000020
x17: 0000000000000000 x16: 0000000000000000
x15: ffffff8125861410 x14: 0000000000000000
x13: 0000000000000000 x12: 0000000000000000
x11: 0000000000000000 x10: 0000000000000000
x9 : 0000000000000000 x8 : 0000000000000000
x7 : 0000000000000000 x6 : 0000000000000000
x5 : ffffff81176cffff x4 : ffffffc01208bfff
x3 : ffffff81176cff80 x2 : ffffffffffffffef
x1 : ffffffc01208bfc0 x0 : ffffff81176ce000
Call trace:
__memcpy+0x168/0x230
mtd_read_oob_std+0x80/0x90
mtd_read_oob+0x8c/0x150
mtd_read+0x54/0x80
mtdchar_read+0xdc/0x2c0
vfs_read+0xb8/0x1e4
ksys_read+0x78/0x10c
__arm64_sys_read+0x28/0x34
do_el0_svc+0x94/0x1f0
el0_svc+0x20/0x30
el0_sync_handler+0x1a4/0x1c0
el0_sync+0x180/0x1c0
Code: a984346c a9c4342c f1010042 54fffee8 (a97c3c8e)
---[ end trace 5707221d643416b6 ]---
Signed-off-by: Petr Malat <oss@malat.biz>
---
drivers/mtd/devices/phram.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
index d503821a3e60..25d3674b4e51 100644
--- a/drivers/mtd/devices/phram.c
+++ b/drivers/mtd/devices/phram.c
@@ -83,7 +83,7 @@ static void unregister_devices(void)
list_for_each_entry_safe(this, safe, &phram_list, list) {
mtd_device_unregister(&this->mtd);
- iounmap(this->mtd.priv);
+ memunmap(this->mtd.priv);
kfree(this->mtd.name);
kfree(this);
}
@@ -99,9 +99,9 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e
goto out0;
ret = -EIO;
- new->mtd.priv = ioremap(start, len);
+ new->mtd.priv = memremap(start, len, MEMREMAP_WB);
if (!new->mtd.priv) {
- pr_err("ioremap failed\n");
+ pr_err("memremap failed\n");
goto out1;
}
@@ -129,7 +129,7 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e
return 0;
out2:
- iounmap(new->mtd.priv);
+ memunmap(new->mtd.priv);
out1:
kfree(new);
out0:
--
2.30.2
On Mon, May 23, 2022 at 04:28:25PM +0200, Petr Malat wrote: > One can't use memcpy on memory obtained by ioremap, because IO memory > may have different alignment and size access restriction than the system > memory. Use memremap as phram driver operates on RAM. > > This fixes an unaligned access on ARM64, which could be triggered with > e.g. dd if=/dev/phram/by-name/testdev bs=8190 count=1 > > Unable to handle kernel paging request at virtual address ffffffc01208bfbf > Mem abort info: > ESR = 0x96000021 > EC = 0x25: DABT (current EL), IL = 32 bits > SET = 0, FnV = 0 > EA = 0, S1PTW = 0 > Data abort info: > ISV = 0, ISS = 0x00000021 > CM = 0, WnR = 0 > swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000000cd5000 > [ffffffc01208bfbf] pgd=00000002fffff003, p4d=00000002fffff003, pud=00000002fffff003, pmd=0000000100b43003, pte=0068000022221717 > Internal error: Oops: 96000021 [#1] PREEMPT SMP > CPU: 2 PID: 14768 Comm: dd Tainted: G O 5.10.116-f13ddced70 #1 > Hardware name: AXM56xx Victoria (DT) > pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) > pc : __memcpy+0x168/0x230 > lr : phram_read+0x68/0xb0 [phram] > sp : ffffffc0138f3bd0 > x29: ffffffc0138f3bd0 x28: 0000000034a50090 > x27: 0000000000000000 x26: ffffff81176ce000 > x25: 0000000000000000 x24: 0000000000000000 > x23: ffffffc0138f3cb8 x22: ffffff8109475000 > x21: 0000000000000000 x20: ffffff81176ce000 > x19: 0000000000001fff x18: 0000000000000020 > x17: 0000000000000000 x16: 0000000000000000 > x15: ffffff8125861410 x14: 0000000000000000 > x13: 0000000000000000 x12: 0000000000000000 > x11: 0000000000000000 x10: 0000000000000000 > x9 : 0000000000000000 x8 : 0000000000000000 > x7 : 0000000000000000 x6 : 0000000000000000 > x5 : ffffff81176cffff x4 : ffffffc01208bfff > x3 : ffffff81176cff80 x2 : ffffffffffffffef > x1 : ffffffc01208bfc0 x0 : ffffff81176ce000 > Call trace: > __memcpy+0x168/0x230 > mtd_read_oob_std+0x80/0x90 > mtd_read_oob+0x8c/0x150 > mtd_read+0x54/0x80 > mtdchar_read+0xdc/0x2c0 > vfs_read+0xb8/0x1e4 > ksys_read+0x78/0x10c > __arm64_sys_read+0x28/0x34 > do_el0_svc+0x94/0x1f0 > el0_svc+0x20/0x30 > el0_sync_handler+0x1a4/0x1c0 > el0_sync+0x180/0x1c0 > Code: a984346c a9c4342c f1010042 54fffee8 (a97c3c8e) > ---[ end trace 5707221d643416b6 ]--- > > Signed-off-by: Petr Malat <oss@malat.biz> > --- > drivers/mtd/devices/phram.c | 8 ++++---- > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c > index d503821a3e60..25d3674b4e51 100644 > --- a/drivers/mtd/devices/phram.c > +++ b/drivers/mtd/devices/phram.c > @@ -83,7 +83,7 @@ static void unregister_devices(void) > > list_for_each_entry_safe(this, safe, &phram_list, list) { > mtd_device_unregister(&this->mtd); > - iounmap(this->mtd.priv); > + memunmap(this->mtd.priv); > kfree(this->mtd.name); > kfree(this); > } > @@ -99,9 +99,9 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e > goto out0; > > ret = -EIO; > - new->mtd.priv = ioremap(start, len); > + new->mtd.priv = memremap(start, len, MEMREMAP_WB); > if (!new->mtd.priv) { > - pr_err("ioremap failed\n"); > + pr_err("memremap failed\n"); > goto out1; > } > > @@ -129,7 +129,7 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e > return 0; > > out2: > - iounmap(new->mtd.priv); > + memunmap(new->mtd.priv); > out1: > kfree(new); > out0: > -- > 2.30.2 > <formletter> This is not the correct way to submit patches for inclusion in the stable kernel tree. Please read: https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html for how to do this properly. </formletter>
From: Petr Malat > Sent: 23 May 2022 15:28 > > One can't use memcpy on memory obtained by ioremap, because IO memory > may have different alignment and size access restriction than the system > memory. Use memremap as phram driver operates on RAM. Does that actually help? The memcpy() is still likely to issue unaligned accesses that the hardware can't handle. David > > This fixes an unaligned access on ARM64, which could be triggered with > e.g. dd if=/dev/phram/by-name/testdev bs=8190 count=1 > > Unable to handle kernel paging request at virtual address ffffffc01208bfbf > Mem abort info: > ESR = 0x96000021 > EC = 0x25: DABT (current EL), IL = 32 bits > SET = 0, FnV = 0 > EA = 0, S1PTW = 0 > Data abort info: > ISV = 0, ISS = 0x00000021 > CM = 0, WnR = 0 > swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000000cd5000 > [ffffffc01208bfbf] pgd=00000002fffff003, p4d=00000002fffff003, pud=00000002fffff003, > pmd=0000000100b43003, pte=0068000022221717 > Internal error: Oops: 96000021 [#1] PREEMPT SMP > CPU: 2 PID: 14768 Comm: dd Tainted: G O 5.10.116-f13ddced70 #1 > Hardware name: AXM56xx Victoria (DT) > pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) > pc : __memcpy+0x168/0x230 > lr : phram_read+0x68/0xb0 [phram] > sp : ffffffc0138f3bd0 > x29: ffffffc0138f3bd0 x28: 0000000034a50090 > x27: 0000000000000000 x26: ffffff81176ce000 > x25: 0000000000000000 x24: 0000000000000000 > x23: ffffffc0138f3cb8 x22: ffffff8109475000 > x21: 0000000000000000 x20: ffffff81176ce000 > x19: 0000000000001fff x18: 0000000000000020 > x17: 0000000000000000 x16: 0000000000000000 > x15: ffffff8125861410 x14: 0000000000000000 > x13: 0000000000000000 x12: 0000000000000000 > x11: 0000000000000000 x10: 0000000000000000 > x9 : 0000000000000000 x8 : 0000000000000000 > x7 : 0000000000000000 x6 : 0000000000000000 > x5 : ffffff81176cffff x4 : ffffffc01208bfff > x3 : ffffff81176cff80 x2 : ffffffffffffffef > x1 : ffffffc01208bfc0 x0 : ffffff81176ce000 > Call trace: > __memcpy+0x168/0x230 > mtd_read_oob_std+0x80/0x90 > mtd_read_oob+0x8c/0x150 > mtd_read+0x54/0x80 > mtdchar_read+0xdc/0x2c0 > vfs_read+0xb8/0x1e4 > ksys_read+0x78/0x10c > __arm64_sys_read+0x28/0x34 > do_el0_svc+0x94/0x1f0 > el0_svc+0x20/0x30 > el0_sync_handler+0x1a4/0x1c0 > el0_sync+0x180/0x1c0 > Code: a984346c a9c4342c f1010042 54fffee8 (a97c3c8e) > ---[ end trace 5707221d643416b6 ]--- > > Signed-off-by: Petr Malat <oss@malat.biz> > --- > drivers/mtd/devices/phram.c | 8 ++++---- > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c > index d503821a3e60..25d3674b4e51 100644 > --- a/drivers/mtd/devices/phram.c > +++ b/drivers/mtd/devices/phram.c > @@ -83,7 +83,7 @@ static void unregister_devices(void) > > list_for_each_entry_safe(this, safe, &phram_list, list) { > mtd_device_unregister(&this->mtd); > - iounmap(this->mtd.priv); > + memunmap(this->mtd.priv); > kfree(this->mtd.name); > kfree(this); > } > @@ -99,9 +99,9 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e > goto out0; > > ret = -EIO; > - new->mtd.priv = ioremap(start, len); > + new->mtd.priv = memremap(start, len, MEMREMAP_WB); > if (!new->mtd.priv) { > - pr_err("ioremap failed\n"); > + pr_err("memremap failed\n"); > goto out1; > } > > @@ -129,7 +129,7 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e > return 0; > > out2: > - iounmap(new->mtd.priv); > + memunmap(new->mtd.priv); > out1: > kfree(new); > out0: > -- > 2.30.2 - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
Hi! On Mon, May 23, 2022 at 02:51:41PM +0000, David Laight wrote: > From: Petr Malat > > Sent: 23 May 2022 15:28 > > > > One can't use memcpy on memory obtained by ioremap, because IO memory > > may have different alignment and size access restriction than the system > > memory. Use memremap as phram driver operates on RAM. > > Does that actually help? > The memcpy() is still likely to issue unaligned accesses > that the hardware can't handle. Yes, it solves the issue. Memcpy can cause unaligned access only on platforms, which can handle it. And on ARM64 it's handled only for RAM and not for a device memory (__pgprot(PROT_DEVICE_*)). Petr
From: Petr Malat > Sent: 23 May 2022 16:28 > > Hi! > > On Mon, May 23, 2022 at 02:51:41PM +0000, David Laight wrote: > > From: Petr Malat > > > Sent: 23 May 2022 15:28 > > > > > > One can't use memcpy on memory obtained by ioremap, because IO memory > > > may have different alignment and size access restriction than the system > > > memory. Use memremap as phram driver operates on RAM. > > > > Does that actually help? > > The memcpy() is still likely to issue unaligned accesses > > that the hardware can't handle. > > Yes, it solves the issue. Memcpy can cause unaligned access only on > platforms, which can handle it. And on ARM64 it's handled only for > RAM and not for a device memory (__pgprot(PROT_DEVICE_*)). Does mapping it as memory cause it to be cached? So the hardware only sees cache line reads (which are aligned) and the cpu support for misaligned memory accesses then stop the faults? On x86 (which I know a lot more about) memcpy() has a nasty habit of getting implemented as 'rep movsb' relying on the cpu to speed it up. But that doesn't happen for uncached addresses - so you get very slow byte copies. OTOH misaligned PCIe transfers generate TLP that have the correct byte enables for the end words. Provided the PCIe target isn't broken they are fine. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Mon, May 23, 2022 at 04:09:20PM +0000, David Laight wrote: > From: Petr Malat > > Sent: 23 May 2022 16:28 > > On Mon, May 23, 2022 at 02:51:41PM +0000, David Laight wrote: > > > From: Petr Malat > > > > Sent: 23 May 2022 15:28 > > > > > > > > One can't use memcpy on memory obtained by ioremap, because IO memory > > > > may have different alignment and size access restriction than the system > > > > memory. Use memremap as phram driver operates on RAM. > > > > > > Does that actually help? > > > The memcpy() is still likely to issue unaligned accesses > > > that the hardware can't handle. > > > > Yes, it solves the issue. Memcpy can cause unaligned access only on > > platforms, which can handle it. And on ARM64 it's handled only for > > RAM and not for a device memory (__pgprot(PROT_DEVICE_*)). > > Does mapping it as memory cause it to be cached? > So the hardware only sees cache line reads (which are aligned) > and the cpu support for misaligned memory accesses then > stop the faults? Yes, this is controlled by the MEMREMAP_WB flag, which sets up a mapping, which "matches the default mapping for System RAM on the architecture. This is usually a read-allocate write-back cache. > On x86 (which I know a lot more about) memcpy() has a nasty > habit of getting implemented as 'rep movsb' relying on the > cpu to speed it up. > But that doesn't happen for uncached addresses - so you get > very slow byte copies. > OTOH misaligned PCIe transfers generate TLP that have the > correct byte enables for the end words. > Provided the PCIe target isn't broken they are fine. With memremap one should get the same behavior and performance as with the system memory and it seems to be a good choice for "Physical system RAM" MTD driver, but if one uses it for actual IO memory, he should use ioremam, memcpy_toio and memcpy_fromio. Using these prevents the crash on arm64 as well, but could lead to a performance degradation on some platforms. If you think there could be users using the driver for a real IO mem, I can provide both behaviors and let the user choose with an option. Petr
Hi! On Mon, May 23, 2022 at 04:09:20PM +0000, David Laight wrote: > On x86 (which I know a lot more about) memcpy() has a nasty > habit of getting implemented as 'rep movsb' relying on the > cpu to speed it up. > But that doesn't happen for uncached addresses - so you get > very slow byte copies. I have measured the performance with (patched) and without my change (orig). My change improves the performance on X8664 and arm. On Mips64 it stays the same: Tests ===== All runtimes are in milliseconds, average real-time of 3 runs, time measured with bash time built-in. Measured process run in SCHED_FIFO with priority 99. Page cache was flushed before every run, but all involved program images were in tmpfs (no swap). - dd r512 dd if=/dev/TESTDEV of=/dev/null bs=512 - dd r1MB dd if=/dev/TESTDEV of=/dev/null bs=1M - dd r512 dd of=/dev/TESTDEV if=/tmpfs/img bs=512 - dd r1MB dd of=/dev/TESTDEV if=/tmpfs/img bs=1M - flashcp flashcp /tmpfs/img /dev/TESTDEV - flasherase flash_eraseall -q /dev/TESTDEV Results ======= All times are in ms ARCH | MIPS64 | ARM | X8664 CPU | CN6335p2.2 | v7 TI K2 | Xeon D-1548 Dev. size | 32MB | 128MB | 256MB -----------+-------+---------+-------+---------+-------+--------- in ms | Orig | Patched | Orig | Patched | Orig | Patched dd r512 | 131 | 130 | 1101 | 543 | 22906 | 281 dd r1MB | 65 | 65 | 655 | 122 | 22715 | 70 dd w512 | 1150 | 1150 | 1136 | 1042 | 28067 | 412 dd w1MB | 104 | 104 | 396 | 244 | 27761 | 122 flashcp | 100 | 99 | 1438 | 568 | 78455 | 270 flasherase | 21 | 21 | 208 | 77 | 27707 | 57 BR, Petr
© 2016 - 2024 Red Hat, Inc.