fs/pstore/inode.c | 2 +- fs/pstore/platform.c | 9 +++++---- fs/pstore/ram.c | 5 +++-- fs/pstore/ram_core.c | 3 ++- 4 files changed, 11 insertions(+), 8 deletions(-)
Current pmsg implementation is using kmalloc for pmsg record buffer,
which has max size limits based on page size. Currently even we
allocate enough space with pmsg-size, pmsg will still fail if the
file size is larger than what kmalloc allowed.
Since we don't need physical contiguous memory for pmsg buffer
, we can use kvmalloc to avoid such limitation.
Signed-off-by: Yuxiao Zhang <yuxiaozhang@google.com>
---
fs/pstore/inode.c | 2 +-
fs/pstore/platform.c | 9 +++++----
fs/pstore/ram.c | 5 +++--
fs/pstore/ram_core.c | 3 ++-
4 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ffbadb8b3032..df7fb2ad4599 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -54,7 +54,7 @@ static void free_pstore_private(struct pstore_private *private)
if (!private)
return;
if (private->record) {
- kfree(private->record->buf);
+ kvfree(private->record->buf);
kfree(private->record->priv);
kfree(private->record);
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index cbc0b468c1ab..f51e9460ac9d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
+#include <linux/mm.h>
#include "internal.h"
@@ -549,7 +550,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
if (record->buf)
return -EINVAL;
- record->buf = memdup_user(buf, record->size);
+ record->buf = vmemdup_user(buf, record->size);
if (IS_ERR(record->buf)) {
ret = PTR_ERR(record->buf);
goto out;
@@ -557,7 +558,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
ret = record->psi->write(record);
- kfree(record->buf);
+ kvfree(record->buf);
out:
record->buf = NULL;
@@ -730,7 +731,7 @@ static void decompress_record(struct pstore_record *record)
return;
/* Swap out compressed contents with decompressed contents. */
- kfree(record->buf);
+ kvfree(record->buf);
record->buf = unzipped;
record->size = unzipped_len;
record->compressed = false;
@@ -783,7 +784,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
rc = pstore_mkfile(root, record);
if (rc) {
/* pstore_mkfile() did not take record, so free it. */
- kfree(record->buf);
+ kvfree(record->buf);
kfree(record->priv);
kfree(record);
if (rc != -EEXIST || !quiet)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index ade66dbe5f39..296465b14fa9 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -20,6 +20,7 @@
#include <linux/compiler.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/mm.h>
#include "internal.h"
#include "ram_internal.h"
@@ -268,7 +269,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
/* ECC correction notice */
record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
- record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
+ record->buf = kvmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
if (record->buf == NULL) {
size = -ENOMEM;
goto out;
@@ -282,7 +283,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
out:
if (free_prz) {
- kfree(prz->old_log);
+ kvfree(prz->old_log);
kfree(prz);
}
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 966191d3a5ba..3453d493ec27 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <asm/page.h>
#include "ram_internal.h"
@@ -385,7 +386,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)
void persistent_ram_free_old(struct persistent_ram_zone *prz)
{
- kfree(prz->old_log);
+ kvfree(prz->old_log);
prz->old_log = NULL;
prz->old_log_size = 0;
}
--
2.41.0.162.gfafddb0af9-goog
On Tue, Jun 27, 2023 at 01:25:41PM -0700, Yuxiao Zhang wrote: > Current pmsg implementation is using kmalloc for pmsg record buffer, > which has max size limits based on page size. Currently even we > allocate enough space with pmsg-size, pmsg will still fail if the > file size is larger than what kmalloc allowed. > > Since we don't need physical contiguous memory for pmsg buffer > , we can use kvmalloc to avoid such limitation. Conceptually, I am fine with this change. I need a little time to trace down the allocations. At first glance, I thought this patch only needed to cover pstore_write_user_compat(), but I guess the read side needs to be adjusted as well? I'll double-check. And yes, Greg's questions are all good -- fixing syntax and adding size details in the commit log would be appreciated. -Kees -- Kees Cook
Thanks,
-Yuxiao
From cd3ec6155a3cf0e198afdf2d040c73ee146b696f Mon Sep 17 00:00:00 2001
From: Yuxiao Zhang <yuxiaozhang@google.com>
Date: Fri, 30 Jun 2023 10:45:21 -0700
Subject: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc
limitation
Current pmsg implementation is using kmalloc for pmsg record buffer,
which has max size limits of 2^(MAX_ORDER + PAGE_SHIFT). Currently even
we allocate enough space with pmsg-size, pmsg will still fail if the
file size is larger than what kmalloc allowed.
Since we don't need physical contiguous memory for pmsg buffer,
we can use kvmalloc to avoid such limitation.
Signed-off-by: Yuxiao Zhang <yuxiaozhang@google.com>
---
fs/pstore/inode.c | 2 +-
fs/pstore/platform.c | 9 +++++----
fs/pstore/ram.c | 5 +++--
fs/pstore/ram_core.c | 3 ++-
4 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ffbadb8b3032..df7fb2ad4599 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -54,7 +54,7 @@ static void free_pstore_private(struct pstore_private *private)
if (!private)
return;
if (private->record) {
- kfree(private->record->buf);
+ kvfree(private->record->buf);
kfree(private->record->priv);
kfree(private->record);
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index cbc0b468c1ab..f51e9460ac9d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
+#include <linux/mm.h>
#include "internal.h"
@@ -549,7 +550,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
if (record->buf)
return -EINVAL;
- record->buf = memdup_user(buf, record->size);
+ record->buf = vmemdup_user(buf, record->size);
if (IS_ERR(record->buf)) {
ret = PTR_ERR(record->buf);
goto out;
@@ -557,7 +558,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
ret = record->psi->write(record);
- kfree(record->buf);
+ kvfree(record->buf);
out:
record->buf = NULL;
@@ -730,7 +731,7 @@ static void decompress_record(struct pstore_record *record)
return;
/* Swap out compressed contents with decompressed contents. */
- kfree(record->buf);
+ kvfree(record->buf);
record->buf = unzipped;
record->size = unzipped_len;
record->compressed = false;
@@ -783,7 +784,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
rc = pstore_mkfile(root, record);
if (rc) {
/* pstore_mkfile() did not take record, so free it. */
- kfree(record->buf);
+ kvfree(record->buf);
kfree(record->priv);
kfree(record);
if (rc != -EEXIST || !quiet)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index ade66dbe5f39..296465b14fa9 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -20,6 +20,7 @@
#include <linux/compiler.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/mm.h>
#include "internal.h"
#include "ram_internal.h"
@@ -268,7 +269,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
/* ECC correction notice */
record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
- record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
+ record->buf = kvmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
if (record->buf == NULL) {
size = -ENOMEM;
goto out;
@@ -282,7 +283,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
out:
if (free_prz) {
- kfree(prz->old_log);
+ kvfree(prz->old_log);
kfree(prz);
}
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 966191d3a5ba..3453d493ec27 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <asm/page.h>
#include "ram_internal.h"
@@ -385,7 +386,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)
void persistent_ram_free_old(struct persistent_ram_zone *prz)
{
- kfree(prz->old_log);
+ kvfree(prz->old_log);
prz->old_log = NULL;
prz->old_log_size = 0;
}
--
2.41.0.255.g8b1d071c50-goog
Sorry forgot to add subject header in msg which messed up email client,
resending it again
Added size details to commit message and fixed the format. See the new
patch below.
Thanks,
-Yuxiao
From cd3ec6155a3cf0e198afdf2d040c73ee146b696f Mon Sep 17 00:00:00 2001
From: Yuxiao Zhang <yuxiaozhang@google.com>
Date: Fri, 30 Jun 2023 10:45:21 -0700
Subject: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc
limitation
Current pmsg implementation is using kmalloc for pmsg record buffer,
which has max size limits of 2^(MAX_ORDER + PAGE_SHIFT). Currently even
we allocate enough space with pmsg-size, pmsg will still fail if the
file size is larger than what kmalloc allowed.
Since we don't need physical contiguous memory for pmsg buffer,
we can use kvmalloc to avoid such limitation.
Signed-off-by: Yuxiao Zhang <yuxiaozhang@google.com>
---
fs/pstore/inode.c | 2 +-
fs/pstore/platform.c | 9 +++++----
fs/pstore/ram.c | 5 +++--
fs/pstore/ram_core.c | 3 ++-
4 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ffbadb8b3032..df7fb2ad4599 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -54,7 +54,7 @@ static void free_pstore_private(struct pstore_private *private)
if (!private)
return;
if (private->record) {
- kfree(private->record->buf);
+ kvfree(private->record->buf);
kfree(private->record->priv);
kfree(private->record);
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index cbc0b468c1ab..f51e9460ac9d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
+#include <linux/mm.h>
#include "internal.h"
@@ -549,7 +550,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
if (record->buf)
return -EINVAL;
- record->buf = memdup_user(buf, record->size);
+ record->buf = vmemdup_user(buf, record->size);
if (IS_ERR(record->buf)) {
ret = PTR_ERR(record->buf);
goto out;
@@ -557,7 +558,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
ret = record->psi->write(record);
- kfree(record->buf);
+ kvfree(record->buf);
out:
record->buf = NULL;
@@ -730,7 +731,7 @@ static void decompress_record(struct pstore_record *record)
return;
/* Swap out compressed contents with decompressed contents. */
- kfree(record->buf);
+ kvfree(record->buf);
record->buf = unzipped;
record->size = unzipped_len;
record->compressed = false;
@@ -783,7 +784,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
rc = pstore_mkfile(root, record);
if (rc) {
/* pstore_mkfile() did not take record, so free it. */
- kfree(record->buf);
+ kvfree(record->buf);
kfree(record->priv);
kfree(record);
if (rc != -EEXIST || !quiet)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index ade66dbe5f39..296465b14fa9 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -20,6 +20,7 @@
#include <linux/compiler.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/mm.h>
#include "internal.h"
#include "ram_internal.h"
@@ -268,7 +269,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
/* ECC correction notice */
record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
- record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
+ record->buf = kvmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
if (record->buf == NULL) {
size = -ENOMEM;
goto out;
@@ -282,7 +283,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
out:
if (free_prz) {
- kfree(prz->old_log);
+ kvfree(prz->old_log);
kfree(prz);
}
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 966191d3a5ba..3453d493ec27 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <asm/page.h>
#include "ram_internal.h"
@@ -385,7 +386,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)
void persistent_ram_free_old(struct persistent_ram_zone *prz)
{
- kfree(prz->old_log);
+ kvfree(prz->old_log);
prz->old_log = NULL;
prz->old_log_size = 0;
}
--
2.41.0.255.g8b1d071c50-goog
Friendly ping, any update on this?
On Tue, Jul 18, 2023 at 01:23:47PM -0700, Yuxiao Zhang wrote: > Friendly ping, any update on this? Hi! I finally got a chance to look this over. I added a few more kvzalloc() uses to generalize this for all records (not just pmsg), and it's testing well. Here's the resulting commit: https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git/commit/?h=for-next/pstore&id=104fd0b5e948157f8e8ac88a20b46ba8641d4e95 -- Kees Cook
On Tue, Jun 27, 2023 at 01:25:41PM -0700, Yuxiao Zhang wrote: > Current pmsg implementation is using kmalloc for pmsg record buffer, > which has max size limits based on page size. What is that max size? > Currently even we > allocate enough space with pmsg-size, pmsg will still fail if the > file size is larger than what kmalloc allowed. > > Since we don't need physical contiguous memory for pmsg buffer > , we can use kvmalloc to avoid such limitation. Odd placement of the ',' character :) Anyway, thanks for getting this sent out. But, what in-kernel user is hitting this in the pstore implementation? How big of a buffer is it trying to create? Is this a bug in older kernels with the in-kernel drivers as well? If so, should it go to stable releases and how far back? thanks, greg k-h
Thanks for reviewing the patch. On 28 Jun 2023 07:30:16 +0200, Greg KH wrote: >What is that max size? The max size is arch dependent, it should be 2^(PAGE_SIZE+MAX_ORDER). In our environment it is 4M. >what in-kernel user is hitting this in the pstore implementation? We are trying to use pmsg to hold a core dump file, so we have pmsg-size=32M and thus hit this issue. Other than us, here is one I found that trying to save dmesg beyond kmalloc limitaton: https://lore.kernel.org/lkml/b2d66d9f-15a6-415c-2485-44649027a1d5@igalia.com/T/ Thanks, -Yuxiao Zhang
On 28/06/2023 19:10, Yuxiao Zhang wrote: > Thanks for reviewing the patch. > > On 28 Jun 2023 07:30:16 +0200, Greg KH wrote: >> What is that max size? > > The max size is arch dependent, it should be 2^(PAGE_SIZE+MAX_ORDER). In our environment it is 4M. > >> what in-kernel user is hitting this in the pstore implementation? > > We are trying to use pmsg to hold a core dump file, so we have pmsg-size=32M and thus hit this issue. > > Other than us, here is one I found that trying to save dmesg beyond kmalloc limitaton: > https://lore.kernel.org/lkml/b2d66d9f-15a6-415c-2485-44649027a1d5@igalia.com/T/ Hi Yuxiao Zhang, thanks for the improvement! And also, thanks for mentioning the thread above - I tested your patch today and was soon to write you this message heh So, first of all, the patch works for the Steam Deck case - kernel 6.4 with or without your patch behaved the same, i.e., pstore/ramoops worked and it was possible to collect the dmesg. But when I tried to increase the record size in ramoops, I got this error: https://termbin.com/b12e This is the same error as mentioned in the thread above. And it happens when I try to bump the record size to 4MB, the biggest working value is still 2MB. Maybe a missing spot, which remained using kmalloc() instead of the virtual variant? Also - Kees certainly knows that way better, but - are we 100% sure that the region for pstore can be non-contiguous? For some reason, I always thought this was a requirement - grepping the code, I found this (wrong?) comment: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/pstore/zone.c#n3 Cheers, Guilherme
On Wed, Jun 28, 2023 at 03:12:10PM -0300, Guilherme G. Piccoli wrote: > Also - Kees certainly knows that way better, but - are we 100% sure that > the region for pstore can be non-contiguous? For some reason, I always > thought this was a requirement - grepping the code, I found this > (wrong?) comment: > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/pstore/zone.c#n3 The contiguous requirements are entirely for the RAM backend's storage, so these intermediate memory regions can use non-contiguous physical backing memory (i.e. vmalloc). Even the special case of crash-dumping should be fine for the large buffer used to hold the crash before doing compression. There are effectively 4 types of allocations in pstore: 1- a physical -> virtual mapping for the RAM backend 2- the allocations (if any) for non-RAM backends to hold a copy of pstore records when extracted from the backend storage (e.g NVRAM, EFI vars, etc). 3- incoming allocations that are used temporarily to hand data to the backend (e.g. the write_compat used in this patch) 4- the allocation used for holding the pstorefs data contents (which I need to double-check, but is entirely defined by the backends) Changes aren't needed for (1), it's fine on its own. This patch changes allocations for (2) and (3) for pmsg and the RAM backend, if I'm reading it correctly. I think (4) is included as part of (2), but I need to check. As long as all paths use kvfree() for the record buffers, everything should Just Work for RAM. Switching the other backends to also use kvalloc() would allow for greater flexibility, though. Anyway, it's on my list to review and test. I'm still working through other things related to the merge window opening, so I may be a bit slow for the next week. :) -Kees -- Kees Cook
On 28/06/2023 20:24, Kees Cook wrote: > On Wed, Jun 28, 2023 at 03:12:10PM -0300, Guilherme G. Piccoli wrote: >> Also - Kees certainly knows that way better, but - are we 100% sure that >> the region for pstore can be non-contiguous? For some reason, I always >> thought this was a requirement - grepping the code, I found this >> (wrong?) comment: >> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/pstore/zone.c#n3 > > The contiguous requirements are entirely for the RAM backend's storage, > so these intermediate memory regions can use non-contiguous physical > backing memory (i.e. vmalloc). > > Even the special case of crash-dumping should be fine for the large > buffer used to hold the crash before doing compression. > > There are effectively 4 types of allocations in pstore: > > 1- a physical -> virtual mapping for the RAM backend > 2- the allocations (if any) for non-RAM backends to hold a copy of pstore > records when extracted from the backend storage (e.g NVRAM, EFI vars, > etc). > 3- incoming allocations that are used temporarily to hand data to the > backend (e.g. the write_compat used in this patch) > 4- the allocation used for holding the pstorefs data contents (which I > need to double-check, but is entirely defined by the backends) > > Changes aren't needed for (1), it's fine on its own. This patch changes > allocations for (2) and (3) for pmsg and the RAM backend, if I'm reading > it correctly. I think (4) is included as part of (2), but I need to > check. As long as all paths use kvfree() for the record buffers, > everything should Just Work for RAM. Switching the other backends to > also use kvalloc() would allow for greater flexibility, though. > > Anyway, it's on my list to review and test. I'm still working through > other things related to the merge window opening, so I may be a bit slow > for the next week. :) > > -Kees > Thanks a bunch for the clarification Kees, much appreciated! Now I understand it way better =)
© 2016 - 2026 Red Hat, Inc.