fs/fuse/virtio_fs.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-)
From: Hou Tao <houtao1@huawei.com>
When invoking virtio_fs_enqueue_req() through kworker, both the
allocation of the sg array and the bounce buffer still use GFP_ATOMIC.
Considering the size of both the sg array and the bounce buffer may be
greater than PAGE_SIZE, use GFP_NOFS instead of GFP_ATOMIC to lower the
possibility of memory allocation failure.
Signed-off-by: Hou Tao <houtao1@huawei.com>
---
Change log:
v2:
* pass gfp_t instead of bool to virtio_fs_enqueue_req() (Suggested by Matthew)
v1: https://lore.kernel.org/linux-fsdevel/20240104015805.2103766-1-houtao@huaweicloud.com
fs/fuse/virtio_fs.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 3aac31d451985..8cf518624ce9e 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -87,7 +87,8 @@ struct virtio_fs_req_work {
};
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
- struct fuse_req *req, bool in_flight);
+ struct fuse_req *req, bool in_flight,
+ gfp_t gfp);
static const struct constant_table dax_param_enums[] = {
{"always", FUSE_DAX_ALWAYS },
@@ -383,7 +384,7 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
list_del_init(&req->list);
spin_unlock(&fsvq->lock);
- ret = virtio_fs_enqueue_req(fsvq, req, true);
+ ret = virtio_fs_enqueue_req(fsvq, req, true, GFP_NOFS);
if (ret < 0) {
if (ret == -ENOMEM || ret == -ENOSPC) {
spin_lock(&fsvq->lock);
@@ -488,7 +489,7 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
}
/* Allocate and copy args into req->argbuf */
-static int copy_args_to_argbuf(struct fuse_req *req)
+static int copy_args_to_argbuf(struct fuse_req *req, gfp_t gfp)
{
struct fuse_args *args = req->args;
unsigned int offset = 0;
@@ -502,7 +503,7 @@ static int copy_args_to_argbuf(struct fuse_req *req)
len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
fuse_len_args(num_out, args->out_args);
- req->argbuf = kmalloc(len, GFP_ATOMIC);
+ req->argbuf = kmalloc(len, gfp);
if (!req->argbuf)
return -ENOMEM;
@@ -1119,7 +1120,8 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
/* Add a request to a virtqueue and kick the device */
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
- struct fuse_req *req, bool in_flight)
+ struct fuse_req *req, bool in_flight,
+ gfp_t gfp)
{
/* requests need at least 4 elements */
struct scatterlist *stack_sgs[6];
@@ -1140,8 +1142,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
/* Does the sglist fit on the stack? */
total_sgs = sg_count_fuse_req(req);
if (total_sgs > ARRAY_SIZE(stack_sgs)) {
- sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
- sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
+ sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), gfp);
+ sg = kmalloc_array(total_sgs, sizeof(sg[0]), gfp);
if (!sgs || !sg) {
ret = -ENOMEM;
goto out;
@@ -1149,7 +1151,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
}
/* Use a bounce buffer since stack args cannot be mapped */
- ret = copy_args_to_argbuf(req);
+ ret = copy_args_to_argbuf(req, gfp);
if (ret < 0)
goto out;
@@ -1245,7 +1247,7 @@ __releases(fiq->lock)
fuse_len_args(req->args->out_numargs, req->args->out_args));
fsvq = &fs->vqs[queue_id];
- ret = virtio_fs_enqueue_req(fsvq, req, false);
+ ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC);
if (ret < 0) {
if (ret == -ENOMEM || ret == -ENOSPC) {
/*
--
2.29.2
On 5 Jan 2024, at 5:53, Hou Tao wrote: > From: Hou Tao <houtao1@huawei.com> > > When invoking virtio_fs_enqueue_req() through kworker, both the > allocation of the sg array and the bounce buffer still use GFP_ATOMIC. > Considering the size of both the sg array and the bounce buffer may be > greater than PAGE_SIZE, use GFP_NOFS instead of GFP_ATOMIC to lower the > possibility of memory allocation failure. Perhaps not appropriate for this case, but are you aware of memalloc_nofs_save/restore? NFS has been converting over and cleaning out our GFP_NOFS usage: Documentation/core-api/gfp_mask-from-fs-io.rst Ben
On Fri, Jan 05, 2024 at 06:53:05PM +0800, Hou Tao wrote:
> From: Hou Tao <houtao1@huawei.com>
>
> When invoking virtio_fs_enqueue_req() through kworker, both the
> allocation of the sg array and the bounce buffer still use GFP_ATOMIC.
> Considering the size of both the sg array and the bounce buffer may be
> greater than PAGE_SIZE, use GFP_NOFS instead of GFP_ATOMIC to lower the
> possibility of memory allocation failure.
>
What's the practical benefit of this patch. Looks like if memory
allocation fails, we keep retrying at interval of 1ms and don't
return error to user space.
Thanks
Vivek
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
> Change log:
> v2:
> * pass gfp_t instead of bool to virtio_fs_enqueue_req() (Suggested by Matthew)
>
> v1: https://lore.kernel.org/linux-fsdevel/20240104015805.2103766-1-houtao@huaweicloud.com
>
> fs/fuse/virtio_fs.c | 20 +++++++++++---------
> 1 file changed, 11 insertions(+), 9 deletions(-)
>
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 3aac31d451985..8cf518624ce9e 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -87,7 +87,8 @@ struct virtio_fs_req_work {
> };
>
> static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
> - struct fuse_req *req, bool in_flight);
> + struct fuse_req *req, bool in_flight,
> + gfp_t gfp);
>
> static const struct constant_table dax_param_enums[] = {
> {"always", FUSE_DAX_ALWAYS },
> @@ -383,7 +384,7 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
> list_del_init(&req->list);
> spin_unlock(&fsvq->lock);
>
> - ret = virtio_fs_enqueue_req(fsvq, req, true);
> + ret = virtio_fs_enqueue_req(fsvq, req, true, GFP_NOFS);
> if (ret < 0) {
> if (ret == -ENOMEM || ret == -ENOSPC) {
> spin_lock(&fsvq->lock);
> @@ -488,7 +489,7 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
> }
>
> /* Allocate and copy args into req->argbuf */
> -static int copy_args_to_argbuf(struct fuse_req *req)
> +static int copy_args_to_argbuf(struct fuse_req *req, gfp_t gfp)
> {
> struct fuse_args *args = req->args;
> unsigned int offset = 0;
> @@ -502,7 +503,7 @@ static int copy_args_to_argbuf(struct fuse_req *req)
> len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
> fuse_len_args(num_out, args->out_args);
>
> - req->argbuf = kmalloc(len, GFP_ATOMIC);
> + req->argbuf = kmalloc(len, gfp);
> if (!req->argbuf)
> return -ENOMEM;
>
> @@ -1119,7 +1120,8 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
>
> /* Add a request to a virtqueue and kick the device */
> static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
> - struct fuse_req *req, bool in_flight)
> + struct fuse_req *req, bool in_flight,
> + gfp_t gfp)
> {
> /* requests need at least 4 elements */
> struct scatterlist *stack_sgs[6];
> @@ -1140,8 +1142,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
> /* Does the sglist fit on the stack? */
> total_sgs = sg_count_fuse_req(req);
> if (total_sgs > ARRAY_SIZE(stack_sgs)) {
> - sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
> - sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
> + sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), gfp);
> + sg = kmalloc_array(total_sgs, sizeof(sg[0]), gfp);
> if (!sgs || !sg) {
> ret = -ENOMEM;
> goto out;
> @@ -1149,7 +1151,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
> }
>
> /* Use a bounce buffer since stack args cannot be mapped */
> - ret = copy_args_to_argbuf(req);
> + ret = copy_args_to_argbuf(req, gfp);
> if (ret < 0)
> goto out;
>
> @@ -1245,7 +1247,7 @@ __releases(fiq->lock)
> fuse_len_args(req->args->out_numargs, req->args->out_args));
>
> fsvq = &fs->vqs[queue_id];
> - ret = virtio_fs_enqueue_req(fsvq, req, false);
> + ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC);
> if (ret < 0) {
> if (ret == -ENOMEM || ret == -ENOSPC) {
> /*
> --
> 2.29.2
>
On Fri, Jan 05, 2024 at 03:17:19PM -0500, Vivek Goyal wrote: > On Fri, Jan 05, 2024 at 06:53:05PM +0800, Hou Tao wrote: > > From: Hou Tao <houtao1@huawei.com> > > > > When invoking virtio_fs_enqueue_req() through kworker, both the > > allocation of the sg array and the bounce buffer still use GFP_ATOMIC. > > Considering the size of both the sg array and the bounce buffer may be > > greater than PAGE_SIZE, use GFP_NOFS instead of GFP_ATOMIC to lower the > > possibility of memory allocation failure. > > > > What's the practical benefit of this patch. Looks like if memory > allocation fails, we keep retrying at interval of 1ms and don't > return error to user space. You don't deplete the atomic reserves unnecessarily?
On 1/6/2024 4:21 AM, Matthew Wilcox wrote: > On Fri, Jan 05, 2024 at 03:17:19PM -0500, Vivek Goyal wrote: >> On Fri, Jan 05, 2024 at 06:53:05PM +0800, Hou Tao wrote: >>> From: Hou Tao <houtao1@huawei.com> >>> >>> When invoking virtio_fs_enqueue_req() through kworker, both the >>> allocation of the sg array and the bounce buffer still use GFP_ATOMIC. >>> Considering the size of both the sg array and the bounce buffer may be >>> greater than PAGE_SIZE, use GFP_NOFS instead of GFP_ATOMIC to lower the >>> possibility of memory allocation failure. >>> >> What's the practical benefit of this patch. Looks like if memory >> allocation fails, we keep retrying at interval of 1ms and don't >> return error to user space. > You don't deplete the atomic reserves unnecessarily? Beside that, I think the proposed GFP_NOFS may reduce unnecessary retries. I Should mention that in the commit message. Should I post a v3 to do that ?
On Fri, Jan 05, 2024 at 08:21:00PM +0000, Matthew Wilcox wrote: > On Fri, Jan 05, 2024 at 03:17:19PM -0500, Vivek Goyal wrote: > > On Fri, Jan 05, 2024 at 06:53:05PM +0800, Hou Tao wrote: > > > From: Hou Tao <houtao1@huawei.com> > > > > > > When invoking virtio_fs_enqueue_req() through kworker, both the > > > allocation of the sg array and the bounce buffer still use GFP_ATOMIC. > > > Considering the size of both the sg array and the bounce buffer may be > > > greater than PAGE_SIZE, use GFP_NOFS instead of GFP_ATOMIC to lower the > > > possibility of memory allocation failure. > > > > > > > What's the practical benefit of this patch. Looks like if memory > > allocation fails, we keep retrying at interval of 1ms and don't > > return error to user space. > > You don't deplete the atomic reserves unnecessarily? Sounds reasonable. With GFP_NOFS specificed, can we still get -ENOMEM? Or this will block indefinitely till memory can be allocated. I am trying to figure out with GFP_NOFS, do we still need to check for -ENOMEM while requeuing the req and asking worker thread to retry after 1ms. Thanks Vivek
On Fri, Jan 05, 2024 at 03:41:48PM -0500, Vivek Goyal wrote: > On Fri, Jan 05, 2024 at 08:21:00PM +0000, Matthew Wilcox wrote: > > On Fri, Jan 05, 2024 at 03:17:19PM -0500, Vivek Goyal wrote: > > > On Fri, Jan 05, 2024 at 06:53:05PM +0800, Hou Tao wrote: > > > > From: Hou Tao <houtao1@huawei.com> > > > > > > > > When invoking virtio_fs_enqueue_req() through kworker, both the > > > > allocation of the sg array and the bounce buffer still use GFP_ATOMIC. > > > > Considering the size of both the sg array and the bounce buffer may be > > > > greater than PAGE_SIZE, use GFP_NOFS instead of GFP_ATOMIC to lower the > > > > possibility of memory allocation failure. > > > > > > > > > > What's the practical benefit of this patch. Looks like if memory > > > allocation fails, we keep retrying at interval of 1ms and don't > > > return error to user space. > > > > You don't deplete the atomic reserves unnecessarily? > > Sounds reasonable. > > With GFP_NOFS specificed, can we still get -ENOMEM? Or this will block > indefinitely till memory can be allocated. If you need the "loop indefinitely" behaviour, that's GFP_NOFS | __GFP_NOFAIL. If you're actually doing something yourself which can free up memory, this is a bad choice. If you're just sleeping and retrying, you might as well have the MM do that for you.
On Fri, Jan 05, 2024 at 08:57:55PM +0000, Matthew Wilcox wrote: > On Fri, Jan 05, 2024 at 03:41:48PM -0500, Vivek Goyal wrote: > > On Fri, Jan 05, 2024 at 08:21:00PM +0000, Matthew Wilcox wrote: > > > On Fri, Jan 05, 2024 at 03:17:19PM -0500, Vivek Goyal wrote: > > > > On Fri, Jan 05, 2024 at 06:53:05PM +0800, Hou Tao wrote: > > > > > From: Hou Tao <houtao1@huawei.com> > > > > > > > > > > When invoking virtio_fs_enqueue_req() through kworker, both the > > > > > allocation of the sg array and the bounce buffer still use GFP_ATOMIC. > > > > > Considering the size of both the sg array and the bounce buffer may be > > > > > greater than PAGE_SIZE, use GFP_NOFS instead of GFP_ATOMIC to lower the > > > > > possibility of memory allocation failure. > > > > > > > > > > > > > What's the practical benefit of this patch. Looks like if memory > > > > allocation fails, we keep retrying at interval of 1ms and don't > > > > return error to user space. > > > > > > You don't deplete the atomic reserves unnecessarily? > > > > Sounds reasonable. > > > > With GFP_NOFS specificed, can we still get -ENOMEM? Or this will block > > indefinitely till memory can be allocated. > > If you need the "loop indefinitely" behaviour, that's > GFP_NOFS | __GFP_NOFAIL. If you're actually doing something yourself > which can free up memory, this is a bad choice. If you're just sleeping > and retrying, you might as well have the MM do that for you. I probably don't want to wait indefinitely. There might be some cases where I might want to return error to user space. For example, if virtiofs device has been hot-unplugged, then there is no point in waiting indefinitely for memory allocation. Even if memory was allocated, soon we will return error to user space with -ENOTCONN. We are currently not doing that check after memory allocation failure but we probably could as an optimization. So this patch looks good to me as it is. Thanks Hou Tao. Reviewed-by: Vivek Goyal <vgoyal@redhat.com> Thanks Vivek
Hi Vivek, On 1/6/2024 5:27 AM, Vivek Goyal wrote: > On Fri, Jan 05, 2024 at 08:57:55PM +0000, Matthew Wilcox wrote: >> On Fri, Jan 05, 2024 at 03:41:48PM -0500, Vivek Goyal wrote: >>> On Fri, Jan 05, 2024 at 08:21:00PM +0000, Matthew Wilcox wrote: >>>> On Fri, Jan 05, 2024 at 03:17:19PM -0500, Vivek Goyal wrote: >>>>> On Fri, Jan 05, 2024 at 06:53:05PM +0800, Hou Tao wrote: >>>>>> From: Hou Tao <houtao1@huawei.com> >>>>>> >>>>>> When invoking virtio_fs_enqueue_req() through kworker, both the >>>>>> allocation of the sg array and the bounce buffer still use GFP_ATOMIC. >>>>>> Considering the size of both the sg array and the bounce buffer may be >>>>>> greater than PAGE_SIZE, use GFP_NOFS instead of GFP_ATOMIC to lower the >>>>>> possibility of memory allocation failure. >>>>>> >>>>> What's the practical benefit of this patch. Looks like if memory >>>>> allocation fails, we keep retrying at interval of 1ms and don't >>>>> return error to user space. Motivation for GFP_NOFS comes another fix proposed for virtiofs [1] in which when trying to insert a big kernel module kept in a cache-disabled virtiofs, the length of fuse args will be large (e.g., 10MB), and the memory allocation in copy_args_to_argbuf() will fail forever. The proposed fix tries to fix the problem by limit the length of data kept in fuse arg, but because the limitation is still large (256KB in that patch), so I think using GFP_NOFS will also be helpful for such memory allocation. [1]: https://lore.kernel.org/linux-fsdevel/20240103105929.1902658-1-houtao@huaweicloud.com/ >>>> You don't deplete the atomic reserves unnecessarily? >>> Sounds reasonable. >>> >>> With GFP_NOFS specificed, can we still get -ENOMEM? Or this will block >>> indefinitely till memory can be allocated. >> If you need the "loop indefinitely" behaviour, that's >> GFP_NOFS | __GFP_NOFAIL. If you're actually doing something yourself >> which can free up memory, this is a bad choice. If you're just sleeping >> and retrying, you might as well have the MM do that for you. Even with GFP_NOFS, I think -ENOMEM is still possible, so the retry logic is still necessary. > I probably don't want to wait indefinitely. There might be some cases > where I might want to return error to user space. For example, if > virtiofs device has been hot-unplugged, then there is no point in > waiting indefinitely for memory allocation. Even if memory was allocated, > soon we will return error to user space with -ENOTCONN. > > We are currently not doing that check after memory allocation failure but > we probably could as an optimization. Yes. It seems virtio_fs_enqueue_req() only checks fsvq->connected before writing sg list to virtual queue, so if the virtio device is hot-unplugged and the free memory is low, it may do unnecessary retry. Even worse, it may hang. I volunteer to post a patch to check the connected status after memory allocation failed if you are OK with that. > > So this patch looks good to me as it is. Thanks Hou Tao. > > Reviewed-by: Vivek Goyal <vgoyal@redhat.com> > > Thanks > Vivek
© 2016 - 2025 Red Hat, Inc.