drivers/vdpa/mlx5/net/mlx5_vnet.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-)
In the error recovery path of mlx5_vdpa_dev_add(), the cleanup is
executed and at the end put_device() is called which ends up calling
mlx5_vdpa_free(). This function will execute the same cleanup all over
again. Most resources support being cleaned up twice, but the recent
mlx5_vdpa_destroy_mr_resources() doesn't.
This change drops the explicit cleanup from within the
mlx5_vdpa_dev_add() and lets mlx5_vdpa_free() do its work.
This issue was discovered while trying to add 2 vdpa devices with the
same name:
$> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.2
$> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.3
... yields the following dump:
BUG: kernel NULL pointer dereference, address: 00000000000000b8
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: Oops: 0000 [#1] SMP
CPU: 4 UID: 0 PID: 2811 Comm: vdpa Not tainted 6.12.0-rc6 #1
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
RIP: 0010:destroy_workqueue+0xe/0x2a0
Code: ...
RSP: 0018:ffff88814920b9a8 EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffff888105c10000 RCX: 0000000000000000
RDX: 0000000000000001 RSI: ffff888100400168 RDI: 0000000000000000
RBP: 0000000000000000 R08: ffff888100120c00 R09: ffffffff828578c0
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: ffff888131fd99a0 R14: 0000000000000000 R15: ffff888105c10580
FS: 00007fdfa6b4f740(0000) GS:ffff88852ca00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000000000b8 CR3: 000000018db09006 CR4: 0000000000372eb0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
? __die+0x20/0x60
? page_fault_oops+0x150/0x3e0
? exc_page_fault+0x74/0x130
? asm_exc_page_fault+0x22/0x30
? destroy_workqueue+0xe/0x2a0
mlx5_vdpa_destroy_mr_resources+0x2b/0x40 [mlx5_vdpa]
mlx5_vdpa_free+0x45/0x150 [mlx5_vdpa]
vdpa_release_dev+0x1e/0x50 [vdpa]
device_release+0x31/0x90
kobject_put+0x8d/0x230
mlx5_vdpa_dev_add+0x328/0x8b0 [mlx5_vdpa]
vdpa_nl_cmd_dev_add_set_doit+0x2b8/0x4c0 [vdpa]
genl_family_rcv_msg_doit+0xd0/0x120
genl_rcv_msg+0x180/0x2b0
? __vdpa_alloc_device+0x1b0/0x1b0 [vdpa]
? genl_family_rcv_msg_dumpit+0xf0/0xf0
netlink_rcv_skb+0x54/0x100
genl_rcv+0x24/0x40
netlink_unicast+0x1fc/0x2d0
netlink_sendmsg+0x1e4/0x410
__sock_sendmsg+0x38/0x60
? sockfd_lookup_light+0x12/0x60
__sys_sendto+0x105/0x160
? __count_memcg_events+0x53/0xe0
? handle_mm_fault+0x100/0x220
? do_user_addr_fault+0x40d/0x620
__x64_sys_sendto+0x20/0x30
do_syscall_64+0x4c/0x100
entry_SYSCALL_64_after_hwframe+0x4b/0x53
RIP: 0033:0x7fdfa6c66b57
Code: ...
RSP: 002b:00007ffeace22998 EFLAGS: 00000202 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 000055a498608350 RCX: 00007fdfa6c66b57
RDX: 000000000000006c RSI: 000055a498608350 RDI: 0000000000000003
RBP: 00007ffeace229c0 R08: 00007fdfa6d35200 R09: 000000000000000c
R10: 0000000000000000 R11: 0000000000000202 R12: 000055a4986082a0
R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffeace233f3
</TASK>
Modules linked in: ...
CR2: 00000000000000b8
Fixes: 62111654481d ("vdpa/mlx5: Postpone MR deletion")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
---
drivers/vdpa/mlx5/net/mlx5_vnet.c | 21 +++++----------------
1 file changed, 5 insertions(+), 16 deletions(-)
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index dee019977716..5f581e71e201 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -3963,28 +3963,28 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
mvdev->vdev.dma_dev = &mdev->pdev->dev;
err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
if (err)
- goto err_mpfs;
+ goto err_alloc;
err = mlx5_vdpa_init_mr_resources(mvdev);
if (err)
- goto err_res;
+ goto err_alloc;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
err = mlx5_vdpa_create_dma_mr(mvdev);
if (err)
- goto err_mr_res;
+ goto err_alloc;
}
err = alloc_fixed_resources(ndev);
if (err)
- goto err_mr;
+ goto err_alloc;
ndev->cvq_ent.mvdev = mvdev;
INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
if (!mvdev->wq) {
err = -ENOMEM;
- goto err_res2;
+ goto err_alloc;
}
mvdev->vdev.mdev = &mgtdev->mgtdev;
@@ -4010,17 +4010,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
_vdpa_unregister_device(&mvdev->vdev);
err_reg:
destroy_workqueue(mvdev->wq);
-err_res2:
- free_fixed_resources(ndev);
-err_mr:
- mlx5_vdpa_clean_mrs(mvdev);
-err_mr_res:
- mlx5_vdpa_destroy_mr_resources(mvdev);
-err_res:
- mlx5_vdpa_free_resources(&ndev->mvdev);
-err_mpfs:
- if (!is_zero_ether_addr(config->mac))
- mlx5_mpfs_del_mac(pfmdev, config->mac);
err_alloc:
put_device(&mvdev->vdev.dev);
return err;
--
2.47.0
On Tue, Nov 5, 2024 at 7:52 PM Dragos Tatulea <dtatulea@nvidia.com> wrote: > > In the error recovery path of mlx5_vdpa_dev_add(), the cleanup is > executed and at the end put_device() is called which ends up calling > mlx5_vdpa_free(). This function will execute the same cleanup all over > again. Most resources support being cleaned up twice, but the recent > mlx5_vdpa_destroy_mr_resources() doesn't. > > This change drops the explicit cleanup from within the > mlx5_vdpa_dev_add() and lets mlx5_vdpa_free() do its work. > > This issue was discovered while trying to add 2 vdpa devices with the > same name: > $> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.2 > $> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.3 > > ... yields the following dump: > > BUG: kernel NULL pointer dereference, address: 00000000000000b8 > #PF: supervisor read access in kernel mode > #PF: error_code(0x0000) - not-present page > PGD 0 P4D 0 > Oops: Oops: 0000 [#1] SMP > CPU: 4 UID: 0 PID: 2811 Comm: vdpa Not tainted 6.12.0-rc6 #1 > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 > RIP: 0010:destroy_workqueue+0xe/0x2a0 > Code: ... > RSP: 0018:ffff88814920b9a8 EFLAGS: 00010282 > RAX: 0000000000000000 RBX: ffff888105c10000 RCX: 0000000000000000 > RDX: 0000000000000001 RSI: ffff888100400168 RDI: 0000000000000000 > RBP: 0000000000000000 R08: ffff888100120c00 R09: ffffffff828578c0 > R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 > R13: ffff888131fd99a0 R14: 0000000000000000 R15: ffff888105c10580 > FS: 00007fdfa6b4f740(0000) GS:ffff88852ca00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 00000000000000b8 CR3: 000000018db09006 CR4: 0000000000372eb0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > Call Trace: > <TASK> > ? __die+0x20/0x60 > ? page_fault_oops+0x150/0x3e0 > ? exc_page_fault+0x74/0x130 > ? asm_exc_page_fault+0x22/0x30 > ? destroy_workqueue+0xe/0x2a0 > mlx5_vdpa_destroy_mr_resources+0x2b/0x40 [mlx5_vdpa] > mlx5_vdpa_free+0x45/0x150 [mlx5_vdpa] > vdpa_release_dev+0x1e/0x50 [vdpa] > device_release+0x31/0x90 > kobject_put+0x8d/0x230 > mlx5_vdpa_dev_add+0x328/0x8b0 [mlx5_vdpa] > vdpa_nl_cmd_dev_add_set_doit+0x2b8/0x4c0 [vdpa] > genl_family_rcv_msg_doit+0xd0/0x120 > genl_rcv_msg+0x180/0x2b0 > ? __vdpa_alloc_device+0x1b0/0x1b0 [vdpa] > ? genl_family_rcv_msg_dumpit+0xf0/0xf0 > netlink_rcv_skb+0x54/0x100 > genl_rcv+0x24/0x40 > netlink_unicast+0x1fc/0x2d0 > netlink_sendmsg+0x1e4/0x410 > __sock_sendmsg+0x38/0x60 > ? sockfd_lookup_light+0x12/0x60 > __sys_sendto+0x105/0x160 > ? __count_memcg_events+0x53/0xe0 > ? handle_mm_fault+0x100/0x220 > ? do_user_addr_fault+0x40d/0x620 > __x64_sys_sendto+0x20/0x30 > do_syscall_64+0x4c/0x100 > entry_SYSCALL_64_after_hwframe+0x4b/0x53 > RIP: 0033:0x7fdfa6c66b57 > Code: ... > RSP: 002b:00007ffeace22998 EFLAGS: 00000202 ORIG_RAX: 000000000000002c > RAX: ffffffffffffffda RBX: 000055a498608350 RCX: 00007fdfa6c66b57 > RDX: 000000000000006c RSI: 000055a498608350 RDI: 0000000000000003 > RBP: 00007ffeace229c0 R08: 00007fdfa6d35200 R09: 000000000000000c > R10: 0000000000000000 R11: 0000000000000202 R12: 000055a4986082a0 > R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffeace233f3 > </TASK> > Modules linked in: ... > CR2: 00000000000000b8 > > Fixes: 62111654481d ("vdpa/mlx5: Postpone MR deletion") Acked-by: Eugenio Pérez <eperezma@redhat.com> Thanks! > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com> > --- > drivers/vdpa/mlx5/net/mlx5_vnet.c | 21 +++++---------------- > 1 file changed, 5 insertions(+), 16 deletions(-) > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c > index dee019977716..5f581e71e201 100644 > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c > @@ -3963,28 +3963,28 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, > mvdev->vdev.dma_dev = &mdev->pdev->dev; > err = mlx5_vdpa_alloc_resources(&ndev->mvdev); > if (err) > - goto err_mpfs; > + goto err_alloc; > > err = mlx5_vdpa_init_mr_resources(mvdev); > if (err) > - goto err_res; > + goto err_alloc; > > if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { > err = mlx5_vdpa_create_dma_mr(mvdev); > if (err) > - goto err_mr_res; > + goto err_alloc; > } > > err = alloc_fixed_resources(ndev); > if (err) > - goto err_mr; > + goto err_alloc; > > ndev->cvq_ent.mvdev = mvdev; > INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); > mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); > if (!mvdev->wq) { > err = -ENOMEM; > - goto err_res2; > + goto err_alloc; > } > > mvdev->vdev.mdev = &mgtdev->mgtdev; > @@ -4010,17 +4010,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, > _vdpa_unregister_device(&mvdev->vdev); > err_reg: > destroy_workqueue(mvdev->wq); > -err_res2: > - free_fixed_resources(ndev); > -err_mr: > - mlx5_vdpa_clean_mrs(mvdev); > -err_mr_res: > - mlx5_vdpa_destroy_mr_resources(mvdev); > -err_res: > - mlx5_vdpa_free_resources(&ndev->mvdev); > -err_mpfs: > - if (!is_zero_ether_addr(config->mac)) > - mlx5_mpfs_del_mac(pfmdev, config->mac); > err_alloc: > put_device(&mvdev->vdev.dev); > return err; > -- > 2.47.0 >
On Wed, Nov 6, 2024 at 2:52 AM Dragos Tatulea <dtatulea@nvidia.com> wrote: > > In the error recovery path of mlx5_vdpa_dev_add(), the cleanup is > executed and at the end put_device() is called which ends up calling > mlx5_vdpa_free(). This function will execute the same cleanup all over > again. Most resources support being cleaned up twice, but the recent > mlx5_vdpa_destroy_mr_resources() doesn't. > > This change drops the explicit cleanup from within the > mlx5_vdpa_dev_add() and lets mlx5_vdpa_free() do its work. > > This issue was discovered while trying to add 2 vdpa devices with the > same name: > $> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.2 > $> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.3 > > ... yields the following dump: > > BUG: kernel NULL pointer dereference, address: 00000000000000b8 > #PF: supervisor read access in kernel mode > #PF: error_code(0x0000) - not-present page > PGD 0 P4D 0 > Oops: Oops: 0000 [#1] SMP > CPU: 4 UID: 0 PID: 2811 Comm: vdpa Not tainted 6.12.0-rc6 #1 > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 > RIP: 0010:destroy_workqueue+0xe/0x2a0 > Code: ... > RSP: 0018:ffff88814920b9a8 EFLAGS: 00010282 > RAX: 0000000000000000 RBX: ffff888105c10000 RCX: 0000000000000000 > RDX: 0000000000000001 RSI: ffff888100400168 RDI: 0000000000000000 > RBP: 0000000000000000 R08: ffff888100120c00 R09: ffffffff828578c0 > R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 > R13: ffff888131fd99a0 R14: 0000000000000000 R15: ffff888105c10580 > FS: 00007fdfa6b4f740(0000) GS:ffff88852ca00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 00000000000000b8 CR3: 000000018db09006 CR4: 0000000000372eb0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > Call Trace: > <TASK> > ? __die+0x20/0x60 > ? page_fault_oops+0x150/0x3e0 > ? exc_page_fault+0x74/0x130 > ? asm_exc_page_fault+0x22/0x30 > ? destroy_workqueue+0xe/0x2a0 > mlx5_vdpa_destroy_mr_resources+0x2b/0x40 [mlx5_vdpa] > mlx5_vdpa_free+0x45/0x150 [mlx5_vdpa] > vdpa_release_dev+0x1e/0x50 [vdpa] > device_release+0x31/0x90 > kobject_put+0x8d/0x230 > mlx5_vdpa_dev_add+0x328/0x8b0 [mlx5_vdpa] > vdpa_nl_cmd_dev_add_set_doit+0x2b8/0x4c0 [vdpa] > genl_family_rcv_msg_doit+0xd0/0x120 > genl_rcv_msg+0x180/0x2b0 > ? __vdpa_alloc_device+0x1b0/0x1b0 [vdpa] > ? genl_family_rcv_msg_dumpit+0xf0/0xf0 > netlink_rcv_skb+0x54/0x100 > genl_rcv+0x24/0x40 > netlink_unicast+0x1fc/0x2d0 > netlink_sendmsg+0x1e4/0x410 > __sock_sendmsg+0x38/0x60 > ? sockfd_lookup_light+0x12/0x60 > __sys_sendto+0x105/0x160 > ? __count_memcg_events+0x53/0xe0 > ? handle_mm_fault+0x100/0x220 > ? do_user_addr_fault+0x40d/0x620 > __x64_sys_sendto+0x20/0x30 > do_syscall_64+0x4c/0x100 > entry_SYSCALL_64_after_hwframe+0x4b/0x53 > RIP: 0033:0x7fdfa6c66b57 > Code: ... > RSP: 002b:00007ffeace22998 EFLAGS: 00000202 ORIG_RAX: 000000000000002c > RAX: ffffffffffffffda RBX: 000055a498608350 RCX: 00007fdfa6c66b57 > RDX: 000000000000006c RSI: 000055a498608350 RDI: 0000000000000003 > RBP: 00007ffeace229c0 R08: 00007fdfa6d35200 R09: 000000000000000c > R10: 0000000000000000 R11: 0000000000000202 R12: 000055a4986082a0 > R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffeace233f3 > </TASK> > Modules linked in: ... > CR2: 00000000000000b8 > > Fixes: 62111654481d ("vdpa/mlx5: Postpone MR deletion") > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com> Acked-by: Jason Wang <jasowang@redhat.com> Thanks
© 2016 - 2024 Red Hat, Inc.