vdpa/mlx5: Fix error path during device add

[PATCH] vdpa/mlx5: Fix error path during device add

Posted by Dragos Tatulea 1 year, 3 months ago

In the error recovery path of mlx5_vdpa_dev_add(), the cleanup is
executed and at the end put_device() is called which ends up calling
mlx5_vdpa_free(). This function will execute the same cleanup all over
again. Most resources support being cleaned up twice, but the recent
mlx5_vdpa_destroy_mr_resources() doesn't.

This change drops the explicit cleanup from within the
mlx5_vdpa_dev_add() and lets mlx5_vdpa_free() do its work.

This issue was discovered while trying to add 2 vdpa devices with the
same name:
$> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.2
$> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.3

... yields the following dump:

  BUG: kernel NULL pointer dereference, address: 00000000000000b8
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 0 P4D 0
  Oops: Oops: 0000 [#1] SMP
  CPU: 4 UID: 0 PID: 2811 Comm: vdpa Not tainted 6.12.0-rc6 #1
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  RIP: 0010:destroy_workqueue+0xe/0x2a0
  Code: ...
  RSP: 0018:ffff88814920b9a8 EFLAGS: 00010282
  RAX: 0000000000000000 RBX: ffff888105c10000 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: ffff888100400168 RDI: 0000000000000000
  RBP: 0000000000000000 R08: ffff888100120c00 R09: ffffffff828578c0
  R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
  R13: ffff888131fd99a0 R14: 0000000000000000 R15: ffff888105c10580
  FS:  00007fdfa6b4f740(0000) GS:ffff88852ca00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00000000000000b8 CR3: 000000018db09006 CR4: 0000000000372eb0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   <TASK>
   ? __die+0x20/0x60
   ? page_fault_oops+0x150/0x3e0
   ? exc_page_fault+0x74/0x130
   ? asm_exc_page_fault+0x22/0x30
   ? destroy_workqueue+0xe/0x2a0
   mlx5_vdpa_destroy_mr_resources+0x2b/0x40 [mlx5_vdpa]
   mlx5_vdpa_free+0x45/0x150 [mlx5_vdpa]
   vdpa_release_dev+0x1e/0x50 [vdpa]
   device_release+0x31/0x90
   kobject_put+0x8d/0x230
   mlx5_vdpa_dev_add+0x328/0x8b0 [mlx5_vdpa]
   vdpa_nl_cmd_dev_add_set_doit+0x2b8/0x4c0 [vdpa]
   genl_family_rcv_msg_doit+0xd0/0x120
   genl_rcv_msg+0x180/0x2b0
   ? __vdpa_alloc_device+0x1b0/0x1b0 [vdpa]
   ? genl_family_rcv_msg_dumpit+0xf0/0xf0
   netlink_rcv_skb+0x54/0x100
   genl_rcv+0x24/0x40
   netlink_unicast+0x1fc/0x2d0
   netlink_sendmsg+0x1e4/0x410
   __sock_sendmsg+0x38/0x60
   ? sockfd_lookup_light+0x12/0x60
   __sys_sendto+0x105/0x160
   ? __count_memcg_events+0x53/0xe0
   ? handle_mm_fault+0x100/0x220
   ? do_user_addr_fault+0x40d/0x620
   __x64_sys_sendto+0x20/0x30
   do_syscall_64+0x4c/0x100
   entry_SYSCALL_64_after_hwframe+0x4b/0x53
  RIP: 0033:0x7fdfa6c66b57
  Code: ...
  RSP: 002b:00007ffeace22998 EFLAGS: 00000202 ORIG_RAX: 000000000000002c
  RAX: ffffffffffffffda RBX: 000055a498608350 RCX: 00007fdfa6c66b57
  RDX: 000000000000006c RSI: 000055a498608350 RDI: 0000000000000003
  RBP: 00007ffeace229c0 R08: 00007fdfa6d35200 R09: 000000000000000c
  R10: 0000000000000000 R11: 0000000000000202 R12: 000055a4986082a0
  R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffeace233f3
   </TASK>
  Modules linked in: ...
  CR2: 00000000000000b8

Fixes: 62111654481d ("vdpa/mlx5: Postpone MR deletion")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
---
 drivers/vdpa/mlx5/net/mlx5_vnet.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index dee019977716..5f581e71e201 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -3963,28 +3963,28 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
 	if (err)
-		goto err_mpfs;
+		goto err_alloc;
 
 	err = mlx5_vdpa_init_mr_resources(mvdev);
 	if (err)
-		goto err_res;
+		goto err_alloc;
 
 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
 		err = mlx5_vdpa_create_dma_mr(mvdev);
 		if (err)
-			goto err_mr_res;
+			goto err_alloc;
 	}
 
 	err = alloc_fixed_resources(ndev);
 	if (err)
-		goto err_mr;
+		goto err_alloc;
 
 	ndev->cvq_ent.mvdev = mvdev;
 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
 	if (!mvdev->wq) {
 		err = -ENOMEM;
-		goto err_res2;
+		goto err_alloc;
 	}
 
 	mvdev->vdev.mdev = &mgtdev->mgtdev;
@@ -4010,17 +4010,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
 	_vdpa_unregister_device(&mvdev->vdev);
 err_reg:
 	destroy_workqueue(mvdev->wq);
-err_res2:
-	free_fixed_resources(ndev);
-err_mr:
-	mlx5_vdpa_clean_mrs(mvdev);
-err_mr_res:
-	mlx5_vdpa_destroy_mr_resources(mvdev);
-err_res:
-	mlx5_vdpa_free_resources(&ndev->mvdev);
-err_mpfs:
-	if (!is_zero_ether_addr(config->mac))
-		mlx5_mpfs_del_mac(pfmdev, config->mac);
 err_alloc:
 	put_device(&mvdev->vdev.dev);
 	return err;
-- 
2.47.0

Re: [PATCH] vdpa/mlx5: Fix error path during device add

Posted by Eugenio Perez Martin 1 year, 3 months ago

On Tue, Nov 5, 2024 at 7:52 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
> In the error recovery path of mlx5_vdpa_dev_add(), the cleanup is
> executed and at the end put_device() is called which ends up calling
> mlx5_vdpa_free(). This function will execute the same cleanup all over
> again. Most resources support being cleaned up twice, but the recent
> mlx5_vdpa_destroy_mr_resources() doesn't.
>
> This change drops the explicit cleanup from within the
> mlx5_vdpa_dev_add() and lets mlx5_vdpa_free() do its work.
>
> This issue was discovered while trying to add 2 vdpa devices with the
> same name:
> $> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.2
> $> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.3
>
> ... yields the following dump:
>
>   BUG: kernel NULL pointer dereference, address: 00000000000000b8
>   #PF: supervisor read access in kernel mode
>   #PF: error_code(0x0000) - not-present page
>   PGD 0 P4D 0
>   Oops: Oops: 0000 [#1] SMP
>   CPU: 4 UID: 0 PID: 2811 Comm: vdpa Not tainted 6.12.0-rc6 #1
>   Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
>   RIP: 0010:destroy_workqueue+0xe/0x2a0
>   Code: ...
>   RSP: 0018:ffff88814920b9a8 EFLAGS: 00010282
>   RAX: 0000000000000000 RBX: ffff888105c10000 RCX: 0000000000000000
>   RDX: 0000000000000001 RSI: ffff888100400168 RDI: 0000000000000000
>   RBP: 0000000000000000 R08: ffff888100120c00 R09: ffffffff828578c0
>   R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
>   R13: ffff888131fd99a0 R14: 0000000000000000 R15: ffff888105c10580
>   FS:  00007fdfa6b4f740(0000) GS:ffff88852ca00000(0000) knlGS:0000000000000000
>   CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>   CR2: 00000000000000b8 CR3: 000000018db09006 CR4: 0000000000372eb0
>   DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>   DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
>   Call Trace:
>    <TASK>
>    ? __die+0x20/0x60
>    ? page_fault_oops+0x150/0x3e0
>    ? exc_page_fault+0x74/0x130
>    ? asm_exc_page_fault+0x22/0x30
>    ? destroy_workqueue+0xe/0x2a0
>    mlx5_vdpa_destroy_mr_resources+0x2b/0x40 [mlx5_vdpa]
>    mlx5_vdpa_free+0x45/0x150 [mlx5_vdpa]
>    vdpa_release_dev+0x1e/0x50 [vdpa]
>    device_release+0x31/0x90
>    kobject_put+0x8d/0x230
>    mlx5_vdpa_dev_add+0x328/0x8b0 [mlx5_vdpa]
>    vdpa_nl_cmd_dev_add_set_doit+0x2b8/0x4c0 [vdpa]
>    genl_family_rcv_msg_doit+0xd0/0x120
>    genl_rcv_msg+0x180/0x2b0
>    ? __vdpa_alloc_device+0x1b0/0x1b0 [vdpa]
>    ? genl_family_rcv_msg_dumpit+0xf0/0xf0
>    netlink_rcv_skb+0x54/0x100
>    genl_rcv+0x24/0x40
>    netlink_unicast+0x1fc/0x2d0
>    netlink_sendmsg+0x1e4/0x410
>    __sock_sendmsg+0x38/0x60
>    ? sockfd_lookup_light+0x12/0x60
>    __sys_sendto+0x105/0x160
>    ? __count_memcg_events+0x53/0xe0
>    ? handle_mm_fault+0x100/0x220
>    ? do_user_addr_fault+0x40d/0x620
>    __x64_sys_sendto+0x20/0x30
>    do_syscall_64+0x4c/0x100
>    entry_SYSCALL_64_after_hwframe+0x4b/0x53
>   RIP: 0033:0x7fdfa6c66b57
>   Code: ...
>   RSP: 002b:00007ffeace22998 EFLAGS: 00000202 ORIG_RAX: 000000000000002c
>   RAX: ffffffffffffffda RBX: 000055a498608350 RCX: 00007fdfa6c66b57
>   RDX: 000000000000006c RSI: 000055a498608350 RDI: 0000000000000003
>   RBP: 00007ffeace229c0 R08: 00007fdfa6d35200 R09: 000000000000000c
>   R10: 0000000000000000 R11: 0000000000000202 R12: 000055a4986082a0
>   R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffeace233f3
>    </TASK>
>   Modules linked in: ...
>   CR2: 00000000000000b8
>
> Fixes: 62111654481d ("vdpa/mlx5: Postpone MR deletion")

Acked-by: Eugenio Pérez <eperezma@redhat.com>

Thanks!

> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 21 +++++----------------
>  1 file changed, 5 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index dee019977716..5f581e71e201 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -3963,28 +3963,28 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>         mvdev->vdev.dma_dev = &mdev->pdev->dev;
>         err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
>         if (err)
> -               goto err_mpfs;
> +               goto err_alloc;
>
>         err = mlx5_vdpa_init_mr_resources(mvdev);
>         if (err)
> -               goto err_res;
> +               goto err_alloc;
>
>         if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
>                 err = mlx5_vdpa_create_dma_mr(mvdev);
>                 if (err)
> -                       goto err_mr_res;
> +                       goto err_alloc;
>         }
>
>         err = alloc_fixed_resources(ndev);
>         if (err)
> -               goto err_mr;
> +               goto err_alloc;
>
>         ndev->cvq_ent.mvdev = mvdev;
>         INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
>         mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
>         if (!mvdev->wq) {
>                 err = -ENOMEM;
> -               goto err_res2;
> +               goto err_alloc;
>         }
>
>         mvdev->vdev.mdev = &mgtdev->mgtdev;
> @@ -4010,17 +4010,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>         _vdpa_unregister_device(&mvdev->vdev);
>  err_reg:
>         destroy_workqueue(mvdev->wq);
> -err_res2:
> -       free_fixed_resources(ndev);
> -err_mr:
> -       mlx5_vdpa_clean_mrs(mvdev);
> -err_mr_res:
> -       mlx5_vdpa_destroy_mr_resources(mvdev);
> -err_res:
> -       mlx5_vdpa_free_resources(&ndev->mvdev);
> -err_mpfs:
> -       if (!is_zero_ether_addr(config->mac))
> -               mlx5_mpfs_del_mac(pfmdev, config->mac);
>  err_alloc:
>         put_device(&mvdev->vdev.dev);
>         return err;
> --
> 2.47.0
>

Re: [PATCH] vdpa/mlx5: Fix error path during device add

Posted by Jason Wang 1 year, 3 months ago

On Wed, Nov 6, 2024 at 2:52 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
> In the error recovery path of mlx5_vdpa_dev_add(), the cleanup is
> executed and at the end put_device() is called which ends up calling
> mlx5_vdpa_free(). This function will execute the same cleanup all over
> again. Most resources support being cleaned up twice, but the recent
> mlx5_vdpa_destroy_mr_resources() doesn't.
>
> This change drops the explicit cleanup from within the
> mlx5_vdpa_dev_add() and lets mlx5_vdpa_free() do its work.
>
> This issue was discovered while trying to add 2 vdpa devices with the
> same name:
> $> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.2
> $> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.3
>
> ... yields the following dump:
>
>   BUG: kernel NULL pointer dereference, address: 00000000000000b8
>   #PF: supervisor read access in kernel mode
>   #PF: error_code(0x0000) - not-present page
>   PGD 0 P4D 0
>   Oops: Oops: 0000 [#1] SMP
>   CPU: 4 UID: 0 PID: 2811 Comm: vdpa Not tainted 6.12.0-rc6 #1
>   Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
>   RIP: 0010:destroy_workqueue+0xe/0x2a0
>   Code: ...
>   RSP: 0018:ffff88814920b9a8 EFLAGS: 00010282
>   RAX: 0000000000000000 RBX: ffff888105c10000 RCX: 0000000000000000
>   RDX: 0000000000000001 RSI: ffff888100400168 RDI: 0000000000000000
>   RBP: 0000000000000000 R08: ffff888100120c00 R09: ffffffff828578c0
>   R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
>   R13: ffff888131fd99a0 R14: 0000000000000000 R15: ffff888105c10580
>   FS:  00007fdfa6b4f740(0000) GS:ffff88852ca00000(0000) knlGS:0000000000000000
>   CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>   CR2: 00000000000000b8 CR3: 000000018db09006 CR4: 0000000000372eb0
>   DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>   DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
>   Call Trace:
>    <TASK>
>    ? __die+0x20/0x60
>    ? page_fault_oops+0x150/0x3e0
>    ? exc_page_fault+0x74/0x130
>    ? asm_exc_page_fault+0x22/0x30
>    ? destroy_workqueue+0xe/0x2a0
>    mlx5_vdpa_destroy_mr_resources+0x2b/0x40 [mlx5_vdpa]
>    mlx5_vdpa_free+0x45/0x150 [mlx5_vdpa]
>    vdpa_release_dev+0x1e/0x50 [vdpa]
>    device_release+0x31/0x90
>    kobject_put+0x8d/0x230
>    mlx5_vdpa_dev_add+0x328/0x8b0 [mlx5_vdpa]
>    vdpa_nl_cmd_dev_add_set_doit+0x2b8/0x4c0 [vdpa]
>    genl_family_rcv_msg_doit+0xd0/0x120
>    genl_rcv_msg+0x180/0x2b0
>    ? __vdpa_alloc_device+0x1b0/0x1b0 [vdpa]
>    ? genl_family_rcv_msg_dumpit+0xf0/0xf0
>    netlink_rcv_skb+0x54/0x100
>    genl_rcv+0x24/0x40
>    netlink_unicast+0x1fc/0x2d0
>    netlink_sendmsg+0x1e4/0x410
>    __sock_sendmsg+0x38/0x60
>    ? sockfd_lookup_light+0x12/0x60
>    __sys_sendto+0x105/0x160
>    ? __count_memcg_events+0x53/0xe0
>    ? handle_mm_fault+0x100/0x220
>    ? do_user_addr_fault+0x40d/0x620
>    __x64_sys_sendto+0x20/0x30
>    do_syscall_64+0x4c/0x100
>    entry_SYSCALL_64_after_hwframe+0x4b/0x53
>   RIP: 0033:0x7fdfa6c66b57
>   Code: ...
>   RSP: 002b:00007ffeace22998 EFLAGS: 00000202 ORIG_RAX: 000000000000002c
>   RAX: ffffffffffffffda RBX: 000055a498608350 RCX: 00007fdfa6c66b57
>   RDX: 000000000000006c RSI: 000055a498608350 RDI: 0000000000000003
>   RBP: 00007ffeace229c0 R08: 00007fdfa6d35200 R09: 000000000000000c
>   R10: 0000000000000000 R11: 0000000000000202 R12: 000055a4986082a0
>   R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffeace233f3
>    </TASK>
>   Modules linked in: ...
>   CR2: 00000000000000b8
>
> Fixes: 62111654481d ("vdpa/mlx5: Postpone MR deletion")
> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>

Acked-by: Jason Wang <jasowang@redhat.com>

Thanks