From: Shay Drory <shayd@nvidia.com>
When utilizing Socket-Direct single netdev functionality the driver
resolves the actual auxiliary device using mlx5_sd_get_adev(). However,
the current implementation returns the primary ETH auxiliary device
without holding the device lock, leading to a potential race condition
where the ETH device could be unbound or removed concurrently during
probe, suspend, resume, or remove operations.[1]
Fix this by introducing mlx5_sd_put_adev() and updating
mlx5_sd_get_adev() so that secondaries devices would acquire the device
lock of the returned auxiliary device. After the lock is acquired, a
second devcom check is needed[2].
In addition, update The callers to pair the get operation with the new
put operation, ensuring the lock is held while the auxiliary device is
being operated on and released afterwards.
[1]
for example:
BUG: kernel NULL pointer dereference, address: 0000000000000370
PGD 0 P4D 0
Oops: Oops: 0000 [#1] SMP
CPU: 4 UID: 0 PID: 3945 Comm: bash Not tainted 6.19.0-rc3+ #1 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
RIP: 0010:mlx5e_dcbnl_dscp_app+0x23/0x100 [mlx5_core]
Call Trace:
<TASK>
mlx5e_remove+0x82/0x12a [mlx5_core]
device_release_driver_internal+0x194/0x1f0
bus_remove_device+0xc6/0x140
device_del+0x159/0x3c0
? devl_param_driverinit_value_get+0x29/0x80
mlx5_rescan_drivers_locked+0x92/0x160 [mlx5_core]
mlx5_unregister_device+0x34/0x50 [mlx5_core]
mlx5_uninit_one+0x43/0xb0 [mlx5_core]
remove_one+0x4e/0xc0 [mlx5_core]
pci_device_remove+0x39/0xa0
device_release_driver_internal+0x194/0x1f0
unbind_store+0x99/0xa0
kernfs_fop_write_iter+0x12e/0x1e0
vfs_write+0x215/0x3d0
ksys_write+0x5f/0xd0
do_syscall_64+0x55/0xe90
entry_SYSCALL_64_after_hwframe+0x4b/0x53
[2]
CPU0 (primary) CPU1 (secondary)
==========================================================================
mlx5e_remove() (device_lock held)
mlx5e_remove() (2nd device_lock held)
mlx5_sd_get_adev()
mlx5_devcom_comp_is_ready() => true
device_lock(primary)
mlx5_sd_get_adev() ==> ret adev
_mlx5e_remove()
mlx5_sd_cleanup()
// mlx5e_remove finished
// releasing device_lock
//need another check here...
mlx5_devcom_comp_is_ready() => false
Fixes: 381978d28317 ("net/mlx5e: Create single netdev per SD group")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/en_main.c | 18 ++++++++++++++----
.../net/ethernet/mellanox/mlx5/core/lib/sd.c | 13 +++++++++++++
.../net/ethernet/mellanox/mlx5/core/lib/sd.h | 2 ++
3 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b6c12460b54a..5761f655f488 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -6657,8 +6657,11 @@ static int mlx5e_resume(struct auxiliary_device *adev)
return err;
actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
- if (actual_adev)
- return _mlx5e_resume(actual_adev);
+ if (actual_adev) {
+ err = _mlx5e_resume(actual_adev);
+ mlx5_sd_put_adev(actual_adev, adev);
+ return err;
+ }
return 0;
}
@@ -6698,6 +6701,8 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
err = _mlx5e_suspend(actual_adev, false);
mlx5_sd_cleanup(mdev);
+ if (actual_adev)
+ mlx5_sd_put_adev(actual_adev, adev);
return err;
}
@@ -6787,8 +6792,11 @@ static int mlx5e_probe(struct auxiliary_device *adev,
return err;
actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
- if (actual_adev)
- return _mlx5e_probe(actual_adev);
+ if (actual_adev) {
+ err = _mlx5e_probe(actual_adev);
+ mlx5_sd_put_adev(actual_adev, adev);
+ return err;
+ }
return 0;
}
@@ -6841,6 +6849,8 @@ static void mlx5e_remove(struct auxiliary_device *adev)
_mlx5e_remove(actual_adev);
mlx5_sd_cleanup(mdev);
+ if (actual_adev)
+ mlx5_sd_put_adev(actual_adev, adev);
}
static const struct auxiliary_device_id mlx5e_id_table[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 954942ad93c5..060649645012 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -528,5 +528,18 @@ struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
if (dev == primary)
return adev;
+ device_lock(&primary->priv.adev[idx]->adev.dev);
+ /* In case primary finish removing its adev */
+ if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
+ device_unlock(&primary->priv.adev[idx]->adev.dev);
+ return NULL;
+ }
return &primary->priv.adev[idx]->adev;
}
+
+void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
+ struct auxiliary_device *adev)
+{
+ if (actual_adev != adev)
+ device_unlock(&actual_adev->dev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
index 137efaf9aabc..9bfd5b9756b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
@@ -15,6 +15,8 @@ struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int c
struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
struct auxiliary_device *adev,
int idx);
+void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
+ struct auxiliary_device *adev);
int mlx5_sd_init(struct mlx5_core_dev *dev);
void mlx5_sd_cleanup(struct mlx5_core_dev *dev);
--
2.44.0
© 2016 - 2026 Red Hat, Inc.