[PATCH rdma-next 4/7] RDMA/mlx5: Change default device for LAG slaves in RDMA TRANSPORT namespaces

Edward Srouji posted 7 patches 3 months, 1 week ago
[PATCH rdma-next 4/7] RDMA/mlx5: Change default device for LAG slaves in RDMA TRANSPORT namespaces
Posted by Edward Srouji 3 months, 1 week ago
From: Patrisious Haddad <phaddad@nvidia.com>

In case of a LAG configuration change the root namespace core device for
all of the LAG slaves to be the core device of the master device for
RDMA_TRANSPORT namespaces, in order to ensure all tables are created
through the master device.
Once the LAG is disabled revert back to the native core device.

Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Edward Srouji <edwards@nvidia.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c | 74 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index cc8859d3c2f5..bbecca405171 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -44,6 +44,63 @@ static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
 	}
 }
 
+static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
+					struct mlx5_core_dev *new_owner)
+{
+	int ret;
+
+	if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
+	    !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
+		return 0;
+
+	if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
+	    !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
+		return 0;
+
+	ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+				   FS_FT_RDMA_TRANSPORT_TX);
+	if (ret)
+		return ret;
+
+	ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+				   FS_FT_RDMA_TRANSPORT_RX);
+	if (ret) {
+		mlx5_fs_set_root_dev(cur_owner, cur_owner,
+				     FS_FT_RDMA_TRANSPORT_TX);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_dev *peer_dev;
+	int i, ret;
+
+	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+		ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
+		WARN_ON_ONCE(ret);
+	}
+}
+
+static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_dev *peer_dev;
+	int ret;
+	int i;
+
+	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+		ret = mlx5_ib_set_owner_transport(peer_dev, dev);
+		if (ret) {
+			mlx5_ib_release_transport(dev);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
@@ -88,10 +145,18 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	else
 		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
 
+	if (mlx5_lag_is_shared_fdb(dev)) {
+		ret = mlx5_ib_take_transport(lag_master);
+		if (ret)
+			return ret;
+	}
+
 	ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
 					 mlx5_core_net(lag_master));
-	if (!ibdev)
-		return -ENOMEM;
+	if (!ibdev) {
+		ret = -ENOMEM;
+		goto release_transport;
+	}
 
 	ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
 			      GFP_KERNEL);
@@ -127,6 +192,10 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	kfree(ibdev->port);
 fail_port:
 	ib_dealloc_device(&ibdev->ib_dev);
+release_transport:
+	if (mlx5_lag_is_shared_fdb(lag_master))
+		mlx5_ib_release_transport(lag_master);
+
 	return ret;
 }
 
@@ -182,6 +251,7 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 				esw = peer_mdev->priv.eswitch;
 				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
 			}
+			mlx5_ib_release_transport(mdev);
 		}
 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
 	}

-- 
2.47.1