[v2] vdpa/mlx5: Parallelize device suspend/resume

[PATCH vhost v2 03/10] vdpa/mlx5: Introduce async fw command wrapper

Posted by Dragos Tatulea 1 year, 5 months ago

Introduce a new function mlx5_vdpa_exec_async_cmds() which
wraps the mlx5_core async firmware command API in a way
that will be used to parallelize certain operation in this
driver.

The wrapper deals with the case when mlx5_cmd_exec_cb() returns
EBUSY due to the command being throttled.

Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
---
 drivers/vdpa/mlx5/core/mlx5_vdpa.h | 15 ++++++
 drivers/vdpa/mlx5/core/resources.c | 73 ++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 424d445ebee4..b34e9b93d56e 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -105,6 +105,18 @@ struct mlx5_vdpa_dev {
 	bool suspended;
 };
 
+struct mlx5_vdpa_async_cmd {
+	int err;
+	struct mlx5_async_work cb_work;
+	struct completion cmd_done;
+
+	void *in;
+	size_t inlen;
+
+	void *out;
+	size_t outlen;
+};
+
 int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
 void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn);
 int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn);
@@ -134,6 +146,9 @@ int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
 				unsigned int asid);
 int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev);
 int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
+int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev,
+			      struct mlx5_vdpa_async_cmd *cmds,
+			      int num_cmds);
 
 #define mlx5_vdpa_err(__dev, format, ...)                                                          \
 	dev_err((__dev)->mdev->device, "%s:%d:(pid %d) error: " format, __func__, __LINE__,        \
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index 5c5a41b64bfc..22ea32fe007b 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -321,3 +321,76 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
 	mutex_destroy(&mvdev->mr_mtx);
 	res->valid = false;
 }
+
+static void virtqueue_cmd_callback(int status, struct mlx5_async_work *context)
+{
+	struct mlx5_vdpa_async_cmd *cmd =
+		container_of(context, struct mlx5_vdpa_async_cmd, cb_work);
+
+	cmd->err = mlx5_cmd_check(context->ctx->dev, status, cmd->in, cmd->out);
+	complete(&cmd->cmd_done);
+}
+
+static int issue_async_cmd(struct mlx5_vdpa_dev *mvdev,
+			   struct mlx5_vdpa_async_cmd *cmds,
+			   int issued,
+			   int *completed)
+
+{
+	struct mlx5_vdpa_async_cmd *cmd = &cmds[issued];
+	int err;
+
+retry:
+	err = mlx5_cmd_exec_cb(&mvdev->async_ctx,
+			       cmd->in, cmd->inlen,
+			       cmd->out, cmd->outlen,
+			       virtqueue_cmd_callback,
+			       &cmd->cb_work);
+	if (err == -EBUSY) {
+		if (*completed < issued) {
+			/* Throttled by own commands: wait for oldest completion. */
+			wait_for_completion(&cmds[*completed].cmd_done);
+			(*completed)++;
+
+			goto retry;
+		} else {
+			/* Throttled by external commands: switch to sync api. */
+			err = mlx5_cmd_exec(mvdev->mdev,
+					    cmd->in, cmd->inlen,
+					    cmd->out, cmd->outlen);
+			if (!err)
+				(*completed)++;
+		}
+	}
+
+	return err;
+}
+
+int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev,
+			      struct mlx5_vdpa_async_cmd *cmds,
+			      int num_cmds)
+{
+	int completed = 0;
+	int issued = 0;
+	int err = 0;
+
+	for (int i = 0; i < num_cmds; i++)
+		init_completion(&cmds[i].cmd_done);
+
+	while (issued < num_cmds) {
+
+		err = issue_async_cmd(mvdev, cmds, issued, &completed);
+		if (err) {
+			mlx5_vdpa_err(mvdev, "error issuing command %d of %d: %d\n",
+				      issued, num_cmds, err);
+			break;
+		}
+
+		issued++;
+	}
+
+	while (completed < issued)
+		wait_for_completion(&cmds[completed++].cmd_done);
+
+	return err;
+}
-- 
2.45.1

Re: [PATCH vhost v2 03/10] vdpa/mlx5: Introduce async fw command wrapper

Posted by Eugenio Perez Martin 1 year, 5 months ago

On Fri, Aug 16, 2024 at 11:02 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
> Introduce a new function mlx5_vdpa_exec_async_cmds() which
> wraps the mlx5_core async firmware command API in a way
> that will be used to parallelize certain operation in this
> driver.
>
> The wrapper deals with the case when mlx5_cmd_exec_cb() returns
> EBUSY due to the command being throttled.
>
> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> Reviewed-by: Tariq Toukan <tariqt@nvidia.com>

Acked-by: Eugenio Pérez <eperezma@redhat.com>

> ---
>  drivers/vdpa/mlx5/core/mlx5_vdpa.h | 15 ++++++
>  drivers/vdpa/mlx5/core/resources.c | 73 ++++++++++++++++++++++++++++++
>  2 files changed, 88 insertions(+)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index 424d445ebee4..b34e9b93d56e 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -105,6 +105,18 @@ struct mlx5_vdpa_dev {
>         bool suspended;
>  };
>
> +struct mlx5_vdpa_async_cmd {
> +       int err;
> +       struct mlx5_async_work cb_work;
> +       struct completion cmd_done;
> +
> +       void *in;
> +       size_t inlen;
> +
> +       void *out;
> +       size_t outlen;
> +};
> +
>  int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
>  void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn);
>  int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn);
> @@ -134,6 +146,9 @@ int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
>                                 unsigned int asid);
>  int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev);
>  int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
> +int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev,
> +                             struct mlx5_vdpa_async_cmd *cmds,
> +                             int num_cmds);
>
>  #define mlx5_vdpa_err(__dev, format, ...)                                                          \
>         dev_err((__dev)->mdev->device, "%s:%d:(pid %d) error: " format, __func__, __LINE__,        \
> diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
> index 5c5a41b64bfc..22ea32fe007b 100644
> --- a/drivers/vdpa/mlx5/core/resources.c
> +++ b/drivers/vdpa/mlx5/core/resources.c
> @@ -321,3 +321,76 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
>         mutex_destroy(&mvdev->mr_mtx);
>         res->valid = false;
>  }
> +
> +static void virtqueue_cmd_callback(int status, struct mlx5_async_work *context)
> +{
> +       struct mlx5_vdpa_async_cmd *cmd =
> +               container_of(context, struct mlx5_vdpa_async_cmd, cb_work);
> +
> +       cmd->err = mlx5_cmd_check(context->ctx->dev, status, cmd->in, cmd->out);
> +       complete(&cmd->cmd_done);
> +}
> +
> +static int issue_async_cmd(struct mlx5_vdpa_dev *mvdev,
> +                          struct mlx5_vdpa_async_cmd *cmds,
> +                          int issued,
> +                          int *completed)
> +
> +{
> +       struct mlx5_vdpa_async_cmd *cmd = &cmds[issued];
> +       int err;
> +
> +retry:
> +       err = mlx5_cmd_exec_cb(&mvdev->async_ctx,
> +                              cmd->in, cmd->inlen,
> +                              cmd->out, cmd->outlen,
> +                              virtqueue_cmd_callback,
> +                              &cmd->cb_work);
> +       if (err == -EBUSY) {
> +               if (*completed < issued) {
> +                       /* Throttled by own commands: wait for oldest completion. */
> +                       wait_for_completion(&cmds[*completed].cmd_done);
> +                       (*completed)++;
> +
> +                       goto retry;
> +               } else {
> +                       /* Throttled by external commands: switch to sync api. */
> +                       err = mlx5_cmd_exec(mvdev->mdev,
> +                                           cmd->in, cmd->inlen,
> +                                           cmd->out, cmd->outlen);
> +                       if (!err)
> +                               (*completed)++;
> +               }
> +       }
> +
> +       return err;
> +}
> +
> +int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev,
> +                             struct mlx5_vdpa_async_cmd *cmds,
> +                             int num_cmds)
> +{
> +       int completed = 0;
> +       int issued = 0;
> +       int err = 0;
> +
> +       for (int i = 0; i < num_cmds; i++)
> +               init_completion(&cmds[i].cmd_done);
> +
> +       while (issued < num_cmds) {
> +
> +               err = issue_async_cmd(mvdev, cmds, issued, &completed);
> +               if (err) {
> +                       mlx5_vdpa_err(mvdev, "error issuing command %d of %d: %d\n",
> +                                     issued, num_cmds, err);
> +                       break;
> +               }
> +
> +               issued++;
> +       }
> +
> +       while (completed < issued)
> +               wait_for_completion(&cmds[completed++].cmd_done);
> +
> +       return err;
> +}
> --
> 2.45.1
>