[PATCH for-next] RDMA/hns: Support mmapping reset state to userspace

Junxian Huang posted 1 patch 1 month, 1 week ago
drivers/infiniband/hw/hns/hns_roce_device.h |  4 ++
drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 47 ++++++++++++++++++++-
drivers/infiniband/hw/hns/hns_roce_main.c   | 36 ++++++++++++++++
include/uapi/rdma/hns-abi.h                 |  6 +++
4 files changed, 91 insertions(+), 2 deletions(-)
[PATCH for-next] RDMA/hns: Support mmapping reset state to userspace
Posted by Junxian Huang 1 month, 1 week ago
From: Chengchang Tang <tangchengchang@huawei.com>

Mmap reset state to notify userspace about HW reset. The mmaped flag
hw_ready will be initiated to a non-zero value. When HW is reset,
the mmap page will be zapped and userspace will get a zero value of
hw_ready.

Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  4 ++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 47 ++++++++++++++++++++-
 drivers/infiniband/hw/hns/hns_roce_main.c   | 36 ++++++++++++++++
 include/uapi/rdma/hns-abi.h                 |  6 +++
 4 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 0b1e21cb6d2d..59bca8067a7f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -202,6 +202,7 @@ struct hns_roce_uar {
 enum hns_roce_mmap_type {
 	HNS_ROCE_MMAP_TYPE_DB = 1,
 	HNS_ROCE_MMAP_TYPE_DWQE,
+	HNS_ROCE_MMAP_TYPE_RESET,
 };
 
 struct hns_user_mmap_entry {
@@ -216,6 +217,7 @@ struct hns_roce_ucontext {
 	struct list_head	page_list;
 	struct mutex		page_mutex;
 	struct hns_user_mmap_entry *db_mmap_entry;
+	struct hns_user_mmap_entry *reset_mmap_entry;
 	u32			config;
 };
 
@@ -1020,6 +1022,8 @@ struct hns_roce_dev {
 	int			loop_idc;
 	u32			sdb_offset;
 	u32			odb_offset;
+	struct page		*reset_page; /* store reset state */
+	void			*reset_kaddr; /* addr of reset page */
 	const struct hns_roce_hw *hw;
 	void			*priv;
 	struct workqueue_struct *irq_workq;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index f1feaa79f78e..2f72074b7cf9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -37,6 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <linux/vmalloc.h>
 #include <net/addrconf.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib_cache.h>
@@ -2865,6 +2866,36 @@ static int free_mr_init(struct hns_roce_dev *hr_dev)
 	return ret;
 }
 
+static int hns_roce_v2_get_reset_page(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_reset_state *state;
+
+	hr_dev->reset_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!hr_dev->reset_page)
+		return -ENOMEM;
+
+	hr_dev->reset_kaddr = vmap(&hr_dev->reset_page, 1, VM_MAP, PAGE_KERNEL);
+	if (!hr_dev->reset_kaddr)
+		goto err_with_vmap;
+
+	state = hr_dev->reset_kaddr;
+	state->hw_ready = 1;
+
+	return 0;
+
+err_with_vmap:
+	put_page(hr_dev->reset_page);
+	return -ENOMEM;
+}
+
+static void hns_roce_v2_put_reset_page(struct hns_roce_dev *hr_dev)
+{
+	vunmap(hr_dev->reset_kaddr);
+	hr_dev->reset_kaddr = NULL;
+	put_page(hr_dev->reset_page);
+	hr_dev->reset_page = NULL;
+}
+
 static int get_hem_table(struct hns_roce_dev *hr_dev)
 {
 	unsigned int qpc_count;
@@ -2944,14 +2975,21 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
 {
 	int ret;
 
+	ret = hns_roce_v2_get_reset_page(hr_dev);
+	if (ret) {
+		dev_err(hr_dev->dev,
+			"reset state init failed, ret = %d.\n", ret);
+		return ret;
+	}
+
 	/* The hns ROCEE requires the extdb info to be cleared before using */
 	ret = hns_roce_clear_extdb_list_info(hr_dev);
 	if (ret)
-		return ret;
+		goto err_clear_extdb_failed;
 
 	ret = get_hem_table(hr_dev);
 	if (ret)
-		return ret;
+		goto err_clear_extdb_failed;
 
 	if (hr_dev->is_vf)
 		return 0;
@@ -2967,6 +3005,9 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
 err_llm_init_failed:
 	put_hem_table(hr_dev);
 
+err_clear_extdb_failed:
+	hns_roce_v2_put_reset_page(hr_dev);
+
 	return ret;
 }
 
@@ -2980,6 +3021,8 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
 	if (!hr_dev->is_vf)
 		hns_roce_free_link_table(hr_dev);
 
+	hns_roce_v2_put_reset_page(hr_dev);
+
 	if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09)
 		free_dip_list(hr_dev);
 }
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 49315f39361d..1620d4318480 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -324,6 +324,7 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
 				ucontext, &entry->rdma_entry, length, 0);
 		break;
 	case HNS_ROCE_MMAP_TYPE_DWQE:
+	case HNS_ROCE_MMAP_TYPE_RESET:
 		ret = rdma_user_mmap_entry_insert_range(
 				ucontext, &entry->rdma_entry, length, 1,
 				U32_MAX);
@@ -341,6 +342,20 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
 	return entry;
 }
 
+static int hns_roce_alloc_reset_entry(struct ib_ucontext *uctx)
+{
+	struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
+	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+
+	context->reset_mmap_entry = hns_roce_user_mmap_entry_insert(
+		uctx, (u64)page_to_phys(hr_dev->reset_page), PAGE_SIZE,
+		HNS_ROCE_MMAP_TYPE_RESET);
+	if (!context->reset_mmap_entry)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context)
 {
 	if (context->db_mmap_entry)
@@ -369,6 +384,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
 	struct hns_roce_ib_alloc_ucontext_resp resp = {};
 	struct hns_roce_ib_alloc_ucontext ucmd = {};
+	struct rdma_user_mmap_entry *rdma_entry;
 	int ret = -EAGAIN;
 
 	if (!hr_dev->active)
@@ -421,6 +437,13 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 
 	resp.cqe_size = hr_dev->caps.cqe_sz;
 
+	ret = hns_roce_alloc_reset_entry(uctx);
+	if (ret)
+		goto error_fail_reset_entry;
+
+	rdma_entry = &context->reset_mmap_entry->rdma_entry;
+	resp.reset_mmap_key = rdma_user_mmap_get_offset(rdma_entry);
+
 	ret = ib_copy_to_udata(udata, &resp,
 			       min(udata->outlen, sizeof(resp)));
 	if (ret)
@@ -429,6 +452,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 	return 0;
 
 error_fail_copy_to_udata:
+	rdma_user_mmap_entry_remove(&context->reset_mmap_entry->rdma_entry);
+
+error_fail_reset_entry:
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
 	    hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
 		mutex_destroy(&context->page_mutex);
@@ -448,6 +474,8 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
 
+	rdma_user_mmap_entry_remove(&context->reset_mmap_entry->rdma_entry);
+
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
 	    hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
 		mutex_destroy(&context->page_mutex);
@@ -485,6 +513,14 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
 	case HNS_ROCE_MMAP_TYPE_DWQE:
 		prot = pgprot_device(vma->vm_page_prot);
 		break;
+	case HNS_ROCE_MMAP_TYPE_RESET:
+		if (vma->vm_flags & (VM_WRITE | VM_EXEC)) {
+			ret = -EINVAL;
+			goto out;
+		}
+		vm_flags_set(vma, VM_DONTEXPAND);
+		prot = vma->vm_page_prot;
+		break;
 	default:
 		ret = -EINVAL;
 		goto out;
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 94e861870e27..065eb2e0a690 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -136,6 +136,7 @@ struct hns_roce_ib_alloc_ucontext_resp {
 	__u32	max_inline_data;
 	__u8	congest_type;
 	__u8	reserved0[7];
+	__aligned_u64 reset_mmap_key;
 };
 
 struct hns_roce_ib_alloc_ucontext {
@@ -153,4 +154,9 @@ struct hns_roce_ib_create_ah_resp {
 	__u8 tc_mode;
 };
 
+struct hns_roce_reset_state {
+	__u32 hw_ready;
+	__u32 reserved;
+};
+
 #endif /* HNS_ABI_USER_H */
-- 
2.33.0
Re: [PATCH for-next] RDMA/hns: Support mmapping reset state to userspace
Posted by Leon Romanovsky 3 weeks, 1 day ago
On Mon, Oct 14, 2024 at 09:07:31PM +0800, Junxian Huang wrote:
> From: Chengchang Tang <tangchengchang@huawei.com>
> 
> Mmap reset state to notify userspace about HW reset. The mmaped flag
> hw_ready will be initiated to a non-zero value. When HW is reset,
> the mmap page will be zapped and userspace will get a zero value of
> hw_ready.

I didn't forget that patch, but not applying now as it seems extremely
sketchy for me, so waiting for anyone to come and say their opinion too.

Thanks

> 
> Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_device.h |  4 ++
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 47 ++++++++++++++++++++-
>  drivers/infiniband/hw/hns/hns_roce_main.c   | 36 ++++++++++++++++
>  include/uapi/rdma/hns-abi.h                 |  6 +++
>  4 files changed, 91 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> index 0b1e21cb6d2d..59bca8067a7f 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> @@ -202,6 +202,7 @@ struct hns_roce_uar {
>  enum hns_roce_mmap_type {
>  	HNS_ROCE_MMAP_TYPE_DB = 1,
>  	HNS_ROCE_MMAP_TYPE_DWQE,
> +	HNS_ROCE_MMAP_TYPE_RESET,
>  };
>  
>  struct hns_user_mmap_entry {
> @@ -216,6 +217,7 @@ struct hns_roce_ucontext {
>  	struct list_head	page_list;
>  	struct mutex		page_mutex;
>  	struct hns_user_mmap_entry *db_mmap_entry;
> +	struct hns_user_mmap_entry *reset_mmap_entry;
>  	u32			config;
>  };
>  
> @@ -1020,6 +1022,8 @@ struct hns_roce_dev {
>  	int			loop_idc;
>  	u32			sdb_offset;
>  	u32			odb_offset;
> +	struct page		*reset_page; /* store reset state */
> +	void			*reset_kaddr; /* addr of reset page */
>  	const struct hns_roce_hw *hw;
>  	void			*priv;
>  	struct workqueue_struct *irq_workq;
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index f1feaa79f78e..2f72074b7cf9 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -37,6 +37,7 @@
>  #include <linux/kernel.h>
>  #include <linux/types.h>
>  #include <linux/workqueue.h>
> +#include <linux/vmalloc.h>
>  #include <net/addrconf.h>
>  #include <rdma/ib_addr.h>
>  #include <rdma/ib_cache.h>
> @@ -2865,6 +2866,36 @@ static int free_mr_init(struct hns_roce_dev *hr_dev)
>  	return ret;
>  }
>  
> +static int hns_roce_v2_get_reset_page(struct hns_roce_dev *hr_dev)
> +{
> +	struct hns_roce_reset_state *state;
> +
> +	hr_dev->reset_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!hr_dev->reset_page)
> +		return -ENOMEM;
> +
> +	hr_dev->reset_kaddr = vmap(&hr_dev->reset_page, 1, VM_MAP, PAGE_KERNEL);
> +	if (!hr_dev->reset_kaddr)
> +		goto err_with_vmap;
> +
> +	state = hr_dev->reset_kaddr;
> +	state->hw_ready = 1;
> +
> +	return 0;
> +
> +err_with_vmap:
> +	put_page(hr_dev->reset_page);
> +	return -ENOMEM;
> +}
> +
> +static void hns_roce_v2_put_reset_page(struct hns_roce_dev *hr_dev)
> +{
> +	vunmap(hr_dev->reset_kaddr);
> +	hr_dev->reset_kaddr = NULL;
> +	put_page(hr_dev->reset_page);
> +	hr_dev->reset_page = NULL;
> +}
> +
>  static int get_hem_table(struct hns_roce_dev *hr_dev)
>  {
>  	unsigned int qpc_count;
> @@ -2944,14 +2975,21 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
>  {
>  	int ret;
>  
> +	ret = hns_roce_v2_get_reset_page(hr_dev);
> +	if (ret) {
> +		dev_err(hr_dev->dev,
> +			"reset state init failed, ret = %d.\n", ret);
> +		return ret;
> +	}
> +
>  	/* The hns ROCEE requires the extdb info to be cleared before using */
>  	ret = hns_roce_clear_extdb_list_info(hr_dev);
>  	if (ret)
> -		return ret;
> +		goto err_clear_extdb_failed;
>  
>  	ret = get_hem_table(hr_dev);
>  	if (ret)
> -		return ret;
> +		goto err_clear_extdb_failed;
>  
>  	if (hr_dev->is_vf)
>  		return 0;
> @@ -2967,6 +3005,9 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
>  err_llm_init_failed:
>  	put_hem_table(hr_dev);
>  
> +err_clear_extdb_failed:
> +	hns_roce_v2_put_reset_page(hr_dev);
> +
>  	return ret;
>  }
>  
> @@ -2980,6 +3021,8 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
>  	if (!hr_dev->is_vf)
>  		hns_roce_free_link_table(hr_dev);
>  
> +	hns_roce_v2_put_reset_page(hr_dev);
> +
>  	if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09)
>  		free_dip_list(hr_dev);
>  }
> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
> index 49315f39361d..1620d4318480 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
> @@ -324,6 +324,7 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
>  				ucontext, &entry->rdma_entry, length, 0);
>  		break;
>  	case HNS_ROCE_MMAP_TYPE_DWQE:
> +	case HNS_ROCE_MMAP_TYPE_RESET:
>  		ret = rdma_user_mmap_entry_insert_range(
>  				ucontext, &entry->rdma_entry, length, 1,
>  				U32_MAX);
> @@ -341,6 +342,20 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
>  	return entry;
>  }
>  
> +static int hns_roce_alloc_reset_entry(struct ib_ucontext *uctx)
> +{
> +	struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
> +	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
> +
> +	context->reset_mmap_entry = hns_roce_user_mmap_entry_insert(
> +		uctx, (u64)page_to_phys(hr_dev->reset_page), PAGE_SIZE,
> +		HNS_ROCE_MMAP_TYPE_RESET);
> +	if (!context->reset_mmap_entry)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
>  static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context)
>  {
>  	if (context->db_mmap_entry)
> @@ -369,6 +384,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
>  	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
>  	struct hns_roce_ib_alloc_ucontext_resp resp = {};
>  	struct hns_roce_ib_alloc_ucontext ucmd = {};
> +	struct rdma_user_mmap_entry *rdma_entry;
>  	int ret = -EAGAIN;
>  
>  	if (!hr_dev->active)
> @@ -421,6 +437,13 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
>  
>  	resp.cqe_size = hr_dev->caps.cqe_sz;
>  
> +	ret = hns_roce_alloc_reset_entry(uctx);
> +	if (ret)
> +		goto error_fail_reset_entry;
> +
> +	rdma_entry = &context->reset_mmap_entry->rdma_entry;
> +	resp.reset_mmap_key = rdma_user_mmap_get_offset(rdma_entry);
> +
>  	ret = ib_copy_to_udata(udata, &resp,
>  			       min(udata->outlen, sizeof(resp)));
>  	if (ret)
> @@ -429,6 +452,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
>  	return 0;
>  
>  error_fail_copy_to_udata:
> +	rdma_user_mmap_entry_remove(&context->reset_mmap_entry->rdma_entry);
> +
> +error_fail_reset_entry:
>  	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
>  	    hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
>  		mutex_destroy(&context->page_mutex);
> @@ -448,6 +474,8 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
>  	struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
>  	struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
>  
> +	rdma_user_mmap_entry_remove(&context->reset_mmap_entry->rdma_entry);
> +
>  	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
>  	    hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
>  		mutex_destroy(&context->page_mutex);
> @@ -485,6 +513,14 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
>  	case HNS_ROCE_MMAP_TYPE_DWQE:
>  		prot = pgprot_device(vma->vm_page_prot);
>  		break;
> +	case HNS_ROCE_MMAP_TYPE_RESET:
> +		if (vma->vm_flags & (VM_WRITE | VM_EXEC)) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +		vm_flags_set(vma, VM_DONTEXPAND);
> +		prot = vma->vm_page_prot;
> +		break;
>  	default:
>  		ret = -EINVAL;
>  		goto out;
> diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
> index 94e861870e27..065eb2e0a690 100644
> --- a/include/uapi/rdma/hns-abi.h
> +++ b/include/uapi/rdma/hns-abi.h
> @@ -136,6 +136,7 @@ struct hns_roce_ib_alloc_ucontext_resp {
>  	__u32	max_inline_data;
>  	__u8	congest_type;
>  	__u8	reserved0[7];
> +	__aligned_u64 reset_mmap_key;
>  };
>  
>  struct hns_roce_ib_alloc_ucontext {
> @@ -153,4 +154,9 @@ struct hns_roce_ib_create_ah_resp {
>  	__u8 tc_mode;
>  };
>  
> +struct hns_roce_reset_state {
> +	__u32 hw_ready;
> +	__u32 reserved;
> +};
> +
>  #endif /* HNS_ABI_USER_H */
> -- 
> 2.33.0
>