[v2] octeontx2: CN20K NPA Halo context support

[net-next PATCH v2 2/4] octeontx2-af: npa: cn20k: Add DPC support

Posted by Subbaraya Sundeep 2 weeks, 3 days ago

From: Linu Cherian <lcherian@marvell.com>

CN20k introduces 32 diagnostic and performance
counters that are shared across all NPA LFs.

Counters being shared, each PF driver need to request
for a counter with the required configuration to the AF,
so that a counter can be allocated and mapped to the
respective LF with the requested configuration.

Add new mbox messages, npa_dpc_alloc/free to handle this.

Also ensure all the LF to DPC counter mappings are cleared
at the time of LF free/teardown.

Signed-off-by: Linu Cherian <lcherian@marvell.com>
Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
---
 .../ethernet/marvell/octeontx2/af/cn20k/api.h |   6 +
 .../ethernet/marvell/octeontx2/af/cn20k/npa.c | 116 ++++++++++++++++++
 .../ethernet/marvell/octeontx2/af/cn20k/reg.h |   7 ++
 .../net/ethernet/marvell/octeontx2/af/mbox.h  |  19 +++
 .../net/ethernet/marvell/octeontx2/af/rvu.h   |   3 +
 .../ethernet/marvell/octeontx2/af/rvu_npa.c   |  14 ++-
 6 files changed, 164 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/api.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/api.h
index 4285b5d6a6a2..b13e7628f767 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/api.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/api.h
@@ -29,4 +29,10 @@ int cn20k_mbox_setup(struct otx2_mbox *mbox, struct pci_dev *pdev,
 		     void *reg_base, int direction, int ndevs);
 void cn20k_rvu_enable_afvf_intr(struct rvu *rvu, int vfs);
 void cn20k_rvu_disable_afvf_intr(struct rvu *rvu, int vfs);
+
+int npa_cn20k_dpc_alloc(struct rvu *rvu, struct npa_cn20k_dpc_alloc_req *req,
+			struct npa_cn20k_dpc_alloc_rsp *rsp);
+int npa_cn20k_dpc_free(struct rvu *rvu, struct npa_cn20k_dpc_free_req *req);
+void npa_cn20k_dpc_free_all(struct rvu *rvu, u16 pcifunc);
+
 #endif /* CN20K_API_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c
index c963f43dc7b0..1def2504872f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c
@@ -8,6 +8,8 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 
+#include "cn20k/api.h"
+#include "cn20k/reg.h"
 #include "struct.h"
 #include "../rvu.h"
 
@@ -46,3 +48,117 @@ int rvu_npa_halo_hwctx_disable(struct npa_aq_enq_req *req)
 
 	return 0;
 }
+
+int npa_cn20k_dpc_alloc(struct rvu *rvu, struct npa_cn20k_dpc_alloc_req *req,
+			struct npa_cn20k_dpc_alloc_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	int cntr, lf, blkaddr, ridx;
+	struct rvu_block *block;
+	struct rvu_pfvf *pfvf;
+	u64 val, lfmask;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
+	if (!pfvf->npalf || blkaddr < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	lf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (lf < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	/* allocate a new counter */
+	cntr = rvu_alloc_rsrc(&rvu->npa_dpc);
+	if (cntr < 0)
+		return cntr;
+	rsp->cntr_id = cntr;
+
+	/* DPC counter config */
+	rvu_write64(rvu, blkaddr, NPA_AF_DPCX_CFG(cntr), req->dpc_conf);
+
+	/* 0 to 63 lfs -> idx 0, 64 - 127 lfs -> idx 1 */
+	ridx = lf >> 6;
+	lfmask = BIT_ULL(ridx ? lf - NPA_DPC_LFS_PER_REG : lf);
+
+	ridx = 2 * cntr + ridx;
+	/* Give permission for LF access */
+	val = rvu_read64(rvu, blkaddr, NPA_AF_DPC_PERMITX(ridx));
+	val |= lfmask;
+	rvu_write64(rvu, blkaddr, NPA_AF_DPC_PERMITX(ridx), val);
+
+	return 0;
+}
+
+int rvu_mbox_handler_npa_cn20k_dpc_alloc(struct rvu *rvu,
+					 struct npa_cn20k_dpc_alloc_req *req,
+					 struct npa_cn20k_dpc_alloc_rsp *rsp)
+{
+	return npa_cn20k_dpc_alloc(rvu, req, rsp);
+}
+
+int npa_cn20k_dpc_free(struct rvu *rvu, struct npa_cn20k_dpc_free_req *req)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	int cntr, lf, blkaddr, ridx;
+	struct rvu_block *block;
+	struct rvu_pfvf *pfvf;
+	u64 val, lfmask;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
+	if (!pfvf->npalf || blkaddr < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	lf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (lf < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	if (req->cntr_id >= NPA_DPC_MAX)
+		return NPA_AF_ERR_PARAM;
+
+	/* 0 to 63 lfs -> idx 0, 64 - 127 lfs -> idx 1 */
+	ridx = lf >> 6;
+	lfmask = BIT_ULL(ridx ? lf - NPA_DPC_LFS_PER_REG : lf);
+	cntr = req->cntr_id;
+
+	ridx = 2 * cntr + ridx;
+
+	val = rvu_read64(rvu, blkaddr, NPA_AF_DPC_PERMITX(ridx));
+	/* Check if the counter is allotted to this LF */
+	if (!(val & lfmask))
+		return 0;
+
+	/* Revert permission */
+	val &= ~lfmask;
+	rvu_write64(rvu, blkaddr, NPA_AF_DPC_PERMITX(ridx), val);
+
+	/* Free this counter */
+	rvu_free_rsrc(&rvu->npa_dpc, req->cntr_id);
+
+	return 0;
+}
+
+void npa_cn20k_dpc_free_all(struct rvu *rvu, u16 pcifunc)
+{
+	struct npa_cn20k_dpc_free_req req;
+	int i;
+
+	req.hdr.pcifunc = pcifunc;
+	for (i = 0; i < NPA_DPC_MAX; i++) {
+		req.cntr_id = i;
+		npa_cn20k_dpc_free(rvu, &req);
+	}
+}
+
+int rvu_mbox_handler_npa_cn20k_dpc_free(struct rvu *rvu,
+					struct npa_cn20k_dpc_free_req *req,
+					struct msg_rsp *rsp)
+{
+	return npa_cn20k_dpc_free(rvu, req);
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h
index 8bfaa507ee50..9b49e376878e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h
@@ -143,4 +143,11 @@
 	offset = (0xb000000ull | (a) << 4 | (b) << 20);		\
 	offset; })
 
+/* NPA Registers */
+#define NPA_AF_DPCX_CFG(a)		(0x800 | (a) << 6)
+#define NPA_AF_DPC_PERMITX(a)		(0x1000 | (a) << 3)
+
+#define NPA_DPC_MAX			32
+#define NPA_DPC_LFS_PER_REG		64
+
 #endif /* RVU_MBOX_REG_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 4a97bd93d882..b29ec26b66b7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -213,6 +213,10 @@ M(NPA_AQ_ENQ,		0x402, npa_aq_enq, npa_aq_enq_req, npa_aq_enq_rsp)   \
 M(NPA_HWCTX_DISABLE,	0x403, npa_hwctx_disable, hwctx_disable_req, msg_rsp)\
 M(NPA_CN20K_AQ_ENQ,	0x404, npa_cn20k_aq_enq, npa_cn20k_aq_enq_req,	\
 				npa_cn20k_aq_enq_rsp)			\
+M(NPA_CN20K_DPC_ALLOC,	0x405, npa_cn20k_dpc_alloc, npa_cn20k_dpc_alloc_req, \
+				npa_cn20k_dpc_alloc_rsp)		\
+M(NPA_CN20K_DPC_FREE,	0x406, npa_cn20k_dpc_free, npa_cn20k_dpc_free_req, \
+				msg_rsp)				\
 /* SSO/SSOW mbox IDs (range 0x600 - 0x7FF) */				\
 /* TIM mbox IDs (range 0x800 - 0x9FF) */				\
 /* CPT mbox IDs (range 0xA00 - 0xBFF) */				\
@@ -910,6 +914,21 @@ struct npa_cn20k_aq_enq_rsp {
 	};
 };
 
+struct npa_cn20k_dpc_alloc_req {
+	struct mbox_msghdr hdr;
+	u16 dpc_conf;
+};
+
+struct npa_cn20k_dpc_alloc_rsp {
+	struct mbox_msghdr hdr;
+	u8 cntr_id;
+};
+
+struct npa_cn20k_dpc_free_req {
+	struct mbox_msghdr hdr;
+	u8 cntr_id;
+};
+
 /* Disable all contexts of type 'ctype' */
 struct hwctx_disable_req {
 	struct mbox_msghdr hdr;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 36a71d32b894..0299fa1bd3bc 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -663,6 +663,9 @@ struct rvu {
 	/* CPT interrupt lock */
 	spinlock_t		cpt_intr_lock;
 
+	/* NPA */
+	struct rsrc_bmap	npa_dpc;
+
 	struct mutex		mbox_lock; /* Serialize mbox up and down msgs */
 	u16			rep_pcifunc;
 	bool			altaf_ready;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
index 96904b8eea62..3cd24226007b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
@@ -8,6 +8,8 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 
+#include "cn20k/api.h"
+#include "cn20k/reg.h"
 #include "rvu_struct.h"
 #include "rvu_reg.h"
 #include "rvu.h"
@@ -504,6 +506,8 @@ int rvu_mbox_handler_npa_lf_free(struct rvu *rvu, struct msg_req *req,
 		return NPA_AF_ERR_LF_RESET;
 	}
 
+	if (is_cn20k(rvu->pdev))
+		npa_cn20k_dpc_free_all(rvu, pcifunc);
 	npa_ctx_free(rvu, pfvf);
 
 	return 0;
@@ -569,12 +573,17 @@ static int npa_aq_init(struct rvu *rvu, struct rvu_block *block)
 int rvu_npa_init(struct rvu *rvu)
 {
 	struct rvu_hwinfo *hw = rvu->hw;
-	int blkaddr;
+	int err, blkaddr;
 
 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
 	if (blkaddr < 0)
 		return 0;
 
+	rvu->npa_dpc.max = NPA_DPC_MAX;
+	err = rvu_alloc_bitmap(&rvu->npa_dpc);
+	if (err)
+		return err;
+
 	/* Initialize admin queue */
 	return npa_aq_init(rvu, &hw->block[blkaddr]);
 }
@@ -591,6 +600,7 @@ void rvu_npa_freemem(struct rvu *rvu)
 
 	block = &hw->block[blkaddr];
 	rvu_aq_free(rvu, block->aq);
+	kfree(rvu->npa_dpc.bmap);
 }
 
 void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf)
@@ -611,6 +621,8 @@ void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf)
 	ctx_req.ctype = NPA_AQ_CTYPE_HALO;
 	npa_lf_hwctx_disable(rvu, &ctx_req);
 
+	if (is_cn20k(rvu->pdev))
+		npa_cn20k_dpc_free_all(rvu, pcifunc);
 	npa_ctx_free(rvu, pfvf);
 }
 
-- 
2.48.1

Re: [net-next PATCH v2 2/4] octeontx2-af: npa: cn20k: Add DPC support

Posted by Simon Horman 2 weeks, 2 days ago

This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.

For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
octeontx2-af: npa: cn20k: Add DPC support

CN20k introduces 32 diagnostic and performance counters that are shared
across all NPA LFs. This patch adds new mbox messages npa_dpc_alloc/free
to allocate and map counters to LFs, and ensures LF to DPC counter
mappings are cleared at LF free/teardown.

> diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c
> index c963f43dc7b0..1def2504872f 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c

[ ... ]

> @@ -46,3 +48,117 @@ int rvu_npa_halo_hwctx_disable(struct npa_aq_enq_req *req)
>
>  	return 0;
>  }
> +
> +int npa_cn20k_dpc_alloc(struct rvu *rvu, struct npa_cn20k_dpc_alloc_req *req,
> +			struct npa_cn20k_dpc_alloc_rsp *rsp)
> +{
> +	struct rvu_hwinfo *hw = rvu->hw;
> +	u16 pcifunc = req->hdr.pcifunc;
> +	int cntr, lf, blkaddr, ridx;
> +	struct rvu_block *block;
> +	struct rvu_pfvf *pfvf;
> +	u64 val, lfmask;
> +
> +	pfvf = rvu_get_pfvf(rvu, pcifunc);
> +
> +	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
> +	if (!pfvf->npalf || blkaddr < 0)
> +		return NPA_AF_ERR_AF_LF_INVALID;
> +
> +	block = &hw->block[blkaddr];
> +	lf = rvu_get_lf(rvu, block, pcifunc, 0);
> +	if (lf < 0)
> +		return NPA_AF_ERR_AF_LF_INVALID;
> +
> +	/* allocate a new counter */
> +	cntr = rvu_alloc_rsrc(&rvu->npa_dpc);
                ^^^^

Are rvu_alloc_rsrc() and rvu_free_rsrc() calls synchronized here?

These functions use non-atomic find_first_zero_bit() + __set_bit() and
__clear_bit() operations on the bitmap. Looking at other callers like
rvu_mbox_handler_attach_resources() in rvu.c:

int rvu_mbox_handler_attach_resources(struct rvu *rvu,
                                      struct rsrc_attach *attach,
                                      struct msg_rsp *rsp)
{
    ...
    mutex_lock(&rvu->rsrc_lock);

    for (q_cnt = 0; q_cnt < attach->npa_msixoff; q_cnt++) {
        msix = rvu_alloc_rsrc(&pfvf->msix);
        ...
    }
    mutex_unlock(&rvu->rsrc_lock);
    ...
}

Similarly, rvu_mbox_handler_nix_bandprof_alloc() and
rvu_mbox_handler_mcs_alloc_resources() also acquire rvu->rsrc_lock
before calling rvu_alloc_rsrc(). rvu_mbox_handler_npc_mcam_alloc_counter()
uses mcam->lock.

Since npa_cn20k_dpc_alloc() can be invoked by the mbox handler
rvu_mbox_handler_npa_cn20k_dpc_alloc() concurrently from different
PF/VF workqueue items in __rvu_mbox_handler(), two threads could race:

Thread 1                                 Thread 2
npa_cn20k_dpc_alloc()                    npa_cn20k_dpc_alloc()
  find_first_zero_bit(npa_dpc) = 0         find_first_zero_bit(npa_dpc) = 0
  __set_bit(0, npa_dpc)                    __set_bit(0, npa_dpc)
  return cntr = 0                          return cntr = 0

Both threads get counter 0, leading to duplicate allocation. The same
race exists in npa_cn20k_dpc_free() with rvu_free_rsrc().

Should these calls be protected with mutex_lock(&rvu->rsrc_lock) similar
to other resource allocation paths?

> +	if (cntr < 0)
> +		return cntr;
> +	rsp->cntr_id = cntr;
> +
> +	/* DPC counter config */
> +	rvu_write64(rvu, blkaddr, NPA_AF_DPCX_CFG(cntr), req->dpc_conf);
> +
> +	/* 0 to 63 lfs -> idx 0, 64 - 127 lfs -> idx 1 */
> +	ridx = lf >> 6;
> +	lfmask = BIT_ULL(ridx ? lf - NPA_DPC_LFS_PER_REG : lf);
> +
> +	ridx = 2 * cntr + ridx;
> +	/* Give permission for LF access */
> +	val = rvu_read64(rvu, blkaddr, NPA_AF_DPC_PERMITX(ridx));
> +	val |= lfmask;
> +	rvu_write64(rvu, blkaddr, NPA_AF_DPC_PERMITX(ridx), val);
> +
> +	return 0;
> +}
> +
> +int rvu_mbox_handler_npa_cn20k_dpc_alloc(struct rvu *rvu,
> +					 struct npa_cn20k_dpc_alloc_req *req,
> +					 struct npa_cn20k_dpc_alloc_rsp *rsp)
> +{
> +	return npa_cn20k_dpc_alloc(rvu, req, rsp);
> +}
> +
> +int npa_cn20k_dpc_free(struct rvu *rvu, struct npa_cn20k_dpc_free_req *req)
> +{
> +	struct rvu_hwinfo *hw = rvu->hw;
> +	u16 pcifunc = req->hdr.pcifunc;
> +	int cntr, lf, blkaddr, ridx;
> +	struct rvu_block *block;
> +	struct rvu_pfvf *pfvf;
> +	u64 val, lfmask;
> +
> +	pfvf = rvu_get_pfvf(rvu, pcifunc);
> +
> +	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
> +	if (!pfvf->npalf || blkaddr < 0)
> +		return NPA_AF_ERR_AF_LF_INVALID;
> +
> +	block = &hw->block[blkaddr];
> +	lf = rvu_get_lf(rvu, block, pcifunc, 0);
> +	if (lf < 0)
> +		return NPA_AF_ERR_AF_LF_INVALID;
> +
> +	if (req->cntr_id >= NPA_DPC_MAX)
> +		return NPA_AF_ERR_PARAM;
> +
> +	/* 0 to 63 lfs -> idx 0, 64 - 127 lfs -> idx 1 */
> +	ridx = lf >> 6;
> +	lfmask = BIT_ULL(ridx ? lf - NPA_DPC_LFS_PER_REG : lf);
> +	cntr = req->cntr_id;
> +
> +	ridx = 2 * cntr + ridx;
> +
> +	val = rvu_read64(rvu, blkaddr, NPA_AF_DPC_PERMITX(ridx));
> +	/* Check if the counter is allotted to this LF */
> +	if (!(val & lfmask))
> +		return 0;
> +
> +	/* Revert permission */
> +	val &= ~lfmask;
> +	rvu_write64(rvu, blkaddr, NPA_AF_DPC_PERMITX(ridx), val);
> +
> +	/* Free this counter */
> +	rvu_free_rsrc(&rvu->npa_dpc, req->cntr_id);
> +
> +	return 0;
> +}

[ ... ]

[net-next PATCH v2 1/4] octeontx2-af: npa: cn20k: Add NPA Halo support
[net-next PATCH v2 2/4] octeontx2-af: npa: cn20k: Add DPC support
[net-next PATCH v2 3/4] octeontx2-af: npa: cn20k: Add debugfs for Halo
[net-next PATCH v2 4/4] octeontx2-pf: cn20k: Use unified Halo context