[net-next PATCH v1 09/15] octeontx2-pf: ipsec: Allocate Ingress SA table

Tanmay Jagdale posted 15 patches 9 months, 1 week ago
[net-next PATCH v1 09/15] octeontx2-pf: ipsec: Allocate Ingress SA table
Posted by Tanmay Jagdale 9 months, 1 week ago
Every NIX LF has the facility to maintain a contiguous SA table that
is used by NIX RX to find the exact SA context pointer associated with
a particular flow. Allocate a 128-entry SA table where each entry is of
2048 bytes which is enough to hold the complete inbound SA context.

Add the structure definitions for SA context (cn10k_rx_sa_s) and
SA bookkeeping information (ctx_inb_ctx_info).

Also, initialize the inb_sw_ctx_list to track all the SA's and their
associated NPC rules and hash table related data.

Signed-off-by: Tanmay Jagdale <tanmay@marvell.com>
---
 .../marvell/octeontx2/nic/cn10k_ipsec.c       | 20 ++++
 .../marvell/octeontx2/nic/cn10k_ipsec.h       | 93 +++++++++++++++++++
 2 files changed, 113 insertions(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c
index fc59e50bafce..c435dcae4929 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c
@@ -787,6 +787,7 @@ int cn10k_ipsec_init(struct net_device *netdev)
 {
 	struct otx2_nic *pf = netdev_priv(netdev);
 	u32 sa_size;
+	int err;
 
 	if (!is_dev_support_ipsec_offload(pf->pdev))
 		return 0;
@@ -797,6 +798,22 @@ int cn10k_ipsec_init(struct net_device *netdev)
 			 OTX2_ALIGN : sizeof(struct cn10k_tx_sa_s);
 	pf->ipsec.sa_size = sa_size;
 
+	/* Set sa_tbl_entry_sz to 2048 since we are programming NIX RX
+	 * to calculate SA index as SPI * 2048. The first 1024 bytes
+	 * are used for SA context and  the next half for bookkeeping data.
+	 */
+	pf->ipsec.sa_tbl_entry_sz = 2048;
+	err = qmem_alloc(pf->dev, &pf->ipsec.inb_sa, CN10K_IPSEC_INB_MAX_SA,
+			 pf->ipsec.sa_tbl_entry_sz);
+	if (err)
+		return err;
+
+	memset(pf->ipsec.inb_sa->base, 0,
+	       pf->ipsec.sa_tbl_entry_sz * CN10K_IPSEC_INB_MAX_SA);
+
+	/* List to track all ingress SAs */
+	INIT_LIST_HEAD(&pf->ipsec.inb_sw_ctx_list);
+
 	INIT_WORK(&pf->ipsec.sa_work, cn10k_ipsec_sa_wq_handler);
 	pf->ipsec.sa_workq = alloc_workqueue("cn10k_ipsec_sa_workq", 0, 0);
 	if (!pf->ipsec.sa_workq) {
@@ -828,6 +845,9 @@ void cn10k_ipsec_clean(struct otx2_nic *pf)
 	}
 
 	cn10k_outb_cpt_clean(pf);
+
+	/* Free Ingress SA table */
+	qmem_free(pf->dev, pf->ipsec.inb_sa);
 }
 EXPORT_SYMBOL(cn10k_ipsec_clean);
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h
index 9965df0faa3e..6dd6ead0b28b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h
@@ -52,10 +52,14 @@ DECLARE_STATIC_KEY_FALSE(cn10k_ipsec_sa_enabled);
 #define CN10K_CPT_LF_NQX(a)		(CPT_LFBASE | 0x400 | (a) << 3)
 #define CN10K_CPT_LF_CTX_FLUSH		(CPT_LFBASE | 0x510)
 
+/* Inbound SA*/
+#define CN10K_IPSEC_INB_MAX_SA	128
+
 /* IPSEC Instruction opcodes */
 #define CN10K_IPSEC_MAJOR_OP_WRITE_SA 0x01UL
 #define CN10K_IPSEC_MINOR_OP_WRITE_SA 0x09UL
 #define CN10K_IPSEC_MAJOR_OP_OUTB_IPSEC 0x2AUL
+#define CN10K_IPSEC_MAJOR_OP_INB_IPSEC 0x29UL
 
 enum cn10k_cpt_comp_e {
 	CN10K_CPT_COMP_E_NOTDONE = 0x00,
@@ -81,6 +85,19 @@ enum cn10k_cpt_hw_state_e {
 	CN10K_CPT_HW_IN_USE
 };
 
+struct cn10k_inb_sw_ctx_info {
+	struct list_head list;
+	struct cn10k_rx_sa_s *sa_entry;
+	struct xfrm_state *x_state;
+	dma_addr_t sa_iova;
+	u32 npc_mcam_entry;
+	u32 sa_index;
+	u32 spi;
+	u16 hash_index;	/* Hash index from SPI_TO_SA match */
+	u8 way;		/* SPI_TO_SA match table way index */
+	bool delete_npc_and_match_entry;
+};
+
 struct cn10k_ipsec {
 	/* Outbound CPT */
 	u64 io_addr;
@@ -92,6 +109,12 @@ struct cn10k_ipsec {
 	u32 outb_sa_count;
 	struct work_struct sa_work;
 	struct workqueue_struct *sa_workq;
+
+	/* For Inbound Inline IPSec flows */
+	u32 sa_tbl_entry_sz;
+	struct qmem *inb_sa;
+	struct list_head inb_sw_ctx_list;
+	DECLARE_BITMAP(inb_sa_table, CN10K_IPSEC_INB_MAX_SA);
 };
 
 /* CN10K IPSEC Security Association (SA) */
@@ -146,6 +169,76 @@ struct cn10k_tx_sa_s {
 	u64 hw_ctx[6];		/* W31 - W36 */
 };
 
+struct cn10k_rx_sa_s {
+	u64 inb_ar_win_sz	: 3; /* W0 */
+	u64 hard_life_dec	: 1;
+	u64 soft_life_dec	: 1;
+	u64 count_glb_octets	: 1;
+	u64 count_glb_pkts	: 1;
+	u64 count_mib_bytes	: 1;
+	u64 count_mib_pkts	: 1;
+	u64 hw_ctx_off		: 7;
+	u64 ctx_id		: 16;
+	u64 orig_pkt_fabs	: 1;
+	u64 orig_pkt_free	: 1;
+	u64 pkind		: 6;
+	u64 rsvd_w0_40		: 1;
+	u64 eth_ovrwr		: 1;
+	u64 pkt_output		: 2;
+	u64 pkt_format		: 1;
+	u64 defrag_opt		: 2;
+	u64 x2p_dst		: 1;
+	u64 ctx_push_size	: 7;
+	u64 rsvd_w0_55		: 1;
+	u64 ctx_hdr_size	: 2;
+	u64 aop_valid		: 1;
+	u64 rsvd_w0_59		: 1;
+	u64 ctx_size		: 4;
+
+	u64 rsvd_w1_31_0	: 32; /* W1 */
+	u64 cookie		: 32;
+
+	u64 sa_valid		: 1; /* W2 Control Word */
+	u64 sa_dir		: 1;
+	u64 rsvd_w2_2_3		: 2;
+	u64 ipsec_mode		: 1;
+	u64 ipsec_protocol	: 1;
+	u64 aes_key_len		: 2;
+	u64 enc_type		: 3;
+	u64 life_unit		: 1;
+	u64 auth_type		: 4;
+	u64 encap_type		: 2;
+	u64 et_ovrwr_ddr_en	: 1;
+	u64 esn_en		: 1;
+	u64 tport_l4_incr_csum	: 1;
+	u64 iphdr_verify	: 2;
+	u64 udp_ports_verify	: 1;
+	u64 l2_l3_hdr_on_error	: 1;
+	u64 rsvd_w25_31		: 7;
+	u64 spi			: 32;
+
+	u64 w3;			/* W3 */
+
+	u8 cipher_key[32];	/* W4 - W7 */
+	u32 rsvd_w8_0_31;	/* W8 : IV */
+	u32 iv_gcm_salt;
+	u64 rsvd_w9;		/* W9 */
+	u64 rsvd_w10;		/* W10 : UDP Encap */
+	u32 dest_ipaddr;	/* W11 - Tunnel mode: outer src and dest ipaddr */
+	u32 src_ipaddr;
+	u64 rsvd_w12_w30[19];	/* W12 - W30 */
+
+	u64 ar_base;		/* W31 */
+	u64 ar_valid_mask;	/* W32 */
+	u64 hard_sa_life;	/* W33 */
+	u64 soft_sa_life;	/* W34 */
+	u64 mib_octs;		/* W35 */
+	u64 mib_pkts;		/* W36 */
+	u64 ar_winbits;		/* W37 */
+
+	u64 rsvd_w38_w100[63];
+};
+
 /* CPT instruction parameter-1 */
 #define CN10K_IPSEC_INST_PARAM1_DIS_L4_CSUM		0x1
 #define CN10K_IPSEC_INST_PARAM1_DIS_L3_CSUM		0x2
-- 
2.43.0
Re: [net-next PATCH v1 09/15] octeontx2-pf: ipsec: Allocate Ingress SA table
Posted by Simon Horman 9 months, 1 week ago
On Fri, May 02, 2025 at 06:49:50PM +0530, Tanmay Jagdale wrote:
> Every NIX LF has the facility to maintain a contiguous SA table that
> is used by NIX RX to find the exact SA context pointer associated with
> a particular flow. Allocate a 128-entry SA table where each entry is of
> 2048 bytes which is enough to hold the complete inbound SA context.
> 
> Add the structure definitions for SA context (cn10k_rx_sa_s) and
> SA bookkeeping information (ctx_inb_ctx_info).
> 
> Also, initialize the inb_sw_ctx_list to track all the SA's and their
> associated NPC rules and hash table related data.
> 
> Signed-off-by: Tanmay Jagdale <tanmay@marvell.com>

...

> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h

...

> @@ -146,6 +169,76 @@ struct cn10k_tx_sa_s {
>  	u64 hw_ctx[6];		/* W31 - W36 */
>  };
>  
> +struct cn10k_rx_sa_s {
> +	u64 inb_ar_win_sz	: 3; /* W0 */
> +	u64 hard_life_dec	: 1;
> +	u64 soft_life_dec	: 1;
> +	u64 count_glb_octets	: 1;
> +	u64 count_glb_pkts	: 1;
> +	u64 count_mib_bytes	: 1;
> +	u64 count_mib_pkts	: 1;
> +	u64 hw_ctx_off		: 7;
> +	u64 ctx_id		: 16;
> +	u64 orig_pkt_fabs	: 1;
> +	u64 orig_pkt_free	: 1;
> +	u64 pkind		: 6;
> +	u64 rsvd_w0_40		: 1;
> +	u64 eth_ovrwr		: 1;
> +	u64 pkt_output		: 2;
> +	u64 pkt_format		: 1;
> +	u64 defrag_opt		: 2;
> +	u64 x2p_dst		: 1;
> +	u64 ctx_push_size	: 7;
> +	u64 rsvd_w0_55		: 1;
> +	u64 ctx_hdr_size	: 2;
> +	u64 aop_valid		: 1;
> +	u64 rsvd_w0_59		: 1;
> +	u64 ctx_size		: 4;
> +
> +	u64 rsvd_w1_31_0	: 32; /* W1 */
> +	u64 cookie		: 32;
> +
> +	u64 sa_valid		: 1; /* W2 Control Word */
> +	u64 sa_dir		: 1;
> +	u64 rsvd_w2_2_3		: 2;
> +	u64 ipsec_mode		: 1;
> +	u64 ipsec_protocol	: 1;
> +	u64 aes_key_len		: 2;
> +	u64 enc_type		: 3;
> +	u64 life_unit		: 1;
> +	u64 auth_type		: 4;
> +	u64 encap_type		: 2;
> +	u64 et_ovrwr_ddr_en	: 1;
> +	u64 esn_en		: 1;
> +	u64 tport_l4_incr_csum	: 1;
> +	u64 iphdr_verify	: 2;
> +	u64 udp_ports_verify	: 1;
> +	u64 l2_l3_hdr_on_error	: 1;
> +	u64 rsvd_w25_31		: 7;
> +	u64 spi			: 32;

As I understand it, this driver is only intended to run on arm64 systems.
While it is also possible, with COMPILE_TEST test, to compile the driver
on for 64-bit systems.

So, given the first point above, this may be moot. But the above
assumes that the byte order of the host is the same as the device.
Or perhaps more to the point, it has been written for a little-endian
host and the device is expecting the data in that byte order.

But u64 is supposed to represent host byte order.  And, in my understanding
of things, this is the kind of problem that FIELD_PREP and FIELD_GET are
intended to avoid, when combined on endian-specific integer types (in this
case __le64 seems appropriate).

I do hesitate in bringing this up, as the above very likely works on
all systems on which this code is intended to run. But I do so
because it is not correct on all systems for which this code can be
compiled. And thus seems somehow misleading.

> +
> +	u64 w3;			/* W3 */
> +
> +	u8 cipher_key[32];	/* W4 - W7 */
> +	u32 rsvd_w8_0_31;	/* W8 : IV */
> +	u32 iv_gcm_salt;
> +	u64 rsvd_w9;		/* W9 */
> +	u64 rsvd_w10;		/* W10 : UDP Encap */
> +	u32 dest_ipaddr;	/* W11 - Tunnel mode: outer src and dest ipaddr */
> +	u32 src_ipaddr;
> +	u64 rsvd_w12_w30[19];	/* W12 - W30 */
> +
> +	u64 ar_base;		/* W31 */
> +	u64 ar_valid_mask;	/* W32 */
> +	u64 hard_sa_life;	/* W33 */
> +	u64 soft_sa_life;	/* W34 */
> +	u64 mib_octs;		/* W35 */
> +	u64 mib_pkts;		/* W36 */
> +	u64 ar_winbits;		/* W37 */
> +
> +	u64 rsvd_w38_w100[63];
> +};
> +
>  /* CPT instruction parameter-1 */
>  #define CN10K_IPSEC_INST_PARAM1_DIS_L4_CSUM		0x1
>  #define CN10K_IPSEC_INST_PARAM1_DIS_L3_CSUM		0x2
Re: [net-next PATCH v1 09/15] octeontx2-pf: ipsec: Allocate Ingress SA table
Posted by Tanmay Jagdale 8 months, 3 weeks ago
Hi Simon,

On 2025-05-07 at 18:26:25, Simon Horman (horms@kernel.org) wrote:
> On Fri, May 02, 2025 at 06:49:50PM +0530, Tanmay Jagdale wrote:
> > Every NIX LF has the facility to maintain a contiguous SA table that
> > is used by NIX RX to find the exact SA context pointer associated with
> > a particular flow. Allocate a 128-entry SA table where each entry is of
> > 2048 bytes which is enough to hold the complete inbound SA context.
> > 
> > Add the structure definitions for SA context (cn10k_rx_sa_s) and
> > SA bookkeeping information (ctx_inb_ctx_info).
> > 
> > Also, initialize the inb_sw_ctx_list to track all the SA's and their
> > associated NPC rules and hash table related data.
> > 
> > Signed-off-by: Tanmay Jagdale <tanmay@marvell.com>
> 
> ...
> 
> > diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h
> 
> ...
> 
> > @@ -146,6 +169,76 @@ struct cn10k_tx_sa_s {
> >  	u64 hw_ctx[6];		/* W31 - W36 */
> >  };
> >  
> > +struct cn10k_rx_sa_s {
> > +	u64 inb_ar_win_sz	: 3; /* W0 */
> > +	u64 hard_life_dec	: 1;
> > +	u64 soft_life_dec	: 1;
> > +	u64 count_glb_octets	: 1;
> > +	u64 count_glb_pkts	: 1;
> > +	u64 count_mib_bytes	: 1;
> > +	u64 count_mib_pkts	: 1;
> > +	u64 hw_ctx_off		: 7;
> > +	u64 ctx_id		: 16;
> > +	u64 orig_pkt_fabs	: 1;
> > +	u64 orig_pkt_free	: 1;
> > +	u64 pkind		: 6;
> > +	u64 rsvd_w0_40		: 1;
> > +	u64 eth_ovrwr		: 1;
> > +	u64 pkt_output		: 2;
> > +	u64 pkt_format		: 1;
> > +	u64 defrag_opt		: 2;
> > +	u64 x2p_dst		: 1;
> > +	u64 ctx_push_size	: 7;
> > +	u64 rsvd_w0_55		: 1;
> > +	u64 ctx_hdr_size	: 2;
> > +	u64 aop_valid		: 1;
> > +	u64 rsvd_w0_59		: 1;
> > +	u64 ctx_size		: 4;
> > +
> > +	u64 rsvd_w1_31_0	: 32; /* W1 */
> > +	u64 cookie		: 32;
> > +
> > +	u64 sa_valid		: 1; /* W2 Control Word */
> > +	u64 sa_dir		: 1;
> > +	u64 rsvd_w2_2_3		: 2;
> > +	u64 ipsec_mode		: 1;
> > +	u64 ipsec_protocol	: 1;
> > +	u64 aes_key_len		: 2;
> > +	u64 enc_type		: 3;
> > +	u64 life_unit		: 1;
> > +	u64 auth_type		: 4;
> > +	u64 encap_type		: 2;
> > +	u64 et_ovrwr_ddr_en	: 1;
> > +	u64 esn_en		: 1;
> > +	u64 tport_l4_incr_csum	: 1;
> > +	u64 iphdr_verify	: 2;
> > +	u64 udp_ports_verify	: 1;
> > +	u64 l2_l3_hdr_on_error	: 1;
> > +	u64 rsvd_w25_31		: 7;
> > +	u64 spi			: 32;
> 
> As I understand it, this driver is only intended to run on arm64 systems.
> While it is also possible, with COMPILE_TEST test, to compile the driver
> on for 64-bit systems.
Yes, this driver works only on Marvell CN10K SoC. I have COMPILE_TESTed
on x86 and ARM64 platforms.

> 
> So, given the first point above, this may be moot. But the above
> assumes that the byte order of the host is the same as the device.
> Or perhaps more to the point, it has been written for a little-endian
> host and the device is expecting the data in that byte order.
> 
> But u64 is supposed to represent host byte order.  And, in my understanding
> of things, this is the kind of problem that FIELD_PREP and FIELD_GET are
> intended to avoid, when combined on endian-specific integer types (in this
> case __le64 seems appropriate).
> 
> I do hesitate in bringing this up, as the above very likely works on
> all systems on which this code is intended to run. But I do so
> because it is not correct on all systems for which this code can be
> compiled. And thus seems somehow misleading.
Okay. Are you referring to a case where we compile on BE machine
and then run on LE platform?

With Regards,
Tanmay
> 
> > +
> > +	u64 w3;			/* W3 */
> > +
> > +	u8 cipher_key[32];	/* W4 - W7 */
> > +	u32 rsvd_w8_0_31;	/* W8 : IV */
> > +	u32 iv_gcm_salt;
> > +	u64 rsvd_w9;		/* W9 */
> > +	u64 rsvd_w10;		/* W10 : UDP Encap */
> > +	u32 dest_ipaddr;	/* W11 - Tunnel mode: outer src and dest ipaddr */
> > +	u32 src_ipaddr;
> > +	u64 rsvd_w12_w30[19];	/* W12 - W30 */
> > +
> > +	u64 ar_base;		/* W31 */
> > +	u64 ar_valid_mask;	/* W32 */
> > +	u64 hard_sa_life;	/* W33 */
> > +	u64 soft_sa_life;	/* W34 */
> > +	u64 mib_octs;		/* W35 */
> > +	u64 mib_pkts;		/* W36 */
> > +	u64 ar_winbits;		/* W37 */
> > +
> > +	u64 rsvd_w38_w100[63];
> > +};
> > +
> >  /* CPT instruction parameter-1 */
> >  #define CN10K_IPSEC_INST_PARAM1_DIS_L4_CSUM		0x1
> >  #define CN10K_IPSEC_INST_PARAM1_DIS_L3_CSUM		0x2