ppp: improve receive path performance

[PATCH net-next 1/3] ppp: convert rlock to rwlock to improve RX concurrency

Posted by Qingfang Deng 3 months, 2 weeks ago

The rlock spinlock in struct ppp protects the receive path, but it is
frequently used in a read-mostly manner. Converting it to rwlock_t
allows multiple CPUs to concurrently enter the receive path (e.g.,
ppp_do_recv()), improving RX performance.

Write locking is preserved for code paths that mutate state.

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
---
 drivers/net/ppp/ppp_generic.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 4cf9d1822a83..f0f8a75e3aef 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -118,7 +118,7 @@ struct ppp {
 	struct file	*owner;		/* file that owns this unit 48 */
 	struct list_head channels;	/* list of attached channels 4c */
 	int		n_channels;	/* how many channels are attached 54 */
-	spinlock_t	rlock;		/* lock for receive side 58 */
+	rwlock_t	rlock;		/* lock for receive side 58 */
 	spinlock_t	wlock;		/* lock for transmit side 5c */
 	int __percpu	*xmit_recursion; /* xmit recursion detect */
 	int		mru;		/* max receive unit 60 */
@@ -371,12 +371,14 @@ static const int npindex_to_ethertype[NUM_NP] = {
  */
 #define ppp_xmit_lock(ppp)	spin_lock_bh(&(ppp)->wlock)
 #define ppp_xmit_unlock(ppp)	spin_unlock_bh(&(ppp)->wlock)
-#define ppp_recv_lock(ppp)	spin_lock_bh(&(ppp)->rlock)
-#define ppp_recv_unlock(ppp)	spin_unlock_bh(&(ppp)->rlock)
+#define ppp_recv_lock(ppp)	write_lock_bh(&(ppp)->rlock)
+#define ppp_recv_unlock(ppp)	write_unlock_bh(&(ppp)->rlock)
 #define ppp_lock(ppp)		do { ppp_xmit_lock(ppp); \
 				     ppp_recv_lock(ppp); } while (0)
 #define ppp_unlock(ppp)		do { ppp_recv_unlock(ppp); \
 				     ppp_xmit_unlock(ppp); } while (0)
+#define ppp_recv_read_lock(ppp)		read_lock_bh(&(ppp)->rlock)
+#define ppp_recv_read_unlock(ppp)	read_unlock_bh(&(ppp)->rlock)
 
 /*
  * /dev/ppp device routines.
@@ -1246,7 +1248,7 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev,
 	for (indx = 0; indx < NUM_NP; ++indx)
 		ppp->npmode[indx] = NPMODE_PASS;
 	INIT_LIST_HEAD(&ppp->channels);
-	spin_lock_init(&ppp->rlock);
+	rwlock_init(&ppp->rlock);
 	spin_lock_init(&ppp->wlock);
 
 	ppp->xmit_recursion = alloc_percpu(int);
@@ -2193,12 +2195,12 @@ struct ppp_mp_skb_parm {
 static inline void
 ppp_do_recv(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
 {
-	ppp_recv_lock(ppp);
+	ppp_recv_read_lock(ppp);
 	if (!ppp->closing)
 		ppp_receive_frame(ppp, skb, pch);
 	else
 		kfree_skb(skb);
-	ppp_recv_unlock(ppp);
+	ppp_recv_read_unlock(ppp);
 }
 
 /**
-- 
2.43.0

Re: [PATCH net-next 1/3] ppp: convert rlock to rwlock to improve RX concurrency

Posted by Guillaume Nault 3 months, 2 weeks ago

On Wed, Jun 25, 2025 at 11:40:18AM +0800, Qingfang Deng wrote:
> The rlock spinlock in struct ppp protects the receive path, but it is
> frequently used in a read-mostly manner. Converting it to rwlock_t
> allows multiple CPUs to concurrently enter the receive path (e.g.,
> ppp_do_recv()), improving RX performance.
> 
> Write locking is preserved for code paths that mutate state.
> 
> Signed-off-by: Qingfang Deng <dqfext@gmail.com>
> ---
>  drivers/net/ppp/ppp_generic.c | 14 ++++++++------
>  1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
> index 4cf9d1822a83..f0f8a75e3aef 100644
> --- a/drivers/net/ppp/ppp_generic.c
> +++ b/drivers/net/ppp/ppp_generic.c
> @@ -118,7 +118,7 @@ struct ppp {
>  	struct file	*owner;		/* file that owns this unit 48 */
>  	struct list_head channels;	/* list of attached channels 4c */
>  	int		n_channels;	/* how many channels are attached 54 */
> -	spinlock_t	rlock;		/* lock for receive side 58 */
> +	rwlock_t	rlock;		/* lock for receive side 58 */
>  	spinlock_t	wlock;		/* lock for transmit side 5c */
>  	int __percpu	*xmit_recursion; /* xmit recursion detect */
>  	int		mru;		/* max receive unit 60 */
> @@ -371,12 +371,14 @@ static const int npindex_to_ethertype[NUM_NP] = {
>   */
>  #define ppp_xmit_lock(ppp)	spin_lock_bh(&(ppp)->wlock)
>  #define ppp_xmit_unlock(ppp)	spin_unlock_bh(&(ppp)->wlock)
> -#define ppp_recv_lock(ppp)	spin_lock_bh(&(ppp)->rlock)
> -#define ppp_recv_unlock(ppp)	spin_unlock_bh(&(ppp)->rlock)
> +#define ppp_recv_lock(ppp)	write_lock_bh(&(ppp)->rlock)
> +#define ppp_recv_unlock(ppp)	write_unlock_bh(&(ppp)->rlock)
>  #define ppp_lock(ppp)		do { ppp_xmit_lock(ppp); \
>  				     ppp_recv_lock(ppp); } while (0)
>  #define ppp_unlock(ppp)		do { ppp_recv_unlock(ppp); \
>  				     ppp_xmit_unlock(ppp); } while (0)
> +#define ppp_recv_read_lock(ppp)		read_lock_bh(&(ppp)->rlock)
> +#define ppp_recv_read_unlock(ppp)	read_unlock_bh(&(ppp)->rlock)
>  
>  /*
>   * /dev/ppp device routines.
> @@ -1246,7 +1248,7 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev,
>  	for (indx = 0; indx < NUM_NP; ++indx)
>  		ppp->npmode[indx] = NPMODE_PASS;
>  	INIT_LIST_HEAD(&ppp->channels);
> -	spin_lock_init(&ppp->rlock);
> +	rwlock_init(&ppp->rlock);
>  	spin_lock_init(&ppp->wlock);
>  
>  	ppp->xmit_recursion = alloc_percpu(int);
> @@ -2193,12 +2195,12 @@ struct ppp_mp_skb_parm {
>  static inline void
>  ppp_do_recv(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
>  {
> -	ppp_recv_lock(ppp);
> +	ppp_recv_read_lock(ppp);
>  	if (!ppp->closing)
>  		ppp_receive_frame(ppp, skb, pch);

That doesn't look right. Several PPP Rx features are stateful
(multilink, compression, etc.) and the current implementations
currently don't take any precaution when updating the shared states.

For example, see how bsd_decompress() (in bsd_comp.c) updates db->*
fields all over the place. This db variable comes from ppp->rc_state,
which is passed as parameter of the ppp->rcomp->decompress() call in
ppp_decompress_frame().

I think a lot of work would be needed before we could allow
ppp_do_recv() to run concurrently on the same struct ppp.

>  	else
>  		kfree_skb(skb);
> -	ppp_recv_unlock(ppp);
> +	ppp_recv_read_unlock(ppp);
>  }
>  
>  /**
> -- 
> 2.43.0
>

Re: [PATCH net-next 1/3] ppp: convert rlock to rwlock to improve RX concurrency

Posted by Qingfang Deng 3 months, 1 week ago

On Fri, Jun 27, 2025 at 12:23 AM Guillaume Nault <gnault@redhat.com> wrote:
> That doesn't look right. Several PPP Rx features are stateful
> (multilink, compression, etc.) and the current implementations
> currently don't take any precaution when updating the shared states.
>
> For example, see how bsd_decompress() (in bsd_comp.c) updates db->*
> fields all over the place. This db variable comes from ppp->rc_state,
> which is passed as parameter of the ppp->rcomp->decompress() call in
> ppp_decompress_frame().
>
> I think a lot of work would be needed before we could allow
> ppp_do_recv() to run concurrently on the same struct ppp.

Right. I think we can grab a write lock where it updates struct ppp.

Re: [PATCH net-next 1/3] ppp: convert rlock to rwlock to improve RX concurrency

Posted by Eric Dumazet 3 months, 1 week ago

On Thu, Jun 26, 2025 at 9:00 PM Qingfang Deng <dqfext@gmail.com> wrote:
>
> On Fri, Jun 27, 2025 at 12:23 AM Guillaume Nault <gnault@redhat.com> wrote:
> > That doesn't look right. Several PPP Rx features are stateful
> > (multilink, compression, etc.) and the current implementations
> > currently don't take any precaution when updating the shared states.
> >
> > For example, see how bsd_decompress() (in bsd_comp.c) updates db->*
> > fields all over the place. This db variable comes from ppp->rc_state,
> > which is passed as parameter of the ppp->rcomp->decompress() call in
> > ppp_decompress_frame().
> >
> > I think a lot of work would be needed before we could allow
> > ppp_do_recv() to run concurrently on the same struct ppp.
>
> Right. I think we can grab a write lock where it updates struct ppp.

tldr: network maintainers do not want rwlock back.

If you really care about concurrency, do not use rwlock, because it is
more expensive than a spinlock and very problematic.

Instead use RCU for readers, and spinlock for the parts needing exclusion.

Adding rwlock in network fast path is almost always a very bad choice.

Take a look at commit 0daf07e527095e6 for gory details.

Re: [PATCH net-next 1/3] ppp: convert rlock to rwlock to improve RX concurrency

Posted by Qingfang Deng 3 months, 1 week ago

On Fri, Jun 27, 2025 at 2:50 PM Eric Dumazet <edumazet@google.com> wrote:
> tldr: network maintainers do not want rwlock back.
>
> If you really care about concurrency, do not use rwlock, because it is
> more expensive than a spinlock and very problematic.
>
> Instead use RCU for readers, and spinlock for the parts needing exclusion.
>
> Adding rwlock in network fast path is almost always a very bad choice.
>
> Take a look at commit 0daf07e527095e6 for gory details.

Thanks for the info, I'll see how to refactor it using RCU instead.

Re: [PATCH net-next 1/3] ppp: convert rlock to rwlock to improve RX concurrency

Posted by Andrew Lunn 3 months, 2 weeks ago

>  #define ppp_xmit_lock(ppp)	spin_lock_bh(&(ppp)->wlock)
>  #define ppp_xmit_unlock(ppp)	spin_unlock_bh(&(ppp)->wlock)
> -#define ppp_recv_lock(ppp)	spin_lock_bh(&(ppp)->rlock)
> -#define ppp_recv_unlock(ppp)	spin_unlock_bh(&(ppp)->rlock)
> +#define ppp_recv_lock(ppp)	write_lock_bh(&(ppp)->rlock)
> +#define ppp_recv_unlock(ppp)	write_unlock_bh(&(ppp)->rlock)
>  #define ppp_lock(ppp)		do { ppp_xmit_lock(ppp); \
>  				     ppp_recv_lock(ppp); } while (0)
>  #define ppp_unlock(ppp)		do { ppp_recv_unlock(ppp); \
>  				     ppp_xmit_unlock(ppp); } while (0)
> +#define ppp_recv_read_lock(ppp)		read_lock_bh(&(ppp)->rlock)
> +#define ppp_recv_read_unlock(ppp)	read_unlock_bh(&(ppp)->rlock)

Given the _read_ in ppp_recv_read_lock(), maybe ppp_recv_lock() should
be ppp_recv_write_lock()? Makes it symmetrical, and clearer there is a
read/write lock.

	Andrew