RFC 5925 (6.2):
> TCP-AO emulates a 64-bit sequence number space by inferring when to
> increment the high-order 32-bit portion (the SNE) based on
> transitions in the low-order portion (the TCP sequence number).
snd_sne and rcv_sne are the upper 4 bytes of extended SEQ number.
Unfortunately, reading two 4-bytes pointers can't be performed
atomically (without synchronization).
Let's keep it KISS and add an rwlock - that shouldn't create much
contention as SNE are updated every 4Gb of traffic and the atomic region
is quite small.
Fixes: 64382c71a557 ("net/tcp: Add TCP-AO SNE support")
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
include/net/tcp_ao.h | 2 +-
net/ipv4/tcp_ao.c | 34 +++++++++++++++++++++-------------
net/ipv4/tcp_input.c | 16 ++++++++++++++--
3 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index 647781080613..beea3e6b39e2 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -123,6 +123,7 @@ struct tcp_ao_info {
*/
u32 snd_sne;
u32 rcv_sne;
+ rwlock_t sne_lock;
refcount_t refcnt; /* Protects twsk destruction */
struct rcu_head rcu;
};
@@ -212,7 +213,6 @@ enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
const struct sk_buff *skb, unsigned short int family,
const struct request_sock *req, int l3index,
const struct tcp_ao_hdr *aoh);
-u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq);
struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index,
const union tcp_ao_addr *addr,
int family, int sndid, int rcvid);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 2d000e275ce7..74db80aeeef3 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -230,6 +230,7 @@ static struct tcp_ao_info *tcp_ao_alloc_info(gfp_t flags)
return NULL;
INIT_HLIST_HEAD(&ao->head);
refcount_set(&ao->refcnt, 1);
+ rwlock_init(&ao->sne_lock);
return ao;
}
@@ -472,10 +473,8 @@ static int tcp_ao_hash_pseudoheader(unsigned short int family,
return -EAFNOSUPPORT;
}
-u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq)
+static u32 tcp_ao_compute_sne(u32 sne, u32 next_seq, u32 seq)
{
- u32 sne = next_sne;
-
if (before(seq, next_seq)) {
if (seq > next_seq)
sne--;
@@ -483,7 +482,6 @@ u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq)
if (seq < next_seq)
sne++;
}
-
return sne;
}
@@ -763,14 +761,15 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
*keyid = (*key)->rcvid;
} else {
struct tcp_ao_key *rnext_key;
- u32 snd_basis;
+ const u32 *snd_basis;
+ unsigned long flags;
if (sk->sk_state == TCP_TIME_WAIT) {
ao_info = rcu_dereference(tcp_twsk(sk)->ao_info);
- snd_basis = tcp_twsk(sk)->tw_snd_nxt;
+ snd_basis = &tcp_twsk(sk)->tw_snd_nxt;
} else {
ao_info = rcu_dereference(tcp_sk(sk)->ao_info);
- snd_basis = tcp_sk(sk)->snd_una;
+ snd_basis = &tcp_sk(sk)->snd_una;
}
if (!ao_info)
return -ENOENT;
@@ -781,8 +780,10 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
*traffic_key = snd_other_key(*key);
rnext_key = READ_ONCE(ao_info->rnext_key);
*keyid = rnext_key->rcvid;
- *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne),
- snd_basis, seq);
+ read_lock_irqsave(&ao_info->sne_lock, flags);
+ *sne = tcp_ao_compute_sne(ao_info->snd_sne,
+ READ_ONCE(*snd_basis), seq);
+ read_unlock_irqrestore(&ao_info->sne_lock, flags);
}
return 0;
}
@@ -795,6 +796,7 @@ int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_ao_info *ao;
void *tkey_buf = NULL;
+ unsigned long flags;
u8 *traffic_key;
u32 sne;
@@ -816,8 +818,10 @@ int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
tp->af_specific->ao_calc_key_sk(key, traffic_key,
sk, ao->lisn, disn, true);
}
- sne = tcp_ao_compute_sne(READ_ONCE(ao->snd_sne), READ_ONCE(tp->snd_una),
- ntohl(th->seq));
+ read_lock_irqsave(&ao->sne_lock, flags);
+ sne = tcp_ao_compute_sne(ao->snd_sne,
+ READ_ONCE(tp->snd_una), ntohl(th->seq));
+ read_unlock_irqrestore(&ao->sne_lock, flags);
tp->af_specific->calc_ao_hash(hash_location, key, sk, skb, traffic_key,
hash_location - (u8 *)th, sne);
kfree(tkey_buf);
@@ -938,8 +942,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
/* Fast-path */
if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) {
- enum skb_drop_reason err;
struct tcp_ao_key *current_key;
+ enum skb_drop_reason err;
+ unsigned long flags;
/* Check if this socket's rnext_key matches the keyid in the
* packet. If not we lookup the key based on the keyid
@@ -956,8 +961,11 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
if (unlikely(th->syn && !th->ack))
goto verify_hash;
- sne = tcp_ao_compute_sne(info->rcv_sne, tcp_sk(sk)->rcv_nxt,
+ read_lock_irqsave(&info->sne_lock, flags);
+ sne = tcp_ao_compute_sne(info->rcv_sne,
+ READ_ONCE(tcp_sk(sk)->rcv_nxt),
ntohl(th->seq));
+ read_unlock_irqrestore(&info->sne_lock, flags);
/* Established socket, traffic key are cached */
traffic_key = rcv_other_key(key);
err = tcp_ao_verify_hash(sk, skb, family, info, aoh, key,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bcb55d98004c..fc3c27ce2b73 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3582,8 +3582,14 @@ static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack)
ao = rcu_dereference_protected(tp->ao_info,
lockdep_sock_is_held((struct sock *)tp));
- if (ao && ack < tp->snd_una)
+ if (ao && ack < tp->snd_una) {
+ unsigned long flags;
+
+ write_lock_irqsave(&ao->sne_lock, flags);
ao->snd_sne++;
+ tp->snd_una = ack;
+ write_unlock_irqrestore(&ao->sne_lock, flags);
+ }
#endif
}
@@ -3608,8 +3614,14 @@ static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq)
ao = rcu_dereference_protected(tp->ao_info,
lockdep_sock_is_held((struct sock *)tp));
- if (ao && seq < tp->rcv_nxt)
+ if (ao && seq < tp->rcv_nxt) {
+ unsigned long flags;
+
+ write_lock_irqsave(&ao->sne_lock, flags);
ao->rcv_sne++;
+ WRITE_ONCE(tp->rcv_nxt, seq);
+ write_unlock_irqrestore(&ao->sne_lock, flags);
+ }
#endif
}
--
2.43.0
On Fri, 2023-11-24 at 00:27 +0000, Dmitry Safonov wrote:
> RFC 5925 (6.2):
> > TCP-AO emulates a 64-bit sequence number space by inferring when to
> > increment the high-order 32-bit portion (the SNE) based on
> > transitions in the low-order portion (the TCP sequence number).
>
> snd_sne and rcv_sne are the upper 4 bytes of extended SEQ number.
> Unfortunately, reading two 4-bytes pointers can't be performed
> atomically (without synchronization).
>
> Let's keep it KISS and add an rwlock - that shouldn't create much
> contention as SNE are updated every 4Gb of traffic and the atomic region
> is quite small.
>
> Fixes: 64382c71a557 ("net/tcp: Add TCP-AO SNE support")
> Signed-off-by: Dmitry Safonov <dima@arista.com>
> ---
> include/net/tcp_ao.h | 2 +-
> net/ipv4/tcp_ao.c | 34 +++++++++++++++++++++-------------
> net/ipv4/tcp_input.c | 16 ++++++++++++++--
> 3 files changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
> index 647781080613..beea3e6b39e2 100644
> --- a/include/net/tcp_ao.h
> +++ b/include/net/tcp_ao.h
> @@ -123,6 +123,7 @@ struct tcp_ao_info {
> */
> u32 snd_sne;
> u32 rcv_sne;
> + rwlock_t sne_lock;
RW lock are problematic in the networking code, see commit
dbca1596bbb08318f5e3b3b99f8ca0a0d3830a65.
I think you can use a plain spinlock here, as both read and write
appears to be in the fastpath (?!?)
> @@ -781,8 +780,10 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
> *traffic_key = snd_other_key(*key);
> rnext_key = READ_ONCE(ao_info->rnext_key);
> *keyid = rnext_key->rcvid;
> - *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne),
> - snd_basis, seq);
> + read_lock_irqsave(&ao_info->sne_lock, flags);
> + *sne = tcp_ao_compute_sne(ao_info->snd_sne,
> + READ_ONCE(*snd_basis), seq);
> + read_unlock_irqrestore(&ao_info->sne_lock, flags);
Why are you using the irqsave variant? bh should suffice.
Cheers,
Paolo
On 11/27/23 11:41, Paolo Abeni wrote:
> On Fri, 2023-11-24 at 00:27 +0000, Dmitry Safonov wrote:
>> RFC 5925 (6.2):
>>> TCP-AO emulates a 64-bit sequence number space by inferring when to
>>> increment the high-order 32-bit portion (the SNE) based on
>>> transitions in the low-order portion (the TCP sequence number).
>>
>> snd_sne and rcv_sne are the upper 4 bytes of extended SEQ number.
>> Unfortunately, reading two 4-bytes pointers can't be performed
>> atomically (without synchronization).
>>
>> Let's keep it KISS and add an rwlock - that shouldn't create much
>> contention as SNE are updated every 4Gb of traffic and the atomic region
>> is quite small.
>>
>> Fixes: 64382c71a557 ("net/tcp: Add TCP-AO SNE support")
>> Signed-off-by: Dmitry Safonov <dima@arista.com>
>> ---
>> include/net/tcp_ao.h | 2 +-
>> net/ipv4/tcp_ao.c | 34 +++++++++++++++++++++-------------
>> net/ipv4/tcp_input.c | 16 ++++++++++++++--
>> 3 files changed, 36 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
>> index 647781080613..beea3e6b39e2 100644
>> --- a/include/net/tcp_ao.h
>> +++ b/include/net/tcp_ao.h
>> @@ -123,6 +123,7 @@ struct tcp_ao_info {
>> */
>> u32 snd_sne;
>> u32 rcv_sne;
>> + rwlock_t sne_lock;
>
> RW lock are problematic in the networking code, see commit
> dbca1596bbb08318f5e3b3b99f8ca0a0d3830a65.
Thanks, was not aware of this pitfall.
> I think you can use a plain spinlock here, as both read and write
> appears to be in the fastpath (?!?)
Yeah, I wanted to avoid to RX concurrency here as writing happens only
once in 4Gb. I'll take another attempt to prevent that in v3.
>> @@ -781,8 +780,10 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
>> *traffic_key = snd_other_key(*key);
>> rnext_key = READ_ONCE(ao_info->rnext_key);
>> *keyid = rnext_key->rcvid;
>> - *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne),
>> - snd_basis, seq);
>> + read_lock_irqsave(&ao_info->sne_lock, flags);
>> + *sne = tcp_ao_compute_sne(ao_info->snd_sne,
>> + READ_ONCE(*snd_basis), seq);
>> + read_unlock_irqrestore(&ao_info->sne_lock, flags);
>
> Why are you using the irqsave variant? bh should suffice.
It should, yes :)
>
> Cheers,
>
> Paolo
>
Thanks,
Dmitry
© 2016 - 2025 Red Hat, Inc.