A peer connected via UDP may change its IP address without reconnecting
(float).
Add support for detecting and updating the new peer IP/port in case of
floating.
Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
---
drivers/net/ovpn/io.c | 8 ++
drivers/net/ovpn/peer.c | 243 ++++++++++++++++++++++++++++++++++++------------
drivers/net/ovpn/peer.h | 3 +-
3 files changed, 194 insertions(+), 60 deletions(-)
diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
index 6ee1a40082ef637285d7f7f8183c53140583b716..5b673eae255033b9d7d6e7890a46686403d7c222 100644
--- a/drivers/net/ovpn/io.c
+++ b/drivers/net/ovpn/io.c
@@ -96,6 +96,7 @@ void ovpn_decrypt_post(void *data, int ret)
struct ovpn_crypto_key_slot *ks;
unsigned int payload_offset = 0;
struct sk_buff *skb = data;
+ struct ovpn_socket *sock;
struct ovpn_peer *peer;
__be16 proto;
__be32 *pid;
@@ -137,6 +138,13 @@ void ovpn_decrypt_post(void *data, int ret)
/* keep track of last received authenticated packet for keepalive */
WRITE_ONCE(peer->last_recv, ktime_get_real_seconds());
+ rcu_read_lock();
+ sock = rcu_dereference(peer->sock);
+ if (sock && sock->sock->sk->sk_protocol == IPPROTO_UDP)
+ /* check if this peer changed local or remote endpoint */
+ ovpn_peer_endpoints_update(peer, skb);
+ rcu_read_unlock();
+
/* point to encapsulated IP packet */
__skb_pull(skb, payload_offset);
diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
index 1803359b833d5797728566fb976e253e2a85ea75..156e45407e0a6e69f177a064053f81fe778a9dd8 100644
--- a/drivers/net/ovpn/peer.c
+++ b/drivers/net/ovpn/peer.c
@@ -127,6 +127,191 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id)
return peer;
}
+/**
+ * ovpn_peer_reset_sockaddr - recreate binding for peer
+ * @peer: peer to recreate the binding for
+ * @ss: sockaddr to use as remote endpoint for the binding
+ * @local_ip: local IP for the binding
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_peer_reset_sockaddr(struct ovpn_peer *peer,
+ const struct sockaddr_storage *ss,
+ const u8 *local_ip)
+{
+ struct ovpn_bind *bind;
+ size_t ip_len;
+
+ lockdep_assert_held(&peer->lock);
+
+ /* create new ovpn_bind object */
+ bind = ovpn_bind_from_sockaddr(ss);
+ if (IS_ERR(bind))
+ return PTR_ERR(bind);
+
+ if (local_ip) {
+ if (ss->ss_family == AF_INET) {
+ ip_len = sizeof(struct in_addr);
+ } else if (ss->ss_family == AF_INET6) {
+ ip_len = sizeof(struct in6_addr);
+ } else {
+ net_dbg_ratelimited("%s: invalid family %u for remote endpoint for peer %u\n",
+ netdev_name(peer->ovpn->dev),
+ ss->ss_family, peer->id);
+ kfree(bind);
+ return -EINVAL;
+ }
+
+ memcpy(&bind->local, local_ip, ip_len);
+ }
+
+ /* set binding */
+ ovpn_bind_reset(peer, bind);
+
+ return 0;
+}
+
+/* variable name __tbl2 needs to be different from __tbl1
+ * in the macro below to avoid confusing clang
+ */
+#define ovpn_get_hash_slot(_tbl, _key, _key_len) ({ \
+ typeof(_tbl) *__tbl2 = &(_tbl); \
+ jhash(_key, _key_len, 0) % HASH_SIZE(*__tbl2); \
+})
+
+#define ovpn_get_hash_head(_tbl, _key, _key_len) ({ \
+ typeof(_tbl) *__tbl1 = &(_tbl); \
+ &(*__tbl1)[ovpn_get_hash_slot(*__tbl1, _key, _key_len)];\
+})
+
+/**
+ * ovpn_peer_endpoints_update - update remote or local endpoint for peer
+ * @peer: peer to update the remote endpoint for
+ * @skb: incoming packet to retrieve the source/destination address from
+ */
+void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb)
+{
+ struct hlist_nulls_head *nhead;
+ struct sockaddr_storage ss;
+ const u8 *local_ip = NULL;
+ struct sockaddr_in6 *sa6;
+ struct sockaddr_in *sa;
+ struct ovpn_bind *bind;
+ size_t salen = 0;
+
+ spin_lock_bh(&peer->lock);
+ bind = rcu_dereference_protected(peer->bind,
+ lockdep_is_held(&peer->lock));
+ if (unlikely(!bind))
+ goto unlock;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ /* float check */
+ if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) {
+ if (bind->remote.in4.sin_family == AF_INET)
+ local_ip = (u8 *)&bind->local;
+ sa = (struct sockaddr_in *)&ss;
+ sa->sin_family = AF_INET;
+ sa->sin_addr.s_addr = ip_hdr(skb)->saddr;
+ sa->sin_port = udp_hdr(skb)->source;
+ salen = sizeof(*sa);
+ break;
+ }
+
+ /* local endpoint update */
+ if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) {
+ net_dbg_ratelimited("%s: learning local IPv4 for peer %d (%pI4 -> %pI4)\n",
+ netdev_name(peer->ovpn->dev),
+ peer->id, &bind->local.ipv4.s_addr,
+ &ip_hdr(skb)->daddr);
+ bind->local.ipv4.s_addr = ip_hdr(skb)->daddr;
+ }
+ break;
+ case htons(ETH_P_IPV6):
+ /* float check */
+ if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) {
+ if (bind->remote.in6.sin6_family == AF_INET6)
+ local_ip = (u8 *)&bind->local;
+ sa6 = (struct sockaddr_in6 *)&ss;
+ sa6->sin6_family = AF_INET6;
+ sa6->sin6_addr = ipv6_hdr(skb)->saddr;
+ sa6->sin6_port = udp_hdr(skb)->source;
+ sa6->sin6_scope_id = ipv6_iface_scope_id(&ipv6_hdr(skb)->saddr,
+ skb->skb_iif);
+ salen = sizeof(*sa6);
+ }
+
+ /* local endpoint update */
+ if (unlikely(!ipv6_addr_equal(&bind->local.ipv6,
+ &ipv6_hdr(skb)->daddr))) {
+ net_dbg_ratelimited("%s: learning local IPv6 for peer %d (%pI6c -> %pI6c\n",
+ netdev_name(peer->ovpn->dev),
+ peer->id, &bind->local.ipv6,
+ &ipv6_hdr(skb)->daddr);
+ bind->local.ipv6 = ipv6_hdr(skb)->daddr;
+ }
+ break;
+ default:
+ goto unlock;
+ }
+
+ /* if the peer did not float, we can bail out now */
+ if (likely(!salen))
+ goto unlock;
+
+ if (unlikely(ovpn_peer_reset_sockaddr(peer,
+ (struct sockaddr_storage *)&ss,
+ local_ip) < 0))
+ goto unlock;
+
+ net_dbg_ratelimited("%s: peer %d floated to %pIScp",
+ netdev_name(peer->ovpn->dev), peer->id, &ss);
+
+ spin_unlock_bh(&peer->lock);
+
+ /* rehashing is required only in MP mode as P2P has one peer
+ * only and thus there is no hashtable
+ */
+ if (peer->ovpn->mode == OVPN_MODE_MP) {
+ spin_lock_bh(&peer->ovpn->lock);
+ spin_lock_bh(&peer->lock);
+ bind = rcu_dereference_protected(peer->bind,
+ lockdep_is_held(&peer->lock));
+ if (unlikely(!bind)) {
+ spin_unlock_bh(&peer->lock);
+ spin_unlock_bh(&peer->ovpn->lock);
+ return;
+ }
+
+ /* his function may be invoked concurrently, therefore another
+ * float may have happened in parallel: perform rehashing
+ * using the peer->bind->remote directly as key
+ */
+
+ switch (bind->remote.in4.sin_family) {
+ case AF_INET:
+ salen = sizeof(*sa);
+ break;
+ case AF_INET6:
+ salen = sizeof(*sa6);
+ break;
+ }
+
+ /* remove old hashing */
+ hlist_nulls_del_init_rcu(&peer->hash_entry_transp_addr);
+ /* re-add with new transport address */
+ nhead = ovpn_get_hash_head(peer->ovpn->peers->by_transp_addr,
+ &bind->remote, salen);
+ hlist_nulls_add_head_rcu(&peer->hash_entry_transp_addr, nhead);
+ spin_unlock_bh(&peer->lock);
+ spin_unlock_bh(&peer->ovpn->lock);
+ }
+ return;
+unlock:
+ spin_unlock_bh(&peer->lock);
+}
+
/**
* ovpn_peer_release_rcu - RCU callback performing last peer release steps
* @head: RCU member of the ovpn_peer
@@ -230,19 +415,6 @@ static struct in6_addr ovpn_nexthop_from_skb6(struct sk_buff *skb)
return rt->rt6i_gateway;
}
-/* variable name __tbl2 needs to be different from __tbl1
- * in the macro below to avoid confusing clang
- */
-#define ovpn_get_hash_slot(_tbl, _key, _key_len) ({ \
- typeof(_tbl) *__tbl2 = &(_tbl); \
- jhash(_key, _key_len, 0) % HASH_SIZE(*__tbl2); \
-})
-
-#define ovpn_get_hash_head(_tbl, _key, _key_len) ({ \
- typeof(_tbl) *__tbl1 = &(_tbl); \
- &(*__tbl1)[ovpn_get_hash_slot(*__tbl1, _key, _key_len)];\
-})
-
/**
* ovpn_peer_get_by_vpn_addr4 - retrieve peer by its VPN IPv4 address
* @ovpn: the openvpn instance to search
@@ -522,51 +694,6 @@ static void ovpn_peer_remove(struct ovpn_peer *peer,
llist_add(&peer->release_entry, release_list);
}
-/**
- * ovpn_peer_update_local_endpoint - update local endpoint for peer
- * @peer: peer to update the endpoint for
- * @skb: incoming packet to retrieve the destination address (local) from
- */
-void ovpn_peer_update_local_endpoint(struct ovpn_peer *peer,
- struct sk_buff *skb)
-{
- struct ovpn_bind *bind;
-
- rcu_read_lock();
- bind = rcu_dereference(peer->bind);
- if (unlikely(!bind))
- goto unlock;
-
- spin_lock_bh(&peer->lock);
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) {
- net_dbg_ratelimited("%s: learning local IPv4 for peer %d (%pI4 -> %pI4)\n",
- netdev_name(peer->ovpn->dev),
- peer->id, &bind->local.ipv4.s_addr,
- &ip_hdr(skb)->daddr);
- bind->local.ipv4.s_addr = ip_hdr(skb)->daddr;
- }
- break;
- case htons(ETH_P_IPV6):
- if (unlikely(!ipv6_addr_equal(&bind->local.ipv6,
- &ipv6_hdr(skb)->daddr))) {
- net_dbg_ratelimited("%s: learning local IPv6 for peer %d (%pI6c -> %pI6c\n",
- netdev_name(peer->ovpn->dev),
- peer->id, &bind->local.ipv6,
- &ipv6_hdr(skb)->daddr);
- bind->local.ipv6 = ipv6_hdr(skb)->daddr;
- }
- break;
- default:
- break;
- }
- spin_unlock_bh(&peer->lock);
-
-unlock:
- rcu_read_unlock();
-}
-
/**
* ovpn_peer_get_by_dst - Lookup peer to send skb to
* @ovpn: the private data representing the current VPN session
diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
index d90ccc313cc3af24ffa6df8bb41bca15fbb022ad..f1288734ff100ee76b0c41ebb6dc71725ea33261 100644
--- a/drivers/net/ovpn/peer.h
+++ b/drivers/net/ovpn/peer.h
@@ -153,7 +153,6 @@ bool ovpn_peer_check_by_src(struct ovpn_priv *ovpn, struct sk_buff *skb,
void ovpn_peer_keepalive_set(struct ovpn_peer *peer, u32 interval, u32 timeout);
void ovpn_peer_keepalive_work(struct work_struct *work);
-void ovpn_peer_update_local_endpoint(struct ovpn_peer *peer,
- struct sk_buff *skb);
+void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb);
#endif /* _NET_OVPN_OVPNPEER_H_ */
--
2.45.3
2025-03-04, 01:33:48 +0100, Antonio Quartulli wrote: > A peer connected via UDP may change its IP address without reconnecting > (float). Should that trigger a reset of the peer->dst_cache? And same when userspace updates the remote address? Otherwise it seems we could be stuck with a cached dst that cannot reach the peer. > +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) > +{ > + struct hlist_nulls_head *nhead; > + struct sockaddr_storage ss; > + const u8 *local_ip = NULL; > + struct sockaddr_in6 *sa6; > + struct sockaddr_in *sa; > + struct ovpn_bind *bind; > + size_t salen = 0; > + > + spin_lock_bh(&peer->lock); > + bind = rcu_dereference_protected(peer->bind, > + lockdep_is_held(&peer->lock)); > + if (unlikely(!bind)) > + goto unlock; > + > + switch (skb->protocol) { > + case htons(ETH_P_IP): > + /* float check */ > + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { > + if (bind->remote.in4.sin_family == AF_INET) > + local_ip = (u8 *)&bind->local; If I'm reading this correctly, we always reuse the existing local address when we have to re-create the bind, even if it doesn't match the skb? The "local endpoint update" chunk below is doing that, but only if we're keeping the same remote? It'll get updated the next time we receive a packet and call ovpn_peer_endpoints_update. That might irritate the RPF check on the other side, if we still use our "old" source to talk to the new dest? > + sa = (struct sockaddr_in *)&ss; > + sa->sin_family = AF_INET; > + sa->sin_addr.s_addr = ip_hdr(skb)->saddr; > + sa->sin_port = udp_hdr(skb)->source; > + salen = sizeof(*sa); > + break; > + } > + > + /* local endpoint update */ > + if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) { > + net_dbg_ratelimited("%s: learning local IPv4 for peer %d (%pI4 -> %pI4)\n", > + netdev_name(peer->ovpn->dev), > + peer->id, &bind->local.ipv4.s_addr, > + &ip_hdr(skb)->daddr); > + bind->local.ipv4.s_addr = ip_hdr(skb)->daddr; > + } > + break; [...] > + if (peer->ovpn->mode == OVPN_MODE_MP) { > + spin_lock_bh(&peer->ovpn->lock); > + spin_lock_bh(&peer->lock); > + bind = rcu_dereference_protected(peer->bind, > + lockdep_is_held(&peer->lock)); > + if (unlikely(!bind)) { > + spin_unlock_bh(&peer->lock); > + spin_unlock_bh(&peer->ovpn->lock); > + return; > + } > + > + /* his function may be invoked concurrently, therefore another typo: ^ This [...] > -/* variable name __tbl2 needs to be different from __tbl1 > - * in the macro below to avoid confusing clang > - */ > -#define ovpn_get_hash_slot(_tbl, _key, _key_len) ({ \ > - typeof(_tbl) *__tbl2 = &(_tbl); \ > - jhash(_key, _key_len, 0) % HASH_SIZE(*__tbl2); \ > -}) > - > -#define ovpn_get_hash_head(_tbl, _key, _key_len) ({ \ > - typeof(_tbl) *__tbl1 = &(_tbl); \ > - &(*__tbl1)[ovpn_get_hash_slot(*__tbl1, _key, _key_len)];\ > -}) > - > /** > * ovpn_peer_get_by_vpn_addr4 - retrieve peer by its VPN IPv4 address > * @ovpn: the openvpn instance to search > @@ -522,51 +694,6 @@ static void ovpn_peer_remove(struct ovpn_peer *peer, > llist_add(&peer->release_entry, release_list); > } > > -/** > - * ovpn_peer_update_local_endpoint - update local endpoint for peer > - * @peer: peer to update the endpoint for > - * @skb: incoming packet to retrieve the destination address (local) from > - */ > -void ovpn_peer_update_local_endpoint(struct ovpn_peer *peer, > - struct sk_buff *skb) > -{ > - struct ovpn_bind *bind; > - > - rcu_read_lock(); > - bind = rcu_dereference(peer->bind); > - if (unlikely(!bind)) > - goto unlock; > - > - spin_lock_bh(&peer->lock); > - switch (skb->protocol) { > - case htons(ETH_P_IP): > - if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) { > - net_dbg_ratelimited("%s: learning local IPv4 for peer %d (%pI4 -> %pI4)\n", > - netdev_name(peer->ovpn->dev), > - peer->id, &bind->local.ipv4.s_addr, > - &ip_hdr(skb)->daddr); > - bind->local.ipv4.s_addr = ip_hdr(skb)->daddr; > - } > - break; > - case htons(ETH_P_IPV6): > - if (unlikely(!ipv6_addr_equal(&bind->local.ipv6, > - &ipv6_hdr(skb)->daddr))) { > - net_dbg_ratelimited("%s: learning local IPv6 for peer %d (%pI6c -> %pI6c\n", > - netdev_name(peer->ovpn->dev), > - peer->id, &bind->local.ipv6, > - &ipv6_hdr(skb)->daddr); > - bind->local.ipv6 = ipv6_hdr(skb)->daddr; > - } > - break; > - default: > - break; > - } > - spin_unlock_bh(&peer->lock); > - > -unlock: > - rcu_read_unlock(); > -} I guess you could squash this and the previous patch into one to reduce the churn (and get a little bit closer to the 15-patch limit :)) -- Sabrina
On 04/03/2025 19:37, Sabrina Dubroca wrote: > 2025-03-04, 01:33:48 +0100, Antonio Quartulli wrote: >> A peer connected via UDP may change its IP address without reconnecting >> (float). > > Should that trigger a reset of the peer->dst_cache? And same when > userspace updates the remote address? Otherwise it seems we could be > stuck with a cached dst that cannot reach the peer. Yeah, that make sense, otherwise ovpn_udpX_output would just try over and over to re-use the cached source address (unless it becomes unavailable). > > >> +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) >> +{ >> + struct hlist_nulls_head *nhead; >> + struct sockaddr_storage ss; >> + const u8 *local_ip = NULL; >> + struct sockaddr_in6 *sa6; >> + struct sockaddr_in *sa; >> + struct ovpn_bind *bind; >> + size_t salen = 0; >> + >> + spin_lock_bh(&peer->lock); >> + bind = rcu_dereference_protected(peer->bind, >> + lockdep_is_held(&peer->lock)); >> + if (unlikely(!bind)) >> + goto unlock; >> + >> + switch (skb->protocol) { >> + case htons(ETH_P_IP): >> + /* float check */ >> + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { >> + if (bind->remote.in4.sin_family == AF_INET) >> + local_ip = (u8 *)&bind->local; > > If I'm reading this correctly, we always reuse the existing local > address when we have to re-create the bind, even if it doesn't match > the skb? The "local endpoint update" chunk below is doing that, but > only if we're keeping the same remote? It'll get updated the next time > we receive a packet and call ovpn_peer_endpoints_update. > > That might irritate the RPF check on the other side, if we still use > our "old" source to talk to the new dest? > >> + sa = (struct sockaddr_in *)&ss; >> + sa->sin_family = AF_INET; >> + sa->sin_addr.s_addr = ip_hdr(skb)->saddr; >> + sa->sin_port = udp_hdr(skb)->source; >> + salen = sizeof(*sa); >> + break; I think the issue is simply this 'break' above - by removing it, everything should work as expected. I thin this is a leftover from when float check and endpoint update were two different functions/switch blocks. >> + } >> + >> + /* local endpoint update */ >> + if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) { >> + net_dbg_ratelimited("%s: learning local IPv4 for peer %d (%pI4 -> %pI4)\n", >> + netdev_name(peer->ovpn->dev), >> + peer->id, &bind->local.ipv4.s_addr, >> + &ip_hdr(skb)->daddr); >> + bind->local.ipv4.s_addr = ip_hdr(skb)->daddr; >> + } >> + break; > > [...] >> + if (peer->ovpn->mode == OVPN_MODE_MP) { >> + spin_lock_bh(&peer->ovpn->lock); >> + spin_lock_bh(&peer->lock); >> + bind = rcu_dereference_protected(peer->bind, >> + lockdep_is_held(&peer->lock)); >> + if (unlikely(!bind)) { >> + spin_unlock_bh(&peer->lock); >> + spin_unlock_bh(&peer->ovpn->lock); >> + return; >> + } >> + >> + /* his function may be invoked concurrently, therefore another > > typo: > ^ This ACK > > > [...] >> -/* variable name __tbl2 needs to be different from __tbl1 >> - * in the macro below to avoid confusing clang >> - */ >> -#define ovpn_get_hash_slot(_tbl, _key, _key_len) ({ \ >> - typeof(_tbl) *__tbl2 = &(_tbl); \ >> - jhash(_key, _key_len, 0) % HASH_SIZE(*__tbl2); \ >> -}) >> - >> -#define ovpn_get_hash_head(_tbl, _key, _key_len) ({ \ >> - typeof(_tbl) *__tbl1 = &(_tbl); \ >> - &(*__tbl1)[ovpn_get_hash_slot(*__tbl1, _key, _key_len)];\ >> -}) >> - >> /** >> * ovpn_peer_get_by_vpn_addr4 - retrieve peer by its VPN IPv4 address >> * @ovpn: the openvpn instance to search >> @@ -522,51 +694,6 @@ static void ovpn_peer_remove(struct ovpn_peer *peer, >> llist_add(&peer->release_entry, release_list); >> } >> >> -/** >> - * ovpn_peer_update_local_endpoint - update local endpoint for peer >> - * @peer: peer to update the endpoint for >> - * @skb: incoming packet to retrieve the destination address (local) from >> - */ >> -void ovpn_peer_update_local_endpoint(struct ovpn_peer *peer, >> - struct sk_buff *skb) >> -{ >> - struct ovpn_bind *bind; >> - >> - rcu_read_lock(); >> - bind = rcu_dereference(peer->bind); >> - if (unlikely(!bind)) >> - goto unlock; >> - >> - spin_lock_bh(&peer->lock); >> - switch (skb->protocol) { >> - case htons(ETH_P_IP): >> - if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) { >> - net_dbg_ratelimited("%s: learning local IPv4 for peer %d (%pI4 -> %pI4)\n", >> - netdev_name(peer->ovpn->dev), >> - peer->id, &bind->local.ipv4.s_addr, >> - &ip_hdr(skb)->daddr); >> - bind->local.ipv4.s_addr = ip_hdr(skb)->daddr; >> - } >> - break; >> - case htons(ETH_P_IPV6): >> - if (unlikely(!ipv6_addr_equal(&bind->local.ipv6, >> - &ipv6_hdr(skb)->daddr))) { >> - net_dbg_ratelimited("%s: learning local IPv6 for peer %d (%pI6c -> %pI6c\n", >> - netdev_name(peer->ovpn->dev), >> - peer->id, &bind->local.ipv6, >> - &ipv6_hdr(skb)->daddr); >> - bind->local.ipv6 = ipv6_hdr(skb)->daddr; >> - } >> - break; >> - default: >> - break; >> - } >> - spin_unlock_bh(&peer->lock); >> - >> -unlock: >> - rcu_read_unlock(); >> -} > > I guess you could squash this and the previous patch into one to > reduce the churn (and get a little bit closer to the 15-patch limit :)) Hehe I can see you're getting familiar with the code :-) Will do! Thanks! > -- Antonio Quartulli OpenVPN Inc.
2025-03-05, 00:19:32 +0100, Antonio Quartulli wrote: > On 04/03/2025 19:37, Sabrina Dubroca wrote: > > 2025-03-04, 01:33:48 +0100, Antonio Quartulli wrote: > > > A peer connected via UDP may change its IP address without reconnecting > > > (float). > > > > Should that trigger a reset of the peer->dst_cache? And same when > > userspace updates the remote address? Otherwise it seems we could be > > stuck with a cached dst that cannot reach the peer. > > Yeah, that make sense, otherwise ovpn_udpX_output would just try over and > over to re-use the cached source address (unless it becomes unavailable). Not just the source address, the routing entry too. I'm more concerned about that: trying to reuse a a cached routing entry that was good for the previous remote address, but not for the new one. [adding your next email] > I spent some more time thinking about this. > It makes sense to reset the dst cache when the local address changes, but > not in case of float (remote address changed). > > That's because we always want to first attempt sending packets using the > address where the remote peer sent the traffic to. > Should that not work (quite rare), then we have code in ovpn_udpX_output > that will reset the cache and attempt a different address. I don't think the code in ovpn_udpX_output will reset the cache unless it was made invalid by a system-wide routing table update (see dst_cache_per_cpu_get). rt = dst_cache_get_ip4(cache, &fl.saddr); if (rt) goto transmit; ... transmit: udp_tunnel_xmit_skb(rt, sk, skb, fl.saddr, fl.daddr, 0, ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, fl.fl4_dport, false, sk->sk_no_check_tx); So it seems that as long as dst_cache_get_ip4 gets us a dst (which AFAIU will happen, unless we did a dst_cache_reset or something else made the cached dst invalid -- and ovpn's floating/endpoint update doesn't do that), we'll just use it. > > > +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) > > > +{ > > > + struct hlist_nulls_head *nhead; > > > + struct sockaddr_storage ss; > > > + const u8 *local_ip = NULL; > > > + struct sockaddr_in6 *sa6; > > > + struct sockaddr_in *sa; > > > + struct ovpn_bind *bind; > > > + size_t salen = 0; > > > + > > > + spin_lock_bh(&peer->lock); > > > + bind = rcu_dereference_protected(peer->bind, > > > + lockdep_is_held(&peer->lock)); > > > + if (unlikely(!bind)) > > > + goto unlock; > > > + > > > + switch (skb->protocol) { > > > + case htons(ETH_P_IP): > > > + /* float check */ > > > + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { > > > + if (bind->remote.in4.sin_family == AF_INET) > > > + local_ip = (u8 *)&bind->local; > > > > If I'm reading this correctly, we always reuse the existing local > > address when we have to re-create the bind, even if it doesn't match > > the skb? The "local endpoint update" chunk below is doing that, but > > only if we're keeping the same remote? It'll get updated the next time > > we receive a packet and call ovpn_peer_endpoints_update. > > > > That might irritate the RPF check on the other side, if we still use > > our "old" source to talk to the new dest? > > > > > + sa = (struct sockaddr_in *)&ss; > > > + sa->sin_family = AF_INET; > > > + sa->sin_addr.s_addr = ip_hdr(skb)->saddr; > > > + sa->sin_port = udp_hdr(skb)->source; > > > + salen = sizeof(*sa); > > > + break; > > I think the issue is simply this 'break' above - by removing it, everything > should work as expected. Only if the bind was of the correct family? Checking an IPv4 local address (in the bind) against an IPv6 source address in the packet (or the other way around) isn't going to work well. > I thin this is a leftover from when float check and endpoint update were two > different functions/switch blocks. > > > > + } > > > + > > > + /* local endpoint update */ > > > + if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) { > > > + net_dbg_ratelimited("%s: learning local IPv4 for peer %d (%pI4 -> %pI4)\n", > > > + netdev_name(peer->ovpn->dev), > > > + peer->id, &bind->local.ipv4.s_addr, > > > + &ip_hdr(skb)->daddr); > > > + bind->local.ipv4.s_addr = ip_hdr(skb)->daddr; > > > + } > > > + break; -- Sabrina
On 05/03/2025 12:20, Sabrina Dubroca wrote: > 2025-03-05, 00:19:32 +0100, Antonio Quartulli wrote: >> On 04/03/2025 19:37, Sabrina Dubroca wrote: >>> 2025-03-04, 01:33:48 +0100, Antonio Quartulli wrote: >>>> A peer connected via UDP may change its IP address without reconnecting >>>> (float). >>> >>> Should that trigger a reset of the peer->dst_cache? And same when >>> userspace updates the remote address? Otherwise it seems we could be >>> stuck with a cached dst that cannot reach the peer. >> >> Yeah, that make sense, otherwise ovpn_udpX_output would just try over and >> over to re-use the cached source address (unless it becomes unavailable). > > Not just the source address, the routing entry too. I'm more concerned > about that: trying to reuse a a cached routing entry that was good for > the previous remote address, but not for the new one. > > > [adding your next email] >> I spent some more time thinking about this. >> It makes sense to reset the dst cache when the local address changes, but >> not in case of float (remote address changed). >> >> That's because we always want to first attempt sending packets using the >> address where the remote peer sent the traffic to. >> Should that not work (quite rare), then we have code in ovpn_udpX_output >> that will reset the cache and attempt a different address. > > I don't think the code in ovpn_udpX_output will reset the cache unless > it was made invalid by a system-wide routing table update (see > dst_cache_per_cpu_get). > > rt = dst_cache_get_ip4(cache, &fl.saddr); > if (rt) > goto transmit; > ... > transmit: > udp_tunnel_xmit_skb(rt, sk, skb, fl.saddr, fl.daddr, 0, > ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, > fl.fl4_dport, false, sk->sk_no_check_tx); > > > So it seems that as long as dst_cache_get_ip4 gets us a dst (which > AFAIU will happen, unless we did a dst_cache_reset or something else > made the cached dst invalid -- and ovpn's floating/endpoint update > doesn't do that), we'll just use it. Mh yeah, you're right. Then I'll reset the cache also when a float is detected. > > >>>> +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) >>>> +{ >>>> + struct hlist_nulls_head *nhead; >>>> + struct sockaddr_storage ss; >>>> + const u8 *local_ip = NULL; >>>> + struct sockaddr_in6 *sa6; >>>> + struct sockaddr_in *sa; >>>> + struct ovpn_bind *bind; >>>> + size_t salen = 0; >>>> + >>>> + spin_lock_bh(&peer->lock); >>>> + bind = rcu_dereference_protected(peer->bind, >>>> + lockdep_is_held(&peer->lock)); >>>> + if (unlikely(!bind)) >>>> + goto unlock; >>>> + >>>> + switch (skb->protocol) { >>>> + case htons(ETH_P_IP): >>>> + /* float check */ >>>> + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { >>>> + if (bind->remote.in4.sin_family == AF_INET) >>>> + local_ip = (u8 *)&bind->local; >>> >>> If I'm reading this correctly, we always reuse the existing local >>> address when we have to re-create the bind, even if it doesn't match >>> the skb? The "local endpoint update" chunk below is doing that, but >>> only if we're keeping the same remote? It'll get updated the next time >>> we receive a packet and call ovpn_peer_endpoints_update. >>> >>> That might irritate the RPF check on the other side, if we still use >>> our "old" source to talk to the new dest? >>> >>>> + sa = (struct sockaddr_in *)&ss; >>>> + sa->sin_family = AF_INET; >>>> + sa->sin_addr.s_addr = ip_hdr(skb)->saddr; >>>> + sa->sin_port = udp_hdr(skb)->source; >>>> + salen = sizeof(*sa); >>>> + break; >> >> I think the issue is simply this 'break' above - by removing it, everything >> should work as expected. > > Only if the bind was of the correct family? Checking an IPv4 local > address (in the bind) against an IPv6 source address in the packet (or > the other way around) isn't going to work well. Ah I understand what you mean. The purpose of "local_ip" is to provide a working local endpoint to be used with the new remote address. However, if the float is switching family we can't re-use the same old local endpoint (hence the check). In this case we'll learn the "new" local address later. Does it make sense? Cheers, > > >> I thin this is a leftover from when float check and endpoint update were two >> different functions/switch blocks. >> >>>> + } >>>> + >>>> + /* local endpoint update */ >>>> + if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) { >>>> + net_dbg_ratelimited("%s: learning local IPv4 for peer %d (%pI4 -> %pI4)\n", >>>> + netdev_name(peer->ovpn->dev), >>>> + peer->id, &bind->local.ipv4.s_addr, >>>> + &ip_hdr(skb)->daddr); >>>> + bind->local.ipv4.s_addr = ip_hdr(skb)->daddr; >>>> + } >>>> + break; > -- Antonio Quartulli OpenVPN Inc.
2025-03-05, 14:14:36 +0100, Antonio Quartulli wrote: > On 05/03/2025 12:20, Sabrina Dubroca wrote: > > 2025-03-05, 00:19:32 +0100, Antonio Quartulli wrote: > > > On 04/03/2025 19:37, Sabrina Dubroca wrote: > > > > 2025-03-04, 01:33:48 +0100, Antonio Quartulli wrote: > > > > > A peer connected via UDP may change its IP address without reconnecting > > > > > (float). > > > > > > > > Should that trigger a reset of the peer->dst_cache? And same when > > > > userspace updates the remote address? Otherwise it seems we could be > > > > stuck with a cached dst that cannot reach the peer. > > > > > > Yeah, that make sense, otherwise ovpn_udpX_output would just try over and > > > over to re-use the cached source address (unless it becomes unavailable). > > > > Not just the source address, the routing entry too. I'm more concerned > > about that: trying to reuse a a cached routing entry that was good for > > the previous remote address, but not for the new one. > > > > > > [adding your next email] > > > I spent some more time thinking about this. > > > It makes sense to reset the dst cache when the local address changes, but > > > not in case of float (remote address changed). > > > > > > That's because we always want to first attempt sending packets using the > > > address where the remote peer sent the traffic to. > > > Should that not work (quite rare), then we have code in ovpn_udpX_output > > > that will reset the cache and attempt a different address. > > > > I don't think the code in ovpn_udpX_output will reset the cache unless > > it was made invalid by a system-wide routing table update (see > > dst_cache_per_cpu_get). > > > > rt = dst_cache_get_ip4(cache, &fl.saddr); > > if (rt) > > goto transmit; > > ... > > transmit: > > udp_tunnel_xmit_skb(rt, sk, skb, fl.saddr, fl.daddr, 0, > > ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, > > fl.fl4_dport, false, sk->sk_no_check_tx); > > > > > > So it seems that as long as dst_cache_get_ip4 gets us a dst (which > > AFAIU will happen, unless we did a dst_cache_reset or something else > > made the cached dst invalid -- and ovpn's floating/endpoint update > > doesn't do that), we'll just use it. > > Mh yeah, you're right. > Then I'll reset the cache also when a float is detected. Ok, thanks. > > > > > > > > > +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) > > > > > +{ > > > > > + struct hlist_nulls_head *nhead; > > > > > + struct sockaddr_storage ss; > > > > > + const u8 *local_ip = NULL; > > > > > + struct sockaddr_in6 *sa6; > > > > > + struct sockaddr_in *sa; > > > > > + struct ovpn_bind *bind; > > > > > + size_t salen = 0; > > > > > + > > > > > + spin_lock_bh(&peer->lock); > > > > > + bind = rcu_dereference_protected(peer->bind, > > > > > + lockdep_is_held(&peer->lock)); > > > > > + if (unlikely(!bind)) > > > > > + goto unlock; > > > > > + > > > > > + switch (skb->protocol) { > > > > > + case htons(ETH_P_IP): > > > > > + /* float check */ > > > > > + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { > > > > > + if (bind->remote.in4.sin_family == AF_INET) > > > > > + local_ip = (u8 *)&bind->local; > > > > > > > > If I'm reading this correctly, we always reuse the existing local > > > > address when we have to re-create the bind, even if it doesn't match > > > > the skb? The "local endpoint update" chunk below is doing that, but > > > > only if we're keeping the same remote? It'll get updated the next time > > > > we receive a packet and call ovpn_peer_endpoints_update. > > > > > > > > That might irritate the RPF check on the other side, if we still use > > > > our "old" source to talk to the new dest? > > > > > > > > > + sa = (struct sockaddr_in *)&ss; > > > > > + sa->sin_family = AF_INET; > > > > > + sa->sin_addr.s_addr = ip_hdr(skb)->saddr; > > > > > + sa->sin_port = udp_hdr(skb)->source; > > > > > + salen = sizeof(*sa); > > > > > + break; > > > > > > I think the issue is simply this 'break' above - by removing it, everything > > > should work as expected. > > > > Only if the bind was of the correct family? Checking an IPv4 local > > address (in the bind) against an IPv6 source address in the packet (or > > the other way around) isn't going to work well. > > Ah I understand what you mean. > > The purpose of "local_ip" is to provide a working local endpoint to be used > with the new remote address. > However, if the float is switching family we can't re-use the same old local > endpoint (hence the check). > In this case we'll learn the "new" local address later. > > Does it make sense? Sure, but we could have learned it immediately from the packet we just got, whether we're changing family or not. No need to wait for the next RX packet to also learn the new local address. But if we now do a dst_cache_reset with the peer float, ovpn_udp*_output will have to do a new route/local address lookup and I guess that should clean up the local address stored in the bind, and then update the dst_cache with the local address we just found. -- Sabrina
On 05/03/2025 17:56, Sabrina Dubroca wrote: > 2025-03-05, 14:14:36 +0100, Antonio Quartulli wrote: >> On 05/03/2025 12:20, Sabrina Dubroca wrote: >>> 2025-03-05, 00:19:32 +0100, Antonio Quartulli wrote: >>>> On 04/03/2025 19:37, Sabrina Dubroca wrote: >>>>> 2025-03-04, 01:33:48 +0100, Antonio Quartulli wrote: >>>>>> A peer connected via UDP may change its IP address without reconnecting >>>>>> (float). >>>>> >>>>> Should that trigger a reset of the peer->dst_cache? And same when >>>>> userspace updates the remote address? Otherwise it seems we could be >>>>> stuck with a cached dst that cannot reach the peer. >>>> >>>> Yeah, that make sense, otherwise ovpn_udpX_output would just try over and >>>> over to re-use the cached source address (unless it becomes unavailable). >>> >>> Not just the source address, the routing entry too. I'm more concerned >>> about that: trying to reuse a a cached routing entry that was good for >>> the previous remote address, but not for the new one. >>> >>> >>> [adding your next email] >>>> I spent some more time thinking about this. >>>> It makes sense to reset the dst cache when the local address changes, but >>>> not in case of float (remote address changed). >>>> >>>> That's because we always want to first attempt sending packets using the >>>> address where the remote peer sent the traffic to. >>>> Should that not work (quite rare), then we have code in ovpn_udpX_output >>>> that will reset the cache and attempt a different address. >>> >>> I don't think the code in ovpn_udpX_output will reset the cache unless >>> it was made invalid by a system-wide routing table update (see >>> dst_cache_per_cpu_get). >>> >>> rt = dst_cache_get_ip4(cache, &fl.saddr); >>> if (rt) >>> goto transmit; >>> ... >>> transmit: >>> udp_tunnel_xmit_skb(rt, sk, skb, fl.saddr, fl.daddr, 0, >>> ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, >>> fl.fl4_dport, false, sk->sk_no_check_tx); >>> >>> >>> So it seems that as long as dst_cache_get_ip4 gets us a dst (which >>> AFAIU will happen, unless we did a dst_cache_reset or something else >>> made the cached dst invalid -- and ovpn's floating/endpoint update >>> doesn't do that), we'll just use it. >> >> Mh yeah, you're right. >> Then I'll reset the cache also when a float is detected. > > Ok, thanks. > >>> >>> >>>>>> +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) >>>>>> +{ >>>>>> + struct hlist_nulls_head *nhead; >>>>>> + struct sockaddr_storage ss; >>>>>> + const u8 *local_ip = NULL; >>>>>> + struct sockaddr_in6 *sa6; >>>>>> + struct sockaddr_in *sa; >>>>>> + struct ovpn_bind *bind; >>>>>> + size_t salen = 0; >>>>>> + >>>>>> + spin_lock_bh(&peer->lock); >>>>>> + bind = rcu_dereference_protected(peer->bind, >>>>>> + lockdep_is_held(&peer->lock)); >>>>>> + if (unlikely(!bind)) >>>>>> + goto unlock; >>>>>> + >>>>>> + switch (skb->protocol) { >>>>>> + case htons(ETH_P_IP): >>>>>> + /* float check */ >>>>>> + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { >>>>>> + if (bind->remote.in4.sin_family == AF_INET) >>>>>> + local_ip = (u8 *)&bind->local; >>>>> >>>>> If I'm reading this correctly, we always reuse the existing local >>>>> address when we have to re-create the bind, even if it doesn't match >>>>> the skb? The "local endpoint update" chunk below is doing that, but >>>>> only if we're keeping the same remote? It'll get updated the next time >>>>> we receive a packet and call ovpn_peer_endpoints_update. >>>>> >>>>> That might irritate the RPF check on the other side, if we still use >>>>> our "old" source to talk to the new dest? >>>>> >>>>>> + sa = (struct sockaddr_in *)&ss; >>>>>> + sa->sin_family = AF_INET; >>>>>> + sa->sin_addr.s_addr = ip_hdr(skb)->saddr; >>>>>> + sa->sin_port = udp_hdr(skb)->source; >>>>>> + salen = sizeof(*sa); >>>>>> + break; >>>> >>>> I think the issue is simply this 'break' above - by removing it, everything >>>> should work as expected. >>> >>> Only if the bind was of the correct family? Checking an IPv4 local >>> address (in the bind) against an IPv6 source address in the packet (or >>> the other way around) isn't going to work well. >> >> Ah I understand what you mean. >> >> The purpose of "local_ip" is to provide a working local endpoint to be used >> with the new remote address. >> However, if the float is switching family we can't re-use the same old local >> endpoint (hence the check). >> In this case we'll learn the "new" local address later. >> >> Does it make sense? > > Sure, but we could have learned it immediately from the packet we just > got, whether we're changing family or not. No need to wait for the > next RX packet to also learn the new local address. Indeed. > > But if we now do a dst_cache_reset with the peer float, > ovpn_udp*_output will have to do a new route/local address lookup and > I guess that should clean up the local address stored in the bind, and > then update the dst_cache with the local address we just found. Right and this may not truly be what we want. If peer X is sending packets to our IP1, we should at least try to reply from the same address. If we have two IPs, IP1 and IP2, and both can be used to reach peer X, we should always try to use the one where we received traffic from X in the first place. OTOH hand it is also true that with floating detection on both sides, the situation will converge quickly, but there might be a reason why X chose IP1 as destination, therefore we should do our best to respect that. So, even in case of float, we should still store the local endpoint and attempt fetching a route that takes that into consideration. Which I think is what is happening (assuming we reset the dst_cache on float). ovpn_udpX_output() will: * get no rt from the cache * possibly confirm that saddr is ok * fetch the new rt using the provided saddr and daddr * update the cache. That makes sense to me. Would you agree? -- Antonio Quartulli OpenVPN Inc.
2025-03-06, 11:02:50 +0100, Antonio Quartulli wrote: > On 05/03/2025 17:56, Sabrina Dubroca wrote: > > 2025-03-05, 14:14:36 +0100, Antonio Quartulli wrote: > > > On 05/03/2025 12:20, Sabrina Dubroca wrote: > > > > 2025-03-05, 00:19:32 +0100, Antonio Quartulli wrote: > > > > > On 04/03/2025 19:37, Sabrina Dubroca wrote: > > > > > > 2025-03-04, 01:33:48 +0100, Antonio Quartulli wrote: > > > > > > > +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) > > > > > > > +{ > > > > > > > + struct hlist_nulls_head *nhead; > > > > > > > + struct sockaddr_storage ss; > > > > > > > + const u8 *local_ip = NULL; > > > > > > > + struct sockaddr_in6 *sa6; > > > > > > > + struct sockaddr_in *sa; > > > > > > > + struct ovpn_bind *bind; > > > > > > > + size_t salen = 0; > > > > > > > + > > > > > > > + spin_lock_bh(&peer->lock); > > > > > > > + bind = rcu_dereference_protected(peer->bind, > > > > > > > + lockdep_is_held(&peer->lock)); > > > > > > > + if (unlikely(!bind)) > > > > > > > + goto unlock; > > > > > > > + > > > > > > > + switch (skb->protocol) { > > > > > > > + case htons(ETH_P_IP): > > > > > > > + /* float check */ > > > > > > > + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { > > > > > > > + if (bind->remote.in4.sin_family == AF_INET) > > > > > > > + local_ip = (u8 *)&bind->local; > > > > > > > > > > > > If I'm reading this correctly, we always reuse the existing local > > > > > > address when we have to re-create the bind, even if it doesn't match > > > > > > the skb? The "local endpoint update" chunk below is doing that, but > > > > > > only if we're keeping the same remote? It'll get updated the next time > > > > > > we receive a packet and call ovpn_peer_endpoints_update. > > > > > > > > > > > > That might irritate the RPF check on the other side, if we still use > > > > > > our "old" source to talk to the new dest? > > > > > > > > > > > > > + sa = (struct sockaddr_in *)&ss; > > > > > > > + sa->sin_family = AF_INET; > > > > > > > + sa->sin_addr.s_addr = ip_hdr(skb)->saddr; > > > > > > > + sa->sin_port = udp_hdr(skb)->source; > > > > > > > + salen = sizeof(*sa); > > > > > > > + break; > > > > > > > > > > I think the issue is simply this 'break' above - by removing it, everything > > > > > should work as expected. > > > > > > > > Only if the bind was of the correct family? Checking an IPv4 local > > > > address (in the bind) against an IPv6 source address in the packet (or > > > > the other way around) isn't going to work well. > > > > > > Ah I understand what you mean. > > > > > > The purpose of "local_ip" is to provide a working local endpoint to be used > > > with the new remote address. > > > However, if the float is switching family we can't re-use the same old local > > > endpoint (hence the check). > > > In this case we'll learn the "new" local address later. > > > > > > Does it make sense? > > > > Sure, but we could have learned it immediately from the packet we just > > got, whether we're changing family or not. No need to wait for the > > next RX packet to also learn the new local address. > > Indeed. > > > > > But if we now do a dst_cache_reset with the peer float, > > ovpn_udp*_output will have to do a new route/local address lookup and > > I guess that should clean up the local address stored in the bind, and > > then update the dst_cache with the local address we just found. > > Right and this may not truly be what we want. > > If peer X is sending packets to our IP1, we should at least try to reply > from the same address. > > If we have two IPs, IP1 and IP2, and both can be used to reach peer X, we > should always try to use the one where we received traffic from X in the > first place. I had a thought that it might not be our prefered address to talk to X, but it would probably be, since we decided to use it (and thus X used it as remote to talk to us). > OTOH hand it is also true that with floating detection on both sides, the > situation will converge quickly, but there might be a reason why X chose IP1 > as destination, therefore we should do our best to respect that. And I guess the primary reason for X to choose IP1 would be "we sent packets to X from IP1". > So, even in case of float, we should still store the local endpoint and > attempt fetching a route that takes that into consideration. > Which I think is what is happening (assuming we reset the dst_cache on > float). Not at the same time as float, unless ovpn_peer_endpoints_update sets local_ip = ip_hdr(skb)->daddr unconditionally on float? Otherwise the next route lookup in ovpn_udpX_output will pick whatever source address it wants (which would likely match what's in the received skb during float, so probably fine anyway). > ovpn_udpX_output() will: > * get no rt from the cache > * possibly confirm that saddr is ok > * fetch the new rt using the provided saddr and daddr > * update the cache. > > That makes sense to me. > Would you agree? With dst_cache reset on float, yes. As long as we have that, the main behavior seems correct to me. (maybe some corner cases will not be handled optimally, but that can be improved later - which is most likely what I've been discussing in these emails :)) [this could be a useful counter to add in the future: number of floats and local address updates - so the user can check if that's increasing "too often", which would indicate something weird is happening] -- Sabrina
On 07/03/2025 11:12, Sabrina Dubroca wrote: > 2025-03-06, 11:02:50 +0100, Antonio Quartulli wrote: >> On 05/03/2025 17:56, Sabrina Dubroca wrote: >>> 2025-03-05, 14:14:36 +0100, Antonio Quartulli wrote: >>>> On 05/03/2025 12:20, Sabrina Dubroca wrote: >>>>> 2025-03-05, 00:19:32 +0100, Antonio Quartulli wrote: >>>>>> On 04/03/2025 19:37, Sabrina Dubroca wrote: >>>>>>> 2025-03-04, 01:33:48 +0100, Antonio Quartulli wrote: >>>>>>>> +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) >>>>>>>> +{ >>>>>>>> + struct hlist_nulls_head *nhead; >>>>>>>> + struct sockaddr_storage ss; >>>>>>>> + const u8 *local_ip = NULL; >>>>>>>> + struct sockaddr_in6 *sa6; >>>>>>>> + struct sockaddr_in *sa; >>>>>>>> + struct ovpn_bind *bind; >>>>>>>> + size_t salen = 0; >>>>>>>> + >>>>>>>> + spin_lock_bh(&peer->lock); >>>>>>>> + bind = rcu_dereference_protected(peer->bind, >>>>>>>> + lockdep_is_held(&peer->lock)); >>>>>>>> + if (unlikely(!bind)) >>>>>>>> + goto unlock; >>>>>>>> + >>>>>>>> + switch (skb->protocol) { >>>>>>>> + case htons(ETH_P_IP): >>>>>>>> + /* float check */ >>>>>>>> + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { >>>>>>>> + if (bind->remote.in4.sin_family == AF_INET) >>>>>>>> + local_ip = (u8 *)&bind->local; >>>>>>> >>>>>>> If I'm reading this correctly, we always reuse the existing local >>>>>>> address when we have to re-create the bind, even if it doesn't match >>>>>>> the skb? The "local endpoint update" chunk below is doing that, but >>>>>>> only if we're keeping the same remote? It'll get updated the next time >>>>>>> we receive a packet and call ovpn_peer_endpoints_update. >>>>>>> >>>>>>> That might irritate the RPF check on the other side, if we still use >>>>>>> our "old" source to talk to the new dest? >>>>>>> >>>>>>>> + sa = (struct sockaddr_in *)&ss; >>>>>>>> + sa->sin_family = AF_INET; >>>>>>>> + sa->sin_addr.s_addr = ip_hdr(skb)->saddr; >>>>>>>> + sa->sin_port = udp_hdr(skb)->source; >>>>>>>> + salen = sizeof(*sa); >>>>>>>> + break; >>>>>> >>>>>> I think the issue is simply this 'break' above - by removing it, everything >>>>>> should work as expected. >>>>> >>>>> Only if the bind was of the correct family? Checking an IPv4 local >>>>> address (in the bind) against an IPv6 source address in the packet (or >>>>> the other way around) isn't going to work well. >>>> >>>> Ah I understand what you mean. >>>> >>>> The purpose of "local_ip" is to provide a working local endpoint to be used >>>> with the new remote address. >>>> However, if the float is switching family we can't re-use the same old local >>>> endpoint (hence the check). >>>> In this case we'll learn the "new" local address later. >>>> >>>> Does it make sense? >>> >>> Sure, but we could have learned it immediately from the packet we just >>> got, whether we're changing family or not. No need to wait for the >>> next RX packet to also learn the new local address. >> >> Indeed. >> >>> >>> But if we now do a dst_cache_reset with the peer float, >>> ovpn_udp*_output will have to do a new route/local address lookup and >>> I guess that should clean up the local address stored in the bind, and >>> then update the dst_cache with the local address we just found. >> >> Right and this may not truly be what we want. >> >> If peer X is sending packets to our IP1, we should at least try to reply >> from the same address. >> >> If we have two IPs, IP1 and IP2, and both can be used to reach peer X, we >> should always try to use the one where we received traffic from X in the >> first place. > > I had a thought that it might not be our prefered address to talk to > X, but it would probably be, since we decided to use it (and thus X > used it as remote to talk to us). I am not sure I follow this sentence: I think you are just confirming what I said above (please correct me if I am wrong)? > >> OTOH hand it is also true that with floating detection on both sides, the >> situation will converge quickly, but there might be a reason why X chose IP1 >> as destination, therefore we should do our best to respect that. > > And I guess the primary reason for X to choose IP1 would be "we sent > packets to X from IP1". Probably. It truly depends on who initiated the connection. > >> So, even in case of float, we should still store the local endpoint and >> attempt fetching a route that takes that into consideration. >> Which I think is what is happening (assuming we reset the dst_cache on >> float). > > Not at the same time as float, unless ovpn_peer_endpoints_update sets > local_ip = ip_hdr(skb)->daddr unconditionally on float? > > Otherwise the next route lookup in ovpn_udpX_output will pick whatever > source address it wants (which would likely match what's in the > received skb during float, so probably fine anyway). > But that's what the code just below in ovpn_peer_endpoints_update() does, no? 223 /* local endpoint update */ 224 if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) { ... 229 bind->local.ipv4.s_addr = ip_hdr(skb)->daddr; >> ovpn_udpX_output() will: >> * get no rt from the cache >> * possibly confirm that saddr is ok >> * fetch the new rt using the provided saddr and daddr >> * update the cache. >> >> That makes sense to me. >> Would you agree? > > With dst_cache reset on float, yes. As long as we have that, the main > behavior seems correct to me. (maybe some corner cases will not be > handled optimally, but that can be improved later - which is most > likely what I've been discussing in these emails :)) Yeah :) > > [this could be a useful counter to add in the future: number of floats > and local address updates - so the user can check if that's increasing > "too often", which would indicate something weird is happening] ACK, good idea! Thanks! Ok, I'll probably wait a little more and then prepare v22. Cheers, -- Antonio Quartulli OpenVPN Inc.
Note: as I tried to say at the end of my previous reply, feel free to ignore this or save it for later. dst_cache_reset on float and change of remote via netlink peer_modify is the important thing, and I'm not sure the "local address change on float" can or can't happen. 2025-03-10, 13:57:09 +0100, Antonio Quartulli wrote: > On 07/03/2025 11:12, Sabrina Dubroca wrote: > > 2025-03-06, 11:02:50 +0100, Antonio Quartulli wrote: > > > On 05/03/2025 17:56, Sabrina Dubroca wrote: > > > > 2025-03-05, 14:14:36 +0100, Antonio Quartulli wrote: > > > > > On 05/03/2025 12:20, Sabrina Dubroca wrote: > > > > > > 2025-03-05, 00:19:32 +0100, Antonio Quartulli wrote: > > > > > > > On 04/03/2025 19:37, Sabrina Dubroca wrote: > > > > > > > > 2025-03-04, 01:33:48 +0100, Antonio Quartulli wrote: > > > > > > > > > +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) > > > > > > > > > +{ > > > > > > > > > + struct hlist_nulls_head *nhead; > > > > > > > > > + struct sockaddr_storage ss; > > > > > > > > > + const u8 *local_ip = NULL; > > > > > > > > > + struct sockaddr_in6 *sa6; > > > > > > > > > + struct sockaddr_in *sa; > > > > > > > > > + struct ovpn_bind *bind; > > > > > > > > > + size_t salen = 0; > > > > > > > > > + > > > > > > > > > + spin_lock_bh(&peer->lock); > > > > > > > > > + bind = rcu_dereference_protected(peer->bind, > > > > > > > > > + lockdep_is_held(&peer->lock)); > > > > > > > > > + if (unlikely(!bind)) > > > > > > > > > + goto unlock; > > > > > > > > > + > > > > > > > > > + switch (skb->protocol) { > > > > > > > > > + case htons(ETH_P_IP): > > > > > > > > > + /* float check */ > > > > > > > > > + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { > > > > > > > > > + if (bind->remote.in4.sin_family == AF_INET) > > > > > > > > > + local_ip = (u8 *)&bind->local; > > > > > > > > > > > > > > > > If I'm reading this correctly, we always reuse the existing local > > > > > > > > address when we have to re-create the bind, even if it doesn't match > > > > > > > > the skb? The "local endpoint update" chunk below is doing that, but > > > > > > > > only if we're keeping the same remote? It'll get updated the next time > > > > > > > > we receive a packet and call ovpn_peer_endpoints_update. > > > > > > > > > > > > > > > > That might irritate the RPF check on the other side, if we still use > > > > > > > > our "old" source to talk to the new dest? > > > > > > > > > > > > > > > > > + sa = (struct sockaddr_in *)&ss; > > > > > > > > > + sa->sin_family = AF_INET; > > > > > > > > > + sa->sin_addr.s_addr = ip_hdr(skb)->saddr; > > > > > > > > > + sa->sin_port = udp_hdr(skb)->source; > > > > > > > > > + salen = sizeof(*sa); > > > > > > > > > + break; > > > > > > > > > > > > > > I think the issue is simply this 'break' above - by removing it, everything > > > > > > > should work as expected. > > > > > > > > > > > > Only if the bind was of the correct family? Checking an IPv4 local > > > > > > address (in the bind) against an IPv6 source address in the packet (or > > > > > > the other way around) isn't going to work well. > > > > > > > > > > Ah I understand what you mean. > > > > > > > > > > The purpose of "local_ip" is to provide a working local endpoint to be used > > > > > with the new remote address. > > > > > However, if the float is switching family we can't re-use the same old local > > > > > endpoint (hence the check). > > > > > In this case we'll learn the "new" local address later. > > > > > > > > > > Does it make sense? > > > > > > > > Sure, but we could have learned it immediately from the packet we just > > > > got, whether we're changing family or not. No need to wait for the > > > > next RX packet to also learn the new local address. > > > > > > Indeed. > > > > > > > > > > > But if we now do a dst_cache_reset with the peer float, > > > > ovpn_udp*_output will have to do a new route/local address lookup and > > > > I guess that should clean up the local address stored in the bind, and > > > > then update the dst_cache with the local address we just found. > > > > > > Right and this may not truly be what we want. > > > > > > If peer X is sending packets to our IP1, we should at least try to reply > > > from the same address. > > > > > > If we have two IPs, IP1 and IP2, and both can be used to reach peer X, we > > > should always try to use the one where we received traffic from X in the > > > first place. > > > > I had a thought that it might not be our prefered address to talk to > > X, but it would probably be, since we decided to use it (and thus X > > used it as remote to talk to us). > > I am not sure I follow this sentence: I think you are just confirming what I > said above (please correct me if I am wrong)? Yes. This may turn out to be incorrect (see the "some corner cases" bit below), but let's wait for examples of that. > > > OTOH hand it is also true that with floating detection on both sides, the > > > situation will converge quickly, but there might be a reason why X chose IP1 > > > as destination, therefore we should do our best to respect that. > > > > And I guess the primary reason for X to choose IP1 would be "we sent > > packets to X from IP1". > > Probably. It truly depends on who initiated the connection. > > > > > > So, even in case of float, we should still store the local endpoint and > > > attempt fetching a route that takes that into consideration. > > > Which I think is what is happening (assuming we reset the dst_cache on > > > float). > > > > Not at the same time as float, unless ovpn_peer_endpoints_update sets > > local_ip = ip_hdr(skb)->daddr unconditionally on float? > > > > Otherwise the next route lookup in ovpn_udpX_output will pick whatever > > source address it wants (which would likely match what's in the > > received skb during float, so probably fine anyway). > > > > But that's what the code just below in ovpn_peer_endpoints_update() does, > no? I think this is going a bit in circles :) And possibly completely irrelevant. The /* local endpoint update */ is (correctly) skipped in case of float [because the family may not be right so we can't compare addresses]. In case of float, I think setting local_ip to the packet's header would do the right thing is all cases: - if it matches what's in the old bind, great, we didn't change our local IP, just the remote and we could have kept local_ip = &bind->local - if it doesn't match, we learn it (but that brings the question: why did the peer think our address was IP2 and we thought it was IP1? -- which may be an irrelevant corner case) > > 223 /* local endpoint update */ > 224 if (unlikely(bind->local.ipv4.s_addr != > ip_hdr(skb)->daddr)) { > > ... > > 229 bind->local.ipv4.s_addr = ip_hdr(skb)->daddr; > > > > > ovpn_udpX_output() will: > > > * get no rt from the cache > > > * possibly confirm that saddr is ok > > > * fetch the new rt using the provided saddr and daddr > > > * update the cache. > > > > > > That makes sense to me. > > > Would you agree? > > > > With dst_cache reset on float, yes. As long as we have that, the main > > behavior seems correct to me. (maybe some corner cases will not be > > handled optimally, but that can be improved later - which is most > > likely what I've been discussing in these emails :)) > > Yeah :) > > > > [this could be a useful counter to add in the future: number of floats > > and local address updates - so the user can check if that's increasing > > "too often", which would indicate something weird is happening] > > ACK, good idea! > > Thanks! > > Ok, I'll probably wait a little more and then prepare v22. I won't be able to look at it until the week-end. But if you're ready, you can go ahead and post it anyway. -- Sabrina
On 05/03/2025 00:19, Antonio Quartulli wrote: > On 04/03/2025 19:37, Sabrina Dubroca wrote: >> 2025-03-04, 01:33:48 +0100, Antonio Quartulli wrote: >>> A peer connected via UDP may change its IP address without reconnecting >>> (float). >> >> Should that trigger a reset of the peer->dst_cache? And same when >> userspace updates the remote address? Otherwise it seems we could be >> stuck with a cached dst that cannot reach the peer. > > Yeah, that make sense, otherwise ovpn_udpX_output would just try over > and over to re-use the cached source address (unless it becomes > unavailable). I spent some more time thinking about this. It makes sense to reset the dst cache when the local address changes, but not in case of float (remote address changed). That's because we always want to first attempt sending packets using the address where the remote peer sent the traffic to. Should that not work (quite rare), then we have code in ovpn_udpX_output that will reset the cache and attempt a different address. Cheers, -- Antonio Quartulli OpenVPN Inc.
© 2016 - 2025 Red Hat, Inc.