[v1] virtio: introduce support for GSO over UDP tunnel

[PATCH RFC 15/16] net: implement tnl feature offloading

Posted by Paolo Abeni 8 months, 3 weeks ago

When any host or guest GSO over UDP tunnel offload is enabled the
virtio net header includes the additional tunnel-related fields,
update the size accordingly.

Push the GSO over UDP tunnel offloads all the way down to the tap
device extending the newly introduced NetFeatures struct, and
eventually enable the associated features.

As per virtio specification, to convert features bit to offload bit,
map the extended features into the reserved range.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 hw/net/virtio-net.c | 48 ++++++++++++++++++++++++++++++++++++++++-----
 include/net/net.h   |  2 ++
 net/net.c           |  7 ++++++-
 net/tap-linux.c     |  6 ++++++
 4 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 881877086e..758ceaffba 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -101,6 +101,27 @@
 #define VIRTIO_FEATURE_TO_OFFLOAD(fbit)  (fbit >= 64 ? \
                                           fbit - VIRTIO_O2F_DELTA : fbit)
 
+#ifdef CONFIG_INT128
+#define VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO \
+    VIRTIO_FEATURE_TO_OFFLOAD(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO)
+#define VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM \
+    VIRTIO_FEATURE_TO_OFFLOAD(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM)
+
+static bool virtio_has_tnl_hdr(virtio_features_t features)
+{
+    return virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) |
+           virtio_has_feature_ex(features, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO);
+}
+
+#else
+
+static bool virtio_has_tnl_hdr(virtio_features_t features)
+{
+    return false;
+}
+
+#endif
+
 static const VirtIOFeature feature_sizes[] = {
     {.flags = 1ULL << VIRTIO_NET_F_MAC,
      .end = endof(struct virtio_net_config, mac)},
@@ -656,7 +677,8 @@ static int peer_has_tunnel(VirtIONet *n)
 }
 
 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
-                                       int version_1, int hash_report)
+                                       int version_1, int hash_report,
+                                       int tnl)
 {
     int i;
     NetClientState *nc;
@@ -674,6 +696,9 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
             sizeof(struct virtio_net_hdr);
         n->rss_data.populate_hash = false;
     }
+    if (tnl) {
+        n->guest_hdr_len += sizeof(struct virtio_net_hdr_tunnel);
+    }
 
     for (i = 0; i < n->max_queue_pairs; i++) {
         nc = qemu_get_subqueue(n->nic, i);
@@ -890,6 +915,12 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
        .ufo  = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
        .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
        .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)),
+#ifdef CONFIG_INT128
+       .tnl  = !!(n->curr_guest_offloads &
+                  (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO)),
+       .tnl_csum = !!(n->curr_guest_offloads &
+                      (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM)),
+#endif
     };
 
     qemu_set_offload(qemu_get_queue(n->nic)->peer, &ol);
@@ -911,7 +942,12 @@ virtio_net_guest_offloads_by_features(virtio_features_t features)
         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
-        (1ULL << VIRTIO_NET_F_GUEST_USO6);
+        (1ULL << VIRTIO_NET_F_GUEST_USO6)
+#ifdef CONFIG_INT128
+        | (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO)
+        | (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM)
+#endif
+        ;
 
     return guest_offloads_mask & virtio_net_features_to_offload(features);
 }
@@ -1020,7 +1056,8 @@ static void virtio_net_set_features(VirtIODevice *vdev,
                                virtio_has_feature(features,
                                                   VIRTIO_F_VERSION_1),
                                virtio_has_feature(features,
-                                                  VIRTIO_NET_F_HASH_REPORT));
+                                                  VIRTIO_NET_F_HASH_REPORT),
+                               virtio_has_tnl_hdr(features));
 
     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
@@ -3139,7 +3176,8 @@ static int virtio_net_post_load_device(void *opaque, int version_id)
                                virtio_vdev_has_feature(vdev,
                                                        VIRTIO_F_VERSION_1),
                                virtio_vdev_has_feature(vdev,
-                                                       VIRTIO_NET_F_HASH_REPORT));
+                                                       VIRTIO_NET_F_HASH_REPORT),
+                               virtio_has_tnl_hdr(vdev->guest_features));
 
     /* MAC_TABLE_ENTRIES may be different from the saved image */
     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
@@ -3946,7 +3984,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
 
     n->vqs[0].tx_waiting = 0;
     n->tx_burst = n->net_conf.txburst;
-    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
+    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0, 0);
     n->promisc = 1; /* for compatibility */
 
     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
diff --git a/include/net/net.h b/include/net/net.h
index c71d7c6074..5049d293f2 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -43,6 +43,8 @@ typedef struct NetOffloads {
     bool ufo;
     bool uso4;
     bool uso6;
+    bool tnl;
+    bool tnl_csum;
 } NetOffloads;
 
 #define DEFINE_NIC_PROPERTIES(_state, _conf)                            \
diff --git a/net/net.c b/net/net.c
index 5a2f00c108..bd41229407 100644
--- a/net/net.c
+++ b/net/net.c
@@ -569,13 +569,18 @@ int qemu_get_vnet_hdr_len(NetClientState *nc)
 
 void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
 {
+    int len_tnl = len - sizeof(struct virtio_net_hdr_tunnel);
+
     if (!nc || !nc->info->set_vnet_hdr_len) {
         return;
     }
 
     assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
+           len_tnl == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
            len == sizeof(struct virtio_net_hdr) ||
-           len == sizeof(struct virtio_net_hdr_v1_hash));
+           len_tnl == sizeof(struct virtio_net_hdr) ||
+           len == sizeof(struct virtio_net_hdr_v1_hash) ||
+           len_tnl == sizeof(struct virtio_net_hdr_v1_hash));
 
     nc->vnet_hdr_len = len;
     nc->info->set_vnet_hdr_len(nc, len);
diff --git a/net/tap-linux.c b/net/tap-linux.c
index aa5f3a6e22..b7662ece63 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -287,6 +287,12 @@ void tap_fd_set_offload(int fd, const NetOffloads *ol)
         if (ol->uso6) {
             offload |= TUN_F_USO6;
         }
+        if ((ol->tso4 || ol->tso6 || ol->uso4 || ol->uso6) && ol->tnl) {
+            offload |= TUN_F_UDP_TUNNEL_GSO;
+        }
+        if ((offload & TUN_F_UDP_TUNNEL_GSO) && ol->tnl_csum) {
+            offload |= TUN_F_UDP_TUNNEL_GSO_CSUM;
+        }
     }
 
     if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
-- 
2.49.0

Re: [PATCH RFC 15/16] net: implement tnl feature offloading

Posted by Akihiko Odaki 8 months, 3 weeks ago

On 2025/05/21 20:34, Paolo Abeni wrote:
> When any host or guest GSO over UDP tunnel offload is enabled the
> virtio net header includes the additional tunnel-related fields,
> update the size accordingly.
> 
> Push the GSO over UDP tunnel offloads all the way down to the tap
> device extending the newly introduced NetFeatures struct, and
> eventually enable the associated features.
> 
> As per virtio specification, to convert features bit to offload bit,
> map the extended features into the reserved range.
> 
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
>   hw/net/virtio-net.c | 48 ++++++++++++++++++++++++++++++++++++++++-----
>   include/net/net.h   |  2 ++
>   net/net.c           |  7 ++++++-
>   net/tap-linux.c     |  6 ++++++
>   4 files changed, 57 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 881877086e..758ceaffba 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -101,6 +101,27 @@
>   #define VIRTIO_FEATURE_TO_OFFLOAD(fbit)  (fbit >= 64 ? \
>                                             fbit - VIRTIO_O2F_DELTA : fbit)
>   
> +#ifdef CONFIG_INT128
> +#define VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO \
> +    VIRTIO_FEATURE_TO_OFFLOAD(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO)
> +#define VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM \
> +    VIRTIO_FEATURE_TO_OFFLOAD(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM)
> +
> +static bool virtio_has_tnl_hdr(virtio_features_t features)

"tnl" looks a bit cryptic to me and also inconsistent with everywhere 
else, which just calls it "tunnel".

> +{
> +    return virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) |
> +           virtio_has_feature_ex(features, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO);
> +}
> +
> +#else
> +
> +static bool virtio_has_tnl_hdr(virtio_features_t features)
> +{
> +    return false;
> +}
> +
> +#endif
> +
>   static const VirtIOFeature feature_sizes[] = {
>       {.flags = 1ULL << VIRTIO_NET_F_MAC,
>        .end = endof(struct virtio_net_config, mac)},
> @@ -656,7 +677,8 @@ static int peer_has_tunnel(VirtIONet *n)
>   }
>   
>   static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
> -                                       int version_1, int hash_report)
> +                                       int version_1, int hash_report,
> +                                       int tnl)
>   {
>       int i;
>       NetClientState *nc;
> @@ -674,6 +696,9 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
>               sizeof(struct virtio_net_hdr);
>           n->rss_data.populate_hash = false;
>       }
> +    if (tnl) {
> +        n->guest_hdr_len += sizeof(struct virtio_net_hdr_tunnel);
> +    }
>   
>       for (i = 0; i < n->max_queue_pairs; i++) {
>           nc = qemu_get_subqueue(n->nic, i);
> @@ -890,6 +915,12 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
>          .ufo  = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
>          .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
>          .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)),
> +#ifdef CONFIG_INT128
> +       .tnl  = !!(n->curr_guest_offloads &
> +                  (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO)),
> +       .tnl_csum = !!(n->curr_guest_offloads &
> +                      (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM)),

"[PATCH RFC 14/16] net: bundle all offloads in a single struct" added a 
struct for offloading, but how about passing n->curr_guest_offloads as 
is instead?

It loses some type safety and makes it prone to have unknown bits, but 
omitting duplicate these bit operations may outweigh the downside.

> +#endif
>       };
>   
>       qemu_set_offload(qemu_get_queue(n->nic)->peer, &ol);
> @@ -911,7 +942,12 @@ virtio_net_guest_offloads_by_features(virtio_features_t features)
>           (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
>           (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
>           (1ULL << VIRTIO_NET_F_GUEST_USO4) |
> -        (1ULL << VIRTIO_NET_F_GUEST_USO6);
> +        (1ULL << VIRTIO_NET_F_GUEST_USO6)
> +#ifdef CONFIG_INT128
> +        | (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO)
> +        | (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM)
> +#endif
> +        ;
>   
>       return guest_offloads_mask & virtio_net_features_to_offload(features);
>   }
> @@ -1020,7 +1056,8 @@ static void virtio_net_set_features(VirtIODevice *vdev,
>                                  virtio_has_feature(features,
>                                                     VIRTIO_F_VERSION_1),
>                                  virtio_has_feature(features,
> -                                                  VIRTIO_NET_F_HASH_REPORT));
> +                                                  VIRTIO_NET_F_HASH_REPORT),
> +                               virtio_has_tnl_hdr(features));
>   
>       n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
>           virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
> @@ -3139,7 +3176,8 @@ static int virtio_net_post_load_device(void *opaque, int version_id)
>                                  virtio_vdev_has_feature(vdev,
>                                                          VIRTIO_F_VERSION_1),
>                                  virtio_vdev_has_feature(vdev,
> -                                                       VIRTIO_NET_F_HASH_REPORT));
> +                                                       VIRTIO_NET_F_HASH_REPORT),
> +                               virtio_has_tnl_hdr(vdev->guest_features));
>   
>       /* MAC_TABLE_ENTRIES may be different from the saved image */
>       if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
> @@ -3946,7 +3984,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>   
>       n->vqs[0].tx_waiting = 0;
>       n->tx_burst = n->net_conf.txburst;
> -    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
> +    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0, 0);
>       n->promisc = 1; /* for compatibility */
>   
>       n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
> diff --git a/include/net/net.h b/include/net/net.h
> index c71d7c6074..5049d293f2 100644
> --- a/include/net/net.h
> +++ b/include/net/net.h
> @@ -43,6 +43,8 @@ typedef struct NetOffloads {
>       bool ufo;
>       bool uso4;
>       bool uso6;
> +    bool tnl;
> +    bool tnl_csum;
>   } NetOffloads;
>   
>   #define DEFINE_NIC_PROPERTIES(_state, _conf)                            \
> diff --git a/net/net.c b/net/net.c
> index 5a2f00c108..bd41229407 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -569,13 +569,18 @@ int qemu_get_vnet_hdr_len(NetClientState *nc)
>   
>   void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
>   {
> +    int len_tnl = len - sizeof(struct virtio_net_hdr_tunnel);
> +
>       if (!nc || !nc->info->set_vnet_hdr_len) {
>           return;
>       }
>   
>       assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
> +           len_tnl == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
>              len == sizeof(struct virtio_net_hdr) ||
> -           len == sizeof(struct virtio_net_hdr_v1_hash));
> +           len_tnl == sizeof(struct virtio_net_hdr) ||
> +           len == sizeof(struct virtio_net_hdr_v1_hash) ||
> +           len_tnl == sizeof(struct virtio_net_hdr_v1_hash));
>   
>       nc->vnet_hdr_len = len;
>       nc->info->set_vnet_hdr_len(nc, len);
> diff --git a/net/tap-linux.c b/net/tap-linux.c
> index aa5f3a6e22..b7662ece63 100644
> --- a/net/tap-linux.c
> +++ b/net/tap-linux.c
> @@ -287,6 +287,12 @@ void tap_fd_set_offload(int fd, const NetOffloads *ol)
>           if (ol->uso6) {
>               offload |= TUN_F_USO6;
>           }
> +        if ((ol->tso4 || ol->tso6 || ol->uso4 || ol->uso6) && ol->tnl) {

Is it possible to have ol->tnl without TSO or USO? If so, is ignoring 
ol->tnl really what you want?

> +            offload |= TUN_F_UDP_TUNNEL_GSO;
> +        }
> +        if ((offload & TUN_F_UDP_TUNNEL_GSO) && ol->tnl_csum) {
> +            offload |= TUN_F_UDP_TUNNEL_GSO_CSUM;
> +        }
>       }
>   
>       if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {

Re: [PATCH RFC 15/16] net: implement tnl feature offloading

Posted by Paolo Abeni 8 months, 3 weeks ago

On 5/23/25 10:16 AM, Akihiko Odaki wrote:
> On 2025/05/21 20:34, Paolo Abeni wrote:
>> @@ -890,6 +915,12 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
>>          .ufo  = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
>>          .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
>>          .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)),
>> +#ifdef CONFIG_INT128
>> +       .tnl  = !!(n->curr_guest_offloads &
>> +                  (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO)),
>> +       .tnl_csum = !!(n->curr_guest_offloads &
>> +                      (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM)),
> 
> "[PATCH RFC 14/16] net: bundle all offloads in a single struct" added a 
> struct for offloading, but how about passing n->curr_guest_offloads as 
> is instead?
> 
> It loses some type safety and makes it prone to have unknown bits, but 
> omitting duplicate these bit operations may outweigh the downside.

I *think* that one of the relevant point about the current interface is
that qemu_set_offload() abstracts from the virtio specifics, as it's
also used by other drivers. Forcing them to covert the to-be-configured
offloads to a virtio specific bitmask sound incorrect to me. Possibly I
misread your suggestion?

[...]
>> diff --git a/net/tap-linux.c b/net/tap-linux.c
>> index aa5f3a6e22..b7662ece63 100644
>> --- a/net/tap-linux.c
>> +++ b/net/tap-linux.c
>> @@ -287,6 +287,12 @@ void tap_fd_set_offload(int fd, const NetOffloads *ol)
>>           if (ol->uso6) {
>>               offload |= TUN_F_USO6;
>>           }
>> +        if ((ol->tso4 || ol->tso6 || ol->uso4 || ol->uso6) && ol->tnl) {
> 
> Is it possible to have ol->tnl without TSO or USO? If so, is ignoring 
> ol->tnl really what you want?

The virtio specifications actually prevent setting UDP-tunnel offload
without any other "inner" offload (TSO or USO), as it makes little to no
sense (the stack can't GSO/GRO the outer header without doing the same
for the inner).

Does the above makes sense/answer your questions?

/P

Re: [PATCH RFC 15/16] net: implement tnl feature offloading

Posted by Akihiko Odaki 8 months, 3 weeks ago

On 2025/05/23 19:40, Paolo Abeni wrote:
> On 5/23/25 10:16 AM, Akihiko Odaki wrote:
>> On 2025/05/21 20:34, Paolo Abeni wrote:
>>> @@ -890,6 +915,12 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
>>>           .ufo  = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
>>>           .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
>>>           .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)),
>>> +#ifdef CONFIG_INT128
>>> +       .tnl  = !!(n->curr_guest_offloads &
>>> +                  (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO)),
>>> +       .tnl_csum = !!(n->curr_guest_offloads &
>>> +                      (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM)),
>>
>> "[PATCH RFC 14/16] net: bundle all offloads in a single struct" added a
>> struct for offloading, but how about passing n->curr_guest_offloads as
>> is instead?
>>
>> It loses some type safety and makes it prone to have unknown bits, but
>> omitting duplicate these bit operations may outweigh the downside.
> 
> I *think* that one of the relevant point about the current interface is
> that qemu_set_offload() abstracts from the virtio specifics, as it's
> also used by other drivers. Forcing them to covert the to-be-configured
> offloads to a virtio specific bitmask sound incorrect to me. Possibly I
> misread your suggestion?
> 

virtio is also an interface, and we can reuse it for QEMU-internal 
interfaces too if it is appropriate.

That said, the feature bitmask defined by virtio is inappropriate for 
for qemu_set_offload() because it also contains other features not 
related to guest offloading. We need an alternative interface, and the 
current qemu_set_offload() just passes each flag separately.

Now, "[PATCH RFC 12/16] virtio-net: implement extended features 
support." is adding another format that derives from virtio for guest 
offloading. This format only contains bits related to guest offloading 
by definition and suits well with qemu_set_offload().

Bit names like VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO will imply that it 
derives from the virtio spec I think this is actually an improvement; 
the virtio spec have been the definitive document of the offloading 
features of tuntap, and some features even used the virtio header (so 
e1000e and igb parse and build virtio headers). These bit names make 
this relationship between tuntap and the virtio spec explicit.

Re: [PATCH RFC 15/16] net: implement tnl feature offloading

Posted by Paolo Abeni 8 months, 3 weeks ago

On 5/23/25 1:35 PM, Akihiko Odaki wrote:
> On 2025/05/23 19:40, Paolo Abeni wrote:
>> On 5/23/25 10:16 AM, Akihiko Odaki wrote:
>>> On 2025/05/21 20:34, Paolo Abeni wrote:
>>>> @@ -890,6 +915,12 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
>>>>           .ufo  = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
>>>>           .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
>>>>           .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)),
>>>> +#ifdef CONFIG_INT128
>>>> +       .tnl  = !!(n->curr_guest_offloads &
>>>> +                  (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO)),
>>>> +       .tnl_csum = !!(n->curr_guest_offloads &
>>>> +                      (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM)),
>>>
>>> "[PATCH RFC 14/16] net: bundle all offloads in a single struct" added a
>>> struct for offloading, but how about passing n->curr_guest_offloads as
>>> is instead?
>>>
>>> It loses some type safety and makes it prone to have unknown bits, but
>>> omitting duplicate these bit operations may outweigh the downside.
>>
>> I *think* that one of the relevant point about the current interface is
>> that qemu_set_offload() abstracts from the virtio specifics, as it's
>> also used by other drivers. Forcing them to covert the to-be-configured
>> offloads to a virtio specific bitmask sound incorrect to me. Possibly I
>> misread your suggestion?
>>
> 
> virtio is also an interface, and we can reuse it for QEMU-internal 
> interfaces too if it is appropriate.
> 
> That said, the feature bitmask defined by virtio is inappropriate for 
> for qemu_set_offload() because it also contains other features not 
> related to guest offloading. We need an alternative interface, and the 
> current qemu_set_offload() just passes each flag separately.
> 
> Now, "[PATCH RFC 12/16] virtio-net: implement extended features 
> support." is adding another format that derives from virtio for guest 
> offloading. This format only contains bits related to guest offloading 
> by definition and suits well with qemu_set_offload().
> 
> Bit names like VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO will imply that it 
> derives from the virtio spec I think this is actually an improvement; 
> the virtio spec have been the definitive document of the offloading 
> features of tuntap, and some features even used the virtio header (so 
> e1000e and igb parse and build virtio headers). These bit names make 
> this relationship between tuntap and the virtio spec explicit.

Let me check we are on the same page. You are suggesting the following:

- change set_offload() signature to:
typedef void (SetOffload)(NetClientState *, uint64_t);

- define VIRTIO_NET_O_GUEST_<offload> masks for known/supported offload
in include/net/net.h (including TSO, USO, etc...)

- adapt the drivers to the above interface.

- move this patch as series pre-req.

Am I correct?

Thanks,

Paolo

Re: [PATCH RFC 15/16] net: implement tnl feature offloading

Posted by Akihiko Odaki 8 months, 2 weeks ago

On 2025/05/23 23:46, Paolo Abeni wrote:
> On 5/23/25 1:35 PM, Akihiko Odaki wrote:
>> On 2025/05/23 19:40, Paolo Abeni wrote:
>>> On 5/23/25 10:16 AM, Akihiko Odaki wrote:
>>>> On 2025/05/21 20:34, Paolo Abeni wrote:
>>>>> @@ -890,6 +915,12 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
>>>>>            .ufo  = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
>>>>>            .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
>>>>>            .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)),
>>>>> +#ifdef CONFIG_INT128
>>>>> +       .tnl  = !!(n->curr_guest_offloads &
>>>>> +                  (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO)),
>>>>> +       .tnl_csum = !!(n->curr_guest_offloads &
>>>>> +                      (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM)),
>>>>
>>>> "[PATCH RFC 14/16] net: bundle all offloads in a single struct" added a
>>>> struct for offloading, but how about passing n->curr_guest_offloads as
>>>> is instead?
>>>>
>>>> It loses some type safety and makes it prone to have unknown bits, but
>>>> omitting duplicate these bit operations may outweigh the downside.
>>>
>>> I *think* that one of the relevant point about the current interface is
>>> that qemu_set_offload() abstracts from the virtio specifics, as it's
>>> also used by other drivers. Forcing them to covert the to-be-configured
>>> offloads to a virtio specific bitmask sound incorrect to me. Possibly I
>>> misread your suggestion?
>>>
>>
>> virtio is also an interface, and we can reuse it for QEMU-internal
>> interfaces too if it is appropriate.
>>
>> That said, the feature bitmask defined by virtio is inappropriate for
>> for qemu_set_offload() because it also contains other features not
>> related to guest offloading. We need an alternative interface, and the
>> current qemu_set_offload() just passes each flag separately.
>>
>> Now, "[PATCH RFC 12/16] virtio-net: implement extended features
>> support." is adding another format that derives from virtio for guest
>> offloading. This format only contains bits related to guest offloading
>> by definition and suits well with qemu_set_offload().
>>
>> Bit names like VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO will imply that it
>> derives from the virtio spec I think this is actually an improvement;
>> the virtio spec have been the definitive document of the offloading
>> features of tuntap, and some features even used the virtio header (so
>> e1000e and igb parse and build virtio headers). These bit names make
>> this relationship between tuntap and the virtio spec explicit.
> 
> Let me check we are on the same page. You are suggesting the following:
> 
> - change set_offload() signature to:
> typedef void (SetOffload)(NetClientState *, uint64_t);
> 
> - define VIRTIO_NET_O_GUEST_<offload> masks for known/supported offload
> in include/net/net.h (including TSO, USO, etc...)
> 
> - adapt the drivers to the above interface.
> 
> - move this patch as series pre-req.
> 
> Am I correct?

Yes, that's what I meant.

Regards,
Akihiko Odaki

Re: [PATCH RFC 15/16] net: implement tnl feature offloading

Posted by Akihiko Odaki 8 months, 3 weeks ago

On 2025/05/23 19:40, Paolo Abeni wrote:
> On 5/23/25 10:16 AM, Akihiko Odaki wrote:
>> On 2025/05/21 20:34, Paolo Abeni wrote:
>>> @@ -890,6 +915,12 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
>>>           .ufo  = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
>>>           .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
>>>           .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)),
>>> +#ifdef CONFIG_INT128
>>> +       .tnl  = !!(n->curr_guest_offloads &
>>> +                  (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO)),
>>> +       .tnl_csum = !!(n->curr_guest_offloads &
>>> +                      (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM)),
>>
>> "[PATCH RFC 14/16] net: bundle all offloads in a single struct" added a
>> struct for offloading, but how about passing n->curr_guest_offloads as
>> is instead?
>>
>> It loses some type safety and makes it prone to have unknown bits, but
>> omitting duplicate these bit operations may outweigh the downside.
> 
> I *think* that one of the relevant point about the current interface is
> that qemu_set_offload() abstracts from the virtio specifics, as it's
> also used by other drivers. Forcing them to covert the to-be-configured
> offloads to a virtio specific bitmask sound incorrect to me. Possibly I
> misread your suggestion?
> 
> [...]
>>> diff --git a/net/tap-linux.c b/net/tap-linux.c
>>> index aa5f3a6e22..b7662ece63 100644
>>> --- a/net/tap-linux.c
>>> +++ b/net/tap-linux.c
>>> @@ -287,6 +287,12 @@ void tap_fd_set_offload(int fd, const NetOffloads *ol)
>>>            if (ol->uso6) {
>>>                offload |= TUN_F_USO6;
>>>            }
>>> +        if ((ol->tso4 || ol->tso6 || ol->uso4 || ol->uso6) && ol->tnl) {
>>
>> Is it possible to have ol->tnl without TSO or USO? If so, is ignoring
>> ol->tnl really what you want?
> 
> The virtio specifications actually prevent setting UDP-tunnel offload
> without any other "inner" offload (TSO or USO), as it makes little to no
> sense (the stack can't GSO/GRO the outer header without doing the same
> for the inner).
> 
> Does the above makes sense/answer your questions?

The code implies the following:
1a. ol->tnl may be true while TSO and USO are disabled.
2a. It is defined as no-op in such a case.

But the reality is as follows:
1b. ol->tnl being true while TSO and USO are disabled is an error.
2b. The consequence is undefined in such a case.

In that case, virtio_net_get_features() should report the error for 1b, 
which will prevent the error condition from reaching to 
tap_fd_set_offload().

Making the error condition no-op in tap_fd_set_offload() does not make 
it (more) correct as the consequence is undefined anyway (2b). It may 
simply ignore the condition under the assumption that it will never 
happen or assert that assumption.

Re: [PATCH RFC 15/16] net: implement tnl feature offloading

Posted by Paolo Abeni 8 months, 3 weeks ago

On 5/23/25 12:54 PM, Akihiko Odaki wrote:
> On 2025/05/23 19:40, Paolo Abeni wrote:
>> On 5/23/25 10:16 AM, Akihiko Odaki wrote:
>>> On 2025/05/21 20:34, Paolo Abeni wrote:
>>>> @@ -890,6 +915,12 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
>>>>           .ufo  = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
>>>>           .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
>>>>           .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)),
>>>> +#ifdef CONFIG_INT128
>>>> +       .tnl  = !!(n->curr_guest_offloads &
>>>> +                  (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO)),
>>>> +       .tnl_csum = !!(n->curr_guest_offloads &
>>>> +                      (1ULL << VIRTIO_NET_O_GUEST_UDP_TUNNEL_GSO_CSUM)),
>>>
>>> "[PATCH RFC 14/16] net: bundle all offloads in a single struct" added a
>>> struct for offloading, but how about passing n->curr_guest_offloads as
>>> is instead?
>>>
>>> It loses some type safety and makes it prone to have unknown bits, but
>>> omitting duplicate these bit operations may outweigh the downside.
>>
>> I *think* that one of the relevant point about the current interface is
>> that qemu_set_offload() abstracts from the virtio specifics, as it's
>> also used by other drivers. Forcing them to covert the to-be-configured
>> offloads to a virtio specific bitmask sound incorrect to me. Possibly I
>> misread your suggestion?
>>
>> [...]
>>>> diff --git a/net/tap-linux.c b/net/tap-linux.c
>>>> index aa5f3a6e22..b7662ece63 100644
>>>> --- a/net/tap-linux.c
>>>> +++ b/net/tap-linux.c
>>>> @@ -287,6 +287,12 @@ void tap_fd_set_offload(int fd, const NetOffloads *ol)
>>>>            if (ol->uso6) {
>>>>                offload |= TUN_F_USO6;
>>>>            }
>>>> +        if ((ol->tso4 || ol->tso6 || ol->uso4 || ol->uso6) && ol->tnl) {
>>>
>>> Is it possible to have ol->tnl without TSO or USO? If so, is ignoring
>>> ol->tnl really what you want?
>>
>> The virtio specifications actually prevent setting UDP-tunnel offload
>> without any other "inner" offload (TSO or USO), as it makes little to no
>> sense (the stack can't GSO/GRO the outer header without doing the same
>> for the inner).
>>
>> Does the above makes sense/answer your questions?
> 
> The code implies the following:
> 1a. ol->tnl may be true while TSO and USO are disabled.
> 2a. It is defined as no-op in such a case.
> 
> But the reality is as follows:
> 1b. ol->tnl being true while TSO and USO are disabled is an error.
> 2b. The consequence is undefined in such a case.
> 
> In that case, virtio_net_get_features() should report the error for 1b, 
> which will prevent the error condition from reaching to 
> tap_fd_set_offload().
> 
> Making the error condition no-op in tap_fd_set_offload() does not make 
> it (more) correct as the consequence is undefined anyway (2b). It may 
> simply ignore the condition under the assumption that it will never 
> happen or assert that assumption.

I see. I'll add the sanity check in virtio_net_get_features() and will
add an assert in the tap code.

Thanks!

/P