Allow the guest to reuse the hash value to make receive steering
consistent between the host and guest, and to save hash computation.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
Documentation/networking/tuntap.rst | 7 +++
drivers/net/Kconfig | 1 +
drivers/net/tap.c | 45 ++++++++++++++--
drivers/net/tun.c | 46 ++++++++++++----
drivers/net/tun_vnet.h | 102 +++++++++++++++++++++++++++++++-----
include/linux/if_tap.h | 2 +
include/uapi/linux/if_tun.h | 48 +++++++++++++++++
7 files changed, 223 insertions(+), 28 deletions(-)
diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
index 4d7087f727be..86b4ae8caa8a 100644
--- a/Documentation/networking/tuntap.rst
+++ b/Documentation/networking/tuntap.rst
@@ -206,6 +206,13 @@ enable is true we enable it, otherwise we disable it::
return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
}
+3.4 Reference
+-------------
+
+``linux/if_tun.h`` defines the interface described below:
+
+.. kernel-doc:: include/uapi/linux/if_tun.h
+
Universal TUN/TAP device driver Frequently Asked Question
=========================================================
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 9920b3a68ed1..e2a7bd703550 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -395,6 +395,7 @@ config TUN
tristate "Universal TUN/TAP device driver support"
depends on INET
select CRC32
+ select SKB_EXTENSIONS
help
TUN/TAP provides packet reception and transmission for user space
programs. It can be viewed as a simple Point-to-Point or Ethernet
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 9a34ceed0c2c..5e2fbe63ca47 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -179,6 +179,16 @@ static void tap_put_queue(struct tap_queue *q)
sock_put(&q->sk);
}
+static struct virtio_net_hash *tap_add_hash(struct sk_buff *skb)
+{
+ return (struct virtio_net_hash *)skb->cb;
+}
+
+static const struct virtio_net_hash *tap_find_hash(const struct sk_buff *skb)
+{
+ return (const struct virtio_net_hash *)skb->cb;
+}
+
/*
* Select a queue based on the rxq of the device on which this packet
* arrived. If the incoming device is not mq, calculate a flow hash
@@ -189,6 +199,7 @@ static void tap_put_queue(struct tap_queue *q)
static struct tap_queue *tap_get_queue(struct tap_dev *tap,
struct sk_buff *skb)
{
+ struct flow_keys_basic keys_basic;
struct tap_queue *queue = NULL;
/* Access to taps array is protected by rcu, but access to numvtaps
* isn't. Below we use it to lookup a queue, but treat it as a hint
@@ -198,15 +209,32 @@ static struct tap_queue *tap_get_queue(struct tap_dev *tap,
int numvtaps = READ_ONCE(tap->numvtaps);
__u32 rxq;
+ *tap_add_hash(skb) = (struct virtio_net_hash) { .report = VIRTIO_NET_HASH_REPORT_NONE };
+
if (!numvtaps)
goto out;
if (numvtaps == 1)
goto single;
+ if (!skb->l4_hash && !skb->sw_hash) {
+ struct flow_keys keys;
+
+ skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+ rxq = flow_hash_from_keys(&keys);
+ keys_basic = (struct flow_keys_basic) {
+ .control = keys.control,
+ .basic = keys.basic
+ };
+ } else {
+ skb_flow_dissect_flow_keys_basic(NULL, skb, &keys_basic, NULL, 0, 0, 0,
+ FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+ rxq = skb->hash;
+ }
+
/* Check if we can use flow to select a queue */
- rxq = skb_get_hash(skb);
if (rxq) {
+ tun_vnet_hash_report(&tap->vnet_hash, skb, &keys_basic, rxq, tap_add_hash);
queue = rcu_dereference(tap->taps[rxq % numvtaps]);
goto out;
}
@@ -713,15 +741,16 @@ static ssize_t tap_put_user(struct tap_queue *q,
int total;
if (q->flags & IFF_VNET_HDR) {
- struct virtio_net_hdr vnet_hdr;
+ struct virtio_net_hdr_v1_hash vnet_hdr;
vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
- ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr);
+ ret = tun_vnet_hdr_from_skb(vnet_hdr_len, q->flags, NULL, skb,
+ tap_find_hash, &vnet_hdr);
if (ret < 0)
goto done;
- ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr);
+ ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr, ret);
if (ret < 0)
goto done;
}
@@ -1025,7 +1054,13 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
return ret;
default:
- return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp);
+ rtnl_lock();
+ tap = rtnl_dereference(q->tap);
+ ret = tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags,
+ tap ? &tap->vnet_hash : NULL, -EINVAL,
+ cmd, sp);
+ rtnl_unlock();
+ return ret;
}
}
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index dd8799d19518..27308417b834 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -209,6 +209,7 @@ struct tun_struct {
struct bpf_prog __rcu *xdp_prog;
struct tun_prog __rcu *steering_prog;
struct tun_prog __rcu *filter_prog;
+ struct tun_vnet_hash vnet_hash;
struct ethtool_link_ksettings link_ksettings;
/* init args */
struct file *file;
@@ -451,6 +452,16 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
e->rps_rxhash = hash;
}
+static struct virtio_net_hash *tun_add_hash(struct sk_buff *skb)
+{
+ return skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
+}
+
+static const struct virtio_net_hash *tun_find_hash(const struct sk_buff *skb)
+{
+ return skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
+}
+
/* We try to identify a flow through its rxhash. The reason that
* we do not check rxq no. is because some cards(e.g 82599), chooses
* the rxq based on the txq where the last packet of the flow comes. As
@@ -459,12 +470,17 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
*/
static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
{
+ struct flow_keys keys;
+ struct flow_keys_basic keys_basic;
struct tun_flow_entry *e;
u32 txq, numqueues;
numqueues = READ_ONCE(tun->numqueues);
- txq = __skb_get_hash_symmetric(skb);
+ memset(&keys, 0, sizeof(keys));
+ skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 0);
+
+ txq = flow_hash_from_keys(&keys);
e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
if (e) {
tun_flow_save_rps_rxhash(e, txq);
@@ -473,6 +489,13 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
txq = reciprocal_scale(txq, numqueues);
}
+ keys_basic = (struct flow_keys_basic) {
+ .control = keys.control,
+ .basic = keys.basic
+ };
+ tun_vnet_hash_report(&tun->vnet_hash, skb, &keys_basic, skb->l4_hash ? skb->hash : txq,
+ tun_add_hash);
+
return txq;
}
@@ -1990,10 +2013,8 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
size_t total;
if (tun->flags & IFF_VNET_HDR) {
- struct virtio_net_hdr gso = { 0 };
-
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
- ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
+ ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, NULL, 0);
if (ret < 0)
return ret;
}
@@ -2018,7 +2039,6 @@ static ssize_t tun_put_user(struct tun_struct *tun,
int vlan_offset = 0;
int vlan_hlen = 0;
int vnet_hdr_sz = 0;
- int ret;
if (skb_vlan_tag_present(skb))
vlan_hlen = VLAN_HLEN;
@@ -2043,13 +2063,15 @@ static ssize_t tun_put_user(struct tun_struct *tun,
}
if (vnet_hdr_sz) {
- struct virtio_net_hdr gso;
+ struct virtio_net_hdr_v1_hash gso;
+ int ret;
- ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso);
+ ret = tun_vnet_hdr_from_skb(vnet_hdr_sz, tun->flags, tun->dev, skb,
+ tun_find_hash, &gso);
if (ret < 0)
goto done;
- ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
+ ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso, ret);
if (ret < 0)
goto done;
}
@@ -3055,9 +3077,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
goto unlock;
}
- ret = -EBADFD;
- if (!tun)
+ if (!tun) {
+ ret = tun_vnet_ioctl(NULL, NULL, NULL, -EBADFD, cmd, argp);
goto unlock;
+ }
netif_info(tun, drv, tun->dev, "tun_chr_ioctl cmd %u\n", cmd);
@@ -3256,7 +3279,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
break;
default:
- ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp);
+ ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags,
+ &tun->vnet_hash, -EINVAL, cmd, argp);
}
if (do_notify)
diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h
index c40bde0fdf8c..589a97dd7d02 100644
--- a/drivers/net/tun_vnet.h
+++ b/drivers/net/tun_vnet.h
@@ -6,6 +6,9 @@
#define TUN_VNET_LE 0x80000000
#define TUN_VNET_BE 0x40000000
+typedef struct virtio_net_hash *(*tun_vnet_hash_add)(struct sk_buff *);
+typedef const struct virtio_net_hash *(*tun_vnet_hash_find)(const struct sk_buff *);
+
static inline bool tun_vnet_legacy_is_little_endian(unsigned int flags)
{
return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && (flags & TUN_VNET_BE)) &&
@@ -59,18 +62,31 @@ static inline __virtio16 cpu_to_tun_vnet16(unsigned int flags, u16 val)
}
static inline long tun_vnet_ioctl(int *sz, unsigned int *flags,
- unsigned int cmd, int __user *sp)
+ struct tun_vnet_hash *hash, long fallback,
+ unsigned int cmd, void __user *argp)
{
+ static const struct tun_vnet_hash cap = {
+ .flags = TUN_VNET_HASH_REPORT,
+ .types = VIRTIO_NET_SUPPORTED_HASH_TYPES
+ };
+ struct tun_vnet_hash hash_buf;
+ int __user *sp = argp;
int s;
switch (cmd) {
case TUNGETVNETHDRSZ:
+ if (!sz)
+ return -EBADFD;
+
s = *sz;
if (put_user(s, sp))
return -EFAULT;
return 0;
case TUNSETVNETHDRSZ:
+ if (!sz)
+ return -EBADFD;
+
if (get_user(s, sp))
return -EFAULT;
if (s < (int)sizeof(struct virtio_net_hdr))
@@ -80,12 +96,18 @@ static inline long tun_vnet_ioctl(int *sz, unsigned int *flags,
return 0;
case TUNGETVNETLE:
+ if (!flags)
+ return -EBADFD;
+
s = !!(*flags & TUN_VNET_LE);
if (put_user(s, sp))
return -EFAULT;
return 0;
case TUNSETVNETLE:
+ if (!flags)
+ return -EBADFD;
+
if (get_user(s, sp))
return -EFAULT;
if (s)
@@ -95,16 +117,56 @@ static inline long tun_vnet_ioctl(int *sz, unsigned int *flags,
return 0;
case TUNGETVNETBE:
+ if (!flags)
+ return -EBADFD;
+
return tun_vnet_get_be(*flags, sp);
case TUNSETVNETBE:
+ if (!flags)
+ return -EBADFD;
+
return tun_vnet_set_be(flags, sp);
+ case TUNGETVNETHASHCAP:
+ return copy_to_user(argp, &cap, sizeof(cap)) ? -EFAULT : 0;
+
+ case TUNSETVNETHASH:
+ if (!hash)
+ return -EBADFD;
+
+ if (copy_from_user(&hash_buf, argp, sizeof(hash_buf)))
+ return -EFAULT;
+
+ *hash = hash_buf;
+ return 0;
+
default:
- return -EINVAL;
+ return fallback;
}
}
+static inline void tun_vnet_hash_report(const struct tun_vnet_hash *hash,
+ struct sk_buff *skb,
+ const struct flow_keys_basic *keys,
+ u32 value,
+ tun_vnet_hash_add vnet_hash_add)
+{
+ struct virtio_net_hash *report;
+
+ if (!(hash->flags & TUN_VNET_HASH_REPORT))
+ return;
+
+ report = vnet_hash_add(skb);
+ if (!report)
+ return;
+
+ *report = (struct virtio_net_hash) {
+ .report = virtio_net_hash_report(hash->types, keys),
+ .value = value
+ };
+}
+
static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
struct iov_iter *from,
struct virtio_net_hdr *hdr)
@@ -130,15 +192,15 @@ static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
}
static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter,
- const struct virtio_net_hdr *hdr)
+ const void *hdr, int content_sz)
{
if (iov_iter_count(iter) < sz)
return -EINVAL;
- if (copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))
+ if (copy_to_iter(hdr, content_sz, iter) != content_sz)
return -EFAULT;
- if (iov_iter_zero(sz - sizeof(*hdr), iter) != sz - sizeof(*hdr))
+ if (iov_iter_zero(sz - content_sz, iter) != sz - content_sz)
return -EFAULT;
return 0;
@@ -151,32 +213,48 @@ static inline int tun_vnet_hdr_to_skb(unsigned int flags,
return virtio_net_hdr_to_skb(skb, hdr, tun_vnet_is_little_endian(flags));
}
-static inline int tun_vnet_hdr_from_skb(unsigned int flags,
+static inline int tun_vnet_hdr_from_skb(int sz, unsigned int flags,
const struct net_device *dev,
const struct sk_buff *skb,
- struct virtio_net_hdr *hdr)
+ tun_vnet_hash_find vnet_hash_find,
+ struct virtio_net_hdr_v1_hash *hdr)
{
int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0;
+ const struct virtio_net_hash *report = sz < sizeof(struct virtio_net_hdr_v1_hash) ?
+ NULL : vnet_hash_find(skb);
+ int content_sz;
+
+ if (report) {
+ content_sz = sizeof(struct virtio_net_hdr_v1_hash);
+
+ *hdr = (struct virtio_net_hdr_v1_hash) {
+ .hdr = { .num_buffers = __cpu_to_virtio16(true, 1) },
+ .hash_value = cpu_to_le32(report->value),
+ .hash_report = cpu_to_le16(report->report)
+ };
+ } else {
+ content_sz = sizeof(struct virtio_net_hdr);
+ }
- if (virtio_net_hdr_from_skb(skb, hdr,
+ if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)hdr,
tun_vnet_is_little_endian(flags), true,
vlan_hlen)) {
struct skb_shared_info *sinfo = skb_shinfo(skb);
if (net_ratelimit()) {
netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
- sinfo->gso_type, tun_vnet16_to_cpu(flags, hdr->gso_size),
- tun_vnet16_to_cpu(flags, hdr->hdr_len));
+ sinfo->gso_type, tun_vnet16_to_cpu(flags, hdr->hdr.gso_size),
+ tun_vnet16_to_cpu(flags, hdr->hdr.hdr_len));
print_hex_dump(KERN_ERR, "tun: ",
DUMP_PREFIX_NONE,
16, 1, skb->head,
- min(tun_vnet16_to_cpu(flags, hdr->hdr_len), 64), true);
+ min(tun_vnet16_to_cpu(flags, hdr->hdr.hdr_len), 64), true);
}
WARN_ON_ONCE(1);
return -EINVAL;
}
- return 0;
+ return content_sz;
}
#endif /* TUN_VNET_H */
diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index 553552fa635c..5bbb343a6dba 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -4,6 +4,7 @@
#include <net/sock.h>
#include <linux/skb_array.h>
+#include <uapi/linux/if_tun.h>
struct file;
struct socket;
@@ -43,6 +44,7 @@ struct tap_dev {
int numqueues;
netdev_features_t tap_features;
int minor;
+ struct tun_vnet_hash vnet_hash;
void (*update_features)(struct tap_dev *tap, netdev_features_t features);
void (*count_tx_dropped)(struct tap_dev *tap);
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 287cdc81c939..d11e79b4e0dc 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -62,6 +62,34 @@
#define TUNSETCARRIER _IOW('T', 226, int)
#define TUNGETDEVNETNS _IO('T', 227)
+/**
+ * define TUNGETVNETHASHCAP - ioctl to get virtio_net hashing capability.
+ *
+ * The argument is a pointer to &struct tun_vnet_hash which will store the
+ * maximal virtio_net hashing configuration.
+ */
+#define TUNGETVNETHASHCAP _IOR('T', 228, struct tun_vnet_hash)
+
+/**
+ * define TUNSETVNETHASH - ioctl to configure virtio_net hashing
+ *
+ * The argument is a pointer to &struct tun_vnet_hash.
+ *
+ * The %TUN_VNET_HASH_REPORT flag set with this ioctl will be effective only
+ * after calling the %TUNSETVNETHDRSZ ioctl with a number greater than or equal
+ * to the size of &struct virtio_net_hdr_v1_hash.
+ *
+ * The members added to the legacy header by %TUN_VNET_HASH_REPORT flag will
+ * always be little-endian.
+ *
+ * This ioctl results in %EBADFD if the underlying device is deleted. It affects
+ * all queues attached to the same device.
+ *
+ * This ioctl currently has no effect on XDP packets and packets with
+ * queue_mapping set by TC.
+ */
+#define TUNSETVNETHASH _IOW('T', 229, struct tun_vnet_hash)
+
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
#define IFF_TAP 0x0002
@@ -115,4 +143,24 @@ struct tun_filter {
__u8 addr[][ETH_ALEN];
};
+/**
+ * define TUN_VNET_HASH_REPORT - Request virtio_net hash reporting for vhost
+ */
+#define TUN_VNET_HASH_REPORT 0x0001
+
+/**
+ * struct tun_vnet_hash - virtio_net hashing configuration
+ * @flags:
+ * Bitmask consists of %TUN_VNET_HASH_REPORT and %TUN_VNET_HASH_RSS
+ * @pad:
+ * Should be filled with zero before passing to %TUNSETVNETHASH
+ * @types:
+ * Bitmask of allowed hash types
+ */
+struct tun_vnet_hash {
+ __u16 flags;
+ __u8 pad[2];
+ __u32 types;
+};
+
#endif /* _UAPI__IF_TUN_H */
--
2.46.2
Akihiko Odaki wrote: > Allow the guest to reuse the hash value to make receive steering > consistent between the host and guest, and to save hash computation. > > Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> > --- > Documentation/networking/tuntap.rst | 7 +++ > drivers/net/Kconfig | 1 + > drivers/net/tap.c | 45 ++++++++++++++-- > drivers/net/tun.c | 46 ++++++++++++---- > drivers/net/tun_vnet.h | 102 +++++++++++++++++++++++++++++++----- > include/linux/if_tap.h | 2 + > include/uapi/linux/if_tun.h | 48 +++++++++++++++++ > 7 files changed, 223 insertions(+), 28 deletions(-) > > diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst > index 4d7087f727be..86b4ae8caa8a 100644 > --- a/Documentation/networking/tuntap.rst > +++ b/Documentation/networking/tuntap.rst > @@ -206,6 +206,13 @@ enable is true we enable it, otherwise we disable it:: > return ioctl(fd, TUNSETQUEUE, (void *)&ifr); > } > > +3.4 Reference > +------------- > + > +``linux/if_tun.h`` defines the interface described below: > + > +.. kernel-doc:: include/uapi/linux/if_tun.h > + > Universal TUN/TAP device driver Frequently Asked Question > ========================================================= > > diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig > index 9920b3a68ed1..e2a7bd703550 100644 > --- a/drivers/net/Kconfig > +++ b/drivers/net/Kconfig > @@ -395,6 +395,7 @@ config TUN > tristate "Universal TUN/TAP device driver support" > depends on INET > select CRC32 > + select SKB_EXTENSIONS > help > TUN/TAP provides packet reception and transmission for user space > programs. It can be viewed as a simple Point-to-Point or Ethernet > diff --git a/drivers/net/tap.c b/drivers/net/tap.c > index 9a34ceed0c2c..5e2fbe63ca47 100644 Merge the earlier tiny patch 2 into this one.
On Tue, Oct 8, 2024 at 2:55 PM Akihiko Odaki <akihiko.odaki@daynix.com> wrote:
>
> Allow the guest to reuse the hash value to make receive steering
> consistent between the host and guest, and to save hash computation.
>
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
I wonder if this would cause overhead when hash reporting is not enabled?
> ---
> Documentation/networking/tuntap.rst | 7 +++
> drivers/net/Kconfig | 1 +
> drivers/net/tap.c | 45 ++++++++++++++--
Tile should be for tap as well or is this just for tun?
> drivers/net/tun.c | 46 ++++++++++++----
> drivers/net/tun_vnet.h | 102 +++++++++++++++++++++++++++++++-----
> include/linux/if_tap.h | 2 +
> include/uapi/linux/if_tun.h | 48 +++++++++++++++++
> 7 files changed, 223 insertions(+), 28 deletions(-)
>
> diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
> index 4d7087f727be..86b4ae8caa8a 100644
> --- a/Documentation/networking/tuntap.rst
> +++ b/Documentation/networking/tuntap.rst
> @@ -206,6 +206,13 @@ enable is true we enable it, otherwise we disable it::
> return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
> }
>
> +3.4 Reference
> +-------------
> +
> +``linux/if_tun.h`` defines the interface described below:
> +
> +.. kernel-doc:: include/uapi/linux/if_tun.h
> +
> Universal TUN/TAP device driver Frequently Asked Question
> =========================================================
>
> diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> index 9920b3a68ed1..e2a7bd703550 100644
> --- a/drivers/net/Kconfig
> +++ b/drivers/net/Kconfig
> @@ -395,6 +395,7 @@ config TUN
> tristate "Universal TUN/TAP device driver support"
> depends on INET
> select CRC32
> + select SKB_EXTENSIONS
Then we need this for macvtap at least as well?
> help
> TUN/TAP provides packet reception and transmission for user space
> programs. It can be viewed as a simple Point-to-Point or Ethernet
> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
> index 9a34ceed0c2c..5e2fbe63ca47 100644
> --- a/drivers/net/tap.c
> +++ b/drivers/net/tap.c
> @@ -179,6 +179,16 @@ static void tap_put_queue(struct tap_queue *q)
> sock_put(&q->sk);
> }
>
> +static struct virtio_net_hash *tap_add_hash(struct sk_buff *skb)
> +{
> + return (struct virtio_net_hash *)skb->cb;
Any reason that tap uses skb->cb but not skb extensions? (And is it
safe to use that without cloning?)
> +}
> +
> +static const struct virtio_net_hash *tap_find_hash(const struct sk_buff *skb)
> +{
> + return (const struct virtio_net_hash *)skb->cb;
> +}
> +
> /*
> * Select a queue based on the rxq of the device on which this packet
> * arrived. If the incoming device is not mq, calculate a flow hash
> @@ -189,6 +199,7 @@ static void tap_put_queue(struct tap_queue *q)
> static struct tap_queue *tap_get_queue(struct tap_dev *tap,
> struct sk_buff *skb)
> {
> + struct flow_keys_basic keys_basic;
> struct tap_queue *queue = NULL;
> /* Access to taps array is protected by rcu, but access to numvtaps
> * isn't. Below we use it to lookup a queue, but treat it as a hint
> @@ -198,15 +209,32 @@ static struct tap_queue *tap_get_queue(struct tap_dev *tap,
> int numvtaps = READ_ONCE(tap->numvtaps);
> __u32 rxq;
>
> + *tap_add_hash(skb) = (struct virtio_net_hash) { .report = VIRTIO_NET_HASH_REPORT_NONE };
> +
> if (!numvtaps)
> goto out;
>
> if (numvtaps == 1)
> goto single;
>
> + if (!skb->l4_hash && !skb->sw_hash) {
> + struct flow_keys keys;
> +
> + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
> + rxq = flow_hash_from_keys(&keys);
> + keys_basic = (struct flow_keys_basic) {
> + .control = keys.control,
> + .basic = keys.basic
> + };
> + } else {
> + skb_flow_dissect_flow_keys_basic(NULL, skb, &keys_basic, NULL, 0, 0, 0,
> + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
> + rxq = skb->hash;
> + }
> +
> /* Check if we can use flow to select a queue */
> - rxq = skb_get_hash(skb);
> if (rxq) {
> + tun_vnet_hash_report(&tap->vnet_hash, skb, &keys_basic, rxq, tap_add_hash);
> queue = rcu_dereference(tap->taps[rxq % numvtaps]);
> goto out;
> }
> @@ -713,15 +741,16 @@ static ssize_t tap_put_user(struct tap_queue *q,
> int total;
>
> if (q->flags & IFF_VNET_HDR) {
> - struct virtio_net_hdr vnet_hdr;
> + struct virtio_net_hdr_v1_hash vnet_hdr;
>
> vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
>
> - ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr);
> + ret = tun_vnet_hdr_from_skb(vnet_hdr_len, q->flags, NULL, skb,
> + tap_find_hash, &vnet_hdr);
> if (ret < 0)
> goto done;
>
> - ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr);
> + ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr, ret);
> if (ret < 0)
> goto done;
> }
> @@ -1025,7 +1054,13 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
> return ret;
>
> default:
> - return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp);
> + rtnl_lock();
> + tap = rtnl_dereference(q->tap);
> + ret = tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags,
> + tap ? &tap->vnet_hash : NULL, -EINVAL,
> + cmd, sp);
> + rtnl_unlock();
> + return ret;
> }
> }
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index dd8799d19518..27308417b834 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -209,6 +209,7 @@ struct tun_struct {
> struct bpf_prog __rcu *xdp_prog;
> struct tun_prog __rcu *steering_prog;
> struct tun_prog __rcu *filter_prog;
> + struct tun_vnet_hash vnet_hash;
> struct ethtool_link_ksettings link_ksettings;
> /* init args */
> struct file *file;
> @@ -451,6 +452,16 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
> e->rps_rxhash = hash;
> }
>
> +static struct virtio_net_hash *tun_add_hash(struct sk_buff *skb)
> +{
> + return skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
> +}
> +
> +static const struct virtio_net_hash *tun_find_hash(const struct sk_buff *skb)
> +{
> + return skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
> +}
> +
> /* We try to identify a flow through its rxhash. The reason that
> * we do not check rxq no. is because some cards(e.g 82599), chooses
> * the rxq based on the txq where the last packet of the flow comes. As
> @@ -459,12 +470,17 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
> */
> static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
> {
> + struct flow_keys keys;
> + struct flow_keys_basic keys_basic;
> struct tun_flow_entry *e;
> u32 txq, numqueues;
>
> numqueues = READ_ONCE(tun->numqueues);
>
> - txq = __skb_get_hash_symmetric(skb);
> + memset(&keys, 0, sizeof(keys));
> + skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 0);
> +
> + txq = flow_hash_from_keys(&keys);
> e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
> if (e) {
> tun_flow_save_rps_rxhash(e, txq);
> @@ -473,6 +489,13 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
> txq = reciprocal_scale(txq, numqueues);
> }
>
> + keys_basic = (struct flow_keys_basic) {
> + .control = keys.control,
> + .basic = keys.basic
> + };
> + tun_vnet_hash_report(&tun->vnet_hash, skb, &keys_basic, skb->l4_hash ? skb->hash : txq,
> + tun_add_hash);
Is using txq required when not l4_hash is required by the virtio-spec?
> +
> return txq;
> }
>
> @@ -1990,10 +2013,8 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
> size_t total;
>
> if (tun->flags & IFF_VNET_HDR) {
> - struct virtio_net_hdr gso = { 0 };
> -
> vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
> - ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
> + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, NULL, 0);
> if (ret < 0)
> return ret;
> }
> @@ -2018,7 +2039,6 @@ static ssize_t tun_put_user(struct tun_struct *tun,
> int vlan_offset = 0;
> int vlan_hlen = 0;
> int vnet_hdr_sz = 0;
> - int ret;
>
> if (skb_vlan_tag_present(skb))
> vlan_hlen = VLAN_HLEN;
> @@ -2043,13 +2063,15 @@ static ssize_t tun_put_user(struct tun_struct *tun,
> }
>
> if (vnet_hdr_sz) {
> - struct virtio_net_hdr gso;
> + struct virtio_net_hdr_v1_hash gso;
> + int ret;
>
> - ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso);
> + ret = tun_vnet_hdr_from_skb(vnet_hdr_sz, tun->flags, tun->dev, skb,
> + tun_find_hash, &gso);
> if (ret < 0)
> goto done;
>
> - ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
> + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso, ret);
> if (ret < 0)
> goto done;
> }
> @@ -3055,9 +3077,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> goto unlock;
> }
>
> - ret = -EBADFD;
> - if (!tun)
> + if (!tun) {
> + ret = tun_vnet_ioctl(NULL, NULL, NULL, -EBADFD, cmd, argp);
This seems not elegant (passing three NULL pointers). Any reason we
can't just modify __tun_chr_ioctl() instead of introducing things like
tun_vnet_ioctl()?
> goto unlock;
> + }
>
> netif_info(tun, drv, tun->dev, "tun_chr_ioctl cmd %u\n", cmd);
>
> @@ -3256,7 +3279,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> break;
>
> default:
> - ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp);
> + ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags,
> + &tun->vnet_hash, -EINVAL, cmd, argp);
> }
>
> if (do_notify)
> diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h
> index c40bde0fdf8c..589a97dd7d02 100644
> --- a/drivers/net/tun_vnet.h
> +++ b/drivers/net/tun_vnet.h
> @@ -6,6 +6,9 @@
> #define TUN_VNET_LE 0x80000000
> #define TUN_VNET_BE 0x40000000
>
> +typedef struct virtio_net_hash *(*tun_vnet_hash_add)(struct sk_buff *);
> +typedef const struct virtio_net_hash *(*tun_vnet_hash_find)(const struct sk_buff *);
> +
> static inline bool tun_vnet_legacy_is_little_endian(unsigned int flags)
> {
> return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && (flags & TUN_VNET_BE)) &&
> @@ -59,18 +62,31 @@ static inline __virtio16 cpu_to_tun_vnet16(unsigned int flags, u16 val)
> }
>
> static inline long tun_vnet_ioctl(int *sz, unsigned int *flags,
> - unsigned int cmd, int __user *sp)
> + struct tun_vnet_hash *hash, long fallback,
> + unsigned int cmd, void __user *argp)
> {
> + static const struct tun_vnet_hash cap = {
> + .flags = TUN_VNET_HASH_REPORT,
> + .types = VIRTIO_NET_SUPPORTED_HASH_TYPES
> + };
Let's find a way to reuse virtio-net uAPI instead of introducing new
stuff to stress the management layer.
> + struct tun_vnet_hash hash_buf;
> + int __user *sp = argp;
> int s;
>
> switch (cmd) {
> case TUNGETVNETHDRSZ:
> + if (!sz)
> + return -EBADFD;
> +
> s = *sz;
> if (put_user(s, sp))
> return -EFAULT;
> return 0;
>
> case TUNSETVNETHDRSZ:
> + if (!sz)
> + return -EBADFD;
> +
> if (get_user(s, sp))
> return -EFAULT;
> if (s < (int)sizeof(struct virtio_net_hdr))
> @@ -80,12 +96,18 @@ static inline long tun_vnet_ioctl(int *sz, unsigned int *flags,
> return 0;
>
> case TUNGETVNETLE:
> + if (!flags)
> + return -EBADFD;
> +
> s = !!(*flags & TUN_VNET_LE);
> if (put_user(s, sp))
> return -EFAULT;
> return 0;
>
> case TUNSETVNETLE:
> + if (!flags)
> + return -EBADFD;
> +
> if (get_user(s, sp))
> return -EFAULT;
> if (s)
> @@ -95,16 +117,56 @@ static inline long tun_vnet_ioctl(int *sz, unsigned int *flags,
> return 0;
>
> case TUNGETVNETBE:
> + if (!flags)
> + return -EBADFD;
> +
> return tun_vnet_get_be(*flags, sp);
>
> case TUNSETVNETBE:
> + if (!flags)
> + return -EBADFD;
> +
> return tun_vnet_set_be(flags, sp);
>
> + case TUNGETVNETHASHCAP:
> + return copy_to_user(argp, &cap, sizeof(cap)) ? -EFAULT : 0;
> +
> + case TUNSETVNETHASH:
> + if (!hash)
> + return -EBADFD;
> +
> + if (copy_from_user(&hash_buf, argp, sizeof(hash_buf)))
> + return -EFAULT;
> +
> + *hash = hash_buf;
> + return 0;
> +
> default:
> - return -EINVAL;
> + return fallback;
> }
> }
>
> +static inline void tun_vnet_hash_report(const struct tun_vnet_hash *hash,
> + struct sk_buff *skb,
> + const struct flow_keys_basic *keys,
> + u32 value,
> + tun_vnet_hash_add vnet_hash_add)
> +{
> + struct virtio_net_hash *report;
> +
> + if (!(hash->flags & TUN_VNET_HASH_REPORT))
> + return;
> +
> + report = vnet_hash_add(skb);
> + if (!report)
> + return;
> +
> + *report = (struct virtio_net_hash) {
> + .report = virtio_net_hash_report(hash->types, keys),
> + .value = value
> + };
> +}
> +
> static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
> struct iov_iter *from,
> struct virtio_net_hdr *hdr)
> @@ -130,15 +192,15 @@ static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
> }
>
> static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter,
> - const struct virtio_net_hdr *hdr)
> + const void *hdr, int content_sz)
> {
> if (iov_iter_count(iter) < sz)
> return -EINVAL;
>
> - if (copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))
> + if (copy_to_iter(hdr, content_sz, iter) != content_sz)
> return -EFAULT;
>
> - if (iov_iter_zero(sz - sizeof(*hdr), iter) != sz - sizeof(*hdr))
> + if (iov_iter_zero(sz - content_sz, iter) != sz - content_sz)
> return -EFAULT;
>
> return 0;
> @@ -151,32 +213,48 @@ static inline int tun_vnet_hdr_to_skb(unsigned int flags,
> return virtio_net_hdr_to_skb(skb, hdr, tun_vnet_is_little_endian(flags));
> }
>
> -static inline int tun_vnet_hdr_from_skb(unsigned int flags,
> +static inline int tun_vnet_hdr_from_skb(int sz, unsigned int flags,
> const struct net_device *dev,
> const struct sk_buff *skb,
> - struct virtio_net_hdr *hdr)
> + tun_vnet_hash_find vnet_hash_find,
> + struct virtio_net_hdr_v1_hash *hdr)
> {
> int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0;
> + const struct virtio_net_hash *report = sz < sizeof(struct virtio_net_hdr_v1_hash) ?
> + NULL : vnet_hash_find(skb);
> + int content_sz;
> +
> + if (report) {
> + content_sz = sizeof(struct virtio_net_hdr_v1_hash);
> +
> + *hdr = (struct virtio_net_hdr_v1_hash) {
> + .hdr = { .num_buffers = __cpu_to_virtio16(true, 1) },
> + .hash_value = cpu_to_le32(report->value),
> + .hash_report = cpu_to_le16(report->report)
> + };
> + } else {
> + content_sz = sizeof(struct virtio_net_hdr);
> + }
>
> - if (virtio_net_hdr_from_skb(skb, hdr,
> + if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)hdr,
> tun_vnet_is_little_endian(flags), true,
> vlan_hlen)) {
> struct skb_shared_info *sinfo = skb_shinfo(skb);
>
> if (net_ratelimit()) {
> netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
> - sinfo->gso_type, tun_vnet16_to_cpu(flags, hdr->gso_size),
> - tun_vnet16_to_cpu(flags, hdr->hdr_len));
> + sinfo->gso_type, tun_vnet16_to_cpu(flags, hdr->hdr.gso_size),
> + tun_vnet16_to_cpu(flags, hdr->hdr.hdr_len));
> print_hex_dump(KERN_ERR, "tun: ",
> DUMP_PREFIX_NONE,
> 16, 1, skb->head,
> - min(tun_vnet16_to_cpu(flags, hdr->hdr_len), 64), true);
> + min(tun_vnet16_to_cpu(flags, hdr->hdr.hdr_len), 64), true);
> }
> WARN_ON_ONCE(1);
> return -EINVAL;
> }
>
> - return 0;
> + return content_sz;
> }
>
> #endif /* TUN_VNET_H */
> diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
> index 553552fa635c..5bbb343a6dba 100644
> --- a/include/linux/if_tap.h
> +++ b/include/linux/if_tap.h
> @@ -4,6 +4,7 @@
>
> #include <net/sock.h>
> #include <linux/skb_array.h>
> +#include <uapi/linux/if_tun.h>
>
> struct file;
> struct socket;
> @@ -43,6 +44,7 @@ struct tap_dev {
> int numqueues;
> netdev_features_t tap_features;
> int minor;
> + struct tun_vnet_hash vnet_hash;
>
> void (*update_features)(struct tap_dev *tap, netdev_features_t features);
> void (*count_tx_dropped)(struct tap_dev *tap);
> diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
> index 287cdc81c939..d11e79b4e0dc 100644
> --- a/include/uapi/linux/if_tun.h
> +++ b/include/uapi/linux/if_tun.h
> @@ -62,6 +62,34 @@
> #define TUNSETCARRIER _IOW('T', 226, int)
> #define TUNGETDEVNETNS _IO('T', 227)
>
> +/**
> + * define TUNGETVNETHASHCAP - ioctl to get virtio_net hashing capability.
> + *
> + * The argument is a pointer to &struct tun_vnet_hash which will store the
> + * maximal virtio_net hashing configuration.
> + */
> +#define TUNGETVNETHASHCAP _IOR('T', 228, struct tun_vnet_hash)
> +
> +/**
> + * define TUNSETVNETHASH - ioctl to configure virtio_net hashing
> + *
> + * The argument is a pointer to &struct tun_vnet_hash.
> + *
> + * The %TUN_VNET_HASH_REPORT flag set with this ioctl will be effective only
> + * after calling the %TUNSETVNETHDRSZ ioctl with a number greater than or equal
> + * to the size of &struct virtio_net_hdr_v1_hash.
I think we don't need & here.
> + *
> + * The members added to the legacy header by %TUN_VNET_HASH_REPORT flag will
> + * always be little-endian.
> + *
> + * This ioctl results in %EBADFD if the underlying device is deleted. It affects
> + * all queues attached to the same device.
> + *
> + * This ioctl currently has no effect on XDP packets and packets with
> + * queue_mapping set by TC.
This needs to be fixed?
> + */
> +#define TUNSETVNETHASH _IOW('T', 229, struct tun_vnet_hash)
> +
> /* TUNSETIFF ifr flags */
> #define IFF_TUN 0x0001
> #define IFF_TAP 0x0002
> @@ -115,4 +143,24 @@ struct tun_filter {
> __u8 addr[][ETH_ALEN];
> };
>
> +/**
> + * define TUN_VNET_HASH_REPORT - Request virtio_net hash reporting for vhost
> + */
> +#define TUN_VNET_HASH_REPORT 0x0001
> +
> +/**
> + * struct tun_vnet_hash - virtio_net hashing configuration
> + * @flags:
> + * Bitmask consists of %TUN_VNET_HASH_REPORT and %TUN_VNET_HASH_RSS
Could we reuse TUNGETIFF by introduce new IFF_XXX stuffs?
> + * @pad:
> + * Should be filled with zero before passing to %TUNSETVNETHASH
> + * @types:
> + * Bitmask of allowed hash types
What are they?
> + */
> +struct tun_vnet_hash {
> + __u16 flags;
> + __u8 pad[2];
> + __u32 types;
> +};
> +
> #endif /* _UAPI__IF_TUN_H */
>
> --
> 2.46.2
>
Thanks
On 2024/10/09 17:05, Jason Wang wrote:
> On Tue, Oct 8, 2024 at 2:55 PM Akihiko Odaki <akihiko.odaki@daynix.com> wrote:
>>
>> Allow the guest to reuse the hash value to make receive steering
>> consistent between the host and guest, and to save hash computation.
>>
>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
>
> I wonder if this would cause overhead when hash reporting is not enabled?
It only adds two branches in the data path. The first one is in
tun_vnet_hash_report(), which determines to add the hash value to
sk_buff. The second one is in tun_vnet_hdr_from_skb(), which determines
to report the added hash value.
>
>> ---
>> Documentation/networking/tuntap.rst | 7 +++
>> drivers/net/Kconfig | 1 +
>> drivers/net/tap.c | 45 ++++++++++++++--
>
> Tile should be for tap as well or is this just for tun?
It is also for tap. I will update the title in v6.
>
>> drivers/net/tun.c | 46 ++++++++++++----
>> drivers/net/tun_vnet.h | 102 +++++++++++++++++++++++++++++++-----
>> include/linux/if_tap.h | 2 +
>> include/uapi/linux/if_tun.h | 48 +++++++++++++++++
>> 7 files changed, 223 insertions(+), 28 deletions(-)
>>
>> diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
>> index 4d7087f727be..86b4ae8caa8a 100644
>> --- a/Documentation/networking/tuntap.rst
>> +++ b/Documentation/networking/tuntap.rst
>> @@ -206,6 +206,13 @@ enable is true we enable it, otherwise we disable it::
>> return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
>> }
>>
>> +3.4 Reference
>> +-------------
>> +
>> +``linux/if_tun.h`` defines the interface described below:
>> +
>> +.. kernel-doc:: include/uapi/linux/if_tun.h
>> +
>> Universal TUN/TAP device driver Frequently Asked Question
>> =========================================================
>>
>> diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
>> index 9920b3a68ed1..e2a7bd703550 100644
>> --- a/drivers/net/Kconfig
>> +++ b/drivers/net/Kconfig
>> @@ -395,6 +395,7 @@ config TUN
>> tristate "Universal TUN/TAP device driver support"
>> depends on INET
>> select CRC32
>> + select SKB_EXTENSIONS
>
> Then we need this for macvtap at least as well?
>
>> help
>> TUN/TAP provides packet reception and transmission for user space
>> programs. It can be viewed as a simple Point-to-Point or Ethernet
>> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
>> index 9a34ceed0c2c..5e2fbe63ca47 100644
>> --- a/drivers/net/tap.c
>> +++ b/drivers/net/tap.c
>> @@ -179,6 +179,16 @@ static void tap_put_queue(struct tap_queue *q)
>> sock_put(&q->sk);
>> }
>>
>> +static struct virtio_net_hash *tap_add_hash(struct sk_buff *skb)
>> +{
>> + return (struct virtio_net_hash *)skb->cb;
>
> Any reason that tap uses skb->cb but not skb extensions? (And is it
> safe to use that without cloning?)
tun adds virtio_net_hash to a skb in ndo_select_queue(), but it does not
immediately put it into its ptr_ring; instead ndo_start_xmit() does so.
It is hard to ensure that nobody modifies skb->cb between the two calls.
The situation is different for tap. tap_handle_frame() adds
virtio_net_hash to a skb and immediately adds it in its ptr_ring so
nobody should touch it between that.
>
>> +}
>> +
>> +static const struct virtio_net_hash *tap_find_hash(const struct sk_buff *skb)
>> +{
>> + return (const struct virtio_net_hash *)skb->cb;
>> +}
>> +
>> /*
>> * Select a queue based on the rxq of the device on which this packet
>> * arrived. If the incoming device is not mq, calculate a flow hash
>> @@ -189,6 +199,7 @@ static void tap_put_queue(struct tap_queue *q)
>> static struct tap_queue *tap_get_queue(struct tap_dev *tap,
>> struct sk_buff *skb)
>> {
>> + struct flow_keys_basic keys_basic;
>> struct tap_queue *queue = NULL;
>> /* Access to taps array is protected by rcu, but access to numvtaps
>> * isn't. Below we use it to lookup a queue, but treat it as a hint
>> @@ -198,15 +209,32 @@ static struct tap_queue *tap_get_queue(struct tap_dev *tap,
>> int numvtaps = READ_ONCE(tap->numvtaps);
>> __u32 rxq;
>>
>> + *tap_add_hash(skb) = (struct virtio_net_hash) { .report = VIRTIO_NET_HASH_REPORT_NONE };
>> +
>> if (!numvtaps)
>> goto out;
>>
>> if (numvtaps == 1)
>> goto single;
>>
>> + if (!skb->l4_hash && !skb->sw_hash) {
>> + struct flow_keys keys;
>> +
>> + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
>> + rxq = flow_hash_from_keys(&keys);
>> + keys_basic = (struct flow_keys_basic) {
>> + .control = keys.control,
>> + .basic = keys.basic
>> + };
>> + } else {
>> + skb_flow_dissect_flow_keys_basic(NULL, skb, &keys_basic, NULL, 0, 0, 0,
>> + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
>> + rxq = skb->hash;
>> + }
>> +
>> /* Check if we can use flow to select a queue */
>> - rxq = skb_get_hash(skb);
>> if (rxq) {
>> + tun_vnet_hash_report(&tap->vnet_hash, skb, &keys_basic, rxq, tap_add_hash);
>> queue = rcu_dereference(tap->taps[rxq % numvtaps]);
>> goto out;
>> }
>> @@ -713,15 +741,16 @@ static ssize_t tap_put_user(struct tap_queue *q,
>> int total;
>>
>> if (q->flags & IFF_VNET_HDR) {
>> - struct virtio_net_hdr vnet_hdr;
>> + struct virtio_net_hdr_v1_hash vnet_hdr;
>>
>> vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
>>
>> - ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr);
>> + ret = tun_vnet_hdr_from_skb(vnet_hdr_len, q->flags, NULL, skb,
>> + tap_find_hash, &vnet_hdr);
>> if (ret < 0)
>> goto done;
>>
>> - ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr);
>> + ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr, ret);
>> if (ret < 0)
>> goto done;
>> }
>> @@ -1025,7 +1054,13 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
>> return ret;
>>
>> default:
>> - return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp);
>> + rtnl_lock();
>> + tap = rtnl_dereference(q->tap);
>> + ret = tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags,
>> + tap ? &tap->vnet_hash : NULL, -EINVAL,
>> + cmd, sp);
>> + rtnl_unlock();
>> + return ret;
>> }
>> }
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index dd8799d19518..27308417b834 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -209,6 +209,7 @@ struct tun_struct {
>> struct bpf_prog __rcu *xdp_prog;
>> struct tun_prog __rcu *steering_prog;
>> struct tun_prog __rcu *filter_prog;
>> + struct tun_vnet_hash vnet_hash;
>> struct ethtool_link_ksettings link_ksettings;
>> /* init args */
>> struct file *file;
>> @@ -451,6 +452,16 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
>> e->rps_rxhash = hash;
>> }
>>
>> +static struct virtio_net_hash *tun_add_hash(struct sk_buff *skb)
>> +{
>> + return skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
>> +}
>> +
>> +static const struct virtio_net_hash *tun_find_hash(const struct sk_buff *skb)
>> +{
>> + return skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
>> +}
>> +
>> /* We try to identify a flow through its rxhash. The reason that
>> * we do not check rxq no. is because some cards(e.g 82599), chooses
>> * the rxq based on the txq where the last packet of the flow comes. As
>> @@ -459,12 +470,17 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
>> */
>> static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
>> {
>> + struct flow_keys keys;
>> + struct flow_keys_basic keys_basic;
>> struct tun_flow_entry *e;
>> u32 txq, numqueues;
>>
>> numqueues = READ_ONCE(tun->numqueues);
>>
>> - txq = __skb_get_hash_symmetric(skb);
>> + memset(&keys, 0, sizeof(keys));
>> + skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 0);
>> +
>> + txq = flow_hash_from_keys(&keys);
>> e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
>> if (e) {
>> tun_flow_save_rps_rxhash(e, txq);
>> @@ -473,6 +489,13 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
>> txq = reciprocal_scale(txq, numqueues);
>> }
>>
>> + keys_basic = (struct flow_keys_basic) {
>> + .control = keys.control,
>> + .basic = keys.basic
>> + };
>> + tun_vnet_hash_report(&tun->vnet_hash, skb, &keys_basic, skb->l4_hash ? skb->hash : txq,
>> + tun_add_hash);
>
> Is using txq required when not l4_hash is required by the virtio-spec?
It is a limitation of the implementation. A hardware driver may set a
hash value with skb_set_hash(), which takes enum pkt_hash_types. The
enum is defined as follows:
enum pkt_hash_types {
PKT_HASH_TYPE_NONE, /* Undefined type */
PKT_HASH_TYPE_L2, /* Input: src_MAC, dest_MAC */
PKT_HASH_TYPE_L3, /* Input: src_IP, dst_IP */
PKT_HASH_TYPE_L4, /* Input: src_IP, dst_IP, src_port, dst_port */
};
A hash value with PKT_HASH_TYPE_L2 must be ignored as the virtio spec
does not have a corresponding hash type. The virtio spec has
corresponding hash types for PKT_HASH_TYPE_L3 and PKT_HASH_TYPE_L4 so we
should report them with the virtio_net header.
However, skb only tells whether the hash is PKT_HASH_TYPE_L4 or not. So
tun reports skb->hash with a L4 hash type if the hash is
PKT_HASH_TYPE_L4. Otherwise it ignores skb->hash and uses the hash value
computed its own because it cannot tell if skb->hash is PKT_HASH_TYPE_L2
or PKT_HASH_TYPE_L3.
>
>> +
>> return txq;
>> }
>>
>> @@ -1990,10 +2013,8 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
>> size_t total;
>>
>> if (tun->flags & IFF_VNET_HDR) {
>> - struct virtio_net_hdr gso = { 0 };
>> -
>> vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
>> - ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
>> + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, NULL, 0);
>> if (ret < 0)
>> return ret;
>> }
>> @@ -2018,7 +2039,6 @@ static ssize_t tun_put_user(struct tun_struct *tun,
>> int vlan_offset = 0;
>> int vlan_hlen = 0;
>> int vnet_hdr_sz = 0;
>> - int ret;
>>
>> if (skb_vlan_tag_present(skb))
>> vlan_hlen = VLAN_HLEN;
>> @@ -2043,13 +2063,15 @@ static ssize_t tun_put_user(struct tun_struct *tun,
>> }
>>
>> if (vnet_hdr_sz) {
>> - struct virtio_net_hdr gso;
>> + struct virtio_net_hdr_v1_hash gso;
>> + int ret;
>>
>> - ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso);
>> + ret = tun_vnet_hdr_from_skb(vnet_hdr_sz, tun->flags, tun->dev, skb,
>> + tun_find_hash, &gso);
>> if (ret < 0)
>> goto done;
>>
>> - ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
>> + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso, ret);
>> if (ret < 0)
>> goto done;
>> }
>> @@ -3055,9 +3077,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>> goto unlock;
>> }
>>
>> - ret = -EBADFD;
>> - if (!tun)
>> + if (!tun) {
>> + ret = tun_vnet_ioctl(NULL, NULL, NULL, -EBADFD, cmd, argp);
>
> This seems not elegant (passing three NULL pointers). Any reason we
> can't just modify __tun_chr_ioctl() instead of introducing things like
> tun_vnet_ioctl()?
tun_vnet_ioctl() is introduced with patch "tun: Unify vnet
implementation". We can abandon unifying the ioctl handling if the
interface looks too awkward.
>
>> goto unlock;
>> + }
>>
>> netif_info(tun, drv, tun->dev, "tun_chr_ioctl cmd %u\n", cmd);
>>
>> @@ -3256,7 +3279,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>> break;
>>
>> default:
>> - ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp);
>> + ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags,
>> + &tun->vnet_hash, -EINVAL, cmd, argp);
>> }
>>
>> if (do_notify)
>> diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h
>> index c40bde0fdf8c..589a97dd7d02 100644
>> --- a/drivers/net/tun_vnet.h
>> +++ b/drivers/net/tun_vnet.h
>> @@ -6,6 +6,9 @@
>> #define TUN_VNET_LE 0x80000000
>> #define TUN_VNET_BE 0x40000000
>>
>> +typedef struct virtio_net_hash *(*tun_vnet_hash_add)(struct sk_buff *);
>> +typedef const struct virtio_net_hash *(*tun_vnet_hash_find)(const struct sk_buff *);
>> +
>> static inline bool tun_vnet_legacy_is_little_endian(unsigned int flags)
>> {
>> return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && (flags & TUN_VNET_BE)) &&
>> @@ -59,18 +62,31 @@ static inline __virtio16 cpu_to_tun_vnet16(unsigned int flags, u16 val)
>> }
>>
>> static inline long tun_vnet_ioctl(int *sz, unsigned int *flags,
>> - unsigned int cmd, int __user *sp)
>> + struct tun_vnet_hash *hash, long fallback,
>> + unsigned int cmd, void __user *argp)
>> {
>> + static const struct tun_vnet_hash cap = {
>> + .flags = TUN_VNET_HASH_REPORT,
>> + .types = VIRTIO_NET_SUPPORTED_HASH_TYPES
>> + };
>
> Let's find a way to reuse virtio-net uAPI instead of introducing new
> stuff to stress the management layer.
I found include/uapi/linux/virtio_net.h inappropriate for this ioctl.
It has the following structure for hash reporting:
struct virtio_net_hash_config {
__le32 hash_types;
/* for compatibility with virtio_net_rss_config */
__le16 reserved[4];
__u8 hash_key_length;
__u8 hash_key_data[/* hash_key_length */];
};
We do not care hash_key_length and hash_key_data.
It has the following structure for RSS and hash reporting when RSS is
enabled:
struct virtio_net_rss_config {
__le32 hash_types;
__le16 indirection_table_mask;
__le16 unclassified_queue;
__le16 indirection_table[1/* + indirection_table_mask */];
__le16 max_tx_vq;
__u8 hash_key_length;
__u8 hash_key_data[/* hash_key_length */];
};
This structure is hard to use as it has data members after the
indirection_table flexible array. max_tx_vq is not our interest either.
I tested the usability of ioctl by actually using it in QEMU. The RFC
patch series is available at:
https://patchew.org/QEMU/20240915-hash-v3-0-79cb08d28647@daynix.com/
>
>> + struct tun_vnet_hash hash_buf;
>> + int __user *sp = argp;
>> int s;
>>
>> switch (cmd) {
>> case TUNGETVNETHDRSZ:
>> + if (!sz)
>> + return -EBADFD;
>> +
>> s = *sz;
>> if (put_user(s, sp))
>> return -EFAULT;
>> return 0;
>>
>> case TUNSETVNETHDRSZ:
>> + if (!sz)
>> + return -EBADFD;
>> +
>> if (get_user(s, sp))
>> return -EFAULT;
>> if (s < (int)sizeof(struct virtio_net_hdr))
>> @@ -80,12 +96,18 @@ static inline long tun_vnet_ioctl(int *sz, unsigned int *flags,
>> return 0;
>>
>> case TUNGETVNETLE:
>> + if (!flags)
>> + return -EBADFD;
>> +
>> s = !!(*flags & TUN_VNET_LE);
>> if (put_user(s, sp))
>> return -EFAULT;
>> return 0;
>>
>> case TUNSETVNETLE:
>> + if (!flags)
>> + return -EBADFD;
>> +
>> if (get_user(s, sp))
>> return -EFAULT;
>> if (s)
>> @@ -95,16 +117,56 @@ static inline long tun_vnet_ioctl(int *sz, unsigned int *flags,
>> return 0;
>>
>> case TUNGETVNETBE:
>> + if (!flags)
>> + return -EBADFD;
>> +
>> return tun_vnet_get_be(*flags, sp);
>>
>> case TUNSETVNETBE:
>> + if (!flags)
>> + return -EBADFD;
>> +
>> return tun_vnet_set_be(flags, sp);
>>
>> + case TUNGETVNETHASHCAP:
>> + return copy_to_user(argp, &cap, sizeof(cap)) ? -EFAULT : 0;
>> +
>> + case TUNSETVNETHASH:
>> + if (!hash)
>> + return -EBADFD;
>> +
>> + if (copy_from_user(&hash_buf, argp, sizeof(hash_buf)))
>> + return -EFAULT;
>> +
>> + *hash = hash_buf;
>> + return 0;
>> +
>> default:
>> - return -EINVAL;
>> + return fallback;
>> }
>> }
>>
>> +static inline void tun_vnet_hash_report(const struct tun_vnet_hash *hash,
>> + struct sk_buff *skb,
>> + const struct flow_keys_basic *keys,
>> + u32 value,
>> + tun_vnet_hash_add vnet_hash_add)
>> +{
>> + struct virtio_net_hash *report;
>> +
>> + if (!(hash->flags & TUN_VNET_HASH_REPORT))
>> + return;
>> +
>> + report = vnet_hash_add(skb);
>> + if (!report)
>> + return;
>> +
>> + *report = (struct virtio_net_hash) {
>> + .report = virtio_net_hash_report(hash->types, keys),
>> + .value = value
>> + };
>> +}
>> +
>> static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
>> struct iov_iter *from,
>> struct virtio_net_hdr *hdr)
>> @@ -130,15 +192,15 @@ static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
>> }
>>
>> static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter,
>> - const struct virtio_net_hdr *hdr)
>> + const void *hdr, int content_sz)
>> {
>> if (iov_iter_count(iter) < sz)
>> return -EINVAL;
>>
>> - if (copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))
>> + if (copy_to_iter(hdr, content_sz, iter) != content_sz)
>> return -EFAULT;
>>
>> - if (iov_iter_zero(sz - sizeof(*hdr), iter) != sz - sizeof(*hdr))
>> + if (iov_iter_zero(sz - content_sz, iter) != sz - content_sz)
>> return -EFAULT;
>>
>> return 0;
>> @@ -151,32 +213,48 @@ static inline int tun_vnet_hdr_to_skb(unsigned int flags,
>> return virtio_net_hdr_to_skb(skb, hdr, tun_vnet_is_little_endian(flags));
>> }
>>
>> -static inline int tun_vnet_hdr_from_skb(unsigned int flags,
>> +static inline int tun_vnet_hdr_from_skb(int sz, unsigned int flags,
>> const struct net_device *dev,
>> const struct sk_buff *skb,
>> - struct virtio_net_hdr *hdr)
>> + tun_vnet_hash_find vnet_hash_find,
>> + struct virtio_net_hdr_v1_hash *hdr)
>> {
>> int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0;
>> + const struct virtio_net_hash *report = sz < sizeof(struct virtio_net_hdr_v1_hash) ?
>> + NULL : vnet_hash_find(skb);
>> + int content_sz;
>> +
>> + if (report) {
>> + content_sz = sizeof(struct virtio_net_hdr_v1_hash);
>> +
>> + *hdr = (struct virtio_net_hdr_v1_hash) {
>> + .hdr = { .num_buffers = __cpu_to_virtio16(true, 1) },
>> + .hash_value = cpu_to_le32(report->value),
>> + .hash_report = cpu_to_le16(report->report)
>> + };
>> + } else {
>> + content_sz = sizeof(struct virtio_net_hdr);
>> + }
>>
>> - if (virtio_net_hdr_from_skb(skb, hdr,
>> + if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)hdr,
>> tun_vnet_is_little_endian(flags), true,
>> vlan_hlen)) {
>> struct skb_shared_info *sinfo = skb_shinfo(skb);
>>
>> if (net_ratelimit()) {
>> netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
>> - sinfo->gso_type, tun_vnet16_to_cpu(flags, hdr->gso_size),
>> - tun_vnet16_to_cpu(flags, hdr->hdr_len));
>> + sinfo->gso_type, tun_vnet16_to_cpu(flags, hdr->hdr.gso_size),
>> + tun_vnet16_to_cpu(flags, hdr->hdr.hdr_len));
>> print_hex_dump(KERN_ERR, "tun: ",
>> DUMP_PREFIX_NONE,
>> 16, 1, skb->head,
>> - min(tun_vnet16_to_cpu(flags, hdr->hdr_len), 64), true);
>> + min(tun_vnet16_to_cpu(flags, hdr->hdr.hdr_len), 64), true);
>> }
>> WARN_ON_ONCE(1);
>> return -EINVAL;
>> }
>>
>> - return 0;
>> + return content_sz;
>> }
>>
>> #endif /* TUN_VNET_H */
>> diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
>> index 553552fa635c..5bbb343a6dba 100644
>> --- a/include/linux/if_tap.h
>> +++ b/include/linux/if_tap.h
>> @@ -4,6 +4,7 @@
>>
>> #include <net/sock.h>
>> #include <linux/skb_array.h>
>> +#include <uapi/linux/if_tun.h>
>>
>> struct file;
>> struct socket;
>> @@ -43,6 +44,7 @@ struct tap_dev {
>> int numqueues;
>> netdev_features_t tap_features;
>> int minor;
>> + struct tun_vnet_hash vnet_hash;
>>
>> void (*update_features)(struct tap_dev *tap, netdev_features_t features);
>> void (*count_tx_dropped)(struct tap_dev *tap);
>> diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
>> index 287cdc81c939..d11e79b4e0dc 100644
>> --- a/include/uapi/linux/if_tun.h
>> +++ b/include/uapi/linux/if_tun.h
>> @@ -62,6 +62,34 @@
>> #define TUNSETCARRIER _IOW('T', 226, int)
>> #define TUNGETDEVNETNS _IO('T', 227)
>>
>> +/**
>> + * define TUNGETVNETHASHCAP - ioctl to get virtio_net hashing capability.
>> + *
>> + * The argument is a pointer to &struct tun_vnet_hash which will store the
>> + * maximal virtio_net hashing configuration.
>> + */
>> +#define TUNGETVNETHASHCAP _IOR('T', 228, struct tun_vnet_hash)
>> +
>> +/**
>> + * define TUNSETVNETHASH - ioctl to configure virtio_net hashing
>> + *
>> + * The argument is a pointer to &struct tun_vnet_hash.
>> + *
>> + * The %TUN_VNET_HASH_REPORT flag set with this ioctl will be effective only
>> + * after calling the %TUNSETVNETHDRSZ ioctl with a number greater than or equal
>> + * to the size of &struct virtio_net_hdr_v1_hash.
>
> I think we don't need & here.
Documentation/doc-guide/kernel-doc.rst says &struct is a token for
struct cross-reference.
>
>> + *
>> + * The members added to the legacy header by %TUN_VNET_HASH_REPORT flag will
>> + * always be little-endian.
>> + *
>> + * This ioctl results in %EBADFD if the underlying device is deleted. It affects
>> + * all queues attached to the same device.
>> + *
>> + * This ioctl currently has no effect on XDP packets and packets with
>> + * queue_mapping set by TC.
>
> This needs to be fixed?
We don't use a hash value to select a queue in such a case so there is
no point to report one.
>
>> + */
>> +#define TUNSETVNETHASH _IOW('T', 229, struct tun_vnet_hash)
>> +
>> /* TUNSETIFF ifr flags */
>> #define IFF_TUN 0x0001
>> #define IFF_TAP 0x0002
>> @@ -115,4 +143,24 @@ struct tun_filter {
>> __u8 addr[][ETH_ALEN];
>> };
>>
>> +/**
>> + * define TUN_VNET_HASH_REPORT - Request virtio_net hash reporting for vhost
>> + */
>> +#define TUN_VNET_HASH_REPORT 0x0001
>> +
>> +/**
>> + * struct tun_vnet_hash - virtio_net hashing configuration
>> + * @flags:
>> + * Bitmask consists of %TUN_VNET_HASH_REPORT and %TUN_VNET_HASH_RSS
>
> Could we reuse TUNGETIFF by introduce new IFF_XXX stuffs?
That's certainly doable though I'm a bit worrying that exhausting all
bits of IFF_XXX.
>
>> + * @pad:
>> + * Should be filled with zero before passing to %TUNSETVNETHASH
>> + * @types:
>> + * Bitmask of allowed hash types
>
> What are they?
They are defined in the virtio spec and include/uapi/linux/virtio_net.h
contains them:
#define VIRTIO_NET_RSS_HASH_TYPE_IPv4 (1 << 0)
#define VIRTIO_NET_RSS_HASH_TYPE_TCPv4 (1 << 1)
#define VIRTIO_NET_RSS_HASH_TYPE_UDPv4 (1 << 2)
#define VIRTIO_NET_RSS_HASH_TYPE_IPv6 (1 << 3)
#define VIRTIO_NET_RSS_HASH_TYPE_TCPv6 (1 << 4)
#define VIRTIO_NET_RSS_HASH_TYPE_UDPv6 (1 << 5)
#define VIRTIO_NET_RSS_HASH_TYPE_IP_EX (1 << 6)
#define VIRTIO_NET_RSS_HASH_TYPE_TCP_EX (1 << 7)
#define VIRTIO_NET_RSS_HASH_TYPE_UDP_EX (1 << 8)
>
>> + */
>> +struct tun_vnet_hash {
>> + __u16 flags;
>> + __u8 pad[2];
>> + __u32 types;
>> +};
>> +
>> #endif /* _UAPI__IF_TUN_H */
>>
>> --
>> 2.46.2
>>
>
> Thanks
>
© 2016 - 2026 Red Hat, Inc.