[PATCH v2] util: support PCI passthrough net device stats collection

zhenwei pi posted 1 patch 3 years, 7 months ago
Test syntax-check failed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20200923023254.478341-1-pizhenwei@bytedance.com
There is a newer version of this series
src/libvirt_private.syms |   1 +
src/qemu/qemu_driver.c   |   3 ++
src/util/virnetdev.c     | 137 +++++++++++++++++++++++++++++++++++++++++++++++
src/util/virnetdev.h     |   5 ++
4 files changed, 146 insertions(+)
[PATCH v2] util: support PCI passthrough net device stats collection
Posted by zhenwei pi 3 years, 7 months ago
Collect PCI passthrough net device stats from kernel by netlink
API.

Currently, libvirt can not get PCI passthrough net device stats,
run command:
 #virsh domifstat instance --interface=52:54:00:2d:b2:35
 error: Failed to get interface stats instance 52:54:00:2d:b2:35
 error: internal error: Interface name not provided

The PCI device(usually SR-IOV virtual function device) is detached
while it's used in PCI passthrough mode. And we can not parse this
device from /proc/net/dev any more.

In this patch, libvirt check net device is VF of not firstly, then
query virNetDevVFInterfaceStats(new API).
virNetDevVFInterfaceStats parses VFs info of all PFs, compares MAC
address until the two MAC addresses match.
'#ip -s link show' can get the same result. Instead of parsing the
output result, implement this feature by libnl API.

Notice that this feature deponds on driver of PF.
Test on Mellanox ConnectX-4 Lx, it works well.
Also test on Intel Corporation 82599ES, it works, but only get 0.
(ip-link command get the same result).

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 src/libvirt_private.syms |   1 +
 src/qemu/qemu_driver.c   |   3 ++
 src/util/virnetdev.c     | 137 +++++++++++++++++++++++++++++++++++++++++++++++
 src/util/virnetdev.h     |   5 ++
 4 files changed, 146 insertions(+)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index bdbe3431b8..bcc40b8d69 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2585,6 +2585,7 @@ virNetDevSetRcvMulti;
 virNetDevSetupControl;
 virNetDevSysfsFile;
 virNetDevValidateConfig;
+virNetDevVFInterfaceStats;
 
 
 # util/virnetdevbandwidth.h
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index ae715c01d7..f554010c40 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -10196,6 +10196,9 @@ qemuDomainInterfaceStats(virDomainPtr dom,
     if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
         if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0)
             goto cleanup;
+    } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+        if (virNetDevVFInterfaceStats(&net->mac, stats) < 0)
+            goto cleanup;
     } else {
         if (virNetDevTapInterfaceStats(net->ifname, stats,
                                        !virDomainNetTypeSharesHostView(net)) < 0)
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index e1a4cc2bef..377f25aae7 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -1489,6 +1489,7 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
                             .maxlen = sizeof(struct ifla_vf_mac) },
     [IFLA_VF_VLAN]      = { .type = NLA_UNSPEC,
                             .maxlen = sizeof(struct ifla_vf_vlan) },
+    [IFLA_VF_STATS]     = { .type = NLA_NESTED },
 };
 
 
@@ -2265,6 +2266,132 @@ virNetDevSetNetConfig(const char *linkdev, int vf,
     return 0;
 }
 
+static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
+    [IFLA_VF_STATS_RX_PACKETS]  = { .type = NLA_U64 },
+    [IFLA_VF_STATS_TX_PACKETS]  = { .type = NLA_U64 },
+    [IFLA_VF_STATS_RX_BYTES]    = { .type = NLA_U64 },
+    [IFLA_VF_STATS_TX_BYTES]    = { .type = NLA_U64 },
+    [IFLA_VF_STATS_BROADCAST]   = { .type = NLA_U64 },
+    [IFLA_VF_STATS_MULTICAST]   = { .type = NLA_U64 },
+};
+
+static int
+virNetDevParseVfStats(struct nlattr **tb, virMacAddrPtr mac,
+                      virDomainInterfaceStatsPtr stats)
+{
+    int ret = -1, len;
+    struct ifla_vf_mac *vf_lladdr;
+    struct nlattr *nla, *t[IFLA_VF_MAX+1];
+    struct nlattr *stb[IFLA_VF_STATS_MAX+1];
+
+    if (tb == NULL || mac == NULL || stats == NULL) {
+        return -1;
+    }
+
+    if (!tb[IFLA_VFINFO_LIST])
+        return -1;
+
+    len = nla_len(tb[IFLA_VFINFO_LIST]);
+
+    for (nla = nla_data(tb[IFLA_VFINFO_LIST]); nla_ok(nla, len);
+            nla = nla_next(nla, &len)) {
+        ret = nla_parse(t, IFLA_VF_MAX, nla_data(nla), nla_len(nla),
+                ifla_vf_policy);
+        if (ret < 0)
+            return -1;
+
+        if (t[IFLA_VF_MAC] == NULL) {
+            continue;
+        }
+
+        vf_lladdr = nla_data(t[IFLA_VF_MAC]);
+        if (virMacAddrCmpRaw(mac, vf_lladdr->mac)) {
+            continue;
+        }
+
+        if (t[IFLA_VF_STATS]) {
+            ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX,
+                    t[IFLA_VF_STATS],
+                    ifla_vfstats_policy);
+            if (ret < 0)
+                return -1;
+
+            stats->rx_bytes = nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
+            stats->tx_bytes = nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
+            stats->rx_packets = nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
+            stats->tx_packets = nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
+        }
+        return 0;
+    }
+
+    return ret;
+}
+
+/**
+ * virNetDevVFInterfaceStats:
+ * @mac: MAC address of the VF interface
+ * @stats: returns stats of the VF interface
+ *
+ * Get the VF interface from kernel by netlink.
+ * Returns 0 on success, -1 on failure.
+ */
+int
+virNetDevVFInterfaceStats(virMacAddrPtr mac,
+                     virDomainInterfaceStatsPtr stats)
+{
+    FILE *fp;
+    char line[256], *colon, *ifname;
+    int rc = -1;
+    void *nlData = NULL;
+    struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
+    char *sysfsDevicePath = NULL;
+
+    fp = fopen("/proc/net/dev", "r");
+    if (!fp) {
+        virReportSystemError(errno, "%s",
+                _("Could not open /proc/net/dev"));
+        return -1;
+    }
+
+    /* get all PCI net devices, and parse VFs list from netlink API.
+     * compare MAC address, collect device stats if matching.
+     */
+    while (fgets(line, sizeof(line), fp)) {
+        /* The line looks like:
+         *   "   eth0:..."
+         * Split it at the colon. and strip blank from head.
+         */
+        colon = strchr(line, ':');
+        if (!colon)
+            continue;
+        *colon = '\0';
+        ifname = line;
+        while ((*ifname == ' ') && (ifname < colon))
+            ifname++;
+
+        if (virNetDevSysfsFile(&sysfsDevicePath, ifname, "device") < 0)
+            break;
+
+        if (virNetDevIsPCIDevice(sysfsDevicePath)) {
+            rc = virNetlinkDumpLink(ifname, -1, &nlData, tb, 0, 0);
+            if (rc < 0) {
+                rc = -1;
+                goto cleanup;
+            }
+
+            rc = virNetDevParseVfStats(tb, mac, stats);
+            VIR_FREE(nlData);
+            if (rc == 0)
+                goto cleanup;
+        }
+        VIR_FREE(sysfsDevicePath);
+    }
+
+ cleanup:
+    VIR_FREE(sysfsDevicePath);
+    VIR_FORCE_FCLOSE(fp);
+    return rc;
+}
 
 #else /* defined(__linux__) && defined(WITH_LIBNL)  && defined(IFLA_VF_MAX) */
 
@@ -2309,6 +2436,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED,
 }
 
 
+int
+virNetDevVFInterfaceStats(virMacAddrPtr mac G_GNUC_UNUSED,
+                     virDomainInterfaceStatsPtr stats G_GNUC_UNUSED)
+{
+    virReportSystemError(ENOSYS, "%s",
+                         _("Unable to get VF net device stats on this platform"));
+    return -1;
+}
+
+
 #endif /* defined(__linux__) && defined(WITH_LIBNL) && defined(IFLA_VF_MAX) */
 
 VIR_ENUM_IMPL(virNetDevIfState,
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index 5f581323ed..ff59d9d341 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -312,4 +312,9 @@ int virNetDevSysfsFile(char **pf_sysfs_device_link,
 int virNetDevRunEthernetScript(const char *ifname, const char *script)
     G_GNUC_NO_INLINE;
 
+int virNetDevVFInterfaceStats(virMacAddrPtr mac,
+                              virDomainInterfaceStatsPtr stats)
+ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
+
+
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree);
-- 
2.11.0

Re: [PATCH v2] util: support PCI passthrough net device stats collection
Posted by Daniel P. Berrangé 3 years, 7 months ago
On Wed, Sep 23, 2020 at 10:32:54AM +0800, zhenwei pi wrote:
> Collect PCI passthrough net device stats from kernel by netlink
> API.
> 
> Currently, libvirt can not get PCI passthrough net device stats,
> run command:
>  #virsh domifstat instance --interface=52:54:00:2d:b2:35
>  error: Failed to get interface stats instance 52:54:00:2d:b2:35
>  error: internal error: Interface name not provided
> 
> The PCI device(usually SR-IOV virtual function device) is detached
> while it's used in PCI passthrough mode. And we can not parse this
> device from /proc/net/dev any more.
> 
> In this patch, libvirt check net device is VF of not firstly, then
> query virNetDevVFInterfaceStats(new API).
> virNetDevVFInterfaceStats parses VFs info of all PFs, compares MAC
> address until the two MAC addresses match.
> '#ip -s link show' can get the same result. Instead of parsing the
> output result, implement this feature by libnl API.
> 
> Notice that this feature deponds on driver of PF.
> Test on Mellanox ConnectX-4 Lx, it works well.
> Also test on Intel Corporation 82599ES, it works, but only get 0.
> (ip-link command get the same result).
> 
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>  src/libvirt_private.syms |   1 +
>  src/qemu/qemu_driver.c   |   3 ++
>  src/util/virnetdev.c     | 137 +++++++++++++++++++++++++++++++++++++++++++++++
>  src/util/virnetdev.h     |   5 ++
>  4 files changed, 146 insertions(+)
> 
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index bdbe3431b8..bcc40b8d69 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -2585,6 +2585,7 @@ virNetDevSetRcvMulti;
>  virNetDevSetupControl;
>  virNetDevSysfsFile;
>  virNetDevValidateConfig;
> +virNetDevVFInterfaceStats;
>  
>  
>  # util/virnetdevbandwidth.h
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index ae715c01d7..f554010c40 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -10196,6 +10196,9 @@ qemuDomainInterfaceStats(virDomainPtr dom,
>      if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
>          if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0)
>              goto cleanup;
> +    } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
> +        if (virNetDevVFInterfaceStats(&net->mac, stats) < 0)
> +            goto cleanup;
>      } else {
>          if (virNetDevTapInterfaceStats(net->ifname, stats,
>                                         !virDomainNetTypeSharesHostView(net)) < 0)
> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
> index e1a4cc2bef..377f25aae7 100644
> --- a/src/util/virnetdev.c
> +++ b/src/util/virnetdev.c
> @@ -1489,6 +1489,7 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
>                              .maxlen = sizeof(struct ifla_vf_mac) },
>      [IFLA_VF_VLAN]      = { .type = NLA_UNSPEC,
>                              .maxlen = sizeof(struct ifla_vf_vlan) },
> +    [IFLA_VF_STATS]     = { .type = NLA_NESTED },
>  };
>  
>  
> @@ -2265,6 +2266,132 @@ virNetDevSetNetConfig(const char *linkdev, int vf,
>      return 0;
>  }
>  
> +static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
> +    [IFLA_VF_STATS_RX_PACKETS]  = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_TX_PACKETS]  = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_RX_BYTES]    = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_TX_BYTES]    = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_BROADCAST]   = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_MULTICAST]   = { .type = NLA_U64 },
> +};
> +
> +static int
> +virNetDevParseVfStats(struct nlattr **tb, virMacAddrPtr mac,
> +                      virDomainInterfaceStatsPtr stats)
> +{
> +    int ret = -1, len;
> +    struct ifla_vf_mac *vf_lladdr;
> +    struct nlattr *nla, *t[IFLA_VF_MAX+1];
> +    struct nlattr *stb[IFLA_VF_STATS_MAX+1];
> +
> +    if (tb == NULL || mac == NULL || stats == NULL) {
> +        return -1;
> +    }
> +
> +    if (!tb[IFLA_VFINFO_LIST])
> +        return -1;
> +
> +    len = nla_len(tb[IFLA_VFINFO_LIST]);
> +
> +    for (nla = nla_data(tb[IFLA_VFINFO_LIST]); nla_ok(nla, len);
> +            nla = nla_next(nla, &len)) {
> +        ret = nla_parse(t, IFLA_VF_MAX, nla_data(nla), nla_len(nla),
> +                ifla_vf_policy);
> +        if (ret < 0)
> +            return -1;
> +
> +        if (t[IFLA_VF_MAC] == NULL) {
> +            continue;
> +        }
> +
> +        vf_lladdr = nla_data(t[IFLA_VF_MAC]);
> +        if (virMacAddrCmpRaw(mac, vf_lladdr->mac)) {
> +            continue;
> +        }
> +
> +        if (t[IFLA_VF_STATS]) {
> +            ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX,
> +                    t[IFLA_VF_STATS],
> +                    ifla_vfstats_policy);
> +            if (ret < 0)
> +                return -1;
> +
> +            stats->rx_bytes = nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
> +            stats->tx_bytes = nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
> +            stats->rx_packets = nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
> +            stats->tx_packets = nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
> +        }
> +        return 0;
> +    }
> +
> +    return ret;
> +}
> +
> +/**
> + * virNetDevVFInterfaceStats:
> + * @mac: MAC address of the VF interface
> + * @stats: returns stats of the VF interface
> + *
> + * Get the VF interface from kernel by netlink.
> + * Returns 0 on success, -1 on failure.
> + */
> +int
> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
> +                     virDomainInterfaceStatsPtr stats)
> +{
> +    FILE *fp;
> +    char line[256], *colon, *ifname;
> +    int rc = -1;
> +    void *nlData = NULL;
> +    struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
> +    char *sysfsDevicePath = NULL;
> +
> +    fp = fopen("/proc/net/dev", "r");
> +    if (!fp) {
> +        virReportSystemError(errno, "%s",
> +                _("Could not open /proc/net/dev"));
> +        return -1;
> +    }
> +
> +    /* get all PCI net devices, and parse VFs list from netlink API.
> +     * compare MAC address, collect device stats if matching.
> +     */
> +    while (fgets(line, sizeof(line), fp)) {
> +        /* The line looks like:
> +         *   "   eth0:..."
> +         * Split it at the colon. and strip blank from head.
> +         */
> +        colon = strchr(line, ':');
> +        if (!colon)
> +            continue;
> +        *colon = '\0';
> +        ifname = line;
> +        while ((*ifname == ' ') && (ifname < colon))
> +            ifname++;

It seems we're only parsing /proc/net/dev  to get a list of
device names, ignoring the rest of the data in that file.
It would be simpler if we just iterate over /sys/class/net
surely, as the filename as the nic names meaning we don't
need to parse anything.

> +
> +        if (virNetDevSysfsFile(&sysfsDevicePath, ifname, "device") < 0)
> +            break;
> +
> +        if (virNetDevIsPCIDevice(sysfsDevicePath)) {
> +            rc = virNetlinkDumpLink(ifname, -1, &nlData, tb, 0, 0);
> +            if (rc < 0) {
> +                rc = -1;
> +                goto cleanup;
> +            }
> +
> +            rc = virNetDevParseVfStats(tb, mac, stats);
> +            VIR_FREE(nlData);
> +            if (rc == 0)
> +                goto cleanup;
> +        }
> +        VIR_FREE(sysfsDevicePath);
> +    }
> +
> + cleanup:
> +    VIR_FREE(sysfsDevicePath);
> +    VIR_FORCE_FCLOSE(fp);
> +    return rc;
> +}

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|