[PATCH] util: support PCI passthrough net device stats collection

zhenwei pi posted 1 patch 3 years, 7 months ago
Test syntax-check failed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20200921122555.42826-1-pizhenwei@bytedance.com
There is a newer version of this series
meson.build              |   4 ++
src/libvirt_private.syms |   1 +
src/qemu/qemu_driver.c   |   3 +
src/util/virnetdev.c     | 158 +++++++++++++++++++++++++++++++++++++++++++++++
src/util/virnetdev.h     |   5 ++
5 files changed, 171 insertions(+)
[PATCH] util: support PCI passthrough net device stats collection
Posted by zhenwei pi 3 years, 7 months ago
Collect PCI passthrough net device stats from kernel by netlink
API.

Currently, libvirt can not get PCI passthrough net device stats,
run command:
 #virsh domifstat instance --interface=52:54:00:2d:b2:35
 error: Failed to get interface stats instance 52:54:00:2d:b2:35
 error: internal error: Interface name not provided

The PCI device(usually SR-IOV virtual function device) is detached
while it's used in PCI passthrough mode. And we can not parse this
device from /proc/net/dev any more.

In this patch, libvirt check net device is VF of not firstly, then
query virNetDevVFInterfaceStats(new API).
virNetDevVFInterfaceStats parses VFs info of all PFs, compares MAC
address until the two MAC addresses match.
'#ip -s link show' can get the same result. Instead of parsing the
output result, implement this feature by libnl API.

Notice that this feature deponds on driver of PF.
Test on Mellanox ConnectX-4 Lx, it works well.
Also test on Intel Corporation 82599ES, it works, but only get 0.
(ip-link command get the same result).

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 meson.build              |   4 ++
 src/libvirt_private.syms |   1 +
 src/qemu/qemu_driver.c   |   3 +
 src/util/virnetdev.c     | 158 +++++++++++++++++++++++++++++++++++++++++++++++
 src/util/virnetdev.h     |   5 ++
 5 files changed, 171 insertions(+)

diff --git a/meson.build b/meson.build
index 24535a403c..e17da9e2b9 100644
--- a/meson.build
+++ b/meson.build
@@ -1392,6 +1392,10 @@ if not get_option('virtualport').disabled()
   endif
 endif
 
+if cc.has_header_symbol('linux/if_link.h', 'IFLA_VF_STATS')
+  conf.set('WITH_VF_STATS', 1)
+endif
+
 if host_machine.system() == 'windows'
   ole32_dep = cc.find_library('ole32')
   oleaut32_dep = cc.find_library('oleaut32')
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index bdbe3431b8..bcc40b8d69 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2585,6 +2585,7 @@ virNetDevSetRcvMulti;
 virNetDevSetupControl;
 virNetDevSysfsFile;
 virNetDevValidateConfig;
+virNetDevVFInterfaceStats;
 
 
 # util/virnetdevbandwidth.h
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index ae715c01d7..f554010c40 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -10196,6 +10196,9 @@ qemuDomainInterfaceStats(virDomainPtr dom,
     if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
         if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0)
             goto cleanup;
+    } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+        if (virNetDevVFInterfaceStats(&net->mac, stats) < 0)
+            goto cleanup;
     } else {
         if (virNetDevTapInterfaceStats(net->ifname, stats,
                                        !virDomainNetTypeSharesHostView(net)) < 0)
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index e1a4cc2bef..be9b8ce4a9 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -1489,6 +1489,9 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
                             .maxlen = sizeof(struct ifla_vf_mac) },
     [IFLA_VF_VLAN]      = { .type = NLA_UNSPEC,
                             .maxlen = sizeof(struct ifla_vf_vlan) },
+#if defined(WITH_VF_STATS)
+    [IFLA_VF_STATS]     = { .type = NLA_NESTED },
+#endif
 };
 
 
@@ -2265,6 +2268,151 @@ virNetDevSetNetConfig(const char *linkdev, int vf,
     return 0;
 }
 
+#if defined(WITH_VF_STATS)
+static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
+    [IFLA_VF_STATS_RX_PACKETS]  = { .type = NLA_U64 },
+    [IFLA_VF_STATS_TX_PACKETS]  = { .type = NLA_U64 },
+    [IFLA_VF_STATS_RX_BYTES]    = { .type = NLA_U64 },
+    [IFLA_VF_STATS_TX_BYTES]    = { .type = NLA_U64 },
+    [IFLA_VF_STATS_BROADCAST]   = { .type = NLA_U64 },
+    [IFLA_VF_STATS_MULTICAST]   = { .type = NLA_U64 },
+};
+
+static int
+virNetDevParseVfStats(struct nlattr **tb, virMacAddrPtr mac,
+                      virDomainInterfaceStatsPtr stats)
+{
+    int ret = -1, len;
+    struct ifla_vf_mac *vf_lladdr;
+    struct nlattr *nla, *t[IFLA_VF_MAX+1];
+    struct nlattr *stb[IFLA_VF_STATS_MAX+1];
+
+    if (tb == NULL || mac == NULL || stats == NULL) {
+        return -1;
+    }
+
+    if (!tb[IFLA_VFINFO_LIST])
+        return -1;
+
+    len = nla_len(tb[IFLA_VFINFO_LIST]);
+
+    for (nla = nla_data(tb[IFLA_VFINFO_LIST]); nla_ok(nla, len);
+            nla = nla_next(nla, &len)) {
+        ret = nla_parse(t, IFLA_VF_MAX, nla_data(nla), nla_len(nla),
+                ifla_vf_policy);
+        if (ret < 0)
+            return -1;
+
+        if (t[IFLA_VF_MAC] == NULL) {
+            continue;
+        }
+
+        vf_lladdr = nla_data(t[IFLA_VF_MAC]);
+        if(virMacAddrCmpRaw(mac, vf_lladdr->mac)) {
+            continue;
+        }
+
+        if (t[IFLA_VF_STATS]) {
+            ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX,
+                    t[IFLA_VF_STATS],
+                    ifla_vfstats_policy);
+            if (ret < 0)
+                return -1;
+
+            stats->rx_bytes = nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
+            stats->tx_bytes = nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
+            stats->rx_packets = nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
+            stats->tx_packets = nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
+        }
+        return 0;
+    }
+
+    return ret;
+}
+
+/**
+ * virNetDevVFInterfaceStats:
+ * @mac: MAC address of the VF interface
+ * @stats: returns stats of the VF interface
+ *
+ * Get the VF interface from kernel by netlink.
+ * Returns 0 on success, -1 on failure.
+ */
+int
+virNetDevVFInterfaceStats(virMacAddrPtr mac,
+                     virDomainInterfaceStatsPtr stats)
+{
+    FILE *fp;
+    char line[256], *colon, *ifname;
+    int rc = -1;
+    void *nlData = NULL;
+    struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
+    char *sysfsDevicePath = NULL;
+
+    fp = fopen("/proc/net/dev", "r");
+    if (!fp) {
+        virReportSystemError(errno, "%s",
+                _("Could not open /proc/net/dev"));
+        return -1;
+    }
+
+    /* get all PCI net devices, and parse VFs list from netlink API.
+     * compare MAC address, collect device stats if matching.
+     */
+    while (fgets(line, sizeof(line), fp)) {
+        /* The line looks like:
+         *   "   eth0:..."
+         * Split it at the colon. and strip blank from head.
+         */
+        colon = strchr(line, ':');
+        if (!colon)
+            continue;
+        *colon = '\0';
+        ifname = line;
+        while((*ifname == ' ') && (ifname < colon))
+            ifname++;
+
+        if (virNetDevSysfsFile(&sysfsDevicePath, ifname, "device") < 0)
+            break;
+
+        if (virNetDevIsPCIDevice(sysfsDevicePath)) {
+            rc = virNetlinkDumpLink(ifname, -1, &nlData, tb, 0, 0);
+            if (rc < 0) {
+                rc = -1;
+                goto cleanup;
+            }
+
+            rc = virNetDevParseVfStats(tb, mac, stats);
+            VIR_FREE(nlData);
+            if (rc == 0)
+                goto cleanup;
+        }
+        VIR_FREE(sysfsDevicePath);
+    }
+
+cleanup:
+    VIR_FREE(sysfsDevicePath);
+    VIR_FORCE_FCLOSE(fp);
+    return rc;
+}
+
+#else	/* #if defined(WITH_VF_STATS) */
+
+int
+virNetDevVFInterfaceStats(virMacAddrPtr mac,
+                     virDomainInterfaceStatsPtr stats)
+{
+    virReportSystemError(ENOSYS, "%s",
+                         _("Unable to get VF net device stats on this kernel, please upgrade kernel at lease linux-4.2"));
+
+    /* no need to do anything, just fix compling error here */
+    if (mac == NULL || stats == NULL) {
+        return -1;
+    }
+
+    return -1;
+}
+#endif	 /* #if defined(WITH_VF_STATS) */
 
 #else /* defined(__linux__) && defined(WITH_LIBNL)  && defined(IFLA_VF_MAX) */
 
@@ -2309,6 +2457,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED,
 }
 
 
+int
+virNetDevVFInterfaceStats(virMacAddrPtr mac,
+                     virDomainInterfaceStatsPtr stats)
+{
+    virReportSystemError(ENOSYS, "%s",
+                         _("Unable to get VF net device stats on this platform"));
+    return -1;
+}
+
+
 #endif /* defined(__linux__) && defined(WITH_LIBNL) && defined(IFLA_VF_MAX) */
 
 VIR_ENUM_IMPL(virNetDevIfState,
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index 5f581323ed..ff59d9d341 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -312,4 +312,9 @@ int virNetDevSysfsFile(char **pf_sysfs_device_link,
 int virNetDevRunEthernetScript(const char *ifname, const char *script)
     G_GNUC_NO_INLINE;
 
+int virNetDevVFInterfaceStats(virMacAddrPtr mac,
+                              virDomainInterfaceStatsPtr stats)
+ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
+
+
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree);
-- 
2.11.0

Re: [PATCH] util: support PCI passthrough net device stats collection
Posted by Laine Stump 3 years, 7 months ago
(Please don't Cc individual developers in patch submissions unless 
they've specifically asked you to do so)

On 9/21/20 8:25 AM, zhenwei pi wrote:
> Collect PCI passthrough net device stats from kernel by netlink
> API.
>
> Currently, libvirt can not get PCI passthrough net device stats,
> run command:
>   #virsh domifstat instance --interface=52:54:00:2d:b2:35
>   error: Failed to get interface stats instance 52:54:00:2d:b2:35
>   error: internal error: Interface name not provided
>
> The PCI device(usually SR-IOV virtual function device) is detached
> while it's used in PCI passthrough mode. And we can not parse this
> device from /proc/net/dev any more.
>
> In this patch, libvirt check net device is VF of not firstly, then
> query virNetDevVFInterfaceStats(new API).
> virNetDevVFInterfaceStats parses VFs info of all PFs, compares MAC
> address until the two MAC addresses match.
> '#ip -s link show' can get the same result. Instead of parsing the
> output result, implement this feature by libnl API.
>
> Notice that this feature deponds on driver of PF.
> Test on Mellanox ConnectX-4 Lx, it works well.
> Also test on Intel Corporation 82599ES, it works, but only get 0.
> (ip-link command get the same result).
>
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>   meson.build              |   4 ++
>   src/libvirt_private.syms |   1 +
>   src/qemu/qemu_driver.c   |   3 +
>   src/util/virnetdev.c     | 158 +++++++++++++++++++++++++++++++++++++++++++++++
>   src/util/virnetdev.h     |   5 ++
>   5 files changed, 171 insertions(+)
>
> diff --git a/meson.build b/meson.build
> index 24535a403c..e17da9e2b9 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1392,6 +1392,10 @@ if not get_option('virtualport').disabled()
>     endif
>   endif
>   
> +if cc.has_header_symbol('linux/if_link.h', 'IFLA_VF_STATS')
> +  conf.set('WITH_VF_STATS', 1)
> +endif
> +


(a bit of digression here...)


I understand why you put this chunk in, but I'm fairly certain that it 
isn't needed.


In the case of the few other things from if_link.h that have their own 
"WITH_BLAH" compile-time flag (e.g. WITH_VIRTUALPORT, which checks for 
existence of IFLA_PORT_MAX in if_link.h), they were added in the before 
before time, when the feature in question had been recently added to the 
kernel, and so there were some supported distro versions that had the 
new feature, and some that didn't yet. In order to compile properly on 
all supported platforms (though lacking these new-at-the-time features 
on some) we had to gate them on the presence of some sentinel in 
if_link.h. Those WITH_BLAH flags were then forgotten about, even though 
the list of supported platform/versions has changed over the years, and 
they will now work on *any* supported Linux platform.


Now that this patch has pointed them out, I think it's time for some 
house cleaning. IFLA_PORT_MAX, for example, has been in the upstream 
kernel since 2.6.35, and was even backported to the RHEL*6* 2.6.32 
kernel. Even router firmware these days has a newer kernel than that, so 
it's a safe bet that any system with __linux_ and WITH_LIBNL can enable 
all of that code, i.e. we can just get rid of the extra 
WITH_VIRTUALPORT. I just added this to my personal todo list; let's see 
how long it takes until I actually accomplish it (betting starts now!)


Back to the topic of *this* patch: I looked up IFLA_VF_STATS for the 
upstream kernel, and it first appeared in kernel 4.2. RHEL7 (which is 
the oldest RHEL now supported by libvirt) is based on kernel 3.10, but 
has *a lot* of backports, and so when I checked I found that it also 
supports IFLA_VF_STATS. I don't have the details handy for the oldest 
version of other Linuxes we support, but I tried eliminating the extra 
check for IFLA_VF_STATS, and the full CI suite on gitlab builds without 
error (there is an unrelated error in ubuntu-18.04 in libxl, and 
different errors in the freebsd, mingw, and macos builds that *are* 
caused by different aspects of this patch, but IFLA_VF_STATS does exist 
on all Linux platforms included in the libvirt upstream CI testing).


So support for this feature can be gated on "defined(__linux__) && 
WITH_LIBNL (as should just about anything else using netlink)


>   if host_machine.system() == 'windows'
>     ole32_dep = cc.find_library('ole32')
>     oleaut32_dep = cc.find_library('oleaut32')
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index bdbe3431b8..bcc40b8d69 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -2585,6 +2585,7 @@ virNetDevSetRcvMulti;
>   virNetDevSetupControl;
>   virNetDevSysfsFile;
>   virNetDevValidateConfig;
> +virNetDevVFInterfaceStats;
>   
>   
>   # util/virnetdevbandwidth.h
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index ae715c01d7..f554010c40 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -10196,6 +10196,9 @@ qemuDomainInterfaceStats(virDomainPtr dom,
>       if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
>           if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0)
>               goto cleanup;
> +    } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
> +        if (virNetDevVFInterfaceStats(&net->mac, stats) < 0)
> +            goto cleanup;
>       } else {
>           if (virNetDevTapInterfaceStats(net->ifname, stats,
>                                          !virDomainNetTypeSharesHostView(net)) < 0)
> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
> index e1a4cc2bef..be9b8ce4a9 100644
> --- a/src/util/virnetdev.c
> +++ b/src/util/virnetdev.c
> @@ -1489,6 +1489,9 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
>                               .maxlen = sizeof(struct ifla_vf_mac) },
>       [IFLA_VF_VLAN]      = { .type = NLA_UNSPEC,
>                               .maxlen = sizeof(struct ifla_vf_vlan) },
> +#if defined(WITH_VF_STATS)


Due to being a nested #if, the above should have been "# if defined...." 
(note the space after #). Of course that #if will be removed, but to 
catch things like this in the future, you should install the cppi 
package (which is still optional for libvirt, since it is (was?) 
unavailable on some supported platforms) and run "ninja -C build test", 
which will run all the tests, including syntax / code style checks.


> +    [IFLA_VF_STATS]     = { .type = NLA_NESTED },
> +#endif
>   };
>   
>   
> @@ -2265,6 +2268,151 @@ virNetDevSetNetConfig(const char *linkdev, int vf,
>       return 0;
>   }
>   
> +#if defined(WITH_VF_STATS)
> +static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
> +    [IFLA_VF_STATS_RX_PACKETS]  = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_TX_PACKETS]  = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_RX_BYTES]    = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_TX_BYTES]    = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_BROADCAST]   = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_MULTICAST]   = { .type = NLA_U64 },
> +};
> +
> +static int
> +virNetDevParseVfStats(struct nlattr **tb, virMacAddrPtr mac,
> +                      virDomainInterfaceStatsPtr stats)
> +{
> +    int ret = -1, len;
> +    struct ifla_vf_mac *vf_lladdr;
> +    struct nlattr *nla, *t[IFLA_VF_MAX+1];
> +    struct nlattr *stb[IFLA_VF_STATS_MAX+1];
> +
> +    if (tb == NULL || mac == NULL || stats == NULL) {
> +        return -1;
> +    }
> +
> +    if (!tb[IFLA_VFINFO_LIST])
> +        return -1;
> +
> +    len = nla_len(tb[IFLA_VFINFO_LIST]);
> +
> +    for (nla = nla_data(tb[IFLA_VFINFO_LIST]); nla_ok(nla, len);
> +            nla = nla_next(nla, &len)) {
> +        ret = nla_parse(t, IFLA_VF_MAX, nla_data(nla), nla_len(nla),
> +                ifla_vf_policy);
> +        if (ret < 0)
> +            return -1;
> +
> +        if (t[IFLA_VF_MAC] == NULL) {
> +            continue;
> +        }
> +
> +        vf_lladdr = nla_data(t[IFLA_VF_MAC]);
> +        if(virMacAddrCmpRaw(mac, vf_lladdr->mac)) {


The above line failed syntax checks because there must be a space after 
"if" before "("


> +            continue;
> +        }
> +
> +        if (t[IFLA_VF_STATS]) {
> +            ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX,
> +                    t[IFLA_VF_STATS],
> +                    ifla_vfstats_policy);
> +            if (ret < 0)
> +                return -1;
> +
> +            stats->rx_bytes = nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
> +            stats->tx_bytes = nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
> +            stats->rx_packets = nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
> +            stats->tx_packets = nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
> +        }
> +        return 0;
> +    }
> +
> +    return ret;
> +}
> +
> +/**
> + * virNetDevVFInterfaceStats:
> + * @mac: MAC address of the VF interface
> + * @stats: returns stats of the VF interface
> + *
> + * Get the VF interface from kernel by netlink.
> + * Returns 0 on success, -1 on failure.
> + */
> +int
> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
> +                     virDomainInterfaceStatsPtr stats)
> +{
> +    FILE *fp;
> +    char line[256], *colon, *ifname;
> +    int rc = -1;
> +    void *nlData = NULL;
> +    struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
> +    char *sysfsDevicePath = NULL;
> +
> +    fp = fopen("/proc/net/dev", "r");
> +    if (!fp) {
> +        virReportSystemError(errno, "%s",
> +                _("Could not open /proc/net/dev"));
> +        return -1;
> +    }
> +
> +    /* get all PCI net devices, and parse VFs list from netlink API.
> +     * compare MAC address, collect device stats if matching.
> +     */
> +    while (fgets(line, sizeof(line), fp)) {
> +        /* The line looks like:
> +         *   "   eth0:..."
> +         * Split it at the colon. and strip blank from head.
> +         */
> +        colon = strchr(line, ':');
> +        if (!colon)
> +            continue;
> +        *colon = '\0';
> +        ifname = line;
> +        while((*ifname == ' ') && (ifname < colon))


This line also failed syntax check - needs a space after "while"


> +            ifname++;
> +
> +        if (virNetDevSysfsFile(&sysfsDevicePath, ifname, "device") < 0)
> +            break;
> +
> +        if (virNetDevIsPCIDevice(sysfsDevicePath)) {
> +            rc = virNetlinkDumpLink(ifname, -1, &nlData, tb, 0, 0);
> +            if (rc < 0) {
> +                rc = -1;
> +                goto cleanup;
> +            }
> +
> +            rc = virNetDevParseVfStats(tb, mac, stats);
> +            VIR_FREE(nlData);
> +            if (rc == 0)
> +                goto cleanup;
> +        }
> +        VIR_FREE(sysfsDevicePath);
> +    }
> +
> +cleanup:


This also failed syntax checks - labels must start in column 2, not 
column 1 (apparently this makes some simple-minded text editors happier 
or something...)


> +    VIR_FREE(sysfsDevicePath);
> +    VIR_FORCE_FCLOSE(fp);
> +    return rc;
> +}
> +
> +#else	/* #if defined(WITH_VF_STATS) */
> +
> +int
> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
> +                     virDomainInterfaceStatsPtr stats)
> +{
> +    virReportSystemError(ENOSYS, "%s",
> +                         _("Unable to get VF net device stats on this kernel, please upgrade kernel at lease linux-4.2"));
> +
> +    /* no need to do anything, just fix compling error here */
> +    if (mac == NULL || stats == NULL) {
> +        return -1;
> +    }


Follow the pattern of other platforms to eliminate build error - declare 
the parameters with G_GNUC_UNUSED.

> +
> +    return -1;
> +}
> +#endif	 /* #if defined(WITH_VF_STATS) */
>   
>   #else /* defined(__linux__) && defined(WITH_LIBNL)  && defined(IFLA_VF_MAX) */
>   
> @@ -2309,6 +2457,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED,
>   }
>   
>   
> +int
> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
> +                     virDomainInterfaceStatsPtr stats)
> +{
> +    virReportSystemError(ENOSYS, "%s",
> +                         _("Unable to get VF net device stats on this platform"));
> +    return -1;


Again, you need G_GNUC_UNUSED on the parameters. (This failed the CI 
tests for FreeBSD and MacOS).



> +}
> +
> +
>   #endif /* defined(__linux__) && defined(WITH_LIBNL) && defined(IFLA_VF_MAX) */
>   
>   VIR_ENUM_IMPL(virNetDevIfState,
> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
> index 5f581323ed..ff59d9d341 100644
> --- a/src/util/virnetdev.h
> +++ b/src/util/virnetdev.h
> @@ -312,4 +312,9 @@ int virNetDevSysfsFile(char **pf_sysfs_device_link,
>   int virNetDevRunEthernetScript(const char *ifname, const char *script)
>       G_GNUC_NO_INLINE;
>   
> +int virNetDevVFInterfaceStats(virMacAddrPtr mac,
> +                              virDomainInterfaceStatsPtr stats)
> +ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
> +
> +
>   G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree);


I didn't have time to look at the code itself yet. In the meantime, 
fixing it up so that this is just another one of the "#if 
defined(__linux__) && WITH_LIBNL" functions, and eliminating the CI 
build/test errors would be useful; possibly that's all that will be 
needed, and even if not you'll have a head start :-)


Re: [External] Re: [PATCH] util: support PCI passthrough net device stats collection
Posted by zhenwei pi 3 years, 7 months ago
On 9/22/20 9:48 AM, Laine Stump wrote:
> (Please don't Cc individual developers in patch submissions unless 
> they've specifically asked you to do so)
> 
> On 9/21/20 8:25 AM, zhenwei pi wrote:
>> Collect PCI passthrough net device stats from kernel by netlink
>> API.
>>
>> Currently, libvirt can not get PCI passthrough net device stats,
>> run command:
>>   #virsh domifstat instance --interface=52:54:00:2d:b2:35
>>   error: Failed to get interface stats instance 52:54:00:2d:b2:35
>>   error: internal error: Interface name not provided
>>
>> The PCI device(usually SR-IOV virtual function device) is detached
>> while it's used in PCI passthrough mode. And we can not parse this
>> device from /proc/net/dev any more.
>>
>> In this patch, libvirt check net device is VF of not firstly, then
>> query virNetDevVFInterfaceStats(new API).
>> virNetDevVFInterfaceStats parses VFs info of all PFs, compares MAC
>> address until the two MAC addresses match.
>> '#ip -s link show' can get the same result. Instead of parsing the
>> output result, implement this feature by libnl API.
>>
>> Notice that this feature deponds on driver of PF.
>> Test on Mellanox ConnectX-4 Lx, it works well.
>> Also test on Intel Corporation 82599ES, it works, but only get 0.
>> (ip-link command get the same result).
>>
>> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
>> ---
>>   meson.build              |   4 ++
>>   src/libvirt_private.syms |   1 +
>>   src/qemu/qemu_driver.c   |   3 +
>>   src/util/virnetdev.c     | 158 
>> +++++++++++++++++++++++++++++++++++++++++++++++
>>   src/util/virnetdev.h     |   5 ++
>>   5 files changed, 171 insertions(+)
>>
>> diff --git a/meson.build b/meson.build
>> index 24535a403c..e17da9e2b9 100644
>> --- a/meson.build
>> +++ b/meson.build
>> @@ -1392,6 +1392,10 @@ if not get_option('virtualport').disabled()
>>     endif
>>   endif
>> +if cc.has_header_symbol('linux/if_link.h', 'IFLA_VF_STATS')
>> +  conf.set('WITH_VF_STATS', 1)
>> +endif
>> +
> 
> 
> (a bit of digression here...)
> 
> 
> I understand why you put this chunk in, but I'm fairly certain that it 
> isn't needed.
> 
> 
> In the case of the few other things from if_link.h that have their own 
> "WITH_BLAH" compile-time flag (e.g. WITH_VIRTUALPORT, which checks for 
> existence of IFLA_PORT_MAX in if_link.h), they were added in the before 
> before time, when the feature in question had been recently added to the 
> kernel, and so there were some supported distro versions that had the 
> new feature, and some that didn't yet. In order to compile properly on 
> all supported platforms (though lacking these new-at-the-time features 
> on some) we had to gate them on the presence of some sentinel in 
> if_link.h. Those WITH_BLAH flags were then forgotten about, even though 
> the list of supported platform/versions has changed over the years, and 
> they will now work on *any* supported Linux platform.
> 
> 
> Now that this patch has pointed them out, I think it's time for some 
> house cleaning. IFLA_PORT_MAX, for example, has been in the upstream 
> kernel since 2.6.35, and was even backported to the RHEL*6* 2.6.32 
> kernel. Even router firmware these days has a newer kernel than that, so 
> it's a safe bet that any system with __linux_ and WITH_LIBNL can enable 
> all of that code, i.e. we can just get rid of the extra 
> WITH_VIRTUALPORT. I just added this to my personal todo list; let's see 
> how long it takes until I actually accomplish it (betting starts now!)
> 
> 
> Back to the topic of *this* patch: I looked up IFLA_VF_STATS for the 
> upstream kernel, and it first appeared in kernel 4.2. RHEL7 (which is 
> the oldest RHEL now supported by libvirt) is based on kernel 3.10, but 
> has *a lot* of backports, and so when I checked I found that it also 
> supports IFLA_VF_STATS. I don't have the details handy for the oldest 
> version of other Linuxes we support, but I tried eliminating the extra 
> check for IFLA_VF_STATS, and the full CI suite on gitlab builds without 
> error (there is an unrelated error in ubuntu-18.04 in libxl, and 
> different errors in the freebsd, mingw, and macos builds that *are* 
> caused by different aspects of this patch, but IFLA_VF_STATS does exist 
> on all Linux platforms included in the libvirt upstream CI testing).
> 
> 
> So support for this feature can be gated on "defined(__linux__) && 
> WITH_LIBNL (as should just about anything else using netlink)
> 
> 
>>   if host_machine.system() == 'windows'
>>     ole32_dep = cc.find_library('ole32')
>>     oleaut32_dep = cc.find_library('oleaut32')
>> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
>> index bdbe3431b8..bcc40b8d69 100644
>> --- a/src/libvirt_private.syms
>> +++ b/src/libvirt_private.syms
>> @@ -2585,6 +2585,7 @@ virNetDevSetRcvMulti;
>>   virNetDevSetupControl;
>>   virNetDevSysfsFile;
>>   virNetDevValidateConfig;
>> +virNetDevVFInterfaceStats;
>>   # util/virnetdevbandwidth.h
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index ae715c01d7..f554010c40 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -10196,6 +10196,9 @@ qemuDomainInterfaceStats(virDomainPtr dom,
>>       if (virDomainNetGetActualType(net) == 
>> VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
>>           if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0)
>>               goto cleanup;
>> +    } else if (virDomainNetGetActualType(net) == 
>> VIR_DOMAIN_NET_TYPE_HOSTDEV) {
>> +        if (virNetDevVFInterfaceStats(&net->mac, stats) < 0)
>> +            goto cleanup;
>>       } else {
>>           if (virNetDevTapInterfaceStats(net->ifname, stats,
>>                                          
>> !virDomainNetTypeSharesHostView(net)) < 0)
>> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
>> index e1a4cc2bef..be9b8ce4a9 100644
>> --- a/src/util/virnetdev.c
>> +++ b/src/util/virnetdev.c
>> @@ -1489,6 +1489,9 @@ static struct nla_policy 
>> ifla_vf_policy[IFLA_VF_MAX+1] = {
>>                               .maxlen = sizeof(struct ifla_vf_mac) },
>>       [IFLA_VF_VLAN]      = { .type = NLA_UNSPEC,
>>                               .maxlen = sizeof(struct ifla_vf_vlan) },
>> +#if defined(WITH_VF_STATS)
> 
> 
> Due to being a nested #if, the above should have been "# if defined...." 
> (note the space after #). Of course that #if will be removed, but to 
> catch things like this in the future, you should install the cppi 
> package (which is still optional for libvirt, since it is (was?) 
> unavailable on some supported platforms) and run "ninja -C build test", 
> which will run all the tests, including syntax / code style checks.
> 
> 
>> +    [IFLA_VF_STATS]     = { .type = NLA_NESTED },
>> +#endif
>>   };
>> @@ -2265,6 +2268,151 @@ virNetDevSetNetConfig(const char *linkdev, int 
>> vf,
>>       return 0;
>>   }
>> +#if defined(WITH_VF_STATS)
>> +static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
>> +    [IFLA_VF_STATS_RX_PACKETS]  = { .type = NLA_U64 },
>> +    [IFLA_VF_STATS_TX_PACKETS]  = { .type = NLA_U64 },
>> +    [IFLA_VF_STATS_RX_BYTES]    = { .type = NLA_U64 },
>> +    [IFLA_VF_STATS_TX_BYTES]    = { .type = NLA_U64 },
>> +    [IFLA_VF_STATS_BROADCAST]   = { .type = NLA_U64 },
>> +    [IFLA_VF_STATS_MULTICAST]   = { .type = NLA_U64 },
>> +};
>> +
>> +static int
>> +virNetDevParseVfStats(struct nlattr **tb, virMacAddrPtr mac,
>> +                      virDomainInterfaceStatsPtr stats)
>> +{
>> +    int ret = -1, len;
>> +    struct ifla_vf_mac *vf_lladdr;
>> +    struct nlattr *nla, *t[IFLA_VF_MAX+1];
>> +    struct nlattr *stb[IFLA_VF_STATS_MAX+1];
>> +
>> +    if (tb == NULL || mac == NULL || stats == NULL) {
>> +        return -1;
>> +    }
>> +
>> +    if (!tb[IFLA_VFINFO_LIST])
>> +        return -1;
>> +
>> +    len = nla_len(tb[IFLA_VFINFO_LIST]);
>> +
>> +    for (nla = nla_data(tb[IFLA_VFINFO_LIST]); nla_ok(nla, len);
>> +            nla = nla_next(nla, &len)) {
>> +        ret = nla_parse(t, IFLA_VF_MAX, nla_data(nla), nla_len(nla),
>> +                ifla_vf_policy);
>> +        if (ret < 0)
>> +            return -1;
>> +
>> +        if (t[IFLA_VF_MAC] == NULL) {
>> +            continue;
>> +        }
>> +
>> +        vf_lladdr = nla_data(t[IFLA_VF_MAC]);
>> +        if(virMacAddrCmpRaw(mac, vf_lladdr->mac)) {
> 
> 
> The above line failed syntax checks because there must be a space after 
> "if" before "("
> 
> 
>> +            continue;
>> +        }
>> +
>> +        if (t[IFLA_VF_STATS]) {
>> +            ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX,
>> +                    t[IFLA_VF_STATS],
>> +                    ifla_vfstats_policy);
>> +            if (ret < 0)
>> +                return -1;
>> +
>> +            stats->rx_bytes = nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
>> +            stats->tx_bytes = nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
>> +            stats->rx_packets = 
>> nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
>> +            stats->tx_packets = 
>> nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
>> +        }
>> +        return 0;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +/**
>> + * virNetDevVFInterfaceStats:
>> + * @mac: MAC address of the VF interface
>> + * @stats: returns stats of the VF interface
>> + *
>> + * Get the VF interface from kernel by netlink.
>> + * Returns 0 on success, -1 on failure.
>> + */
>> +int
>> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
>> +                     virDomainInterfaceStatsPtr stats)
>> +{
>> +    FILE *fp;
>> +    char line[256], *colon, *ifname;
>> +    int rc = -1;
>> +    void *nlData = NULL;
>> +    struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
>> +    char *sysfsDevicePath = NULL;
>> +
>> +    fp = fopen("/proc/net/dev", "r");
>> +    if (!fp) {
>> +        virReportSystemError(errno, "%s",
>> +                _("Could not open /proc/net/dev"));
>> +        return -1;
>> +    }
>> +
>> +    /* get all PCI net devices, and parse VFs list from netlink API.
>> +     * compare MAC address, collect device stats if matching.
>> +     */
>> +    while (fgets(line, sizeof(line), fp)) {
>> +        /* The line looks like:
>> +         *   "   eth0:..."
>> +         * Split it at the colon. and strip blank from head.
>> +         */
>> +        colon = strchr(line, ':');
>> +        if (!colon)
>> +            continue;
>> +        *colon = '\0';
>> +        ifname = line;
>> +        while((*ifname == ' ') && (ifname < colon))
> 
> 
> This line also failed syntax check - needs a space after "while"
> 
> 
>> +            ifname++;
>> +
>> +        if (virNetDevSysfsFile(&sysfsDevicePath, ifname, "device") < 0)
>> +            break;
>> +
>> +        if (virNetDevIsPCIDevice(sysfsDevicePath)) {
>> +            rc = virNetlinkDumpLink(ifname, -1, &nlData, tb, 0, 0);
>> +            if (rc < 0) {
>> +                rc = -1;
>> +                goto cleanup;
>> +            }
>> +
>> +            rc = virNetDevParseVfStats(tb, mac, stats);
>> +            VIR_FREE(nlData);
>> +            if (rc == 0)
>> +                goto cleanup;
>> +        }
>> +        VIR_FREE(sysfsDevicePath);
>> +    }
>> +
>> +cleanup:
> 
> 
> This also failed syntax checks - labels must start in column 2, not 
> column 1 (apparently this makes some simple-minded text editors happier 
> or something...)
> 
> 
>> +    VIR_FREE(sysfsDevicePath);
>> +    VIR_FORCE_FCLOSE(fp);
>> +    return rc;
>> +}
>> +
>> +#else    /* #if defined(WITH_VF_STATS) */
>> +
>> +int
>> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
>> +                     virDomainInterfaceStatsPtr stats)
>> +{
>> +    virReportSystemError(ENOSYS, "%s",
>> +                         _("Unable to get VF net device stats on this 
>> kernel, please upgrade kernel at lease linux-4.2"));
>> +
>> +    /* no need to do anything, just fix compling error here */
>> +    if (mac == NULL || stats == NULL) {
>> +        return -1;
>> +    }
> 
> 
> Follow the pattern of other platforms to eliminate build error - declare 
> the parameters with G_GNUC_UNUSED.
> 
>> +
>> +    return -1;
>> +}
>> +#endif     /* #if defined(WITH_VF_STATS) */
>>   #else /* defined(__linux__) && defined(WITH_LIBNL)  && 
>> defined(IFLA_VF_MAX) */
>> @@ -2309,6 +2457,16 @@ virNetDevSetNetConfig(const char *linkdev 
>> G_GNUC_UNUSED,
>>   }
>> +int
>> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
>> +                     virDomainInterfaceStatsPtr stats)
>> +{
>> +    virReportSystemError(ENOSYS, "%s",
>> +                         _("Unable to get VF net device stats on this 
>> platform"));
>> +    return -1;
> 
> 
> Again, you need G_GNUC_UNUSED on the parameters. (This failed the CI 
> tests for FreeBSD and MacOS).
> 
> 
> 
>> +}
>> +
>> +
>>   #endif /* defined(__linux__) && defined(WITH_LIBNL) && 
>> defined(IFLA_VF_MAX) */
>>   VIR_ENUM_IMPL(virNetDevIfState,
>> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
>> index 5f581323ed..ff59d9d341 100644
>> --- a/src/util/virnetdev.h
>> +++ b/src/util/virnetdev.h
>> @@ -312,4 +312,9 @@ int virNetDevSysfsFile(char **pf_sysfs_device_link,
>>   int virNetDevRunEthernetScript(const char *ifname, const char *script)
>>       G_GNUC_NO_INLINE;
>> +int virNetDevVFInterfaceStats(virMacAddrPtr mac,
>> +                              virDomainInterfaceStatsPtr stats)
>> +ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
>> +
>> +
>>   G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, 
>> virNetDevRxFilterFree);
> 
> 
> I didn't have time to look at the code itself yet. In the meantime, 
> fixing it up so that this is just another one of the "#if 
> defined(__linux__) && WITH_LIBNL" functions, and eliminating the CI 
> build/test errors would be useful; possibly that's all that will be 
> needed, and even if not you'll have a head start :-)
> 
> 

Thanks a lot, I'll fix all the above problems and push a v2 version.

-- 
zhenwei pi