[PATCH v3] util: support PCI passthrough net device stats collection

zhenwei pi posted 1 patch 3 years, 7 months ago
Test syntax-check failed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20200924081830.544759-1-pizhenwei@bytedance.com
src/libvirt_private.syms |   1 +
src/qemu/qemu_driver.c   |   3 ++
src/util/virnetdev.c     | 121 +++++++++++++++++++++++++++++++++++++++++++++++
src/util/virnetdev.h     |   5 ++
4 files changed, 130 insertions(+)
[PATCH v3] util: support PCI passthrough net device stats collection
Posted by zhenwei pi 3 years, 7 months ago
Collect PCI passthrough net device stats from kernel by netlink
API.

Currently, libvirt can not get PCI passthrough net device stats,
run command:
 #virsh domifstat instance --interface=52:54:00:2d:b2:35
 error: Failed to get interface stats instance 52:54:00:2d:b2:35
 error: internal error: Interface name not provided

The PCI device(usually SR-IOV virtual function device) is detached
while it's used in PCI passthrough mode. And we can not parse this
device from /proc/net/dev any more.

In this patch, libvirt check net device is VF of not firstly, then
query virNetDevVFInterfaceStats(new API).
virNetDevVFInterfaceStats parses VFs info of all PFs, compares MAC
address until the two MAC addresses match.
'#ip -s link show' can get the same result. Instead of parsing the
output result, implement this feature by libnl API.

Notice that this feature deponds on driver of PF.
Test on Mellanox ConnectX-4 Lx, it works well.
Also test on Intel Corporation 82599ES, it works, but only get 0.
(ip-link command get the same result).

IFLA_VF_STATS is supported since Linux-4.2, suggested by Laine,
just using defined(__linux__) && WITH_LIBNL is enough.

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 src/libvirt_private.syms |   1 +
 src/qemu/qemu_driver.c   |   3 ++
 src/util/virnetdev.c     | 121 +++++++++++++++++++++++++++++++++++++++++++++++
 src/util/virnetdev.h     |   5 ++
 4 files changed, 130 insertions(+)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index bdbe3431b8..bcc40b8d69 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2585,6 +2585,7 @@ virNetDevSetRcvMulti;
 virNetDevSetupControl;
 virNetDevSysfsFile;
 virNetDevValidateConfig;
+virNetDevVFInterfaceStats;
 
 
 # util/virnetdevbandwidth.h
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index ae715c01d7..f554010c40 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -10196,6 +10196,9 @@ qemuDomainInterfaceStats(virDomainPtr dom,
     if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
         if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0)
             goto cleanup;
+    } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+        if (virNetDevVFInterfaceStats(&net->mac, stats) < 0)
+            goto cleanup;
     } else {
         if (virNetDevTapInterfaceStats(net->ifname, stats,
                                        !virDomainNetTypeSharesHostView(net)) < 0)
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index e1a4cc2bef..3d54d07606 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -1489,6 +1489,7 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
                             .maxlen = sizeof(struct ifla_vf_mac) },
     [IFLA_VF_VLAN]      = { .type = NLA_UNSPEC,
                             .maxlen = sizeof(struct ifla_vf_vlan) },
+    [IFLA_VF_STATS]     = { .type = NLA_NESTED },
 };
 
 
@@ -2265,6 +2266,116 @@ virNetDevSetNetConfig(const char *linkdev, int vf,
     return 0;
 }
 
+static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
+    [IFLA_VF_STATS_RX_PACKETS]  = { .type = NLA_U64 },
+    [IFLA_VF_STATS_TX_PACKETS]  = { .type = NLA_U64 },
+    [IFLA_VF_STATS_RX_BYTES]    = { .type = NLA_U64 },
+    [IFLA_VF_STATS_TX_BYTES]    = { .type = NLA_U64 },
+    [IFLA_VF_STATS_BROADCAST]   = { .type = NLA_U64 },
+    [IFLA_VF_STATS_MULTICAST]   = { .type = NLA_U64 },
+};
+
+static int
+virNetDevParseVfStats(struct nlattr **tb, virMacAddrPtr mac,
+                      virDomainInterfaceStatsPtr stats)
+{
+    int ret = -1, len;
+    struct ifla_vf_mac *vf_lladdr;
+    struct nlattr *nla, *t[IFLA_VF_MAX+1];
+    struct nlattr *stb[IFLA_VF_STATS_MAX+1];
+
+    if (tb == NULL || mac == NULL || stats == NULL) {
+        return -1;
+    }
+
+    if (!tb[IFLA_VFINFO_LIST])
+        return -1;
+
+    len = nla_len(tb[IFLA_VFINFO_LIST]);
+
+    for (nla = nla_data(tb[IFLA_VFINFO_LIST]); nla_ok(nla, len);
+            nla = nla_next(nla, &len)) {
+        ret = nla_parse(t, IFLA_VF_MAX, nla_data(nla), nla_len(nla),
+                ifla_vf_policy);
+        if (ret < 0)
+            return -1;
+
+        if (t[IFLA_VF_MAC] == NULL) {
+            continue;
+        }
+
+        vf_lladdr = nla_data(t[IFLA_VF_MAC]);
+        if (virMacAddrCmpRaw(mac, vf_lladdr->mac)) {
+            continue;
+        }
+
+        if (t[IFLA_VF_STATS]) {
+            ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX,
+                    t[IFLA_VF_STATS],
+                    ifla_vfstats_policy);
+            if (ret < 0)
+                return -1;
+
+            stats->rx_bytes = nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
+            stats->tx_bytes = nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
+            stats->rx_packets = nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
+            stats->tx_packets = nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
+        }
+        return 0;
+    }
+
+    return ret;
+}
+
+/**
+ * virNetDevVFInterfaceStats:
+ * @mac: MAC address of the VF interface
+ * @stats: returns stats of the VF interface
+ *
+ * Get the VF interface from kernel by netlink.
+ * Returns 0 on success, -1 on failure.
+ */
+int
+virNetDevVFInterfaceStats(virMacAddrPtr mac,
+                          virDomainInterfaceStatsPtr stats)
+{
+    int rc = -1;
+    void *nlData = NULL;
+    struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
+    char *sysfsDevicePath = NULL;
+    DIR *dirp = NULL;
+    struct dirent *dp;
+
+    if (virDirOpen(&dirp, SYSFS_NET_DIR) < 0)
+        return -1;
+
+    /* get all PCI net devices, and parse VFs list from netlink API.
+     * compare MAC address, collect device stats if matching.
+     */
+    while (virDirRead(dirp, &dp, SYSFS_NET_DIR) > 0) {
+        if (virNetDevSysfsFile(&sysfsDevicePath, dp->d_name, "device") < 0)
+            break;
+
+        if (virNetDevIsPCIDevice(sysfsDevicePath)) {
+            rc = virNetlinkDumpLink(dp->d_name, -1, &nlData, tb, 0, 0);
+            if (rc < 0) {
+                rc = -1;
+                goto cleanup;
+            }
+
+            rc = virNetDevParseVfStats(tb, mac, stats);
+            VIR_FREE(nlData);
+            if (rc == 0)
+                goto cleanup;
+        }
+        VIR_FREE(sysfsDevicePath);
+    }
+
+ cleanup:
+    VIR_FREE(sysfsDevicePath);
+    VIR_DIR_CLOSE(dirp);
+    return rc;
+}
 
 #else /* defined(__linux__) && defined(WITH_LIBNL)  && defined(IFLA_VF_MAX) */
 
@@ -2309,6 +2420,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED,
 }
 
 
+int
+virNetDevVFInterfaceStats(virMacAddrPtr mac G_GNUC_UNUSED,
+                     virDomainInterfaceStatsPtr stats G_GNUC_UNUSED)
+{
+    virReportSystemError(ENOSYS, "%s",
+                         _("Unable to get VF net device stats on this platform"));
+    return -1;
+}
+
+
 #endif /* defined(__linux__) && defined(WITH_LIBNL) && defined(IFLA_VF_MAX) */
 
 VIR_ENUM_IMPL(virNetDevIfState,
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index 5f581323ed..ff59d9d341 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -312,4 +312,9 @@ int virNetDevSysfsFile(char **pf_sysfs_device_link,
 int virNetDevRunEthernetScript(const char *ifname, const char *script)
     G_GNUC_NO_INLINE;
 
+int virNetDevVFInterfaceStats(virMacAddrPtr mac,
+                              virDomainInterfaceStatsPtr stats)
+ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
+
+
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree);
-- 
2.11.0

Re: [PATCH v3] util: support PCI passthrough net device stats collection
Posted by Daniel Henrique Barboza 3 years, 7 months ago

On 9/24/20 5:18 AM, zhenwei pi wrote:
> Collect PCI passthrough net device stats from kernel by netlink
> API.
> 
> Currently, libvirt can not get PCI passthrough net device stats,
> run command:
>   #virsh domifstat instance --interface=52:54:00:2d:b2:35
>   error: Failed to get interface stats instance 52:54:00:2d:b2:35
>   error: internal error: Interface name not provided
> 
> The PCI device(usually SR-IOV virtual function device) is detached
> while it's used in PCI passthrough mode. And we can not parse this
> device from /proc/net/dev any more.
> 
> In this patch, libvirt check net device is VF of not firstly, then
> query virNetDevVFInterfaceStats(new API).
> virNetDevVFInterfaceStats parses VFs info of all PFs, compares MAC
> address until the two MAC addresses match.
> '#ip -s link show' can get the same result. Instead of parsing the
> output result, implement this feature by libnl API.
> 
> Notice that this feature deponds on driver of PF.

s/deponds/depends

> Test on Mellanox ConnectX-4 Lx, it works well.
> Also test on Intel Corporation 82599ES, it works, but only get 0.
> (ip-link command get the same result).
> 
> IFLA_VF_STATS is supported since Linux-4.2, suggested by Laine,
> just using defined(__linux__) && WITH_LIBNL is enough.
> 
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>   src/libvirt_private.syms |   1 +
>   src/qemu/qemu_driver.c   |   3 ++
>   src/util/virnetdev.c     | 121 +++++++++++++++++++++++++++++++++++++++++++++++
>   src/util/virnetdev.h     |   5 ++
>   4 files changed, 130 insertions(+)
> 

[...]

> +
> +/**
> + * virNetDevVFInterfaceStats:
> + * @mac: MAC address of the VF interface
> + * @stats: returns stats of the VF interface
> + *
> + * Get the VF interface from kernel by netlink.
> + * Returns 0 on success, -1 on failure.
> + */
> +int
> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
> +                          virDomainInterfaceStatsPtr stats)
> +{
> +    int rc = -1;
> +    void *nlData = NULL;
> +    struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
> +    char *sysfsDevicePath = NULL;

This variable is used only in the while() loop down there and you're freeing
it after each loop iteration, and then you need to clean it in the function wide
'cleanup' label as well.

Yon can move it with autocleanup enabled down there ....


> +    DIR *dirp = NULL;
> +    struct dirent *dp;
> +
> +    if (virDirOpen(&dirp, SYSFS_NET_DIR) < 0)
> +        return -1;
> +
> +    /* get all PCI net devices, and parse VFs list from netlink API.
> +     * compare MAC address, collect device stats if matching.
> +     */
> +    while (virDirRead(dirp, &dp, SYSFS_NET_DIR) > 0) {

Here:
            g_autofree char *sysfsDevicePath = NULL;
               

> +        if (virNetDevSysfsFile(&sysfsDevicePath, dp->d_name, "device") < 0)
> +            break;
> +
> +        if (virNetDevIsPCIDevice(sysfsDevicePath)) {
> +            rc = virNetlinkDumpLink(dp->d_name, -1, &nlData, tb, 0, 0);
> +            if (rc < 0) {
> +                rc = -1;
> +                goto cleanup;
> +            }
> +
> +            rc = virNetDevParseVfStats(tb, mac, stats);
> +            VIR_FREE(nlData);
> +            if (rc == 0)
> +                goto cleanup;
> +        }
> +        VIR_FREE(sysfsDevicePath);

And then you don't need this VIR_FREE() call because sysfsDevicePath will be auto-freed
after each iteration ....


> +    }
> +
> + cleanup:
> +    VIR_FREE(sysfsDevicePath);

And this VIR_FREE() call becomes obsolete because the variable is not on function
scope any longer.



Thanks,


DHB


> +    VIR_DIR_CLOSE(dirp);
> +    return rc;
> +}
>   
>   #else /* defined(__linux__) && defined(WITH_LIBNL)  && defined(IFLA_VF_MAX) */
>   
> @@ -2309,6 +2420,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED,
>   }
>   
>   
> +int
> +virNetDevVFInterfaceStats(virMacAddrPtr mac G_GNUC_UNUSED,
> +                     virDomainInterfaceStatsPtr stats G_GNUC_UNUSED)
> +{
> +    virReportSystemError(ENOSYS, "%s",
> +                         _("Unable to get VF net device stats on this platform"));
> +    return -1;
> +}
> +
> +
>   #endif /* defined(__linux__) && defined(WITH_LIBNL) && defined(IFLA_VF_MAX) */
>   
>   VIR_ENUM_IMPL(virNetDevIfState,
> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
> index 5f581323ed..ff59d9d341 100644
> --- a/src/util/virnetdev.h
> +++ b/src/util/virnetdev.h
> @@ -312,4 +312,9 @@ int virNetDevSysfsFile(char **pf_sysfs_device_link,
>   int virNetDevRunEthernetScript(const char *ifname, const char *script)
>       G_GNUC_NO_INLINE;
>   
> +int virNetDevVFInterfaceStats(virMacAddrPtr mac,
> +                              virDomainInterfaceStatsPtr stats)
> +ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
> +
> +
>   G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree);
> 

Re: [PATCH v3] util: support PCI passthrough net device stats collection
Posted by Laine Stump 3 years, 6 months ago
On 9/24/20 4:18 AM, zhenwei pi wrote:
> Collect PCI passthrough net device stats from kernel by netlink
> API.
> 
> Currently, libvirt can not get PCI passthrough net device stats,
> run command:
>   #virsh domifstat instance --interface=52:54:00:2d:b2:35
>   error: Failed to get interface stats instance 52:54:00:2d:b2:35
>   error: internal error: Interface name not provided
> 
> The PCI device(usually SR-IOV virtual function device) is detached
> while it's used in PCI passthrough mode. And we can not parse this
> device from /proc/net/dev any more.
> 
> In this patch, libvirt check net device is VF of not firstly, then
> query virNetDevVFInterfaceStats(new API).
> virNetDevVFInterfaceStats parses VFs info of all PFs, compares MAC
> address until the two MAC addresses match.
> '#ip -s link show' can get the same result. Instead of parsing the
> output result, implement this feature by libnl API.
> 
> Notice that this feature deponds on driver of PF.
> Test on Mellanox ConnectX-4 Lx, it works well.
> Also test on Intel Corporation 82599ES, it works, but only get 0.
> (ip-link command get the same result).
> 
> IFLA_VF_STATS is supported since Linux-4.2, suggested by Laine,
> just using defined(__linux__) && WITH_LIBNL is enough.
> 
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>   src/libvirt_private.syms |   1 +
>   src/qemu/qemu_driver.c   |   3 ++
>   src/util/virnetdev.c     | 121 +++++++++++++++++++++++++++++++++++++++++++++++
>   src/util/virnetdev.h     |   5 ++
>   4 files changed, 130 insertions(+)
> 
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index bdbe3431b8..bcc40b8d69 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -2585,6 +2585,7 @@ virNetDevSetRcvMulti;
>   virNetDevSetupControl;
>   virNetDevSysfsFile;
>   virNetDevValidateConfig;
> +virNetDevVFInterfaceStats;
>   
>   
>   # util/virnetdevbandwidth.h
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index ae715c01d7..f554010c40 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -10196,6 +10196,9 @@ qemuDomainInterfaceStats(virDomainPtr dom,
>       if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
>           if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0)
>               goto cleanup;
> +    } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
> +        if (virNetDevVFInterfaceStats(&net->mac, stats) < 0)
> +            goto cleanup;
>       } else {
>           if (virNetDevTapInterfaceStats(net->ifname, stats,
>                                          !virDomainNetTypeSharesHostView(net)) < 0)
> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
> index e1a4cc2bef..3d54d07606 100644
> --- a/src/util/virnetdev.c
> +++ b/src/util/virnetdev.c
> @@ -1489,6 +1489,7 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
>                               .maxlen = sizeof(struct ifla_vf_mac) },
>       [IFLA_VF_VLAN]      = { .type = NLA_UNSPEC,
>                               .maxlen = sizeof(struct ifla_vf_vlan) },
> +    [IFLA_VF_STATS]     = { .type = NLA_NESTED },
>   };
>   
>   
> @@ -2265,6 +2266,116 @@ virNetDevSetNetConfig(const char *linkdev, int vf,
>       return 0;
>   }
>   
> +static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
> +    [IFLA_VF_STATS_RX_PACKETS]  = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_TX_PACKETS]  = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_RX_BYTES]    = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_TX_BYTES]    = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_BROADCAST]   = { .type = NLA_U64 },
> +    [IFLA_VF_STATS_MULTICAST]   = { .type = NLA_U64 },
> +};
> +
> +static int
> +virNetDevParseVfStats(struct nlattr **tb, virMacAddrPtr mac,
> +                      virDomainInterfaceStatsPtr stats)
> +{
> +    int ret = -1, len;
> +    struct ifla_vf_mac *vf_lladdr;
> +    struct nlattr *nla, *t[IFLA_VF_MAX+1];
> +    struct nlattr *stb[IFLA_VF_STATS_MAX+1];
> +
> +    if (tb == NULL || mac == NULL || stats == NULL) {
> +        return -1;
> +    }
> +
> +    if (!tb[IFLA_VFINFO_LIST])
> +        return -1;
> +
> +    len = nla_len(tb[IFLA_VFINFO_LIST]);
> +
> +    for (nla = nla_data(tb[IFLA_VFINFO_LIST]); nla_ok(nla, len);
> +            nla = nla_next(nla, &len)) {
> +        ret = nla_parse(t, IFLA_VF_MAX, nla_data(nla), nla_len(nla),
> +                ifla_vf_policy);
> +        if (ret < 0)
> +            return -1;
> +
> +        if (t[IFLA_VF_MAC] == NULL) {
> +            continue;
> +        }
> +
> +        vf_lladdr = nla_data(t[IFLA_VF_MAC]);
> +        if (virMacAddrCmpRaw(mac, vf_lladdr->mac)) {
> +            continue;
> +        }
> +
> +        if (t[IFLA_VF_STATS]) {
> +            ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX,
> +                    t[IFLA_VF_STATS],
> +                    ifla_vfstats_policy);
> +            if (ret < 0)
> +                return -1;
> +
> +            stats->rx_bytes = nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
> +            stats->tx_bytes = nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
> +            stats->rx_packets = nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
> +            stats->tx_packets = nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
> +        }
> +        return 0;
> +    }
> +
> +    return ret;
> +}
> +
> +/**
> + * virNetDevVFInterfaceStats:
> + * @mac: MAC address of the VF interface
> + * @stats: returns stats of the VF interface
> + *
> + * Get the VF interface from kernel by netlink.
> + * Returns 0 on success, -1 on failure.
> + */
> +int
> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
> +                          virDomainInterfaceStatsPtr stats)
> +{
> +    int rc = -1;
> +    void *nlData = NULL;
> +    struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
> +    char *sysfsDevicePath = NULL;
> +    DIR *dirp = NULL;
> +    struct dirent *dp;
> +
> +    if (virDirOpen(&dirp, SYSFS_NET_DIR) < 0)
> +        return -1;
> +
> +    /* get all PCI net devices, and parse VFs list from netlink API.
> +     * compare MAC address, collect device stats if matching.
> +     */


Sorry, I hate to bring this up only on v3 of the patches (In the initial 
version I hadn't looked at implementation details, only at build CI, and 
hadn't gotten back to look at the later versions before other people 
commented on also-macro-level issues), but we shouldn't be determining 
which VF stats of which PF to report by scanning through all the PFs 
until we find a VF with a matching MAC address. Aside from being really 
inefficient (you shouldn't need to get a dump of all interfaces, just of 
the one interface you're interested in), also the MAC address of a 
device isn't unique (although it usually is), so you can't be guaranteed 
this is the correct VF.

Instead we need to start with the PCI address in the virDomainHostdevDef 
associated with the VF (which can be retrieved with 
virDomainNetDefActualHostdev(net)) and then learn the name of the PF, 
and what the VF# is for this VF using virPCIGetVirtualFunctionInfo(), 
(*or even better* - that function refactored to be more like 
virHostdevNetDevice() - it provides exactly what you need, but is just 
located in a file (virhostdev.c) that shouldn't be referenced from 
virnetdev.c).

Once we know the PF device name and which VF, we can get the NetlinkDump 
*only* for that device, and then directly grab the details from the 
particular VFINFO in the list, rather than relying on a match of the 
(unreliable) MAC address.

Since I already have an idea of the change I'd like to see in 
virPCIGetVirtualFunctionInfo(), let me refactor that function (and a few 
surrounding) so that they're more generally useful, and I'll Cc the 
patch for it to you so that you can use the updated function. I'll try 
to do it in the next couple days.


> +    while (virDirRead(dirp, &dp, SYSFS_NET_DIR) > 0) {
> +        if (virNetDevSysfsFile(&sysfsDevicePath, dp->d_name, "device") < 0)
> +            break;
> +
> +        if (virNetDevIsPCIDevice(sysfsDevicePath)) {
> +            rc = virNetlinkDumpLink(dp->d_name, -1, &nlData, tb, 0, 0);
> +            if (rc < 0) {
> +                rc = -1;
> +                goto cleanup;
> +            }
> +
> +            rc = virNetDevParseVfStats(tb, mac, stats);
> +            VIR_FREE(nlData);
> +            if (rc == 0)
> +                goto cleanup;
> +        }
> +        VIR_FREE(sysfsDevicePath);
> +    }
> +
> + cleanup:
> +    VIR_FREE(sysfsDevicePath);
> +    VIR_DIR_CLOSE(dirp);
> +    return rc;
> +}
>   
>   #else /* defined(__linux__) && defined(WITH_LIBNL)  && defined(IFLA_VF_MAX) */
>   
> @@ -2309,6 +2420,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED,
>   }
>   
>   
> +int
> +virNetDevVFInterfaceStats(virMacAddrPtr mac G_GNUC_UNUSED,
> +                     virDomainInterfaceStatsPtr stats G_GNUC_UNUSED)
> +{
> +    virReportSystemError(ENOSYS, "%s",
> +                         _("Unable to get VF net device stats on this platform"));
> +    return -1;
> +}
> +
> +
>   #endif /* defined(__linux__) && defined(WITH_LIBNL) && defined(IFLA_VF_MAX) */
>   
>   VIR_ENUM_IMPL(virNetDevIfState,
> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
> index 5f581323ed..ff59d9d341 100644
> --- a/src/util/virnetdev.h
> +++ b/src/util/virnetdev.h
> @@ -312,4 +312,9 @@ int virNetDevSysfsFile(char **pf_sysfs_device_link,
>   int virNetDevRunEthernetScript(const char *ifname, const char *script)
>       G_GNUC_NO_INLINE;
>   
> +int virNetDevVFInterfaceStats(virMacAddrPtr mac,
> +                              virDomainInterfaceStatsPtr stats)
> +ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
> +
> +
>   G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree);
> 

Re: [PATCH v3] util: support PCI passthrough net device stats collection
Posted by Laine Stump 3 years, 6 months ago
(After playing around with virPCIDeviceGetVirtualFunctionInfo() for a 
few days, I came to a point where I decided it was best to leave its API 
as it is (taking a path to the device in sysfs rather than a 
virPCIDeviceAddress as the first argument). And *THEN* I took a close 
look at virNetDevParseVfStats() and finally realized that it is nearly 
identical to virNetDevParseVfConfig(), and that 
virNetDevVFInterfaceStats() is just a wrapper around the same code that 
is in VirNetDevGetVfConfig() (just searching through all the host 
interfaces looking for a VF with matching MAC address.

So I've further changed my mind about the form this patch should take. 
Instead of "almost duplicating" so much code, what about this:


0) the changes you've made to the ifla_vf_policy struct are correct and 
should remain.


1) Instead of duplicating all that code from virNetDevParseVfConfig() 
into a new function (virNetDevParseVfStats()), just expand 
virNetDevParseVfConfig() itself with a new arg for stats, and add in the 
small bit of new code that retrieves the stats *iff the stats arg is 
non-NULL. This way it can still be used for its original purpose, but 
also for the new purpose, leaving us less "almost identical" code to 
maintain:


static int

virNetDevParseVfConfig(struct nlattr **tb, int32_t vf,

                        virMacAddrPtr mac,

                        int *vlanid,

                        virDomainInterfaceStatsPtr stats) <=== new


...

         if (vlanid && tb[IFLA_VF_VLAN]) {

             [...]

         }

         /* new code starts here */

         if (stats && tb[IFLA_VF_STATS] && tb[IFLA_VF_MAC]) {

             vf_mac = RTA_DATA(tb_vf[IFLA_VF_MAC]);

             if (vf_mac && vf_mac->vf == vf) {

                 ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX, 
t[IFLA_VF_STATS], ifla_vfstats_policy);
                 if (ret < 0) {
                     virReportError(VIR_ERR_INTERNAL_ERROR, "%s", 
_("error parsing IFLA_VF_STATS"));

                     return -1;

                 }

                 stats->rx_bytes = 
nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
                 stats->tx_bytes = 
nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
                 stats->rx_packets = 
nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
                 stats->tx_packets = 
nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
             }


(bonus points for a separate preliminary patch to rename 
virNetDevParseVfConfig() to virNetDevParseVFInfo())

2) virNetDevVFInterfaceStats() should take the PCI address of the VF *on 
the host* as its first argument, rather than the MAC address. If will 
then use that to determine the netdev name of the PF, and the VF# of the 
VF, using the pfName to call virNetlinkDumpLink() to get the vfinfo_list 
for the correct PF, and then the vf# when calling 
virNetDevParseVfConfig() to get the stats from the correct vfinfo entry 
in the list, something like this:

     int
     virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr,
                               virDomainInterfaceStatsPtr stats)
     {
     g_autofree char *vfSysFsPath = NULL;
     g_autofree char *pfName = NULL;
     int vf = -1;

     if (virPCIDeviceAddressGetSysfsFile(vfAddr, &vfSysfsPath) < 0)
         return -1;

     if (!virPCIIsVirtualFunction(vfSysfsPath)) {
virReportError(VIR_ERR_INTERNAL_ERROR, _("'%s' is not a VF device"), 
vfSysfsPath);
        return -1;
     }

     if (virPCIGetVirtualFunctionInfo(vsSysfsPath, -1, &pfName, &vf) < 0)
         return -1;

     /* we know the pfname and vf# - use that to retrieve the correct 
vfinfo */

     if (virNetlinkDumpLink(pfname, -1, &nlData, tb, 0, 0) < 0)
         return -1;

     return virNetDevParseVfConfig(tb, vf, NULL, NULL, stats);
     }

... or something like that.


3) When calling virNetDevVFInterfaceStats() (from 
qemuDomainInterfaceStats()), send it the PCI address of the device like 
this:

     [...]

     } else if (virDomainNetGetActualType(net) == 
VIR_DOMAIN_NET_TYPE_HOSTDEV) {

         virDomainHostdevDef const *hostdev = 
virDomainNetGetActualHostdev(net);

         if (!hostdev) {

             /* some things shouldn't happen. This *definitely* should 
never ever be possible */

             virReportError(VIR_ERR_INTERNAL_ERROR,

                            "%s", _("hostdev interface missing hostdev 
data"));

             goto cleanup;

          }

          if 
virNetDevVFInterfaceStats(&hostdev->source.subsys.u.pci.addr, stats) < 0)

             goto cleanup;

          ...


Does all of that make sense? (Hopefully I haven't skipped over something 
crucial).

On 9/30/20 11:06 AM, Laine Stump wrote:
> On 9/24/20 4:18 AM, zhenwei pi wrote:
>> Collect PCI passthrough net device stats from kernel by netlink
>> API.
>>
>> Currently, libvirt can not get PCI passthrough net device stats,
>> run command:
>>   #virsh domifstat instance --interface=52:54:00:2d:b2:35
>>   error: Failed to get interface stats instance 52:54:00:2d:b2:35
>>   error: internal error: Interface name not provided
>>
>> The PCI device(usually SR-IOV virtual function device) is detached
>> while it's used in PCI passthrough mode. And we can not parse this
>> device from /proc/net/dev any more.
>>
>> In this patch, libvirt check net device is VF of not firstly, then
>> query virNetDevVFInterfaceStats(new API).
>> virNetDevVFInterfaceStats parses VFs info of all PFs, compares MAC
>> address until the two MAC addresses match.
>> '#ip -s link show' can get the same result. Instead of parsing the
>> output result, implement this feature by libnl API.
>>
>> Notice that this feature deponds on driver of PF.
>> Test on Mellanox ConnectX-4 Lx, it works well.
>> Also test on Intel Corporation 82599ES, it works, but only get 0.
>> (ip-link command get the same result).
>>
>> IFLA_VF_STATS is supported since Linux-4.2, suggested by Laine,
>> just using defined(__linux__) && WITH_LIBNL is enough.
>>
>> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
>> ---
>>   src/libvirt_private.syms |   1 +
>>   src/qemu/qemu_driver.c   |   3 ++
>>   src/util/virnetdev.c     | 121 
>> +++++++++++++++++++++++++++++++++++++++++++++++
>>   src/util/virnetdev.h     |   5 ++
>>   4 files changed, 130 insertions(+)
>>
>> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
>> index bdbe3431b8..bcc40b8d69 100644
>> --- a/src/libvirt_private.syms
>> +++ b/src/libvirt_private.syms
>> @@ -2585,6 +2585,7 @@ virNetDevSetRcvMulti;
>>   virNetDevSetupControl;
>>   virNetDevSysfsFile;
>>   virNetDevValidateConfig;
>> +virNetDevVFInterfaceStats;
>>       # util/virnetdevbandwidth.h
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index ae715c01d7..f554010c40 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -10196,6 +10196,9 @@ qemuDomainInterfaceStats(virDomainPtr dom,
>>       if (virDomainNetGetActualType(net) == 
>> VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
>>           if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) 
>> < 0)
>>               goto cleanup;
>> +    } else if (virDomainNetGetActualType(net) == 
>> VIR_DOMAIN_NET_TYPE_HOSTDEV) {
>> +        if (virNetDevVFInterfaceStats(&net->mac, stats) < 0)


1) this should be:



>> +            goto cleanup;
>>       } else {
>>           if (virNetDevTapInterfaceStats(net->ifname, stats,
>> !virDomainNetTypeSharesHostView(net)) < 0)
>> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
>> index e1a4cc2bef..3d54d07606 100644
>> --- a/src/util/virnetdev.c
>> +++ b/src/util/virnetdev.c
>> @@ -1489,6 +1489,7 @@ static struct nla_policy 
>> ifla_vf_policy[IFLA_VF_MAX+1] = {
>>                               .maxlen = sizeof(struct ifla_vf_mac) },
>>       [IFLA_VF_VLAN]      = { .type = NLA_UNSPEC,
>>                               .maxlen = sizeof(struct ifla_vf_vlan) },
>> +    [IFLA_VF_STATS]     = { .type = NLA_NESTED },
>>   };
>>     @@ -2265,6 +2266,116 @@ virNetDevSetNetConfig(const char 
>> *linkdev, int vf,
>>       return 0;
>>   }
>>   +static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
>> +    [IFLA_VF_STATS_RX_PACKETS]  = { .type = NLA_U64 },
>> +    [IFLA_VF_STATS_TX_PACKETS]  = { .type = NLA_U64 },
>> +    [IFLA_VF_STATS_RX_BYTES]    = { .type = NLA_U64 },
>> +    [IFLA_VF_STATS_TX_BYTES]    = { .type = NLA_U64 },
>> +    [IFLA_VF_STATS_BROADCAST]   = { .type = NLA_U64 },
>> +    [IFLA_VF_STATS_MULTICAST]   = { .type = NLA_U64 },
>> +};
>> +
>> +static int
>> +virNetDevParseVfStats(struct nlattr **tb, virMacAddrPtr mac,
>> +                      virDomainInterfaceStatsPtr stats)
>> +{
>> +    int ret = -1, len;
>> +    struct ifla_vf_mac *vf_lladdr;
>> +    struct nlattr *nla, *t[IFLA_VF_MAX+1];
>> +    struct nlattr *stb[IFLA_VF_STATS_MAX+1];
>> +
>> +    if (tb == NULL || mac == NULL || stats == NULL) {
>> +        return -1;
>> +    }
>> +
>> +    if (!tb[IFLA_VFINFO_LIST])
>> +        return -1;
>> +
>> +    len = nla_len(tb[IFLA_VFINFO_LIST]);
>> +
>> +    for (nla = nla_data(tb[IFLA_VFINFO_LIST]); nla_ok(nla, len);
>> +            nla = nla_next(nla, &len)) {
>> +        ret = nla_parse(t, IFLA_VF_MAX, nla_data(nla), nla_len(nla),
>> +                ifla_vf_policy);
>> +        if (ret < 0)
>> +            return -1;
>> +
>> +        if (t[IFLA_VF_MAC] == NULL) {
>> +            continue;
>> +        }
>> +
>> +        vf_lladdr = nla_data(t[IFLA_VF_MAC]);
>> +        if (virMacAddrCmpRaw(mac, vf_lladdr->mac)) {
>> +            continue;
>> +        }
>> +
>> +        if (t[IFLA_VF_STATS]) {
>> +            ret = nla_parse_nested(stb, IFLA_VF_STATS_MAX,
>> +                    t[IFLA_VF_STATS],
>> +                    ifla_vfstats_policy);
>> +            if (ret < 0)
>> +                return -1;
>> +
>> +            stats->rx_bytes = nla_get_u64(stb[IFLA_VF_STATS_RX_BYTES]);
>> +            stats->tx_bytes = nla_get_u64(stb[IFLA_VF_STATS_TX_BYTES]);
>> +            stats->rx_packets = 
>> nla_get_u64(stb[IFLA_VF_STATS_RX_PACKETS]);
>> +            stats->tx_packets = 
>> nla_get_u64(stb[IFLA_VF_STATS_TX_PACKETS]);
>> +        }
>> +        return 0;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +/**
>> + * virNetDevVFInterfaceStats:
>> + * @mac: MAC address of the VF interface
>> + * @stats: returns stats of the VF interface
>> + *
>> + * Get the VF interface from kernel by netlink.
>> + * Returns 0 on success, -1 on failure.
>> + */
>> +int
>> +virNetDevVFInterfaceStats(virMacAddrPtr mac,
>> +                          virDomainInterfaceStatsPtr stats)
>> +{
>> +    int rc = -1;
>> +    void *nlData = NULL;
>> +    struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
>> +    char *sysfsDevicePath = NULL;
>> +    DIR *dirp = NULL;
>> +    struct dirent *dp;
>> +
>> +    if (virDirOpen(&dirp, SYSFS_NET_DIR) < 0)
>> +        return -1;
>> +
>> +    /* get all PCI net devices, and parse VFs list from netlink API.
>> +     * compare MAC address, collect device stats if matching.
>> +     */
>
>
> Sorry, I hate to bring this up only on v3 of the patches (In the 
> initial version I hadn't looked at implementation details, only at 
> build CI, and hadn't gotten back to look at the later versions before 
> other people commented on also-macro-level issues), but we shouldn't 
> be determining which VF stats of which PF to report by scanning 
> through all the PFs until we find a VF with a matching MAC address. 
> Aside from being really inefficient (you shouldn't need to get a dump 
> of all interfaces, just of the one interface you're interested in), 
> also the MAC address of a device isn't unique (although it usually 
> is), so you can't be guaranteed this is the correct VF.
>
> Instead we need to start with the PCI address in the 
> virDomainHostdevDef associated with the VF (which can be retrieved 
> with virDomainNetDefActualHostdev(net)) and then learn the name of the 
> PF, and what the VF# is for this VF using 
> virPCIGetVirtualFunctionInfo(), (*or even better* - that function 
> refactored to be more like virHostdevNetDevice() - it provides exactly 
> what you need, but is just located in a file (virhostdev.c) that 
> shouldn't be referenced from virnetdev.c).
>
> Once we know the PF device name and which VF, we can get the 
> NetlinkDump *only* for that device, and then directly grab the details 
> from the particular VFINFO in the list, rather than relying on a match 
> of the (unreliable) MAC address.
>
> Since I already have an idea of the change I'd like to see in 
> virPCIGetVirtualFunctionInfo(), let me refactor that function (and a 
> few surrounding) so that they're more generally useful, and I'll Cc 
> the patch for it to you so that you can use the updated function. I'll 
> try to do it in the next couple days.
>
>
>> +    while (virDirRead(dirp, &dp, SYSFS_NET_DIR) > 0) {
>> +        if (virNetDevSysfsFile(&sysfsDevicePath, dp->d_name, 
>> "device") < 0)
>> +            break;
>> +
>> +        if (virNetDevIsPCIDevice(sysfsDevicePath)) {
>> +            rc = virNetlinkDumpLink(dp->d_name, -1, &nlData, tb, 0, 0);
>> +            if (rc < 0) {
>> +                rc = -1;
>> +                goto cleanup;
>> +            }
>> +
>> +            rc = virNetDevParseVfStats(tb, mac, stats);
>> +            VIR_FREE(nlData);
>> +            if (rc == 0)
>> +                goto cleanup;
>> +        }
>> +        VIR_FREE(sysfsDevicePath);
>> +    }
>> +
>> + cleanup:
>> +    VIR_FREE(sysfsDevicePath);
>> +    VIR_DIR_CLOSE(dirp);
>> +    return rc;
>> +}
>>     #else /* defined(__linux__) && defined(WITH_LIBNL) && 
>> defined(IFLA_VF_MAX) */
>>   @@ -2309,6 +2420,16 @@ virNetDevSetNetConfig(const char *linkdev 
>> G_GNUC_UNUSED,
>>   }
>>     +int
>> +virNetDevVFInterfaceStats(virMacAddrPtr mac G_GNUC_UNUSED,
>> +                     virDomainInterfaceStatsPtr stats G_GNUC_UNUSED)
>> +{
>> +    virReportSystemError(ENOSYS, "%s",
>> +                         _("Unable to get VF net device stats on 
>> this platform"));
>> +    return -1;
>> +}
>> +
>> +
>>   #endif /* defined(__linux__) && defined(WITH_LIBNL) && 
>> defined(IFLA_VF_MAX) */
>>     VIR_ENUM_IMPL(virNetDevIfState,
>> diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
>> index 5f581323ed..ff59d9d341 100644
>> --- a/src/util/virnetdev.h
>> +++ b/src/util/virnetdev.h
>> @@ -312,4 +312,9 @@ int virNetDevSysfsFile(char **pf_sysfs_device_link,
>>   int virNetDevRunEthernetScript(const char *ifname, const char *script)
>>       G_GNUC_NO_INLINE;
>>   +int virNetDevVFInterfaceStats(virMacAddrPtr mac,
>> +                              virDomainInterfaceStatsPtr stats)
>> +ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
>> +
>> +
>>   G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, 
>> virNetDevRxFilterFree);
>>
>