[libvirt PATCH] nodedev: fix race in API usage vs initial device enumeration

Daniel P. Berrangé posted 1 patch 4 years ago
Test syntax-check failed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20200313120035.1180068-1-berrange@redhat.com
src/conf/virnodedeviceobj.h          |  2 ++
src/node_device/node_device_driver.c | 44 ++++++++++++++++++++++++++++
src/node_device/node_device_hal.c    | 15 ++++++++++
src/node_device/node_device_udev.c   | 13 ++++++++
4 files changed, 74 insertions(+)
[libvirt PATCH] nodedev: fix race in API usage vs initial device enumeration
Posted by Daniel P. Berrangé 4 years ago
During startup the udev node device driver impl uses a background thread
to populate the list of devices to avoid blocking the daemon startup
entirely. There is no synchronization to the public APIs, so it is
possible for an application to start calling APIs before the device
initialization is complete.

This was not a problem in the old approach where libvirtd was started
on boot, as initialization would easily complete before any APIs were
called.

With the use of socket activation, however, APIs are invoked from the
very moment the daemon starts. This is easily seen by doing a

  'virsh -c nodedev:///system list'

the first time it runs it will only show one or two devices. The second
time it runs it will show all devices. The solution is to introduce a
flag and condition variable for APIs to synchronize against before
returning any data.

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 src/conf/virnodedeviceobj.h          |  2 ++
 src/node_device/node_device_driver.c | 44 ++++++++++++++++++++++++++++
 src/node_device/node_device_hal.c    | 15 ++++++++++
 src/node_device/node_device_udev.c   | 13 ++++++++
 4 files changed, 74 insertions(+)

diff --git a/src/conf/virnodedeviceobj.h b/src/conf/virnodedeviceobj.h
index c4d3c55d73..c9df8dedab 100644
--- a/src/conf/virnodedeviceobj.h
+++ b/src/conf/virnodedeviceobj.h
@@ -36,6 +36,8 @@ typedef struct _virNodeDeviceDriverState virNodeDeviceDriverState;
 typedef virNodeDeviceDriverState *virNodeDeviceDriverStatePtr;
 struct _virNodeDeviceDriverState {
     virMutex lock;
+    virCond initCond;
+    bool initialized;
 
     /* pid file FD, ensures two copies of the driver can't use the same root */
     int lockFD;
diff --git a/src/node_device/node_device_driver.c b/src/node_device/node_device_driver.c
index da92a4cf94..ee175e1095 100644
--- a/src/node_device/node_device_driver.c
+++ b/src/node_device/node_device_driver.c
@@ -156,6 +156,22 @@ nodeDeviceUnlock(void)
 }
 
 
+static int
+nodeDeviceWaitInit(void)
+{
+    nodeDeviceLock();
+    while (!driver->initialized) {
+        if (virCondWait(&driver->initCond, &driver->lock) < 0) {
+            virReportSystemError(errno, "%s",
+                                 _("failed to wait on condition"));
+            nodeDeviceUnlock();
+            return -1;
+        }
+    }
+    nodeDeviceUnlock();
+    return 0;
+}
+
 int
 nodeNumOfDevices(virConnectPtr conn,
                  const char *cap,
@@ -166,6 +182,9 @@ nodeNumOfDevices(virConnectPtr conn,
 
     virCheckFlags(0, -1);
 
+    if (nodeDeviceWaitInit() < 0)
+        return -1;
+
     return virNodeDeviceObjListNumOfDevices(driver->devs, conn, cap,
                                             virNodeNumOfDevicesCheckACL);
 }
@@ -183,6 +202,9 @@ nodeListDevices(virConnectPtr conn,
 
     virCheckFlags(0, -1);
 
+    if (nodeDeviceWaitInit() < 0)
+        return -1;
+
     return virNodeDeviceObjListGetNames(driver->devs, conn,
                                         virNodeListDevicesCheckACL,
                                         cap, names, maxnames);
@@ -199,6 +221,9 @@ nodeConnectListAllNodeDevices(virConnectPtr conn,
     if (virConnectListAllNodeDevicesEnsureACL(conn) < 0)
         return -1;
 
+    if (nodeDeviceWaitInit() < 0)
+        return -1;
+
     return virNodeDeviceObjListExport(conn, driver->devs, devices,
                                       virConnectListAllNodeDevicesCheckACL,
                                       flags);
@@ -228,6 +253,9 @@ nodeDeviceLookupByName(virConnectPtr conn,
     virNodeDeviceDefPtr def;
     virNodeDevicePtr device = NULL;
 
+    if (nodeDeviceWaitInit() < 0)
+        return NULL;
+
     if (!(obj = nodeDeviceObjFindByName(name)))
         return NULL;
     def = virNodeDeviceObjGetDef(obj);
@@ -256,6 +284,9 @@ nodeDeviceLookupSCSIHostByWWN(virConnectPtr conn,
 
     virCheckFlags(0, NULL);
 
+    if (nodeDeviceWaitInit() < 0)
+        return NULL;
+
     if (!(obj = virNodeDeviceObjListFindSCSIHostByWWNs(driver->devs,
                                                        wwnn, wwpn)))
         return NULL;
@@ -470,6 +501,10 @@ nodeDeviceCreateXML(virConnectPtr conn,
     const char *virt_type = NULL;
 
     virCheckFlags(0, NULL);
+
+    if (nodeDeviceWaitInit() < 0)
+        return NULL;
+
     virt_type  = virConnectGetType(conn);
 
     if (!(def = virNodeDeviceDefParseString(xmlDesc, CREATE_DEVICE, virt_type)))
@@ -512,6 +547,9 @@ nodeDeviceDestroy(virNodeDevicePtr device)
     g_autofree char *wwpn = NULL;
     unsigned int parent_host;
 
+    if (nodeDeviceWaitInit() < 0)
+        return -1;
+
     if (!(obj = nodeDeviceObjFindByName(device->name)))
         return -1;
     def = virNodeDeviceObjGetDef(obj);
@@ -564,6 +602,9 @@ nodeConnectNodeDeviceEventRegisterAny(virConnectPtr conn,
     if (virConnectNodeDeviceEventRegisterAnyEnsureACL(conn) < 0)
         return -1;
 
+    if (nodeDeviceWaitInit() < 0)
+        return -1;
+
     if (virNodeDeviceEventStateRegisterID(conn, driver->nodeDeviceEventState,
                                           device, eventID, callback,
                                           opaque, freecb, &callbackID) < 0)
@@ -580,6 +621,9 @@ nodeConnectNodeDeviceEventDeregisterAny(virConnectPtr conn,
     if (virConnectNodeDeviceEventDeregisterAnyEnsureACL(conn) < 0)
         return -1;
 
+    if (nodeDeviceWaitInit() < 0)
+        return -1;
+
     if (virObjectEventStateDeregisterID(conn,
                                         driver->nodeDeviceEventState,
                                         callbackID, true) < 0)
diff --git a/src/node_device/node_device_hal.c b/src/node_device/node_device_hal.c
index a48b4ffcd1..653f54ceca 100644
--- a/src/node_device/node_device_hal.c
+++ b/src/node_device/node_device_hal.c
@@ -611,6 +611,15 @@ nodeStateInitialize(bool privileged G_GNUC_UNUSED,
         VIR_FREE(driver);
         return VIR_DRV_STATE_INIT_ERROR;
     }
+
+    if (virCondInit(&driver->initCond) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Unable to initialize condition variable"));
+        virMutexDestroy(&driver->lock);
+        VIR_FREE(driver);
+        return VIR_DRV_STATE_INIT_ERROR;
+    }
+
     nodeDeviceLock();
 
     if (privileged) {
@@ -701,6 +710,11 @@ nodeStateInitialize(bool privileged G_GNUC_UNUSED,
     }
     VIR_FREE(udi);
 
+    nodeDeviceLock();
+    driver->initialized = true;
+    nodeDeviceUnlock();
+    virCondBroadcast(&driver->initCond);
+
     return VIR_DRV_STATE_INIT_COMPLETE;
 
  failure:
@@ -733,6 +747,7 @@ nodeStateCleanup(void)
 
         VIR_FREE(driver->stateDir);
         nodeDeviceUnlock();
+        virCondDestroy(&driver->initCond);
         virMutexDestroy(&driver->lock);
         VIR_FREE(driver);
         return 0;
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index 536cae6c30..87bdbc5a0b 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -1477,6 +1477,7 @@ nodeStateCleanup(void)
         virPidFileRelease(driver->stateDir, "driver", driver->lockFD);
 
     VIR_FREE(driver->stateDir);
+    virCondDestroy(&driver->initCond);
     virMutexDestroy(&driver->lock);
     VIR_FREE(driver);
 
@@ -1744,6 +1745,11 @@ nodeStateInitializeEnumerate(void *opaque)
     if (udevEnumerateDevices(udev) != 0)
         goto error;
 
+    nodeDeviceLock();
+    driver->initialized = true;
+    nodeDeviceUnlock();
+    virCondBroadcast(&driver->initCond);
+
     return;
 
  error:
@@ -1806,6 +1812,13 @@ nodeStateInitialize(bool privileged,
         VIR_FREE(driver);
         return VIR_DRV_STATE_INIT_ERROR;
     }
+    if (virCondInit(&driver->initCond) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Unable to initialize condition variable"));
+        virMutexDestroy(&driver->lock);
+        VIR_FREE(driver);
+        return VIR_DRV_STATE_INIT_ERROR;
+    }
 
     driver->privileged = privileged;
 
-- 
2.24.1

Re: [libvirt PATCH] nodedev: fix race in API usage vs initial device enumeration
Posted by Michal Prívozník 4 years ago
On 13. 3. 2020 13:00, Daniel P. Berrangé wrote:
> During startup the udev node device driver impl uses a background thread
> to populate the list of devices to avoid blocking the daemon startup
> entirely. There is no synchronization to the public APIs, so it is
> possible for an application to start calling APIs before the device
> initialization is complete.
> 
> This was not a problem in the old approach where libvirtd was started
> on boot, as initialization would easily complete before any APIs were
> called.
> 
> With the use of socket activation, however, APIs are invoked from the
> very moment the daemon starts. This is easily seen by doing a
> 
>   'virsh -c nodedev:///system list'
> 
> the first time it runs it will only show one or two devices. The second
> time it runs it will show all devices. The solution is to introduce a
> flag and condition variable for APIs to synchronize against before
> returning any data.
> 
> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> ---
>  src/conf/virnodedeviceobj.h          |  2 ++
>  src/node_device/node_device_driver.c | 44 ++++++++++++++++++++++++++++
>  src/node_device/node_device_hal.c    | 15 ++++++++++
>  src/node_device/node_device_udev.c   | 13 ++++++++
>  4 files changed, 74 insertions(+)
> 

For both drivers:

> @@ -1744,6 +1745,11 @@ nodeStateInitializeEnumerate(void *opaque)
>      if (udevEnumerateDevices(udev) != 0)
>          goto error;
>  
> +    nodeDeviceLock();
> +    driver->initialized = true;
> +    nodeDeviceUnlock();
> +    virCondBroadcast(&driver->initCond);

Should this broadcast be in the critical section? Just asking, for this
simple case it may be okay.

> +
>      return;
>  
>   error:
> @@ -1806,6 +1812,13 @@ nodeStateInitialize(bool privileged,
>          VIR_FREE(driver);
>          return VIR_DRV_STATE_INIT_ERROR;
>      }
> +    if (virCondInit(&driver->initCond) < 0) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                       _("Unable to initialize condition variable"));

And perhaps, virReportSystemError() would be nicer.

> +        virMutexDestroy(&driver->lock);
> +        VIR_FREE(driver);
> +        return VIR_DRV_STATE_INIT_ERROR;
> +    }
>  
>      driver->privileged = privileged;
>  
> 

Reviewed-by: Michal Privoznik <mprivozn@redhat.com>

Michal

Re: [libvirt PATCH] nodedev: fix race in API usage vs initial device enumeration
Posted by Daniel P. Berrangé 4 years ago
On Mon, Mar 16, 2020 at 05:03:59PM +0100, Michal Prívozník wrote:
> On 13. 3. 2020 13:00, Daniel P. Berrangé wrote:
> > During startup the udev node device driver impl uses a background thread
> > to populate the list of devices to avoid blocking the daemon startup
> > entirely. There is no synchronization to the public APIs, so it is
> > possible for an application to start calling APIs before the device
> > initialization is complete.
> > 
> > This was not a problem in the old approach where libvirtd was started
> > on boot, as initialization would easily complete before any APIs were
> > called.
> > 
> > With the use of socket activation, however, APIs are invoked from the
> > very moment the daemon starts. This is easily seen by doing a
> > 
> >   'virsh -c nodedev:///system list'
> > 
> > the first time it runs it will only show one or two devices. The second
> > time it runs it will show all devices. The solution is to introduce a
> > flag and condition variable for APIs to synchronize against before
> > returning any data.
> > 
> > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> > ---
> >  src/conf/virnodedeviceobj.h          |  2 ++
> >  src/node_device/node_device_driver.c | 44 ++++++++++++++++++++++++++++
> >  src/node_device/node_device_hal.c    | 15 ++++++++++
> >  src/node_device/node_device_udev.c   | 13 ++++++++
> >  4 files changed, 74 insertions(+)
> > 
> 
> For both drivers:
> 
> > @@ -1744,6 +1745,11 @@ nodeStateInitializeEnumerate(void *opaque)
> >      if (udevEnumerateDevices(udev) != 0)
> >          goto error;
> >  
> > +    nodeDeviceLock();
> > +    driver->initialized = true;
> > +    nodeDeviceUnlock();
> > +    virCondBroadcast(&driver->initCond);
> 
> Should this broadcast be in the critical section? Just asking, for this
> simple case it may be okay.

The critical section protects the updating of the master variable,
"initialized" in this case.

The condition variable signalling just wakes up the other thread
which then acquires this same mutex to read "initialized". Spurious
wake ups are permitted with condition variables - that's why the
waiter side of a condition variable always uses a while() loop
to check the variable & optionally go back to sleep

IOW, condition variable signalling does not need to be part of
any critical section. It merely a means to kick another thread.

> > @@ -1806,6 +1812,13 @@ nodeStateInitialize(bool privileged,
> >          VIR_FREE(driver);
> >          return VIR_DRV_STATE_INIT_ERROR;
> >      }
> > +    if (virCondInit(&driver->initCond) < 0) {
> > +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> > +                       _("Unable to initialize condition variable"));
> 
> And perhaps, virReportSystemError() would be nicer.

Yeah, we really ought to just push the error reporting down a level
into the method, as every caller repeats this.
> 
> Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
> 
> Michal

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|