[PATCH 05/10] rpc: finish all threads before exiting main loop

Nikolay Shirokovskiy posted 10 patches 5 years, 7 months ago
There is a newer version of this series
[PATCH 05/10] rpc: finish all threads before exiting main loop
Posted by Nikolay Shirokovskiy 5 years, 7 months ago
Currently we have issues like [1] on libvirtd shutdown as we cleanup while RPC
and other threads are still running. Let's finish all threads other then main
before cleanup.

The approach to finish threads is suggested in [2]. In order to finish RPC
threads serving API calls we let the event loop run but stop accepting new API
calls and block processing any pending API calls. We also inform all drivers of
shutdown so they can prepare for shutdown too. Then we wait for all RPC threads
and driver's background thread to finish. If finishing takes more then 15s we
just exit as we can't safely cleanup in time.

[1] https://bugzilla.redhat.com/show_bug.cgi?id=1828207
[2] https://www.redhat.com/archives/libvir-list/2020-April/msg01328.html

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com>
---
 src/remote/remote_daemon.c |  3 --
 src/rpc/virnetdaemon.c     | 82 +++++++++++++++++++++++++++++++++++++++++++++-
 src/rpc/virnetserver.c     |  8 +++++
 src/rpc/virnetserver.h     |  1 +
 4 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/src/remote/remote_daemon.c b/src/remote/remote_daemon.c
index 1aa9bfc..222bb5f 100644
--- a/src/remote/remote_daemon.c
+++ b/src/remote/remote_daemon.c
@@ -1201,9 +1201,6 @@ int main(int argc, char **argv) {
                 0, "shutdown", NULL, NULL);
 
  cleanup:
-    /* Keep cleanup order in inverse order of startup */
-    virNetDaemonClose(dmn);
-
     virNetlinkEventServiceStopAll();
 
     if (driversInitialized) {
diff --git a/src/rpc/virnetdaemon.c b/src/rpc/virnetdaemon.c
index bb81a43..c4b31c6 100644
--- a/src/rpc/virnetdaemon.c
+++ b/src/rpc/virnetdaemon.c
@@ -23,6 +23,7 @@
 #include <unistd.h>
 #include <fcntl.h>
 
+#include "libvirt_internal.h"
 #include "virnetdaemon.h"
 #include "virlog.h"
 #include "viralloc.h"
@@ -69,7 +70,10 @@ struct _virNetDaemon {
     virHashTablePtr servers;
     virJSONValuePtr srvObject;
 
+    int finishTimer;
     bool quit;
+    bool finished;
+    bool graceful;
 
     unsigned int autoShutdownTimeout;
     size_t autoShutdownInhibitions;
@@ -80,6 +84,11 @@ struct _virNetDaemon {
 
 static virClassPtr virNetDaemonClass;
 
+static int
+daemonServerClose(void *payload,
+                  const void *key G_GNUC_UNUSED,
+                  void *opaque G_GNUC_UNUSED);
+
 static void
 virNetDaemonDispose(void *obj)
 {
@@ -796,11 +805,53 @@ daemonServerProcessClients(void *payload,
     return 0;
 }
 
+static int
+daemonServerShutdownWait(void *payload,
+                         const void *key G_GNUC_UNUSED,
+                         void *opaque G_GNUC_UNUSED)
+{
+    virNetServerPtr srv = payload;
+
+    virNetServerShutdownWait(srv);
+    return 0;
+}
+
+static void
+daemonShutdownWait(void *opaque)
+{
+    virNetDaemonPtr dmn = opaque;
+    bool graceful = false;
+
+    virHashForEach(dmn->servers, daemonServerShutdownWait, NULL);
+    if (virStateShutdownWait() < 0)
+        goto finish;
+
+    graceful = true;
+
+ finish:
+    virObjectLock(dmn);
+    dmn->graceful = graceful;
+    virEventUpdateTimeout(dmn->finishTimer, 0);
+    virObjectUnlock(dmn);
+}
+
+static void
+virNetDaemonFinishTimer(int timerid G_GNUC_UNUSED,
+                        void *opaque)
+{
+    virNetDaemonPtr dmn = opaque;
+
+    virObjectLock(dmn);
+    dmn->finished = true;
+    virObjectUnlock(dmn);
+}
+
 void
 virNetDaemonRun(virNetDaemonPtr dmn)
 {
     int timerid = -1;
     bool timerActive = false;
+    virThread shutdownThread;
 
     virObjectLock(dmn);
 
@@ -811,6 +862,9 @@ virNetDaemonRun(virNetDaemonPtr dmn)
     }
 
     dmn->quit = false;
+    dmn->finishTimer = -1;
+    dmn->finished = false;
+    dmn->graceful = false;
 
     if (dmn->autoShutdownTimeout &&
         (timerid = virEventAddTimeout(-1,
@@ -826,7 +880,7 @@ virNetDaemonRun(virNetDaemonPtr dmn)
     virSystemdNotifyStartup();
 
     VIR_DEBUG("dmn=%p quit=%d", dmn, dmn->quit);
-    while (!dmn->quit) {
+    while (!dmn->finished) {
         /* A shutdown timeout is specified, so check
          * if any drivers have active state, if not
          * shutdown after timeout seconds
@@ -857,6 +911,32 @@ virNetDaemonRun(virNetDaemonPtr dmn)
         virObjectLock(dmn);
 
         virHashForEach(dmn->servers, daemonServerProcessClients, NULL);
+
+        if (dmn->quit && dmn->finishTimer == -1) {
+            virHashForEach(dmn->servers, daemonServerClose, NULL);
+            if (virStateShutdown() < 0)
+                break;
+
+            if ((dmn->finishTimer = virEventAddTimeout(15 * 1000,
+                                                       virNetDaemonFinishTimer,
+                                                       dmn, NULL)) < 0) {
+                VIR_WARN("Failed to register finish timer.");
+                break;
+            }
+
+            if (virThreadCreateFull(&shutdownThread, true, daemonShutdownWait,
+                                   "daemon-shutdown", false, dmn) < 0) {
+                VIR_WARN("Failed to register join thread.");
+                break;
+            }
+        }
+    }
+
+    if (dmn->graceful) {
+        virThreadJoin(&shutdownThread);
+    } else {
+        VIR_WARN("Make forcefull daemon shutdown");
+        exit(EXIT_FAILURE);
     }
 
  cleanup:
diff --git a/src/rpc/virnetserver.c b/src/rpc/virnetserver.c
index e0a2386..79ea9f6 100644
--- a/src/rpc/virnetserver.c
+++ b/src/rpc/virnetserver.c
@@ -942,9 +942,17 @@ void virNetServerClose(virNetServerPtr srv)
     for (i = 0; i < srv->nclients; i++)
         virNetServerClientClose(srv->clients[i]);
 
+    virThreadPoolStop(srv->workers);
+
     virObjectUnlock(srv);
 }
 
+void
+virNetServerShutdownWait(virNetServerPtr srv)
+{
+    virThreadPoolDrain(srv->workers);
+}
+
 static inline size_t
 virNetServerTrackPendingAuthLocked(virNetServerPtr srv)
 {
diff --git a/src/rpc/virnetserver.h b/src/rpc/virnetserver.h
index 1c6a2ef..112a51d 100644
--- a/src/rpc/virnetserver.h
+++ b/src/rpc/virnetserver.h
@@ -56,6 +56,7 @@ virNetServerPtr virNetServerNewPostExecRestart(virJSONValuePtr object,
     ATTRIBUTE_NONNULL(4) ATTRIBUTE_NONNULL(5) ATTRIBUTE_NONNULL(6);
 
 void virNetServerClose(virNetServerPtr srv);
+void virNetServerShutdownWait(virNetServerPtr srv);
 
 virJSONValuePtr virNetServerPreExecRestart(virNetServerPtr srv);
 
-- 
1.8.3.1

Re: [PATCH 05/10] rpc: finish all threads before exiting main loop
Posted by Daniel P. Berrangé 5 years, 6 months ago
On Tue, Jul 14, 2020 at 12:32:56PM +0300, Nikolay Shirokovskiy wrote:
> Currently we have issues like [1] on libvirtd shutdown as we cleanup while RPC
> and other threads are still running. Let's finish all threads other then main
> before cleanup.
> 
> The approach to finish threads is suggested in [2]. In order to finish RPC
> threads serving API calls we let the event loop run but stop accepting new API
> calls and block processing any pending API calls. We also inform all drivers of
> shutdown so they can prepare for shutdown too. Then we wait for all RPC threads
> and driver's background thread to finish. If finishing takes more then 15s we
> just exit as we can't safely cleanup in time.
> 
> [1] https://bugzilla.redhat.com/show_bug.cgi?id=1828207
> [2] https://www.redhat.com/archives/libvir-list/2020-April/msg01328.html
> 
> Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com>
> ---
>  src/remote/remote_daemon.c |  3 --
>  src/rpc/virnetdaemon.c     | 82 +++++++++++++++++++++++++++++++++++++++++++++-
>  src/rpc/virnetserver.c     |  8 +++++
>  src/rpc/virnetserver.h     |  1 +
>  4 files changed, 90 insertions(+), 4 deletions(-)
> 
> diff --git a/src/remote/remote_daemon.c b/src/remote/remote_daemon.c
> index 1aa9bfc..222bb5f 100644
> --- a/src/remote/remote_daemon.c
> +++ b/src/remote/remote_daemon.c
> @@ -1201,9 +1201,6 @@ int main(int argc, char **argv) {
>                  0, "shutdown", NULL, NULL);
>  
>   cleanup:
> -    /* Keep cleanup order in inverse order of startup */
> -    virNetDaemonClose(dmn);
> -
>      virNetlinkEventServiceStopAll();
>  
>      if (driversInitialized) {
> diff --git a/src/rpc/virnetdaemon.c b/src/rpc/virnetdaemon.c
> index bb81a43..c4b31c6 100644
> --- a/src/rpc/virnetdaemon.c
> +++ b/src/rpc/virnetdaemon.c
> @@ -23,6 +23,7 @@
>  #include <unistd.h>
>  #include <fcntl.h>
>  
> +#include "libvirt_internal.h"
>  #include "virnetdaemon.h"
>  #include "virlog.h"
>  #include "viralloc.h"
> @@ -69,7 +70,10 @@ struct _virNetDaemon {
>      virHashTablePtr servers;
>      virJSONValuePtr srvObject;
>  
> +    int finishTimer;
>      bool quit;
> +    bool finished;
> +    bool graceful;
>  
>      unsigned int autoShutdownTimeout;
>      size_t autoShutdownInhibitions;
> @@ -80,6 +84,11 @@ struct _virNetDaemon {
>  
>  static virClassPtr virNetDaemonClass;
>  
> +static int
> +daemonServerClose(void *payload,
> +                  const void *key G_GNUC_UNUSED,
> +                  void *opaque G_GNUC_UNUSED);
> +
>  static void
>  virNetDaemonDispose(void *obj)
>  {
> @@ -796,11 +805,53 @@ daemonServerProcessClients(void *payload,
>      return 0;
>  }
>  
> +static int
> +daemonServerShutdownWait(void *payload,
> +                         const void *key G_GNUC_UNUSED,
> +                         void *opaque G_GNUC_UNUSED)
> +{
> +    virNetServerPtr srv = payload;
> +
> +    virNetServerShutdownWait(srv);
> +    return 0;
> +}
> +
> +static void
> +daemonShutdownWait(void *opaque)
> +{
> +    virNetDaemonPtr dmn = opaque;
> +    bool graceful = false;
> +
> +    virHashForEach(dmn->servers, daemonServerShutdownWait, NULL);
> +    if (virStateShutdownWait() < 0)

This code can't have any dependancy on virStateShutdownWait because
it is used in virtlockd and virtlogd, neither of which use the
virState infrastructure.

> +        goto finish;
> +
> +    graceful = true;
> +
> + finish:
> +    virObjectLock(dmn);
> +    dmn->graceful = graceful;
> +    virEventUpdateTimeout(dmn->finishTimer, 0);
> +    virObjectUnlock(dmn);
> +}
> +
> +static void
> +virNetDaemonFinishTimer(int timerid G_GNUC_UNUSED,
> +                        void *opaque)
> +{
> +    virNetDaemonPtr dmn = opaque;
> +
> +    virObjectLock(dmn);
> +    dmn->finished = true;
> +    virObjectUnlock(dmn);
> +}
> +
>  void
>  virNetDaemonRun(virNetDaemonPtr dmn)
>  {
>      int timerid = -1;
>      bool timerActive = false;
> +    virThread shutdownThread;
>  
>      virObjectLock(dmn);
>  
> @@ -811,6 +862,9 @@ virNetDaemonRun(virNetDaemonPtr dmn)
>      }
>  
>      dmn->quit = false;
> +    dmn->finishTimer = -1;
> +    dmn->finished = false;
> +    dmn->graceful = false;
>  
>      if (dmn->autoShutdownTimeout &&
>          (timerid = virEventAddTimeout(-1,
> @@ -826,7 +880,7 @@ virNetDaemonRun(virNetDaemonPtr dmn)
>      virSystemdNotifyStartup();
>  
>      VIR_DEBUG("dmn=%p quit=%d", dmn, dmn->quit);
> -    while (!dmn->quit) {
> +    while (!dmn->finished) {
>          /* A shutdown timeout is specified, so check
>           * if any drivers have active state, if not
>           * shutdown after timeout seconds
> @@ -857,6 +911,32 @@ virNetDaemonRun(virNetDaemonPtr dmn)
>          virObjectLock(dmn);
>  
>          virHashForEach(dmn->servers, daemonServerProcessClients, NULL);
> +
> +        if (dmn->quit && dmn->finishTimer == -1) {
> +            virHashForEach(dmn->servers, daemonServerClose, NULL);
> +            if (virStateShutdown() < 0)
> +                break;

Again, we cna't have this direct call here.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|