[PATCH] rpc: Temporarily stop accept()-ing new clients on EMFILE

Michal Privoznik posted 1 patch 1 week, 3 days ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/2857a75e9f234eeff9d535af0f8ba7a76e7adefe.1634042491.git.mprivozn@redhat.com
src/libvirt_remote.syms       |  1 +
src/rpc/virnetserver.c        |  9 ++++++
src/rpc/virnetserverservice.c | 53 ++++++++++++++++++++++++++++++++++-
src/rpc/virnetserverservice.h |  2 ++
src/rpc/virnetsocket.c        | 15 ++++++++++
5 files changed, 79 insertions(+), 1 deletion(-)

[PATCH] rpc: Temporarily stop accept()-ing new clients on EMFILE

Posted by Michal Privoznik 1 week, 3 days ago
This commit is related to 5de203f879 which I pushed a few days
ago. While that commit prioritized closing clients socket over
the rest of I/O process, this one goes one step further and
temporarily suspends processing new connection requests.

A brief recapitulation of the problem:

1) assume that libvirt is at the top of RLIMIT_NOFILE (that is no
   new FDs can be opened).

2) we have a client trying to connect to a UNIX/TCP socket

Because of 2) our event loop sees POLLIN on the socket and thus
calls virNetServerServiceAccept(). But since no new FDs can be
opened (because of 1)) the request is not handled and we will get
the same event on next iteration. The poll() will exit
immediately because there is an event on the socket.  Thus we end
up in an endless loop.

To break the loop and stop burning CPU cycles we can stop
listening for events on the socket and set up a timer tho enable
listening again after some time (I chose 5 seconds because of no
obvious reason).

There's another area where we play with temporarily suspending
accept() of new clients - when a client disconnects and we check
max_clients against number of current clients. Problem here is
that max_clients can be orders of magnitude larger than
RLIMIT_NOFILE but more importantly, what this code considers
client disconnect is not equal to closing client's FD.
A client disconnecting means that the corresponding client
structure is removed from the internal list of clients. Closing
of the client's FD is done from event loop - asynchronously.

To avoid this part stepping on the toes of my fix, let's make the
code NOP if socket timer (as described above) is active.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
---
 src/libvirt_remote.syms       |  1 +
 src/rpc/virnetserver.c        |  9 ++++++
 src/rpc/virnetserverservice.c | 53 ++++++++++++++++++++++++++++++++++-
 src/rpc/virnetserverservice.h |  2 ++
 src/rpc/virnetsocket.c        | 15 ++++++++++
 5 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/src/libvirt_remote.syms b/src/libvirt_remote.syms
index b4265adf2e..942e1013a6 100644
--- a/src/libvirt_remote.syms
+++ b/src/libvirt_remote.syms
@@ -221,6 +221,7 @@ virNetServerServiceNewTCP;
 virNetServerServiceNewUNIX;
 virNetServerServicePreExecRestart;
 virNetServerServiceSetDispatcher;
+virNetServerServiceTimerActive;
 virNetServerServiceToggle;
 
 
diff --git a/src/rpc/virnetserver.c b/src/rpc/virnetserver.c
index b3214883ee..dc8f32b095 100644
--- a/src/rpc/virnetserver.c
+++ b/src/rpc/virnetserver.c
@@ -252,6 +252,15 @@ virNetServerDispatchNewMessage(virNetServerClient *client,
 static void
 virNetServerCheckLimits(virNetServer *srv)
 {
+    size_t i;
+
+    for (i = 0; i < srv->nservices; i++) {
+        if (virNetServerServiceTimerActive(srv->services[i])) {
+            VIR_DEBUG("Skipping client-related limits evaluation");
+            return;
+        }
+    }
+
     VIR_DEBUG("Checking client-related limits to re-enable or temporarily "
               "suspend services: nclients=%zu nclients_max=%zu "
               "nclients_unauth=%zu nclients_unauth_max=%zu",
diff --git a/src/rpc/virnetserverservice.c b/src/rpc/virnetserverservice.c
index 0c4c437a49..214eae1acb 100644
--- a/src/rpc/virnetserverservice.c
+++ b/src/rpc/virnetserverservice.c
@@ -43,6 +43,8 @@ struct _virNetServerService {
     int auth;
     bool readonly;
     size_t nrequests_client_max;
+    int timer;
+    bool timerActive;
 
     virNetTLSContext *tls;
 
@@ -71,9 +73,25 @@ static void virNetServerServiceAccept(virNetSocket *sock,
 {
     virNetServerService *svc = opaque;
     virNetSocket *clientsock = NULL;
+    int rc;
 
-    if (virNetSocketAccept(sock, &clientsock) < 0)
+    rc = virNetSocketAccept(sock, &clientsock);
+    if (rc < 0) {
+        if (rc == -2) {
+            /* Could not accept new client due to EMFILE. Suspend listening on
+             * the socket and set up a timer to enable it later. Hopefully,
+             * some FDs will be closed meanwhile. */
+            VIR_DEBUG("Temporarily suspending listening on svc=%p because accept() on sock=%p failed (errno=%d)",
+                      svc, sock, errno);
+
+            virNetServerServiceToggle(svc, false);
+
+            svc->timerActive = true;
+            /* Retry in 5 seconds. */
+            virEventUpdateTimeout(svc->timer, 5 * 1000);
+        }
         goto cleanup;
+    }
 
     if (!clientsock) /* Connection already went away */
         goto cleanup;
@@ -88,6 +106,21 @@ static void virNetServerServiceAccept(virNetSocket *sock,
 }
 
 
+static void
+virNetServerServiceTimerFunc(int timer,
+                             void *opaque)
+{
+    virNetServerService *svc = opaque;
+
+    VIR_DEBUG("Resuming listening on service svc=%p after previous suspend", svc);
+
+    virNetServerServiceToggle(svc, true);
+
+    virEventUpdateTimeout(timer, -1);
+    svc->timerActive = false;
+}
+
+
 static virNetServerService *
 virNetServerServiceNewSocket(virNetSocket **socks,
                              size_t nsocks,
@@ -117,6 +150,14 @@ virNetServerServiceNewSocket(virNetSocket **socks,
     svc->nrequests_client_max = nrequests_client_max;
     svc->tls = virObjectRef(tls);
 
+    virObjectRef(svc);
+    svc->timer = virEventAddTimeout(-1, virNetServerServiceTimerFunc,
+                                    svc, virObjectFreeCallback);
+    if (svc->timer < 0) {
+        virObjectUnref(svc);
+        goto error;
+    }
+
     for (i = 0; i < svc->nsocks; i++) {
         if (virNetSocketListen(svc->socks[i], max_queued_clients) < 0)
             goto error;
@@ -407,6 +448,9 @@ void virNetServerServiceDispose(void *obj)
     virNetServerService *svc = obj;
     size_t i;
 
+    if (svc->timer >= 0)
+        virEventRemoveTimeout(svc->timer);
+
     for (i = 0; i < svc->nsocks; i++)
        virObjectUnref(svc->socks[i]);
     g_free(svc->socks);
@@ -438,3 +482,10 @@ void virNetServerServiceClose(virNetServerService *svc)
         virNetSocketClose(svc->socks[i]);
     }
 }
+
+
+bool
+virNetServerServiceTimerActive(virNetServerService *svc)
+{
+    return svc->timerActive;
+}
diff --git a/src/rpc/virnetserverservice.h b/src/rpc/virnetserverservice.h
index ab5798938e..f3d55a9cc0 100644
--- a/src/rpc/virnetserverservice.h
+++ b/src/rpc/virnetserverservice.h
@@ -78,3 +78,5 @@ void virNetServerServiceToggle(virNetServerService *svc,
                                bool enabled);
 
 void virNetServerServiceClose(virNetServerService *svc);
+
+bool virNetServerServiceTimerActive(virNetServerService *svc);
diff --git a/src/rpc/virnetsocket.c b/src/rpc/virnetsocket.c
index 212089520d..60dff71718 100644
--- a/src/rpc/virnetsocket.c
+++ b/src/rpc/virnetsocket.c
@@ -2067,6 +2067,19 @@ int virNetSocketListen(virNetSocket *sock, int backlog)
     return 0;
 }
 
+
+/**
+ * virNetSocketAccept:
+ * @sock: socket to accept connection on
+ * @clientsock: returned client socket
+ *
+ * For given socket @sock accept incoming connection and create
+ * @clientsock representation of the new accepted connection.
+ *
+ * Returns: 0 on success,
+ *         -2 if accepting failed due to EMFILE error,
+ *         -1 otherwise.
+ */
 int virNetSocketAccept(virNetSocket *sock, virNetSocket **clientsock)
 {
     int fd = -1;
@@ -2087,6 +2100,8 @@ int virNetSocketAccept(virNetSocket *sock, virNetSocket **clientsock)
             errno == EAGAIN) {
             ret = 0;
             goto cleanup;
+        } else if (errno == EMFILE) {
+            ret = -2;
         }
 
         virReportSystemError(errno, "%s",
-- 
2.32.0

Re: [PATCH] rpc: Temporarily stop accept()-ing new clients on EMFILE

Posted by Daniel P. Berrangé 2 days, 22 hours ago
On Tue, Oct 12, 2021 at 02:41:39PM +0200, Michal Privoznik wrote:
> This commit is related to 5de203f879 which I pushed a few days
> ago. While that commit prioritized closing clients socket over
> the rest of I/O process, this one goes one step further and
> temporarily suspends processing new connection requests.
> 
> A brief recapitulation of the problem:
> 
> 1) assume that libvirt is at the top of RLIMIT_NOFILE (that is no
>    new FDs can be opened).
> 
> 2) we have a client trying to connect to a UNIX/TCP socket
> 
> Because of 2) our event loop sees POLLIN on the socket and thus
> calls virNetServerServiceAccept(). But since no new FDs can be
> opened (because of 1)) the request is not handled and we will get
> the same event on next iteration. The poll() will exit
> immediately because there is an event on the socket.  Thus we end
> up in an endless loop.
> 
> To break the loop and stop burning CPU cycles we can stop
> listening for events on the socket and set up a timer tho enable
> listening again after some time (I chose 5 seconds because of no
> obvious reason).
> 
> There's another area where we play with temporarily suspending
> accept() of new clients - when a client disconnects and we check
> max_clients against number of current clients. Problem here is
> that max_clients can be orders of magnitude larger than
> RLIMIT_NOFILE but more importantly, what this code considers
> client disconnect is not equal to closing client's FD.
> A client disconnecting means that the corresponding client
> structure is removed from the internal list of clients. Closing
> of the client's FD is done from event loop - asynchronously.
> 
> To avoid this part stepping on the toes of my fix, let's make the
> code NOP if socket timer (as described above) is active.
> 
> Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
> ---
>  src/libvirt_remote.syms       |  1 +
>  src/rpc/virnetserver.c        |  9 ++++++
>  src/rpc/virnetserverservice.c | 53 ++++++++++++++++++++++++++++++++++-
>  src/rpc/virnetserverservice.h |  2 ++
>  src/rpc/virnetsocket.c        | 15 ++++++++++
>  5 files changed, 79 insertions(+), 1 deletion(-)

Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|