[PATCH] qemu: Do not report eof when processing monitor IO

Jim Fehlig posted 1 patch 2 years, 5 months ago
Test syntax-check failed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20211014235308.8661-1-jfehlig@suse.com
src/qemu/qemu_monitor.c | 28 +++++++++++++++-------------
1 file changed, 15 insertions(+), 13 deletions(-)
[PATCH] qemu: Do not report eof when processing monitor IO
Posted by Jim Fehlig 2 years, 5 months ago
There have been countless reports from users concerned about the following
error reported by libvirtd when qemu domains are shutdown

internal error: End of file from qemu monitor

While the error is harmless, users often mistaken it for real problem with
their deployments. EOF from the monitor can't be entirely ignored since
other threads may be using the monitor and must be able to detect the EOF
condition.

One potential fix is to delay reporting EOF until the monitor is used
after EOF is detected. This patch adds a 'goteof' member to the
qemuMonitor structure, which is set when EOF is detected on the monitor
socket. If another thread later tries to send data on the monitor, the
EOF error is reported.

Signed-off-by: Jim Fehlig <jfehlig@suse.com>
---

First non-RFC version of the patch. RFC version can be found here

https://listman.redhat.com/archives/libvir-list/2021-October/msg00484.html

Tests mentioned in the below post are running on this version of the
patch and have completed 12 iterations thus far

https://listman.redhat.com/archives/libvir-list/2021-October/msg00351.html

 src/qemu/qemu_monitor.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 7ff6a1161f..434cc26c10 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -113,6 +113,7 @@ struct _qemuMonitor {
 
     /* true if qemu no longer wants 'props' sub-object of object-add */
     bool objectAddNoWrap;
+    bool goteof;
 };
 
 /**
@@ -526,7 +527,6 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
 {
     qemuMonitor *mon = opaque;
     bool error = false;
-    bool eof = false;
     bool hangup = false;
 
     virObjectRef(mon);
@@ -544,7 +544,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
 
     if (mon->lastError.code != VIR_ERR_OK) {
         if (cond & (G_IO_HUP | G_IO_ERR))
-            eof = true;
+            mon->goteof = true;
         error = true;
     } else {
         if (cond & G_IO_OUT) {
@@ -562,7 +562,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
                 if (errno == ECONNRESET)
                     hangup = true;
             } else if (got == 0) {
-                eof = true;
+                mon->goteof = true;
             } else {
                 /* Ignore hangup/error cond if we read some data, to
                  * give time for that data to be consumed */
@@ -575,22 +575,19 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
 
         if (cond & G_IO_HUP) {
             hangup = true;
-            if (!error) {
-                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-                               _("End of file from qemu monitor"));
-                eof = true;
-            }
+            if (!error)
+                mon->goteof = true;
         }
 
-        if (!error && !eof &&
+        if (!error && !mon->goteof &&
             cond & G_IO_ERR) {
             virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                            _("Invalid file descriptor while waiting for monitor"));
-            eof = true;
+            mon->goteof = true;
         }
     }
 
-    if (error || eof) {
+    if (error || mon->goteof) {
         if (hangup && mon->logFunc != NULL) {
             /* Check if an error message from qemu is available and if so, use
              * it to overwrite the actual message. It's done only in early
@@ -609,7 +606,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
             /* Already have an error, so clear any new error */
             virResetLastError();
         } else {
-            if (virGetLastErrorCode() == VIR_ERR_OK)
+            if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof)
                 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                                _("Error while processing monitor IO"));
             virCopyLastError(&mon->lastError);
@@ -630,7 +627,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
     /* We have to unlock to avoid deadlock against command thread,
      * but is this safe ?  I think it is, because the callback
      * will try to acquire the virDomainObj *mutex next */
-    if (eof) {
+    if (mon->goteof) {
         qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
         virDomainObj *vm = mon->vm;
 
@@ -949,6 +946,11 @@ qemuMonitorSend(qemuMonitor *mon,
         virSetError(&mon->lastError);
         return -1;
     }
+    if (mon->goteof) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("End of file from qemu monitor"));
+        return -1;
+    }
 
     mon->msg = msg;
     qemuMonitorUpdateWatch(mon);
-- 
2.33.0


Re: [PATCH] qemu: Do not report eof when processing monitor IO
Posted by Michal Prívozník 2 years, 5 months ago
On 10/15/21 1:53 AM, Jim Fehlig wrote:
> There have been countless reports from users concerned about the following
> error reported by libvirtd when qemu domains are shutdown
> 
> internal error: End of file from qemu monitor
> 
> While the error is harmless, users often mistaken it for real problem with
> their deployments. EOF from the monitor can't be entirely ignored since
> other threads may be using the monitor and must be able to detect the EOF
> condition.
> 
> One potential fix is to delay reporting EOF until the monitor is used
> after EOF is detected. This patch adds a 'goteof' member to the
> qemuMonitor structure, which is set when EOF is detected on the monitor
> socket. If another thread later tries to send data on the monitor, the
> EOF error is reported.
> 
> Signed-off-by: Jim Fehlig <jfehlig@suse.com>
> ---
> 
> First non-RFC version of the patch. RFC version can be found here
> 
> https://listman.redhat.com/archives/libvir-list/2021-October/msg00484.html
> 
> Tests mentioned in the below post are running on this version of the
> patch and have completed 12 iterations thus far
> 
> https://listman.redhat.com/archives/libvir-list/2021-October/msg00351.html
> 
>  src/qemu/qemu_monitor.c | 28 +++++++++++++++-------------
>  1 file changed, 15 insertions(+), 13 deletions(-)
> 
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index 7ff6a1161f..434cc26c10 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -113,6 +113,7 @@ struct _qemuMonitor {
>  
>      /* true if qemu no longer wants 'props' sub-object of object-add */
>      bool objectAddNoWrap;
> +    bool goteof;

Nit pick - sorry for not raising this earlier, but this placement feels
a bit weird. The goteof has nothing to do with objectAddNoWrap member.
Can you please move it a few lines up? Somewhere around lastError or
waitGreeting - that place looks better.

>  };
>  

Reviewed-by: Michal Privoznik <mprivozn@redhat.com>

Michal

Re: [PATCH] qemu: Do not report eof when processing monitor IO
Posted by Jim Fehlig 2 years, 5 months ago
On 10/15/21 01:22, Michal Prívozník wrote:
> On 10/15/21 1:53 AM, Jim Fehlig wrote:
>> There have been countless reports from users concerned about the following
>> error reported by libvirtd when qemu domains are shutdown
>>
>> internal error: End of file from qemu monitor
>>
>> While the error is harmless, users often mistaken it for real problem with
>> their deployments. EOF from the monitor can't be entirely ignored since
>> other threads may be using the monitor and must be able to detect the EOF
>> condition.
>>
>> One potential fix is to delay reporting EOF until the monitor is used
>> after EOF is detected. This patch adds a 'goteof' member to the
>> qemuMonitor structure, which is set when EOF is detected on the monitor
>> socket. If another thread later tries to send data on the monitor, the
>> EOF error is reported.
>>
>> Signed-off-by: Jim Fehlig <jfehlig@suse.com>
>> ---
>>
>> First non-RFC version of the patch. RFC version can be found here
>>
>> https://listman.redhat.com/archives/libvir-list/2021-October/msg00484.html
>>
>> Tests mentioned in the below post are running on this version of the
>> patch and have completed 12 iterations thus far
>>
>> https://listman.redhat.com/archives/libvir-list/2021-October/msg00351.html
>>
>>   src/qemu/qemu_monitor.c | 28 +++++++++++++++-------------
>>   1 file changed, 15 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
>> index 7ff6a1161f..434cc26c10 100644
>> --- a/src/qemu/qemu_monitor.c
>> +++ b/src/qemu/qemu_monitor.c
>> @@ -113,6 +113,7 @@ struct _qemuMonitor {
>>   
>>       /* true if qemu no longer wants 'props' sub-object of object-add */
>>       bool objectAddNoWrap;
>> +    bool goteof;
> 
> Nit pick - sorry for not raising this earlier, but this placement feels
> a bit weird. The goteof has nothing to do with objectAddNoWrap member.
> Can you please move it a few lines up? Somewhere around lastError or
> waitGreeting - that place looks better.

No problem. I added the below diff before pushing. Thanks helping get this small 
yet annoying cosmetic issue resolved!

Cheers,
Jim

diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 434cc26c10..e8accaf2b0 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -98,6 +98,9 @@ struct _qemuMonitor {
       * the next monitor msg */
      virError lastError;

+    /* Set to true when EOF is detected on the monitor */
+    bool goteof;
+
      int nextSerial;

      bool waitGreeting;
@@ -113,7 +116,6 @@ struct _qemuMonitor {

      /* true if qemu no longer wants 'props' sub-object of object-add */
      bool objectAddNoWrap;
-    bool goteof;
  };

  /**