[PATCH v2 06/13] scripts/qmp_helper: add support for a timeout logic

Mauro Carvalho Chehab posted 13 patches 1 day, 20 hours ago
Maintainers: John Snow <jsnow@redhat.com>, Cleber Rosa <crosa@redhat.com>, Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
[PATCH v2 06/13] scripts/qmp_helper: add support for a timeout logic
Posted by Mauro Carvalho Chehab 1 day, 20 hours ago
We can't inject a new GHES record to the same source before
it has been acked. There is an async mechanism to verify when
the Kernel is ready, which is implemented at QEMU's ghes
driver.

If error inject is too fast, QEMU may return an error. When
such errors occur, implement a retry mechanism, based on a
maximum timeout.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 scripts/qmp_helper.py | 47 +++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/scripts/qmp_helper.py b/scripts/qmp_helper.py
index 1587492807fe..d5ffd51f161e 100755
--- a/scripts/qmp_helper.py
+++ b/scripts/qmp_helper.py
@@ -14,6 +14,7 @@
 
 from datetime import datetime
 from os import path as os_path
+from time import sleep
 
 try:
     qemu_dir = os_path.abspath(os_path.dirname(os_path.dirname(__file__)))
@@ -324,7 +325,8 @@ class qmp:
     Opens a connection and send/receive QMP commands.
     """
 
-    def send_cmd(self, command, args=None, may_open=False, return_error=True):
+    def send_cmd(self, command, args=None, may_open=False, return_error=True,
+                 timeout=None):
         """Send a command to QMP, optinally opening a connection"""
 
         if may_open:
@@ -336,12 +338,31 @@ def send_cmd(self, command, args=None, may_open=False, return_error=True):
         if args:
             msg['arguments'] = args
 
-        try:
-            obj = self.qmp_monitor.cmd_obj(msg)
-        # Can we use some other exception class here?
-        except Exception as e:                         # pylint: disable=W0718
-            print(f"Command: {command}")
-            print(f"Failed to inject error: {e}.")
+        if timeout and timeout > 0:
+            attempts = int(timeout * 10)
+        else:
+            attempts = 1
+
+        # Try up to attempts
+        for i in range(0, attempts):
+            try:
+                obj = self.qmp_monitor.cmd_obj(msg)
+
+                if obj and "return" in obj and not obj["return"]:
+                    break
+
+            except Exception as e:                     # pylint: disable=W0718
+                print(f"Command: {command}")
+                print(f"Failed to inject error: {e}.")
+                obj = None
+
+            if attempts > 1:
+                print(f"Error inject attempt {i + 1}/{attempts} failed.")
+
+            if i + 1 < attempts:
+                sleep(0.1)
+
+        if not obj:
             return None
 
         if "return" in obj:
@@ -531,7 +552,7 @@ def __init__(self, host, port, debug=False):
     #
     # Socket QMP send command
     #
-    def send_cper_raw(self, cper_data):
+    def send_cper_raw(self, cper_data, timeout=None):
         """
         Send a raw CPER data to QEMU though QMP TCP socket.
 
@@ -546,11 +567,11 @@ def send_cper_raw(self, cper_data):
 
         self._connect()
 
-        if self.send_cmd("inject-ghes-v2-error", cmd_arg):
+        ret = self.send_cmd("inject-ghes-v2-error", cmd_arg, timeout=timeout)
+        if ret:
             print("Error injected.")
-            return True
 
-        return False
+        return ret
 
     def get_gede(self, notif_type, payload_length):
         """
@@ -597,7 +618,7 @@ def get_gebs(self, payload_length):
         return gebs
 
     def send_cper(self, notif_type, payload,
-                  gede=None, gebs=None, raw_data=None):
+                  gede=None, gebs=None, raw_data=None, timeout=None):
         """
         Send commands to QEMU though QMP TCP socket.
 
@@ -656,7 +677,7 @@ def send_cper(self, notif_type, payload,
 
             util.dump_bytearray("Payload", payload)
 
-        return self.send_cper_raw(cper_data)
+        return self.send_cper_raw(cper_data, timeout=timeout)
 
     def search_qom(self, path, prop, regex):
         """
-- 
2.52.0
Re: [PATCH v2 06/13] scripts/qmp_helper: add support for a timeout logic
Posted by Jonathan Cameron via qemu development 1 day, 17 hours ago
On Fri, 23 Jan 2026 14:35:20 +0100
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> We can't inject a new GHES record to the same source before
> it has been acked. There is an async mechanism to verify when
> the Kernel is ready, which is implemented at QEMU's ghes
> driver.
> 
> If error inject is too fast, QEMU may return an error. When
> such errors occur, implement a retry mechanism, based on a
> maximum timeout.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
One comment where I think you intended to make a comment clearer.

Not important though
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>

> ---
>  scripts/qmp_helper.py | 47 +++++++++++++++++++++++++++++++------------
>  1 file changed, 34 insertions(+), 13 deletions(-)
> 
> diff --git a/scripts/qmp_helper.py b/scripts/qmp_helper.py
> index 1587492807fe..d5ffd51f161e 100755
> --- a/scripts/qmp_helper.py
> +++ b/scripts/qmp_helper.py
> @@ -14,6 +14,7 @@
>  
>  from datetime import datetime
>  from os import path as os_path
> +from time import sleep
>  
>  try:
>      qemu_dir = os_path.abspath(os_path.dirname(os_path.dirname(__file__)))
> @@ -324,7 +325,8 @@ class qmp:
>      Opens a connection and send/receive QMP commands.
>      """
>  
> -    def send_cmd(self, command, args=None, may_open=False, return_error=True):
> +    def send_cmd(self, command, args=None, may_open=False, return_error=True,
> +                 timeout=None):
>          """Send a command to QMP, optinally opening a connection"""
>  
>          if may_open:
> @@ -336,12 +338,31 @@ def send_cmd(self, command, args=None, may_open=False, return_error=True):
>          if args:
>              msg['arguments'] = args
>  
> -        try:
> -            obj = self.qmp_monitor.cmd_obj(msg)
> -        # Can we use some other exception class here?
> -        except Exception as e:                         # pylint: disable=W0718
> -            print(f"Command: {command}")
> -            print(f"Failed to inject error: {e}.")
> +        if timeout and timeout > 0:
> +            attempts = int(timeout * 10)
> +        else:
> +            attempts = 1
> +
> +        # Try up to attempts
From v1 thread:
https://lore.kernel.org/qemu-devel/20260122171357.00000747@huawei.com/T/#m5e70aed9fca30a24106ca39196bcf935da745722

I thought plan was to update this message?

> +        for i in range(0, attempts):
> +            try:
> +                obj = self.qmp_monitor.cmd_obj(msg)