[PATCH v13 03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure

Dmitry Osipenko posted 13 patches 6 months ago
Maintainers: Gerd Hoffmann <kraxel@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, Thomas Huth <thuth@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>
There is a newer version of this series
[PATCH v13 03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure
Posted by Dmitry Osipenko 6 months ago
virtio_gpu_virgl_init() may fail, leading to a further Qemu crash
because Qemu assumes it never fails. Check virtio_gpu_virgl_init()
return code and don't execute virtio commands on error. Failed
virtio_gpu_virgl_init() will result in a timed out virtio commands
for a guest OS.

Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
---
 hw/display/virtio-gpu-gl.c     | 29 +++++++++++++++++++++--------
 include/hw/virtio/virtio-gpu.h | 11 +++++++++--
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
index e06be60dfbfc..38a2b1bd3916 100644
--- a/hw/display/virtio-gpu-gl.c
+++ b/hw/display/virtio-gpu-gl.c
@@ -29,9 +29,14 @@ static void virtio_gpu_gl_update_cursor_data(VirtIOGPU *g,
                                              struct virtio_gpu_scanout *s,
                                              uint32_t resource_id)
 {
+    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
     uint32_t width, height;
     uint32_t pixels, *data;
 
+    if (gl->renderer_state != RS_INITED) {
+        return;
+    }
+
     data = virgl_renderer_get_cursor_data(resource_id, &width, &height);
     if (!data) {
         return;
@@ -65,13 +70,21 @@ static void virtio_gpu_gl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
         return;
     }
 
-    if (!gl->renderer_inited) {
-        virtio_gpu_virgl_init(g);
-        gl->renderer_inited = true;
-    }
-    if (gl->renderer_reset) {
-        gl->renderer_reset = false;
+    switch (gl->renderer_state) {
+    case RS_RESET:
         virtio_gpu_virgl_reset(g);
+        /* fallthrough */
+    case RS_START:
+        if (virtio_gpu_virgl_init(g)) {
+            gl->renderer_state = RS_INIT_FAILED;
+        } else {
+            gl->renderer_state = RS_INITED;
+        }
+        break;
+    case RS_INIT_FAILED:
+        return;
+    case RS_INITED:
+        break;
     }
 
     cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
@@ -98,9 +111,9 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
      * GL functions must be called with the associated GL context in main
      * thread, and when the renderer is unblocked.
      */
-    if (gl->renderer_inited && !gl->renderer_reset) {
+    if (gl->renderer_state == RS_INITED) {
         virtio_gpu_virgl_reset_scanout(g);
-        gl->renderer_reset = true;
+        gl->renderer_state = RS_RESET;
     }
 }
 
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 7ff989a45a5c..6e71d799e5da 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -224,11 +224,18 @@ struct VirtIOGPUClass {
                              Error **errp);
 };
 
+/* VirtIOGPUGL renderer states */
+typedef enum {
+    RS_START,       /* starting state */
+    RS_INIT_FAILED, /* failed initialisation */
+    RS_INITED,      /* initialised and working */
+    RS_RESET,       /* inited and reset pending, moves to start after reset */
+} RenderState;
+
 struct VirtIOGPUGL {
     struct VirtIOGPU parent_obj;
 
-    bool renderer_inited;
-    bool renderer_reset;
+    RenderState renderer_state;
 
     QEMUTimer *fence_poll;
     QEMUTimer *print_stats;
-- 
2.44.0
Re: [PATCH v13 03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure
Posted by Marc-André Lureau 5 months, 3 weeks ago
Hi

On Mon, May 27, 2024 at 7:03 AM Dmitry Osipenko <
dmitry.osipenko@collabora.com> wrote:

> virtio_gpu_virgl_init() may fail, leading to a further Qemu crash
> because Qemu assumes it never fails. Check virtio_gpu_virgl_init()
> return code and don't execute virtio commands on error. Failed
> virtio_gpu_virgl_init() will result in a timed out virtio commands
> for a guest OS.
>
> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> ---
>  hw/display/virtio-gpu-gl.c     | 29 +++++++++++++++++++++--------
>  include/hw/virtio/virtio-gpu.h | 11 +++++++++--
>  2 files changed, 30 insertions(+), 10 deletions(-)
>
> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
> index e06be60dfbfc..38a2b1bd3916 100644
> --- a/hw/display/virtio-gpu-gl.c
> +++ b/hw/display/virtio-gpu-gl.c
> @@ -29,9 +29,14 @@ static void virtio_gpu_gl_update_cursor_data(VirtIOGPU
> *g,
>                                               struct virtio_gpu_scanout *s,
>                                               uint32_t resource_id)
>  {
> +    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
>      uint32_t width, height;
>      uint32_t pixels, *data;
>
> +    if (gl->renderer_state != RS_INITED) {
> +        return;
> +    }
> +
>      data = virgl_renderer_get_cursor_data(resource_id, &width, &height);
>      if (!data) {
>          return;
> @@ -65,13 +70,21 @@ static void virtio_gpu_gl_handle_ctrl(VirtIODevice
> *vdev, VirtQueue *vq)
>          return;
>      }
>
> -    if (!gl->renderer_inited) {
> -        virtio_gpu_virgl_init(g);
> -        gl->renderer_inited = true;
> -    }
> -    if (gl->renderer_reset) {
> -        gl->renderer_reset = false;
> +    switch (gl->renderer_state) {
> +    case RS_RESET:
>          virtio_gpu_virgl_reset(g);
> +        /* fallthrough */
> +    case RS_START:
> +        if (virtio_gpu_virgl_init(g)) {
> +            gl->renderer_state = RS_INIT_FAILED;
> +        } else {
> +            gl->renderer_state = RS_INITED;
> +        }
> +        break;
> +    case RS_INIT_FAILED:
> +        return;
> +    case RS_INITED:
> +        break;
>      }
>
>
This still lets it go through the cmd processing after setting
gl->renderer_state = RS_INIT_FAILED, the first time.


>      cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
> @@ -98,9 +111,9 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>       * GL functions must be called with the associated GL context in main
>       * thread, and when the renderer is unblocked.
>       */
> -    if (gl->renderer_inited && !gl->renderer_reset) {
> +    if (gl->renderer_state == RS_INITED) {
>          virtio_gpu_virgl_reset_scanout(g);
> -        gl->renderer_reset = true;
> +        gl->renderer_state = RS_RESET;
>      }
>  }
>
> diff --git a/include/hw/virtio/virtio-gpu.h
> b/include/hw/virtio/virtio-gpu.h
> index 7ff989a45a5c..6e71d799e5da 100644
> --- a/include/hw/virtio/virtio-gpu.h
> +++ b/include/hw/virtio/virtio-gpu.h
> @@ -224,11 +224,18 @@ struct VirtIOGPUClass {
>                               Error **errp);
>  };
>
> +/* VirtIOGPUGL renderer states */
> +typedef enum {
> +    RS_START,       /* starting state */
> +    RS_INIT_FAILED, /* failed initialisation */
> +    RS_INITED,      /* initialised and working */
> +    RS_RESET,       /* inited and reset pending, moves to start after
> reset */
> +} RenderState;
> +
>  struct VirtIOGPUGL {
>      struct VirtIOGPU parent_obj;
>
> -    bool renderer_inited;
> -    bool renderer_reset;
> +    RenderState renderer_state;
>
>      QEMUTimer *fence_poll;
>      QEMUTimer *print_stats;
> --
> 2.44.0
>
>

-- 
Marc-André Lureau
Re: [PATCH v13 03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure
Posted by Dmitry Osipenko 5 months, 2 weeks ago
On 6/4/24 17:21, Marc-André Lureau wrote:
>> @@ -65,13 +70,21 @@ static void virtio_gpu_gl_handle_ctrl(VirtIODevice
>> *vdev, VirtQueue *vq)
>>          return;
>>      }
>>
>> -    if (!gl->renderer_inited) {
>> -        virtio_gpu_virgl_init(g);
>> -        gl->renderer_inited = true;
>> -    }
>> -    if (gl->renderer_reset) {
>> -        gl->renderer_reset = false;
>> +    switch (gl->renderer_state) {
>> +    case RS_RESET:
>>          virtio_gpu_virgl_reset(g);
>> +        /* fallthrough */
>> +    case RS_START:
>> +        if (virtio_gpu_virgl_init(g)) {
>> +            gl->renderer_state = RS_INIT_FAILED;
>> +        } else {
>> +            gl->renderer_state = RS_INITED;
>> +        }
>> +        break;
>> +    case RS_INIT_FAILED:
>> +        return;
>> +    case RS_INITED:
>> +        break;
>>      }
>>
>>
> This still lets it go through the cmd processing after setting
> gl->renderer_state = RS_INIT_FAILED, the first time.

Good catch, thanks!

-- 
Best regards,
Dmitry


Re: [PATCH v13 03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure
Posted by Akihiko Odaki 5 months, 3 weeks ago
On 2024/05/27 12:02, Dmitry Osipenko wrote:
> virtio_gpu_virgl_init() may fail, leading to a further Qemu crash
> because Qemu assumes it never fails. Check virtio_gpu_virgl_init()
> return code and don't execute virtio commands on error. Failed
> virtio_gpu_virgl_init() will result in a timed out virtio commands
> for a guest OS.
> 
> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> ---
>   hw/display/virtio-gpu-gl.c     | 29 +++++++++++++++++++++--------
>   include/hw/virtio/virtio-gpu.h | 11 +++++++++--
>   2 files changed, 30 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
> index e06be60dfbfc..38a2b1bd3916 100644
> --- a/hw/display/virtio-gpu-gl.c
> +++ b/hw/display/virtio-gpu-gl.c
> @@ -29,9 +29,14 @@ static void virtio_gpu_gl_update_cursor_data(VirtIOGPU *g,
>                                                struct virtio_gpu_scanout *s,
>                                                uint32_t resource_id)
>   {
> +    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
>       uint32_t width, height;
>       uint32_t pixels, *data;
>   
> +    if (gl->renderer_state != RS_INITED) {
> +        return;
> +    }
> +
>       data = virgl_renderer_get_cursor_data(resource_id, &width, &height);
>       if (!data) {
>           return;
> @@ -65,13 +70,21 @@ static void virtio_gpu_gl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>           return;
>       }
>   
> -    if (!gl->renderer_inited) {
> -        virtio_gpu_virgl_init(g);
> -        gl->renderer_inited = true;
> -    }
> -    if (gl->renderer_reset) {
> -        gl->renderer_reset = false;
> +    switch (gl->renderer_state) {
> +    case RS_RESET:
>           virtio_gpu_virgl_reset(g);
> +        /* fallthrough */
> +    case RS_START:
> +        if (virtio_gpu_virgl_init(g)) {
> +            gl->renderer_state = RS_INIT_FAILED;
> +        } else {
> +            gl->renderer_state = RS_INITED;
> +        }
> +        break;
> +    case RS_INIT_FAILED:
> +        return;
> +    case RS_INITED:
> +        break;
>       }
>   
>       cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
> @@ -98,9 +111,9 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>        * GL functions must be called with the associated GL context in main
>        * thread, and when the renderer is unblocked.
>        */
> -    if (gl->renderer_inited && !gl->renderer_reset) {
> +    if (gl->renderer_state == RS_INITED) {
>           virtio_gpu_virgl_reset_scanout(g);
> -        gl->renderer_reset = true;
> +        gl->renderer_state = RS_RESET;
>       }
>   }
>   
> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
> index 7ff989a45a5c..6e71d799e5da 100644
> --- a/include/hw/virtio/virtio-gpu.h
> +++ b/include/hw/virtio/virtio-gpu.h
> @@ -224,11 +224,18 @@ struct VirtIOGPUClass {
>                                Error **errp);
>   };
>   
> +/* VirtIOGPUGL renderer states */
> +typedef enum {
> +    RS_START,       /* starting state */
> +    RS_INIT_FAILED, /* failed initialisation */

Is the distinction between RS_START and RS_INIT_FAILED really necessary?
Re: [PATCH v13 03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure
Posted by Dmitry Osipenko 5 months, 3 weeks ago
On 6/2/24 08:34, Akihiko Odaki wrote:
>> +typedef enum {
>> +    RS_START,       /* starting state */
>> +    RS_INIT_FAILED, /* failed initialisation */
> 
> Is the distinction between RS_START and RS_INIT_FAILED really necessary?

The state stays in RS_INIT_FAILED once was failed until virtio-gpu is
reset, re-initializing virglrenderer isn't allowed in this state.

The RS_START state allows to initialize virglrenderer and moves to
either RS_INITED or RS_INIT_FAILED state after initialization.

The distinction is necessary

-- 
Best regards,
Dmitry


Re: [PATCH v13 03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure
Posted by Akihiko Odaki 5 months, 3 weeks ago
On 2024/06/03 14:26, Dmitry Osipenko wrote:
> On 6/2/24 08:34, Akihiko Odaki wrote:
>>> +typedef enum {
>>> +    RS_START,       /* starting state */
>>> +    RS_INIT_FAILED, /* failed initialisation */
>>
>> Is the distinction between RS_START and RS_INIT_FAILED really necessary?
> 
> The state stays in RS_INIT_FAILED once was failed until virtio-gpu is
> reset, re-initializing virglrenderer isn't allowed in this state.

Can you elaborate more? Why isn't re-initializing allowed?

Re: [PATCH v13 03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure
Posted by Dmitry Osipenko 5 months, 2 weeks ago
On 6/3/24 08:44, Akihiko Odaki wrote:
> On 2024/06/03 14:26, Dmitry Osipenko wrote:
>> On 6/2/24 08:34, Akihiko Odaki wrote:
>>>> +typedef enum {
>>>> +    RS_START,       /* starting state */
>>>> +    RS_INIT_FAILED, /* failed initialisation */
>>>
>>> Is the distinction between RS_START and RS_INIT_FAILED really necessary?
>>
>> The state stays in RS_INIT_FAILED once was failed until virtio-gpu is
>> reset, re-initializing virglrenderer isn't allowed in this state.
> 
> Can you elaborate more? Why isn't re-initializing allowed?

In practice, if virglrenderer initialization failed once, it will fail
second time. Otherwise we will be retrying to init endlessly because
guest won't stop sending virgl commands even if they all are timing out.
Each initialization failure produces a error msg.

-- 
Best regards,
Dmitry


Re: [PATCH v13 03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure
Posted by Akihiko Odaki 5 months, 2 weeks ago
On 2024/06/10 4:02, Dmitry Osipenko wrote:
> On 6/3/24 08:44, Akihiko Odaki wrote:
>> On 2024/06/03 14:26, Dmitry Osipenko wrote:
>>> On 6/2/24 08:34, Akihiko Odaki wrote:
>>>>> +typedef enum {
>>>>> +    RS_START,       /* starting state */
>>>>> +    RS_INIT_FAILED, /* failed initialisation */
>>>>
>>>> Is the distinction between RS_START and RS_INIT_FAILED really necessary?
>>>
>>> The state stays in RS_INIT_FAILED once was failed until virtio-gpu is
>>> reset, re-initializing virglrenderer isn't allowed in this state.
>>
>> Can you elaborate more? Why isn't re-initializing allowed?
> 
> In practice, if virglrenderer initialization failed once, it will fail
> second time. Otherwise we will be retrying to init endlessly because
> guest won't stop sending virgl commands even if they all are timing out.
> Each initialization failure produces a error msg.
>
I see.

A better solution is to add a new function to GraphicHwOps to call back 
after initializating the displays and before starting the guest. You can 
do virgl initialization in such a function, and exit(1) if the 
initialization fails because the guest has not started yet, saving this 
enum. I don't require you to make such a change however; this is not a 
regression of your patches so you have no obligation to fix it.

Re: [PATCH v13 03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure
Posted by Dmitry Osipenko 5 months, 1 week ago
On 6/10/24 06:38, Akihiko Odaki wrote:
> On 2024/06/10 4:02, Dmitry Osipenko wrote:
>> On 6/3/24 08:44, Akihiko Odaki wrote:
>>> On 2024/06/03 14:26, Dmitry Osipenko wrote:
>>>> On 6/2/24 08:34, Akihiko Odaki wrote:
>>>>>> +typedef enum {
>>>>>> +    RS_START,       /* starting state */
>>>>>> +    RS_INIT_FAILED, /* failed initialisation */
>>>>>
>>>>> Is the distinction between RS_START and RS_INIT_FAILED really
>>>>> necessary?
>>>>
>>>> The state stays in RS_INIT_FAILED once was failed until virtio-gpu is
>>>> reset, re-initializing virglrenderer isn't allowed in this state.
>>>
>>> Can you elaborate more? Why isn't re-initializing allowed?
>>
>> In practice, if virglrenderer initialization failed once, it will fail
>> second time. Otherwise we will be retrying to init endlessly because
>> guest won't stop sending virgl commands even if they all are timing out.
>> Each initialization failure produces a error msg.
>>
> I see.
> 
> A better solution is to add a new function to GraphicHwOps to call back
> after initializating the displays and before starting the guest. You can
> do virgl initialization in such a function, and exit(1) if the
> initialization fails because the guest has not started yet, saving this
> enum. I don't require you to make such a change however; this is not a
> regression of your patches so you have no obligation to fix it.

I'll keep this idea for a follow up patch, thanks! It will take me some
extra time to look through the display code, making sure that callback
is added properly and nothing is missed.

-- 
Best regards,
Dmitry