[PATCH v11 04/10] virtio-gpu: Support asynchronous fencing

Dmitry Osipenko posted 10 patches 11 months ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>
There is a newer version of this series
[PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by Dmitry Osipenko 11 months ago
Support asynchronous fencing feature of virglrenderer. It allows Qemu to
handle fence as soon as it's signalled instead of periodically polling
the fence status. This feature is required for enabling DRM context
support in Qemu because legacy fencing mode isn't supported for DRM
contexts in virglrenderer.

Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
---
 hw/display/virtio-gpu-gl.c     |   3 +
 hw/display/virtio-gpu-virgl.c  | 147 +++++++++++++++++++++++++++++++--
 include/hw/virtio/virtio-gpu.h |  13 +++
 3 files changed, 154 insertions(+), 9 deletions(-)

diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
index 683fad3bf8a8..d9bb50ac1d4a 100644
--- a/hw/display/virtio-gpu-gl.c
+++ b/hw/display/virtio-gpu-gl.c
@@ -169,6 +169,9 @@ static void virtio_gpu_gl_device_unrealize(DeviceState *qdev)
     if (gl->renderer_state >= RS_INITED) {
 #if VIRGL_VERSION_MAJOR >= 1
         qemu_bh_delete(gl->cmdq_resume_bh);
+
+        virtio_gpu_virgl_reset_async_fences(g);
+        qemu_bh_delete(gl->async_fence_bh);
 #endif
         if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
             timer_free(gl->print_stats);
diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 2eb6aaab4e84..ee896eced67c 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -871,6 +871,7 @@ static void virgl_cmd_set_scanout_blob(VirtIOGPU *g,
 void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
                                       struct virtio_gpu_ctrl_command *cmd)
 {
+    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
     bool cmd_suspended = false;
     int ret;
 
@@ -972,15 +973,34 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
 
     trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
 
-    /*
-     * Unlike other virglrenderer functions, this one returns a positive
-     * error code.
-     */
-    ret = virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, 0);
-    if (ret) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: virgl_renderer_create_fence error: %s",
-                      __func__, strerror(ret));
+    if (gl->context_fence_enabled &&
+        (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX)) {
+#if VIRGL_VERSION_MAJOR >= 1
+        uint32_t flags = 0;
+
+        ret = virgl_renderer_context_create_fence(cmd->cmd_hdr.ctx_id, flags,
+                                                  cmd->cmd_hdr.ring_idx,
+                                                  cmd->cmd_hdr.fence_id);
+        if (ret) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: virgl_renderer_context_create_fence error: %s",
+                          __func__, strerror(-ret));
+        }
+#else
+        /* gl->context_fence_enabled cannot be set with older virglrenderer */
+        g_assert_not_reached();
+#endif
+    } else {
+        /*
+         * Unlike other virglrenderer functions, this one returns a positive
+         * error code.
+         */
+        ret = virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, 0);
+        if (ret) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: virgl_renderer_create_fence error: %s",
+                          __func__, strerror(ret));
+        }
     }
 }
 
@@ -1008,6 +1028,102 @@ static void virgl_write_fence(void *opaque, uint32_t fence)
     }
 }
 
+void virtio_gpu_virgl_reset_async_fences(VirtIOGPU *g)
+{
+    struct virtio_gpu_virgl_context_fence *f;
+    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
+
+    while (!QSLIST_EMPTY(&gl->async_fenceq)) {
+        f = QSLIST_FIRST(&gl->async_fenceq);
+
+        QSLIST_REMOVE_HEAD(&gl->async_fenceq, next);
+
+        g_free(f);
+    }
+}
+
+#if VIRGL_VERSION_MAJOR >= 1
+static void virtio_gpu_virgl_async_fence_bh(void *opaque)
+{
+    QSLIST_HEAD(, virtio_gpu_virgl_context_fence) async_fenceq;
+    struct virtio_gpu_ctrl_command *cmd, *tmp;
+    struct virtio_gpu_virgl_context_fence *f;
+    VirtIOGPU *g = opaque;
+    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
+
+    QSLIST_MOVE_ATOMIC(&async_fenceq, &gl->async_fenceq);
+
+    while (!QSLIST_EMPTY(&async_fenceq)) {
+        f = QSLIST_FIRST(&async_fenceq);
+
+        QSLIST_REMOVE_HEAD(&async_fenceq, next);
+
+        QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
+            /*
+             * the guest can end up emitting fences out of order
+             * so we should check all fenced cmds not just the first one.
+             */
+            if (cmd->cmd_hdr.fence_id > f->fence_id) {
+                continue;
+            }
+            if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) {
+                if (cmd->cmd_hdr.ring_idx != f->ring_idx) {
+                    continue;
+                }
+                if (cmd->cmd_hdr.ctx_id != f->ctx_id) {
+                    continue;
+                }
+            } else if (f->ring_idx >= 0) {
+                /* ctx0 GL-query fences don't have ring info */
+                continue;
+            }
+            virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+            QTAILQ_REMOVE(&g->fenceq, cmd, next);
+            g_free(cmd);
+        }
+
+        trace_virtio_gpu_fence_resp(f->fence_id);
+        g_free(f);
+        g->inflight--;
+        if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
+            trace_virtio_gpu_dec_inflight_fences(g->inflight);
+        }
+    }
+}
+
+static void
+virtio_gpu_virgl_push_async_fence(VirtIOGPU *g, uint32_t ctx_id,
+                                  int64_t ring_idx, uint64_t fence_id)
+{
+    struct virtio_gpu_virgl_context_fence *f;
+    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
+
+    f = g_new(struct virtio_gpu_virgl_context_fence, 1);
+    f->ctx_id = ctx_id;
+    f->ring_idx = ring_idx;
+    f->fence_id = fence_id;
+
+    QSLIST_INSERT_HEAD_ATOMIC(&gl->async_fenceq, f, next);
+
+    qemu_bh_schedule(gl->async_fence_bh);
+}
+
+static void virgl_write_async_fence(void *opaque, uint32_t fence)
+{
+    VirtIOGPU *g = opaque;
+
+    virtio_gpu_virgl_push_async_fence(g, 0, -1, fence);
+}
+
+static void virgl_write_async_context_fence(void *opaque, uint32_t ctx_id,
+                                            uint32_t ring_idx, uint64_t fence)
+{
+    VirtIOGPU *g = opaque;
+
+    virtio_gpu_virgl_push_async_fence(g, ctx_id, ring_idx, fence);
+}
+#endif
+
 static virgl_renderer_gl_context
 virgl_create_context(void *opaque, int scanout_idx,
                      struct virgl_renderer_gl_ctx_param *params)
@@ -1095,6 +1211,8 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g)
         dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL);
         dpy_gl_scanout_disable(g->parent_obj.scanout[i].con);
     }
+
+    virtio_gpu_virgl_reset_async_fences(g);
 }
 
 void virtio_gpu_virgl_reset(VirtIOGPU *g)
@@ -1112,6 +1230,13 @@ int virtio_gpu_virgl_init(VirtIOGPU *g)
     if (qemu_egl_display) {
         virtio_gpu_3d_cbs.version = 4;
         virtio_gpu_3d_cbs.get_egl_display = virgl_get_egl_display;
+#if VIRGL_VERSION_MAJOR >= 1
+        virtio_gpu_3d_cbs.write_fence         = virgl_write_async_fence;
+        virtio_gpu_3d_cbs.write_context_fence = virgl_write_async_context_fence;
+        flags |= VIRGL_RENDERER_ASYNC_FENCE_CB;
+        flags |= VIRGL_RENDERER_THREAD_SYNC;
+        gl->context_fence_enabled = true;
+#endif
     }
 #endif
 #ifdef VIRGL_RENDERER_D3D11_SHARE_TEXTURE
@@ -1145,6 +1270,10 @@ int virtio_gpu_virgl_init(VirtIOGPU *g)
     gl->cmdq_resume_bh = aio_bh_new(qemu_get_aio_context(),
                                     virtio_gpu_virgl_resume_cmdq_bh,
                                     g);
+
+    gl->async_fence_bh = aio_bh_new(qemu_get_aio_context(),
+                                    virtio_gpu_virgl_async_fence_bh,
+                                    g);
 #endif
 
     return 0;
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index a42957c4e2cc..bd2cccdc60d7 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -230,6 +230,13 @@ struct VirtIOGPUClass {
                              Error **errp);
 };
 
+struct virtio_gpu_virgl_context_fence {
+    uint32_t ctx_id;
+    int64_t ring_idx;
+    uint64_t fence_id;
+    QSLIST_ENTRY(virtio_gpu_virgl_context_fence) next;
+};
+
 /* VirtIOGPUGL renderer states */
 typedef enum {
     RS_START,       /* starting state */
@@ -247,6 +254,11 @@ struct VirtIOGPUGL {
     QEMUTimer *print_stats;
 
     QEMUBH *cmdq_resume_bh;
+
+    QEMUBH *async_fence_bh;
+    QSLIST_HEAD(, virtio_gpu_virgl_context_fence) async_fenceq;
+
+    bool context_fence_enabled;
 };
 
 struct VhostUserGPU {
@@ -376,5 +388,6 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
 void virtio_gpu_virgl_reset(VirtIOGPU *g);
 int virtio_gpu_virgl_init(VirtIOGPU *g);
 GArray *virtio_gpu_virgl_get_capsets(VirtIOGPU *g);
+void virtio_gpu_virgl_reset_async_fences(VirtIOGPU *g);
 
 #endif
-- 
2.48.1


Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by Cong Liu 10 months ago
I discovered that on an ARM64 environment, the 'virtio-gpu: Support asynchronous fencing' patch causes the virtual machine GUI to fail to display. Rolling back this patch and using virgl allows the virtual machine to start normally. When the VM screen is black, I can see some errors in QEMU. I used QEMU's -serial stdio to enter the virtual machine's command line console but didn't see any errors inside the VM - the graphical interface seems to be stuck. I would greatly appreciate any suggestions regarding effective troubleshooting methods or specific areas I should investigate to resolve this issue.

Here's my software and hardware environment:
- host and guest are ubuntu 24.04
- QEMU: https://gitlab.freedesktop.org/digetx/qemu.git native-context-v11 branch
- virglrender: latest main branch 08eb12d00711370002e8f8fa6d620df9b79f9e27
- Mesa: Mesa 25.0~git2504031308.ff386e~oibaf~n (git-ff386eb 2025-04-03 noble-oibaf-ppa)
- Kernel: Linux d3000 6.14.1-061401-generic #202504071048
- GPU: Radeon RX 6600/6600 XT/6600M
- CPU: phytium D3000 aarch64

Here's the command I'm using to run the virtual machine, which displays a black frame with "Display output is not active" and fails to start the graphical interface normally:

    phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display gtk,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0

    (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
    (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
    (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
    (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
    (qemu:46029): Gdk-WARNING **: 16:43:53.716: eglMakeCurrent failed

When using SDL, the error messages are slightly different:

    phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0

    vrend_renderer_fill_caps: Entering with stale GL error: 1286
Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by Dmitry Osipenko 10 months ago
10.04.2025 12:54, Cong Liu пишет:
> I discovered that on an ARM64 environment, the 'virtio-gpu: Support asynchronous fencing' patch causes the virtual machine GUI to fail to display. Rolling back this patch and using virgl allows the virtual machine to start normally. When the VM screen is black, I can see some errors in QEMU. I used QEMU's -serial stdio to enter the virtual machine's command line console but didn't see any errors inside the VM - the graphical interface seems to be stuck. I would greatly appreciate any suggestions regarding effective troubleshooting methods or specific areas I should investigate to resolve this issue.
> 
> Here's my software and hardware environment:
> - host and guest are ubuntu 24.04
> - QEMU: https://gitlab.freedesktop.org/digetx/qemu.git native-context-v11 branch
> - virglrender: latest main branch 08eb12d00711370002e8f8fa6d620df9b79f9e27
> - Mesa: Mesa 25.0~git2504031308.ff386e~oibaf~n (git-ff386eb 2025-04-03 noble-oibaf-ppa)
> - Kernel: Linux d3000 6.14.1-061401-generic #202504071048
> - GPU: Radeon RX 6600/6600 XT/6600M
> - CPU: phytium D3000 aarch64
> 
> Here's the command I'm using to run the virtual machine, which displays a black frame with "Display output is not active" and fails to start the graphical interface normally:
> 
>     phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display gtk,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
> 
>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>     (qemu:46029): Gdk-WARNING **: 16:43:53.716: eglMakeCurrent failed
> 
> When using SDL, the error messages are slightly different:
> 
>     phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
> 
>     vrend_renderer_fill_caps: Entering with stale GL error: 1286
> 

Hi,

1. Please make sure that you're not only building QEMU against your
virglrenderer version, but also setting LD_LIBRARY_PATH properly at
runtime. Best to remove system version of virglrenderer if unsure,

2. Can you reproduce this problem using tcg instead of kvm?

-- 
Best regards,
Dmitry

Re: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by 刘聪 10 months ago


> -----Original Messages-----
> From: "Dmitry Osipenko" <dmitry.osipenko@collabora.com>
> Send time:Friday, 04/11/2025 05:59:11
> To: "Cong Liu" <liucong2565@phytium.com.cn>
> Cc: Jiqian.Chen@amd.com, akihiko.odaki@daynix.com, alex.bennee@linaro.org, alexander.deucher@amd.com, christian.koenig@amd.com, gert.wollny@collabora.com, gurchetansingh@chromium.org, hi@alyssa.is, honglei1.huang@amd.com, julia.zhang@amd.com, kraxel@redhat.com, marcandre.lureau@redhat.com, mst@redhat.com, pbonzini@redhat.com, philmd@linaro.org, pierre-eric.pelloux-prayer@amd.com, qemu-devel@nongnu.org, ray.huang@amd.com, robdclark@gmail.com, roger.pau@citrix.com, slp@redhat.com, stefano.stabellini@amd.com, xenia.ragiadakou@amd.com, zzyiwei@chromium.org
> Subject: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
> 
> 10.04.2025 12:54, Cong Liu пишет:
> > I discovered that on an ARM64 environment, the 'virtio-gpu: Support asynchronous fencing' patch causes the virtual machine GUI to fail to display. Rolling back this patch and using virgl allows the virtual machine to start normally. When the VM screen is black, I can see some errors in QEMU. I used QEMU's -serial stdio to enter the virtual machine's command line console but didn't see any errors inside the VM - the graphical interface seems to be stuck. I would greatly appreciate any suggestions regarding effective troubleshooting methods or specific areas I should investigate to resolve this issue.
> > 
> > Here's my software and hardware environment:
> > - host and guest are ubuntu 24.04
> > - QEMU: https://gitlab.freedesktop.org/digetx/qemu.git native-context-v11 branch
> > - virglrender: latest main branch 08eb12d00711370002e8f8fa6d620df9b79f9e27
> > - Mesa: Mesa 25.0~git2504031308.ff386e~oibaf~n (git-ff386eb 2025-04-03 noble-oibaf-ppa)
> > - Kernel: Linux d3000 6.14.1-061401-generic #202504071048
> > - GPU: Radeon RX 6600/6600 XT/6600M
> > - CPU: phytium D3000 aarch64
> > 
> > Here's the command I'm using to run the virtual machine, which displays a black frame with "Display output is not active" and fails to start the graphical interface normally:
> > 
> >     phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display gtk,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
> > 
> >     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
> >     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
> >     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
> >     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
> >     (qemu:46029): Gdk-WARNING **: 16:43:53.716: eglMakeCurrent failed
> > 
> > When using SDL, the error messages are slightly different:
> > 
> >     phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
> > 
> >     vrend_renderer_fill_caps: Entering with stale GL error: 1286
> > 
> 
> Hi,
> 
> 1. Please make sure that you're not only building QEMU against your
> virglrenderer version, but also setting LD_LIBRARY_PATH properly at
> runtime. Best to remove system version of virglrenderer if unsure,

I built and installed virglrenderer with the --prefix=/usr option, so
 it replaces the system version as expected.

> 
> 2. Can you reproduce this problem using tcg instead of kvm?
> 

 yes, change qemu command '--machine virt,accel=kvm -cpu host' to
'--machine virt -cpu max' can reproduce this problem. 
> -- 
> Best regards,
> Dmitry

diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
index f6df9dcb..f6e06842 100644
--- a/src/vrend_renderer.c
+++ b/src/vrend_renderer.c
@@ -12808,7 +12808,7 @@ void vrend_renderer_fill_caps(uint32_t set, uint32_t version,
                               union virgl_caps *caps)
 {
    int gl_ver, gles_ver;
-   GLenum err;
+   GLenum err = GL_NO_ERROR;
    bool fill_capset2 = false;
 
    if (!caps)

phytium@d3000:~/working/qemu$ git log --oneline  -n 10
e0286f56c8 (HEAD -> native-context-v11, origin/native-context-v11) Revert "amd_iommu: Add support for pass though mode"
d6e9eb0f0d docs/system: virtio-gpu: Document host/guest requirements
55db821ea5 docs/system: virtio-gpu: Update Venus link
003940db9a docs/system: virtio-gpu: Add link to Mesa VirGL doc
7674e82755 ui/gtk: Don't disable scanout when display is refreshed
712fd024e3 ui/sdl2: Don't disable scanout when display is refreshed
9003da356f virtio-gpu: Support DRM native context
e2ff4f4a48 virtio-gpu: Support asynchronous fencing
25458c7625 virtio-gpu: Handle virgl fence creation errors

I tried initializing GLenum err = GL_NO_ERROR in vrend_renderer_fill_caps, but it doesn’t seem to resolve the “Entering with stale GL error: 1286” message. However, this error might not be directly related to the VM black screen issue. I noticed that even when the VM was working correctly—specifically when I reset to commit 25458c7625—the same GL error still appeared.

Best regards,
liucong


信息安全声明:本邮件包含信息归发件人所在组织所有,发件人所在组织对该邮件拥有所有权利。请接收者注意保密,未经发件人书面许可,不得向任何第三方组织和个人透露本邮件所含信息。
Information Security Notice: The information contained in this mail is solely property of the sender's organization.This mail communication is confidential.Recipients named above are obligated to maintain secrecy and are not permitted to disclose the contents of this communication to others.
Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by Dmitry Osipenko 10 months ago
On 4/11/25 04:42, 刘聪 wrote:
> 
> 
> 
>> -----Original Messages-----
>> From: "Dmitry Osipenko" <dmitry.osipenko@collabora.com>
>> Send time:Friday, 04/11/2025 05:59:11
>> To: "Cong Liu" <liucong2565@phytium.com.cn>
>> Cc: Jiqian.Chen@amd.com, akihiko.odaki@daynix.com, alex.bennee@linaro.org, alexander.deucher@amd.com, christian.koenig@amd.com, gert.wollny@collabora.com, gurchetansingh@chromium.org, hi@alyssa.is, honglei1.huang@amd.com, julia.zhang@amd.com, kraxel@redhat.com, marcandre.lureau@redhat.com, mst@redhat.com, pbonzini@redhat.com, philmd@linaro.org, pierre-eric.pelloux-prayer@amd.com, qemu-devel@nongnu.org, ray.huang@amd.com, robdclark@gmail.com, roger.pau@citrix.com, slp@redhat.com, stefano.stabellini@amd.com, xenia.ragiadakou@amd.com, zzyiwei@chromium.org
>> Subject: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
>>
>> 10.04.2025 12:54, Cong Liu пишет:
>>> I discovered that on an ARM64 environment, the 'virtio-gpu: Support asynchronous fencing' patch causes the virtual machine GUI to fail to display. Rolling back this patch and using virgl allows the virtual machine to start normally. When the VM screen is black, I can see some errors in QEMU. I used QEMU's -serial stdio to enter the virtual machine's command line console but didn't see any errors inside the VM - the graphical interface seems to be stuck. I would greatly appreciate any suggestions regarding effective troubleshooting methods or specific areas I should investigate to resolve this issue.
>>>
>>> Here's my software and hardware environment:
>>> - host and guest are ubuntu 24.04
>>> - QEMU: https://gitlab.freedesktop.org/digetx/qemu.git native-context-v11 branch
>>> - virglrender: latest main branch 08eb12d00711370002e8f8fa6d620df9b79f9e27
>>> - Mesa: Mesa 25.0~git2504031308.ff386e~oibaf~n (git-ff386eb 2025-04-03 noble-oibaf-ppa)
>>> - Kernel: Linux d3000 6.14.1-061401-generic #202504071048
>>> - GPU: Radeon RX 6600/6600 XT/6600M
>>> - CPU: phytium D3000 aarch64
>>>
>>> Here's the command I'm using to run the virtual machine, which displays a black frame with "Display output is not active" and fails to start the graphical interface normally:
>>>
>>>     phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display gtk,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
>>>
>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.716: eglMakeCurrent failed
>>>
>>> When using SDL, the error messages are slightly different:
>>>
>>>     phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
>>>
>>>     vrend_renderer_fill_caps: Entering with stale GL error: 1286
>>>
>>
>> Hi,
>>
>> 1. Please make sure that you're not only building QEMU against your
>> virglrenderer version, but also setting LD_LIBRARY_PATH properly at
>> runtime. Best to remove system version of virglrenderer if unsure,
> 
> I built and installed virglrenderer with the --prefix=/usr option, so
>  it replaces the system version as expected.
> 
>>
>> 2. Can you reproduce this problem using tcg instead of kvm?
>>
> 
>  yes, change qemu command '--machine virt,accel=kvm -cpu host' to
> '--machine virt -cpu max' can reproduce this problem. 
>> -- 
>> Best regards,
>> Dmitry
> 
> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
> index f6df9dcb..f6e06842 100644
> --- a/src/vrend_renderer.c
> +++ b/src/vrend_renderer.c
> @@ -12808,7 +12808,7 @@ void vrend_renderer_fill_caps(uint32_t set, uint32_t version,
>                                union virgl_caps *caps)
>  {
>     int gl_ver, gles_ver;
> -   GLenum err;
> +   GLenum err = GL_NO_ERROR;
>     bool fill_capset2 = false;
>  
>     if (!caps)
> 
> phytium@d3000:~/working/qemu$ git log --oneline  -n 10
> e0286f56c8 (HEAD -> native-context-v11, origin/native-context-v11) Revert "amd_iommu: Add support for pass though mode"
> d6e9eb0f0d docs/system: virtio-gpu: Document host/guest requirements
> 55db821ea5 docs/system: virtio-gpu: Update Venus link
> 003940db9a docs/system: virtio-gpu: Add link to Mesa VirGL doc
> 7674e82755 ui/gtk: Don't disable scanout when display is refreshed
> 712fd024e3 ui/sdl2: Don't disable scanout when display is refreshed
> 9003da356f virtio-gpu: Support DRM native context
> e2ff4f4a48 virtio-gpu: Support asynchronous fencing
> 25458c7625 virtio-gpu: Handle virgl fence creation errors
> 
> I tried initializing GLenum err = GL_NO_ERROR in vrend_renderer_fill_caps, but it doesn’t seem to resolve the “Entering with stale GL error: 1286” message. However, this error might not be directly related to the VM black screen issue. I noticed that even when the VM was working correctly—specifically when I reset to commit 25458c7625—the same GL error still appeared.

Thanks for the report. I confirm that something is wrong with virgl when
async fencing is used. Don't have this GL 1286 error, but getting a
lockup on ARM VM and with one of my new x64 VM setups. Will investigate
further and report back here.

-- 
Best regards,
Dmitry

Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by Dmitry Osipenko 9 months, 2 weeks ago
On 4/14/25 17:47, Dmitry Osipenko wrote:
> On 4/11/25 04:42, 刘聪 wrote:
>>
>>
>>
>>> -----Original Messages-----
>>> From: "Dmitry Osipenko" <dmitry.osipenko@collabora.com>
>>> Send time:Friday, 04/11/2025 05:59:11
>>> To: "Cong Liu" <liucong2565@phytium.com.cn>
>>> Cc: Jiqian.Chen@amd.com, akihiko.odaki@daynix.com, alex.bennee@linaro.org, alexander.deucher@amd.com, christian.koenig@amd.com, gert.wollny@collabora.com, gurchetansingh@chromium.org, hi@alyssa.is, honglei1.huang@amd.com, julia.zhang@amd.com, kraxel@redhat.com, marcandre.lureau@redhat.com, mst@redhat.com, pbonzini@redhat.com, philmd@linaro.org, pierre-eric.pelloux-prayer@amd.com, qemu-devel@nongnu.org, ray.huang@amd.com, robdclark@gmail.com, roger.pau@citrix.com, slp@redhat.com, stefano.stabellini@amd.com, xenia.ragiadakou@amd.com, zzyiwei@chromium.org
>>> Subject: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
>>>
>>> 10.04.2025 12:54, Cong Liu пишет:
>>>> I discovered that on an ARM64 environment, the 'virtio-gpu: Support asynchronous fencing' patch causes the virtual machine GUI to fail to display. Rolling back this patch and using virgl allows the virtual machine to start normally. When the VM screen is black, I can see some errors in QEMU. I used QEMU's -serial stdio to enter the virtual machine's command line console but didn't see any errors inside the VM - the graphical interface seems to be stuck. I would greatly appreciate any suggestions regarding effective troubleshooting methods or specific areas I should investigate to resolve this issue.
>>>>
>>>> Here's my software and hardware environment:
>>>> - host and guest are ubuntu 24.04
>>>> - QEMU: https://gitlab.freedesktop.org/digetx/qemu.git native-context-v11 branch
>>>> - virglrender: latest main branch 08eb12d00711370002e8f8fa6d620df9b79f9e27
>>>> - Mesa: Mesa 25.0~git2504031308.ff386e~oibaf~n (git-ff386eb 2025-04-03 noble-oibaf-ppa)
>>>> - Kernel: Linux d3000 6.14.1-061401-generic #202504071048
>>>> - GPU: Radeon RX 6600/6600 XT/6600M
>>>> - CPU: phytium D3000 aarch64
>>>>
>>>> Here's the command I'm using to run the virtual machine, which displays a black frame with "Display output is not active" and fails to start the graphical interface normally:
>>>>
>>>>     phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display gtk,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
>>>>
>>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.716: eglMakeCurrent failed
>>>>
>>>> When using SDL, the error messages are slightly different:
>>>>
>>>>     phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
>>>>
>>>>     vrend_renderer_fill_caps: Entering with stale GL error: 1286
>>>>
>>>
>>> Hi,
>>>
>>> 1. Please make sure that you're not only building QEMU against your
>>> virglrenderer version, but also setting LD_LIBRARY_PATH properly at
>>> runtime. Best to remove system version of virglrenderer if unsure,
>>
>> I built and installed virglrenderer with the --prefix=/usr option, so
>>  it replaces the system version as expected.
>>
>>>
>>> 2. Can you reproduce this problem using tcg instead of kvm?
>>>
>>
>>  yes, change qemu command '--machine virt,accel=kvm -cpu host' to
>> '--machine virt -cpu max' can reproduce this problem. 
>>> -- 
>>> Best regards,
>>> Dmitry
>>
>> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
>> index f6df9dcb..f6e06842 100644
>> --- a/src/vrend_renderer.c
>> +++ b/src/vrend_renderer.c
>> @@ -12808,7 +12808,7 @@ void vrend_renderer_fill_caps(uint32_t set, uint32_t version,
>>                                union virgl_caps *caps)
>>  {
>>     int gl_ver, gles_ver;
>> -   GLenum err;
>> +   GLenum err = GL_NO_ERROR;
>>     bool fill_capset2 = false;
>>  
>>     if (!caps)
>>
>> phytium@d3000:~/working/qemu$ git log --oneline  -n 10
>> e0286f56c8 (HEAD -> native-context-v11, origin/native-context-v11) Revert "amd_iommu: Add support for pass though mode"
>> d6e9eb0f0d docs/system: virtio-gpu: Document host/guest requirements
>> 55db821ea5 docs/system: virtio-gpu: Update Venus link
>> 003940db9a docs/system: virtio-gpu: Add link to Mesa VirGL doc
>> 7674e82755 ui/gtk: Don't disable scanout when display is refreshed
>> 712fd024e3 ui/sdl2: Don't disable scanout when display is refreshed
>> 9003da356f virtio-gpu: Support DRM native context
>> e2ff4f4a48 virtio-gpu: Support asynchronous fencing
>> 25458c7625 virtio-gpu: Handle virgl fence creation errors
>>
>> I tried initializing GLenum err = GL_NO_ERROR in vrend_renderer_fill_caps, but it doesn’t seem to resolve the “Entering with stale GL error: 1286” message. However, this error might not be directly related to the VM black screen issue. I noticed that even when the VM was working correctly—specifically when I reset to commit 25458c7625—the same GL error still appeared.
> 
> Thanks for the report. I confirm that something is wrong with virgl when
> async fencing is used. Don't have this GL 1286 error, but getting a
> lockup on ARM VM and with one of my new x64 VM setups. Will investigate
> further and report back here.

Hi, Cong. Please give a test to [1]. It fixes the problem for me.

[1] https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1518

-- 
Best regards,
Dmitry

Re: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by 刘聪 9 months, 2 weeks ago
Hi Dmitry,

The virglrender patch can fix the virgl issue, but the native context still fails to run on my machine.
I'm not sure if anyone has successfully run it on an ARM64 machine before.

When running with Venus, the virtual machine can successfully run vkcube. However, when using the native context, a KVM error is triggered. Both my guest and host kernels are already updated to version 6.14.

Here are the commands and error messages I encountered:

```
phytium@ubuntu:~/working/virglrenderer$ /opt/native-context-v11/bin/qemu-system-aarch64 --machine virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl,hostmem=4G,blob=on,venus=on -object memory-backend-memfd,id=mem1,size=4G  -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
phytium@ubuntu:~/working/virglrenderer$ 
phytium@ubuntu:~/working/virglrenderer$ /opt/native-context-v11/bin/qemu-system-aarch64 --machine virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl,hostmem=4G,blob=on,drm_native_context=on -object memory-backend-memfd,id=mem1,size=4G  -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
error: kvm run failed Bad address
 PC=0000e2bcbbf31ab0 X00=0000e2bc9c3ae060 X01=0000e2bc7c02af00
X02=0000000000000014 X03=0000e2bc9c3ae000 X04=0000e2bc7c02af14
X05=0000e2bc9c3ae074 X06=0000000000000200 X07=0000e2bc7c02a8f8
X08=00000000000000de X09=0000000000000200 X10=0000000000001000
X11=0000000000000004 X12=0000e2bc7c0000b0 X13=0000000000000001
X14=0000000000000020 X15=0000e2bc9e465f93 X16=0000e2bcad6a01f0
X17=0000e2bcbbf31a80 X18=0000000000000093 X19=0000000000000060
X20=0000000000000074 X21=0000e2bc9e46c5f0 X22=0000e2bc9c3ae000
X23=0000000000000074 X24=0000c02241da83b0 X25=0000c02241da85a0
X26=0000c02241da85a0 X27=0000000000000014 X28=0000e2bc9e46c5f0
X29=0000e2bc9e46c610 X30=0000e2bcac809c38  SP=0000e2bc9e46c510
PSTATE=20001000 --C- EL0t
phytium@ubuntu:~/working/virglrenderer$ uname -a
Linux ubuntu 6.14.1-061401-generic #202504071048 SMP PREEMPT_DYNAMIC Mon Apr  7 11:34:37 UTC 2025 aarch64 aarch64 aarch64 GNU/Linux
```

Best regards,
Cong

> -----Original Messages-----
> From: "Dmitry Osipenko" <dmitry.osipenko@collabora.com>
> Send time:Sunday, 04/27/2025 06:27:39
> To: 刘聪 <liucong2565@phytium.com.cn>
> Cc: Jiqian.Chen@amd.com, akihiko.odaki@daynix.com, alex.bennee@linaro.org, alexander.deucher@amd.com, christian.koenig@amd.com, gert.wollny@collabora.com, gurchetansingh@chromium.org, hi@alyssa.is, honglei1.huang@amd.com, julia.zhang@amd.com, kraxel@redhat.com, marcandre.lureau@redhat.com, mst@redhat.com, pbonzini@redhat.com, philmd@linaro.org, pierre-eric.pelloux-prayer@amd.com, qemu-devel@nongnu.org, ray.huang@amd.com, robdclark@gmail.com, roger.pau@citrix.com, slp@redhat.com, stefano.stabellini@amd.com, xenia.ragiadakou@amd.com, zzyiwei@chromium.org
> Subject: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
> 
> On 4/14/25 17:47, Dmitry Osipenko wrote:
> > On 4/11/25 04:42, 刘聪 wrote:
> >>
> >>
> >>
> >>> -----Original Messages-----
> >>> From: "Dmitry Osipenko" <dmitry.osipenko@collabora.com>
> >>> Send time:Friday, 04/11/2025 05:59:11
> >>> To: "Cong Liu" <liucong2565@phytium.com.cn>
> >>> Cc: Jiqian.Chen@amd.com, akihiko.odaki@daynix.com, alex.bennee@linaro.org, alexander.deucher@amd.com, christian.koenig@amd.com, gert.wollny@collabora.com, gurchetansingh@chromium.org, hi@alyssa.is, honglei1.huang@amd.com, julia.zhang@amd.com, kraxel@redhat.com, marcandre.lureau@redhat.com, mst@redhat.com, pbonzini@redhat.com, philmd@linaro.org, pierre-eric.pelloux-prayer@amd.com, qemu-devel@nongnu.org, ray.huang@amd.com, robdclark@gmail.com, roger.pau@citrix.com, slp@redhat.com, stefano.stabellini@amd.com, xenia.ragiadakou@amd.com, zzyiwei@chromium.org
> >>> Subject: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
> >>>
> >>> 10.04.2025 12:54, Cong Liu пишет:
> >>>> I discovered that on an ARM64 environment, the 'virtio-gpu: Support asynchronous fencing' patch causes the virtual machine GUI to fail to display. Rolling back this patch and using virgl allows the virtual machine to start normally. When the VM screen is black, I can see some errors in QEMU. I used QEMU's -serial stdio to enter the virtual machine's command line console but didn't see any errors inside the VM - the graphical interface seems to be stuck. I would greatly appreciate any suggestions regarding effective troubleshooting methods or specific areas I should investigate to resolve this issue.
> >>>>
> >>>> Here's my software and hardware environment:
> >>>> - host and guest are ubuntu 24.04
> >>>> - QEMU: https://gitlab.freedesktop.org/digetx/qemu.git native-context-v11 branch
> >>>> - virglrender: latest main branch 08eb12d00711370002e8f8fa6d620df9b79f9e27
> >>>> - Mesa: Mesa 25.0~git2504031308.ff386e~oibaf~n (git-ff386eb 2025-04-03 noble-oibaf-ppa)
> >>>> - Kernel: Linux d3000 6.14.1-061401-generic #202504071048
> >>>> - GPU: Radeon RX 6600/6600 XT/6600M
> >>>> - CPU: phytium D3000 aarch64
> >>>>
> >>>> Here's the command I'm using to run the virtual machine, which displays a black frame with "Display output is not active" and fails to start the graphical interface normally:
> >>>>
> >>>>     phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display gtk,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
> >>>>
> >>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
> >>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
> >>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
> >>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
> >>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.716: eglMakeCurrent failed
> >>>>
> >>>> When using SDL, the error messages are slightly different:
> >>>>
> >>>>     phytium@d3000:~/working/qemu$ /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
> >>>>
> >>>>     vrend_renderer_fill_caps: Entering with stale GL error: 1286
> >>>>
> >>>
> >>> Hi,
> >>>
> >>> 1. Please make sure that you're not only building QEMU against your
> >>> virglrenderer version, but also setting LD_LIBRARY_PATH properly at
> >>> runtime. Best to remove system version of virglrenderer if unsure,
> >>
> >> I built and installed virglrenderer with the --prefix=/usr option, so
> >>  it replaces the system version as expected.
> >>
> >>>
> >>> 2. Can you reproduce this problem using tcg instead of kvm?
> >>>
> >>
> >>  yes, change qemu command '--machine virt,accel=kvm -cpu host' to
> >> '--machine virt -cpu max' can reproduce this problem. 
> >>> -- 
> >>> Best regards,
> >>> Dmitry
> >>
> >> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
> >> index f6df9dcb..f6e06842 100644
> >> --- a/src/vrend_renderer.c
> >> +++ b/src/vrend_renderer.c
> >> @@ -12808,7 +12808,7 @@ void vrend_renderer_fill_caps(uint32_t set, uint32_t version,
> >>                                union virgl_caps *caps)
> >>  {
> >>     int gl_ver, gles_ver;
> >> -   GLenum err;
> >> +   GLenum err = GL_NO_ERROR;
> >>     bool fill_capset2 = false;
> >>  
> >>     if (!caps)
> >>
> >> phytium@d3000:~/working/qemu$ git log --oneline  -n 10
> >> e0286f56c8 (HEAD -> native-context-v11, origin/native-context-v11) Revert "amd_iommu: Add support for pass though mode"
> >> d6e9eb0f0d docs/system: virtio-gpu: Document host/guest requirements
> >> 55db821ea5 docs/system: virtio-gpu: Update Venus link
> >> 003940db9a docs/system: virtio-gpu: Add link to Mesa VirGL doc
> >> 7674e82755 ui/gtk: Don't disable scanout when display is refreshed
> >> 712fd024e3 ui/sdl2: Don't disable scanout when display is refreshed
> >> 9003da356f virtio-gpu: Support DRM native context
> >> e2ff4f4a48 virtio-gpu: Support asynchronous fencing
> >> 25458c7625 virtio-gpu: Handle virgl fence creation errors
> >>
> >> I tried initializing GLenum err = GL_NO_ERROR in vrend_renderer_fill_caps, but it doesn’t seem to resolve the “Entering with stale GL error: 1286” message. However, this error might not be directly related to the VM black screen issue. I noticed that even when the VM was working correctly—specifically when I reset to commit 25458c7625—the same GL error still appeared.
> > 
> > Thanks for the report. I confirm that something is wrong with virgl when
> > async fencing is used. Don't have this GL 1286 error, but getting a
> > lockup on ARM VM and with one of my new x64 VM setups. Will investigate
> > further and report back here.
> 
> Hi, Cong. Please give a test to [1]. It fixes the problem for me.
> 
> [1] https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1518
> 
> -- 
> Best regards,
> Dmitry


信息安全声明:本邮件包含信息归发件人所在组织所有,发件人所在组织对该邮件拥有所有权利。请接收者注意保密,未经发件人书面许可,不得向任何第三方组织和个人透露本邮件所含信息。
Information Security Notice: The information contained in this mail is solely property of the sender's organization.This mail communication is confidential.Recipients named above are obligated to maintain secrecy and are not permitted to disclose the contents of this communication to others.
Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by Dmitry Osipenko 9 months, 2 weeks ago
On 4/27/25 14:53, 刘聪 wrote:
> Hi Dmitry,
> 
> The virglrender patch can fix the virgl issue, but the native context still fails to run on my machine.
> I'm not sure if anyone has successfully run it on an ARM64 machine before.

Thanks for the testing!

> When running with Venus, the virtual machine can successfully run vkcube. However, when using the native context, a KVM error is triggered. Both my guest and host kernels are already updated to version 6.14.
> 
> Here are the commands and error messages I encountered:
> 
> ```
> phytium@ubuntu:~/working/virglrenderer$ /opt/native-context-v11/bin/qemu-system-aarch64 --machine virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl,hostmem=4G,blob=on,venus=on -object memory-backend-memfd,id=mem1,size=4G  -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
> phytium@ubuntu:~/working/virglrenderer$ 
> phytium@ubuntu:~/working/virglrenderer$ /opt/native-context-v11/bin/qemu-system-aarch64 --machine virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl,hostmem=4G,blob=on,drm_native_context=on -object memory-backend-memfd,id=mem1,size=4G  -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
> error: kvm run failed Bad address
>  PC=0000e2bcbbf31ab0 X00=0000e2bc9c3ae060 X01=0000e2bc7c02af00
> X02=0000000000000014 X03=0000e2bc9c3ae000 X04=0000e2bc7c02af14
> X05=0000e2bc9c3ae074 X06=0000000000000200 X07=0000e2bc7c02a8f8
> X08=00000000000000de X09=0000000000000200 X10=0000000000001000
> X11=0000000000000004 X12=0000e2bc7c0000b0 X13=0000000000000001
> X14=0000000000000020 X15=0000e2bc9e465f93 X16=0000e2bcad6a01f0
> X17=0000e2bcbbf31a80 X18=0000000000000093 X19=0000000000000060
> X20=0000000000000074 X21=0000e2bc9e46c5f0 X22=0000e2bc9c3ae000
> X23=0000000000000074 X24=0000c02241da83b0 X25=0000c02241da85a0
> X26=0000c02241da85a0 X27=0000000000000014 X28=0000e2bc9e46c5f0
> X29=0000e2bc9e46c610 X30=0000e2bcac809c38  SP=0000e2bc9e46c510
> PSTATE=20001000 --C- EL0t
> phytium@ubuntu:~/working/virglrenderer$ uname -a
> Linux ubuntu 6.14.1-061401-generic #202504071048 SMP PREEMPT_DYNAMIC Mon Apr  7 11:34:37 UTC 2025 aarch64 aarch64 aarch64 GNU/Linux
> ```

Alex Bennée reported the very same problem with KVM on ARM + native ctx
AMD dGPU in the past. You may try to add error messages to
virt/kvm/kvm_main.c of host Linux kernel to find from where KVM error
originates. Sounds like page refcounting may be not working properly on ARM.

+CC: Sean Christopherson

-- 
Best regards,
Dmitry

Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by Alex Bennée 9 months, 2 weeks ago
Dmitry Osipenko <dmitry.osipenko@collabora.com> writes:

> On 4/27/25 14:53, 刘聪 wrote:
>> Hi Dmitry,
>> 
>> The virglrender patch can fix the virgl issue, but the native context still fails to run on my machine.
>> I'm not sure if anyone has successfully run it on an ARM64 machine before.
>
> Thanks for the testing!
>
>> When running with Venus, the virtual machine can successfully run vkcube. However, when using the native context, a KVM error is triggered. Both my guest and host kernels are already updated to version 6.14.
>> 
>> Here are the commands and error messages I encountered:
>> 
>> ```
>> phytium@ubuntu:~/working/virglrenderer$
>> /opt/native-context-v11/bin/qemu-system-aarch64 --machine
>> virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive
>> file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio
>> -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device
>> virtio-net-pci,netdev=net0 -device
>> virtio-gpu-gl,hostmem=4G,blob=on,venus=on -object
>> memory-backend-memfd,id=mem1,size=4G -display
>> sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device
>> usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
>> phytium@ubuntu:~/working/virglrenderer$ 
>> phytium@ubuntu:~/working/virglrenderer$
>> /opt/native-context-v11/bin/qemu-system-aarch64 --machine
>> virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive
>> file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio
>> -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device
>> virtio-net-pci,netdev=net0 -device
>> virtio-gpu-gl,hostmem=4G,blob=on,drm_native_context=on -object
>> memory-backend-memfd,id=mem1,size=4G -display
>> sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device
>> usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
>> error: kvm run failed Bad address
>>  PC=0000e2bcbbf31ab0 X00=0000e2bc9c3ae060 X01=0000e2bc7c02af00
>> X02=0000000000000014 X03=0000e2bc9c3ae000 X04=0000e2bc7c02af14
>> X05=0000e2bc9c3ae074 X06=0000000000000200 X07=0000e2bc7c02a8f8
>> X08=00000000000000de X09=0000000000000200 X10=0000000000001000
>> X11=0000000000000004 X12=0000e2bc7c0000b0 X13=0000000000000001
>> X14=0000000000000020 X15=0000e2bc9e465f93 X16=0000e2bcad6a01f0
>> X17=0000e2bcbbf31a80 X18=0000000000000093 X19=0000000000000060
>> X20=0000000000000074 X21=0000e2bc9e46c5f0 X22=0000e2bc9c3ae000
>> X23=0000000000000074 X24=0000c02241da83b0 X25=0000c02241da85a0
>> X26=0000c02241da85a0 X27=0000000000000014 X28=0000e2bc9e46c5f0
>> X29=0000e2bc9e46c610 X30=0000e2bcac809c38  SP=0000e2bc9e46c510
>> PSTATE=20001000 --C- EL0t
>> phytium@ubuntu:~/working/virglrenderer$ uname -a
>> Linux ubuntu 6.14.1-061401-generic #202504071048 SMP PREEMPT_DYNAMIC Mon Apr  7 11:34:37 UTC 2025 aarch64 aarch64 aarch64 GNU/Linux
>> ```
>
> Alex Bennée reported the very same problem with KVM on ARM + native ctx
> AMD dGPU in the past. You may try to add error messages to
> virt/kvm/kvm_main.c of host Linux kernel to find from where KVM error
> originates. Sounds like page refcounting may be not working properly
> on ARM.

Also what hardware is the machine? The AVA (and most things with the
same chipset) have a broken PCI which needs a workaround for unaligned
SIMD access:

  https://github.com/stsquad/linux/tree/testing/altra-tweaks-for-gpu

>
> +CC: Sean Christopherson

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro
Re: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by liucong2565@phytium.com.cn 9 months, 2 weeks ago
I user Phytium D3000 8 Core, and I am not sure if it has a broken PCI.

https://www.cpubenchmark.net/cpu.php?cpu=ARM+Phytium+D3000+8+Core+2500+MHz


Regards,
cong


> -----Original Messages-----
> From: "Alex Bennée" <alex.bennee@linaro.org>
> Send time:Monday, 04/28/2025 18:07:11
> To: "Dmitry Osipenko" <dmitry.osipenko@collabora.com>
> Cc: 刘聪 <liucong2565@phytium.com.cn>, "Sean Christopherson" <seanjc@google.com>, Jiqian.Chen@amd.com, akihiko.odaki@daynix.com, alexander.deucher@amd.com, christian.koenig@amd.com, gert.wollny@collabora.com, gurchetansingh@chromium.org, hi@alyssa.is, honglei1.huang@amd.com, julia.zhang@amd.com, kraxel@redhat.com, marcandre.lureau@redhat.com, mst@redhat.com, pbonzini@redhat.com, philmd@linaro.org, pierre-eric.pelloux-prayer@amd.com, qemu-devel@nongnu.org, ray.huang@amd.com, robdclark@gmail.com, roger.pau@citrix.com, slp@redhat.com, stefano.stabellini@amd.com, xenia.ragiadakou@amd.com, zzyiwei@chromium.org
> Subject: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
> 
> Dmitry Osipenko <dmitry.osipenko@collabora.com> writes:
> 
> > On 4/27/25 14:53, 刘聪 wrote:
> >> Hi Dmitry,
> >> 
> >> The virglrender patch can fix the virgl issue, but the native context still fails to run on my machine.
> >> I'm not sure if anyone has successfully run it on an ARM64 machine before.
> >
> > Thanks for the testing!
> >
> >> When running with Venus, the virtual machine can successfully run vkcube. However, when using the native context, a KVM error is triggered. Both my guest and host kernels are already updated to version 6.14.
> >> 
> >> Here are the commands and error messages I encountered:
> >> 
> >> ```
> >> phytium@ubuntu:~/working/virglrenderer$
> >> /opt/native-context-v11/bin/qemu-system-aarch64 --machine
> >> virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive
> >> file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio
> >> -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device
> >> virtio-net-pci,netdev=net0 -device
> >> virtio-gpu-gl,hostmem=4G,blob=on,venus=on -object
> >> memory-backend-memfd,id=mem1,size=4G -display
> >> sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device
> >> usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
> >> phytium@ubuntu:~/working/virglrenderer$ 
> >> phytium@ubuntu:~/working/virglrenderer$
> >> /opt/native-context-v11/bin/qemu-system-aarch64 --machine
> >> virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive
> >> file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio
> >> -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device
> >> virtio-net-pci,netdev=net0 -device
> >> virtio-gpu-gl,hostmem=4G,blob=on,drm_native_context=on -object
> >> memory-backend-memfd,id=mem1,size=4G -display
> >> sdl,gl=on,show-cursor=on -device usb-ehci,id=usb -device
> >> usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
> >> error: kvm run failed Bad address
> >>  PC=0000e2bcbbf31ab0 X00=0000e2bc9c3ae060 X01=0000e2bc7c02af00
> >> X02=0000000000000014 X03=0000e2bc9c3ae000 X04=0000e2bc7c02af14
> >> X05=0000e2bc9c3ae074 X06=0000000000000200 X07=0000e2bc7c02a8f8
> >> X08=00000000000000de X09=0000000000000200 X10=0000000000001000
> >> X11=0000000000000004 X12=0000e2bc7c0000b0 X13=0000000000000001
> >> X14=0000000000000020 X15=0000e2bc9e465f93 X16=0000e2bcad6a01f0
> >> X17=0000e2bcbbf31a80 X18=0000000000000093 X19=0000000000000060
> >> X20=0000000000000074 X21=0000e2bc9e46c5f0 X22=0000e2bc9c3ae000
> >> X23=0000000000000074 X24=0000c02241da83b0 X25=0000c02241da85a0
> >> X26=0000c02241da85a0 X27=0000000000000014 X28=0000e2bc9e46c5f0
> >> X29=0000e2bc9e46c610 X30=0000e2bcac809c38  SP=0000e2bc9e46c510
> >> PSTATE=20001000 --C- EL0t
> >> phytium@ubuntu:~/working/virglrenderer$ uname -a
> >> Linux ubuntu 6.14.1-061401-generic #202504071048 SMP PREEMPT_DYNAMIC Mon Apr  7 11:34:37 UTC 2025 aarch64 aarch64 aarch64 GNU/Linux
> >> ```
> >
> > Alex Bennée reported the very same problem with KVM on ARM + native ctx
> > AMD dGPU in the past. You may try to add error messages to
> > virt/kvm/kvm_main.c of host Linux kernel to find from where KVM error
> > originates. Sounds like page refcounting may be not working properly
> > on ARM.
> 
> Also what hardware is the machine? The AVA (and most things with the
> same chipset) have a broken PCI which needs a workaround for unaligned
> SIMD access:
> 
>   https://github.com/stsquad/linux/tree/testing/altra-tweaks-for-gpu
> 
> >
> > +CC: Sean Christopherson
> 
> -- 
> Alex Bennée
> Virtualisation Tech Lead @ Linaro


信息安全声明:本邮件包含信息归发件人所在组织所有,发件人所在组织对该邮件拥有所有权利。请接收者注意保密,未经发件人书面许可,不得向任何第三方组织和个人透露本邮件所含信息。
Information Security Notice: The information contained in this mail is solely property of the sender's organization.This mail communication is confidential.Recipients named above are obligated to maintain secrecy and are not permitted to disclose the contents of this communication to others.
Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by Alex Bennée 9 months, 2 weeks ago
liucong2565@phytium.com.cn writes:

> I user Phytium D3000 8 Core, and I am not sure if it has a broken PCI.
>
> https://www.cpubenchmark.net/cpu.php?cpu=ARM+Phytium+D3000+8+Core+2500+MHz

Ahh - looks totally unrelated to the Altera platform so hopefully that
isn't an issue. Apparently a lot of the PCIe implementations are based
off the same underlying IP but without details its hard to check.

I assume everything runs fine directly when not virtualised? Without a
patched kernel on the AVA you would see corruption for X11 systems
(although not Wayland). e.g.:

  https://gitlab.freedesktop.org/mesa/mesa/-/issues/9100


If you are happy your Arm can drive the AMD GPU ok from the host system
you should focus on verifying the page locking is working as intended
for the guests.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro
Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by Alex Bennée 9 months, 2 weeks ago
刘聪 <liucong2565@phytium.com.cn> writes:

> Hi Dmitry,
>
> The virglrender patch can fix the virgl issue, but the native context still fails to run on my machine.
> I'm not sure if anyone has successfully run it on an ARM64 machine before.
>
> When running with Venus, the virtual machine can successfully run vkcube. However, when using the native context, a KVM error is triggered. Both my guest and host kernels are already updated to version 6.14.
>
> Here are the commands and error messages I encountered:
>
> ```
> phytium@ubuntu:~/working/virglrenderer$
> /opt/native-context-v11/bin/qemu-system-aarch64 --machine
> virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive
> file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio
> -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device
> virtio-net-pci,netdev=net0 -device
> virtio-gpu-gl,hostmem=4G,blob=on,venus=on -object
> memory-backend-memfd,id=mem1,size=4G -display sdl,gl=on,show-cursor=on
> -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device
> usb-kbd,bus=usb.0
> phytium@ubuntu:~/working/virglrenderer$ 
> phytium@ubuntu:~/working/virglrenderer$
> /opt/native-context-v11/bin/qemu-system-aarch64 --machine
> virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive
> file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio
> -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device
> virtio-net-pci,netdev=net0 -device
> virtio-gpu-gl,hostmem=4G,blob=on,drm_native_context=on -object
> memory-backend-memfd,id=mem1,size=4G -display sdl,gl=on,show-cursor=on
> -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device
> usb-kbd,bus=usb.0
> error: kvm run failed Bad address

That very much looks like a page not being accessible when trying to do
something. Do we know the page address? Can we dump the current state of
the page table for that? Is the page locked?

>  PC=0000e2bcbbf31ab0 X00=0000e2bc9c3ae060 X01=0000e2bc7c02af00
> X02=0000000000000014 X03=0000e2bc9c3ae000 X04=0000e2bc7c02af14
> X05=0000e2bc9c3ae074 X06=0000000000000200 X07=0000e2bc7c02a8f8
> X08=00000000000000de X09=0000000000000200 X10=0000000000001000
> X11=0000000000000004 X12=0000e2bc7c0000b0 X13=0000000000000001
> X14=0000000000000020 X15=0000e2bc9e465f93 X16=0000e2bcad6a01f0
> X17=0000e2bcbbf31a80 X18=0000000000000093 X19=0000000000000060
> X20=0000000000000074 X21=0000e2bc9e46c5f0 X22=0000e2bc9c3ae000
> X23=0000000000000074 X24=0000c02241da83b0 X25=0000c02241da85a0
> X26=0000c02241da85a0 X27=0000000000000014 X28=0000e2bc9e46c5f0
> X29=0000e2bc9e46c610 X30=0000e2bcac809c38  SP=0000e2bc9e46c510
> PSTATE=20001000 --C- EL0t
> phytium@ubuntu:~/working/virglrenderer$ uname -a
> Linux ubuntu 6.14.1-061401-generic #202504071048 SMP PREEMPT_DYNAMIC Mon Apr  7 11:34:37 UTC 2025 aarch64 aarch64 aarch64 GNU/Linux
> ```
>
> Best regards,
> Cong
>
>> -----Original Messages-----
>> From: "Dmitry Osipenko" <dmitry.osipenko@collabora.com>
>> Send time:Sunday, 04/27/2025 06:27:39
>> To: 刘聪 <liucong2565@phytium.com.cn>
>> Cc: Jiqian.Chen@amd.com, akihiko.odaki@daynix.com,
>> alex.bennee@linaro.org, alexander.deucher@amd.com,
>> christian.koenig@amd.com, gert.wollny@collabora.com,
>> gurchetansingh@chromium.org, hi@alyssa.is, honglei1.huang@amd.com,
>> julia.zhang@amd.com, kraxel@redhat.com, marcandre.lureau@redhat.com,
>> mst@redhat.com, pbonzini@redhat.com, philmd@linaro.org,
>> pierre-eric.pelloux-prayer@amd.com, qemu-devel@nongnu.org,
>> ray.huang@amd.com, robdclark@gmail.com, roger.pau@citrix.com,
>> slp@redhat.com, stefano.stabellini@amd.com,
>> xenia.ragiadakou@amd.com, zzyiwei@chromium.org
>> Subject: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
>> 
>> On 4/14/25 17:47, Dmitry Osipenko wrote:
>> > On 4/11/25 04:42, 刘聪 wrote:
>> >>
>> >>
>> >>
>> >>> -----Original Messages-----
>> >>> From: "Dmitry Osipenko" <dmitry.osipenko@collabora.com>
>> >>> Send time:Friday, 04/11/2025 05:59:11
>> >>> To: "Cong Liu" <liucong2565@phytium.com.cn>
>> >>> Cc: Jiqian.Chen@amd.com, akihiko.odaki@daynix.com,
>> >>> alex.bennee@linaro.org, alexander.deucher@amd.com,
>> >>> christian.koenig@amd.com, gert.wollny@collabora.com,
>> >>> gurchetansingh@chromium.org, hi@alyssa.is,
>> >>> honglei1.huang@amd.com, julia.zhang@amd.com, kraxel@redhat.com,
>> >>> marcandre.lureau@redhat.com, mst@redhat.com,
>> >>> pbonzini@redhat.com, philmd@linaro.org,
>> >>> pierre-eric.pelloux-prayer@amd.com, qemu-devel@nongnu.org,
>> >>> ray.huang@amd.com, robdclark@gmail.com, roger.pau@citrix.com,
>> >>> slp@redhat.com, stefano.stabellini@amd.com,
>> >>> xenia.ragiadakou@amd.com, zzyiwei@chromium.org
>> >>> Subject: Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
>> >>>
>> >>> 10.04.2025 12:54, Cong Liu пишет:
>> >>>> I discovered that on an ARM64 environment, the 'virtio-gpu:
>> >>>> Support asynchronous fencing' patch causes the virtual machine
>> >>>> GUI to fail to display. Rolling back this patch and using virgl
>> >>>> allows the virtual machine to start normally. When the VM
>> >>>> screen is black, I can see some errors in QEMU. I used QEMU's
>> >>>> -serial stdio to enter the virtual machine's command line
>> >>>> console but didn't see any errors inside the VM - the graphical
>> >>>> interface seems to be stuck. I would greatly appreciate any
>> >>>> suggestions regarding effective troubleshooting methods or
>> >>>> specific areas I should investigate to resolve this issue.
>> >>>>
>> >>>> Here's my software and hardware environment:
>> >>>> - host and guest are ubuntu 24.04
>> >>>> - QEMU: https://gitlab.freedesktop.org/digetx/qemu.git native-context-v11 branch
>> >>>> - virglrender: latest main branch 08eb12d00711370002e8f8fa6d620df9b79f9e27
>> >>>> - Mesa: Mesa 25.0~git2504031308.ff386e~oibaf~n (git-ff386eb 2025-04-03 noble-oibaf-ppa)
>> >>>> - Kernel: Linux d3000 6.14.1-061401-generic #202504071048
>> >>>> - GPU: Radeon RX 6600/6600 XT/6600M
>> >>>> - CPU: phytium D3000 aarch64
>> >>>>
>> >>>> Here's the command I'm using to run the virtual machine, which displays a black frame with "Display output is not active" and fails to start the graphical interface normally:
>> >>>>
>> >>>>     phytium@d3000:~/working/qemu$
>> >>>> /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm
>> >>>> -cpu host -smp 4 -m 4G -drive
>> >>>> file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio
>> >>>> -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0
>> >>>> -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl
>> >>>> -display gtk,gl=on,show-cursor=on -device usb-ehci,id=usb
>> >>>> -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
>> >>>>
>> >>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>> >>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>> >>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>> >>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.715: eglMakeCurrent failed
>> >>>>     (qemu:46029): Gdk-WARNING **: 16:43:53.716: eglMakeCurrent failed
>> >>>>
>> >>>> When using SDL, the error messages are slightly different:
>> >>>>
>> >>>>     phytium@d3000:~/working/qemu$
>> >>>> /usr/local/bin/qemu-system-aarch64 --machine virt,accel=kvm
>> >>>> -cpu host -smp 4 -m 4G -drive
>> >>>> file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio
>> >>>> -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0
>> >>>> -device virtio-net-pci,netdev=net0 -device virtio-gpu-gl
>> >>>> -display sdl,gl=on,show-cursor=on -device usb-ehci,id=usb
>> >>>> -device usb-mouse,bus=usb.0 -device usb-kbd,bus=usb.0
>> >>>>
>> >>>>     vrend_renderer_fill_caps: Entering with stale GL error: 1286
>> >>>>
>> >>>
>> >>> Hi,
>> >>>
>> >>> 1. Please make sure that you're not only building QEMU against your
>> >>> virglrenderer version, but also setting LD_LIBRARY_PATH properly at
>> >>> runtime. Best to remove system version of virglrenderer if unsure,
>> >>
>> >> I built and installed virglrenderer with the --prefix=/usr option, so
>> >>  it replaces the system version as expected.
>> >>
>> >>>
>> >>> 2. Can you reproduce this problem using tcg instead of kvm?
>> >>>
>> >>
>> >>  yes, change qemu command '--machine virt,accel=kvm -cpu host' to
>> >> '--machine virt -cpu max' can reproduce this problem. 
>> >>> -- 
>> >>> Best regards,
>> >>> Dmitry
>> >>
>> >> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
>> >> index f6df9dcb..f6e06842 100644
>> >> --- a/src/vrend_renderer.c
>> >> +++ b/src/vrend_renderer.c
>> >> @@ -12808,7 +12808,7 @@ void vrend_renderer_fill_caps(uint32_t set, uint32_t version,
>> >>                                union virgl_caps *caps)
>> >>  {
>> >>     int gl_ver, gles_ver;
>> >> -   GLenum err;
>> >> +   GLenum err = GL_NO_ERROR;
>> >>     bool fill_capset2 = false;
>> >>  
>> >>     if (!caps)
>> >>
>> >> phytium@d3000:~/working/qemu$ git log --oneline  -n 10
>> >> e0286f56c8 (HEAD -> native-context-v11, origin/native-context-v11) Revert "amd_iommu: Add support for pass though mode"
>> >> d6e9eb0f0d docs/system: virtio-gpu: Document host/guest requirements
>> >> 55db821ea5 docs/system: virtio-gpu: Update Venus link
>> >> 003940db9a docs/system: virtio-gpu: Add link to Mesa VirGL doc
>> >> 7674e82755 ui/gtk: Don't disable scanout when display is refreshed
>> >> 712fd024e3 ui/sdl2: Don't disable scanout when display is refreshed
>> >> 9003da356f virtio-gpu: Support DRM native context
>> >> e2ff4f4a48 virtio-gpu: Support asynchronous fencing
>> >> 25458c7625 virtio-gpu: Handle virgl fence creation errors
>> >>
>> >> I tried initializing GLenum err = GL_NO_ERROR in vrend_renderer_fill_caps, but it doesn’t seem to resolve the “Entering with stale GL error: 1286” message. However, this error might not be directly related to the VM black screen issue. I noticed that even when the VM was working correctly—specifically when I reset to commit 25458c7625—the same GL error still appeared.
>> > 
>> > Thanks for the report. I confirm that something is wrong with virgl when
>> > async fencing is used. Don't have this GL 1286 error, but getting a
>> > lockup on ARM VM and with one of my new x64 VM setups. Will investigate
>> > further and report back here.
>> 
>> Hi, Cong. Please give a test to [1]. It fixes the problem for me.
>> 
>> [1] https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1518
>> 
>> -- 
>> Best regards,
>> Dmitry
>
>
> 信息安全声明:本邮件包含信息归发件人所在组织所有,发件人所在组织对该邮
> 件拥有所有权利。请接收者注意保密,未经发件人书面许可,不得向任何第三方组
> 织和个人透露本邮件所含信息。Information Security Notice: The
> information contained in this mail is solely property of the sender's
> organization.This mail communication is confidential.Recipients named
> above are obligated to maintain secrecy and are not permitted to
> disclose the contents of this communication to others.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro
Re: [PATCH v11 04/10] virtio-gpu: Support asynchronous fencing
Posted by Sean Christopherson 9 months, 2 weeks ago
On Sun, Apr 27, 2025, Alex Bennée wrote:
> 刘聪 <liucong2565@phytium.com.cn> writes:
> 
> > Hi Dmitry,
> >
> > The virglrender patch can fix the virgl issue, but the native context still
> > fails to run on my machine.  I'm not sure if anyone has successfully run it
> > on an ARM64 machine before.
> >
> > When running with Venus, the virtual machine can successfully run vkcube.
> > However, when using the native context, a KVM error is triggered. Both my
> > guest and host kernels are already updated to version 6.14.
> >
> > Here are the commands and error messages I encountered:
> >
> > ```
> > phytium@ubuntu:~/working/virglrenderer$
> > /opt/native-context-v11/bin/qemu-system-aarch64 --machine
> > virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive
> > file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio
> > -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device
> > virtio-net-pci,netdev=net0 -device
> > virtio-gpu-gl,hostmem=4G,blob=on,venus=on -object
> > memory-backend-memfd,id=mem1,size=4G -display sdl,gl=on,show-cursor=on
> > -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device
> > usb-kbd,bus=usb.0
> > phytium@ubuntu:~/working/virglrenderer$ 
> > phytium@ubuntu:~/working/virglrenderer$
> > /opt/native-context-v11/bin/qemu-system-aarch64 --machine
> > virt,accel=kvm,memory-backend=mem1 -cpu host -smp 4 -m 4G -drive
> > file=/home/phytium/working/ubuntu24.04-aarch64-native-context,format=raw,if=virtio
> > -bios /usr/share/AAVMF/AAVMF_CODE.ms.fd -netdev user,id=net0 -device
> > virtio-net-pci,netdev=net0 -device
> > virtio-gpu-gl,hostmem=4G,blob=on,drm_native_context=on -object
> > memory-backend-memfd,id=mem1,size=4G -display sdl,gl=on,show-cursor=on
> > -device usb-ehci,id=usb -device usb-mouse,bus=usb.0 -device
> > usb-kbd,bus=usb.0
> > error: kvm run failed Bad address
> 
> That very much looks like a page not being accessible when trying to do
> something.

Yep.  The most likely scenario where KVM_RUN returns -EFAULT is when KVM can't
obtain a PFN for the faulting GPA.  Odds are good it's this code:

	pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
				&writable, &page);
	if (pfn == KVM_PFN_ERR_HWPOISON) {
		kvm_send_hwpoison_signal(hva, vma_shift);
		return 0;
	}
	if (is_error_noslot_pfn(pfn))   <==========
		return -EFAULT;

where under the hood, __kvm_faultin_pfn() is a wrapper to gup() and
follow_pfnmap_start() (and some other things).

If you can figure out which GPA is failing, then it's "just" a matter of figuring
out why KVM doesn't find a valid mapping.