From nobody Sun Dec 22 15:03:41 2024 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D0CC3E95A69 for ; Sat, 7 Oct 2023 19:49:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1344146AbjJGTts (ORCPT ); Sat, 7 Oct 2023 15:49:48 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57806 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1344079AbjJGTtr (ORCPT ); Sat, 7 Oct 2023 15:49:47 -0400 Received: from madras.collabora.co.uk (madras.collabora.co.uk [IPv6:2a00:1098:0:82:1000:25:2eeb:e5ab]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 95BA6BC for ; Sat, 7 Oct 2023 12:49:44 -0700 (PDT) Received: from workpc.. (109-252-153-31.dynamic.spd-mgts.ru [109.252.153.31]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) (Authenticated sender: dmitry.osipenko) by madras.collabora.co.uk (Postfix) with ESMTPSA id 7CB7C66072F7; Sat, 7 Oct 2023 20:49:42 +0100 (BST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=collabora.com; s=mail; t=1696708183; bh=QkG+4yqcf7f4GTT/p4sdMS0CGEUP0WepcZIWVD0r598=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=KODcYnt2BGZeotFxeDNzIdIxrU/Xg1Cvlilyw0XlMVa6DnT3S2EOw1jA/ts6Fwuya 8c9CLM60Zg4CVSn9kCVf2OR9k5WkP6cLUN7gzvwgbn6kUI2qdZO2oGmYhr+6Kx5CsE fXzZ8qquLLGorXOs00yi5hGje2e9pX346guqNOx+FqzwDPax6afKtJQH0gp7Nbxflg vTfeNuPJNLwL/4O4OKXTcMIjBEVWlS5xdrYX/T2GSQAebZMmzxJNW0OktZ7EktmMJP TwtKLUDoAwsxyT5SGFy6nwg8PlKcnSgbpTfxU3Wg6gwpCFlSGc/MkcMvPJMfSAU0rB 3UbyTyKF2mSGw== From: Dmitry Osipenko To: David Airlie , Gerd Hoffmann , Gurchetan Singh , Chia-I Wu , Rob Clark , Pierre-Eric Pelloux-Prayer Cc: dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, kernel@collabora.com, virtualization@lists.linux-foundation.org Subject: [RFC PATCH v1 1/1] drm/virtio: Support fence-passing feature Date: Sat, 7 Oct 2023 22:47:47 +0300 Message-ID: <20231007194747.788934-2-dmitry.osipenko@collabora.com> X-Mailer: git-send-email 2.41.0 In-Reply-To: <20231007194747.788934-1-dmitry.osipenko@collabora.com> References: <20231007194747.788934-1-dmitry.osipenko@collabora.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Support extended version of VIRTIO_GPU_CMD_SUBMIT_3D command that allows passing in-fence IDs to host for waiting, removing need to do expensive host-guest roundtrips in a case of waiting for fences on a guest side. Guest userspace must enable new VIRTGPU_CONTEXT_PARAM_FENCE_PASSING flag and host must support new VIRTIO_GPU_F_FENCE_PASSING feature in order to activate the fence passing for a given virtio-gpu context. Array of in-fence IDs is then prepended to the VIRTIO_GPU_CMD_SUBMIT_3D's data, the previously unused padding field of the command is reused for the number of in-fences. A new VIRTGPU_EXECBUF_SHARED_FENCE flag is added to the job submission UAPI and must be set by userspace if it wants to make fence shareable with/on host. Certain jobs won't want to share fence, in particular Venus will benefit from this flag. Link: https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1= 138 Link: https://gitlab.freedesktop.org/digetx/qemu/-/commits/native-context-i= ris Link: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4679609 Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/virtio/virtgpu_drv.c | 1 + drivers/gpu/drm/virtio/virtgpu_drv.h | 11 ++- drivers/gpu/drm/virtio/virtgpu_fence.c | 15 +++- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 11 ++- drivers/gpu/drm/virtio/virtgpu_kms.c | 8 +- drivers/gpu/drm/virtio/virtgpu_submit.c | 99 ++++++++++++++++++++++++- drivers/gpu/drm/virtio/virtgpu_vq.c | 7 +- include/uapi/drm/virtgpu_drm.h | 3 + include/uapi/linux/virtio_gpu.h | 11 ++- 9 files changed, 152 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/= virtgpu_drv.c index 644b8ee51009..544918bd38e9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -148,6 +148,7 @@ static unsigned int features[] =3D { VIRTIO_GPU_F_RESOURCE_UUID, VIRTIO_GPU_F_RESOURCE_BLOB, VIRTIO_GPU_F_CONTEXT_INIT, + VIRTIO_GPU_F_FENCE_PASSING, }; static struct virtio_driver virtio_gpu_driver =3D { .feature_table =3D features, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/= virtgpu_drv.h index 8513b671f871..1dc503cb53de 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -149,6 +149,7 @@ struct virtio_gpu_fence { struct virtio_gpu_fence_event *e; struct virtio_gpu_fence_driver *drv; struct list_head node; + bool host_shareable; }; =20 struct virtio_gpu_vbuffer { @@ -246,6 +247,7 @@ struct virtio_gpu_device { bool has_resource_blob; bool has_host_visible; bool has_context_init; + bool has_fence_passing; struct virtio_shm_region host_visible_region; struct drm_mm host_visible_mm; =20 @@ -273,6 +275,7 @@ struct virtio_gpu_fpriv { uint32_t num_rings; uint64_t base_fence_ctx; uint64_t ring_idx_mask; + bool fence_passing_enabled; struct mutex context_lock; }; =20 @@ -367,7 +370,9 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vg= dev, void *data, uint32_t data_size, uint32_t ctx_id, struct virtio_gpu_object_array *objs, - struct virtio_gpu_fence *fence); + struct virtio_gpu_fence *fence, + uint32_t cmd_size, + unsigned int num_in_fences); void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev, uint32_t ctx_id, uint64_t offset, uint32_t level, @@ -420,6 +425,9 @@ virtio_gpu_cmd_set_scanout_blob(struct virtio_gpu_devic= e *vgdev, uint32_t width, uint32_t height, uint32_t x, uint32_t y); =20 +void virtio_gpu_cmd_in_fence(struct virtio_gpu_device *vgdev, + uint32_t ctx_id, uint64_t fence_id); + /* virtgpu_display.c */ int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev); @@ -439,6 +447,7 @@ void virtio_gpu_fence_emit(struct virtio_gpu_device *vg= dev, struct virtio_gpu_fence *fence); void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev, u64 fence_id); +struct virtio_gpu_fence *to_virtio_gpu_fence(struct dma_fence *dma_fence); =20 /* virtgpu_object.c */ void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo); diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virti= o/virtgpu_fence.c index f28357dbde35..1fd3cfeca2f5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_fence.c +++ b/drivers/gpu/drm/virtio/virtgpu_fence.c @@ -27,9 +27,6 @@ =20 #include "virtgpu_drv.h" =20 -#define to_virtio_gpu_fence(x) \ - container_of(x, struct virtio_gpu_fence, f) - static const char *virtio_gpu_get_driver_name(struct dma_fence *f) { return "virtio_gpu"; @@ -71,6 +68,14 @@ static const struct dma_fence_ops virtio_gpu_fence_ops = =3D { .timeline_value_str =3D virtio_gpu_timeline_value_str, }; =20 +struct virtio_gpu_fence *to_virtio_gpu_fence(struct dma_fence *dma_fence) +{ + if (dma_fence->ops !=3D &virtio_gpu_fence_ops) + return NULL; + + return container_of(dma_fence, struct virtio_gpu_fence, f); +} + struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *= vgdev, uint64_t base_fence_ctx, uint32_t ring_idx) @@ -122,6 +127,10 @@ void virtio_gpu_fence_emit(struct virtio_gpu_device *v= gdev, cpu_to_le32(VIRTIO_GPU_FLAG_INFO_RING_IDX); cmd_hdr->ring_idx =3D (u8)fence->ring_idx; } + + if (fence->host_shareable) + cmd_hdr->flags |=3D + cpu_to_le32(VIRTIO_GPU_FLAG_FENCE_SHAREABLE); } =20 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev, diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virti= o/virtgpu_ioctl.c index b24b11f25197..3028786c59cd 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -514,7 +514,8 @@ static int virtio_gpu_resource_create_blob_ioctl(struct= drm_device *dev, return PTR_ERR(buf); =20 virtio_gpu_cmd_submit(vgdev, buf, rc_blob->cmd_size, - vfpriv->ctx_id, NULL, NULL); + vfpriv->ctx_id, NULL, NULL, + rc_blob->cmd_size, 0); } =20 if (guest_blob) @@ -642,6 +643,14 @@ static int virtio_gpu_context_init_ioctl(struct drm_de= vice *dev, =20 vfpriv->ring_idx_mask =3D value; break; + case VIRTGPU_CONTEXT_PARAM_FENCE_PASSING: + if (!vgdev->has_fence_passing && value) { + ret =3D -EINVAL; + goto out_unlock; + } + + vfpriv->fence_passing_enabled =3D !!value; + break; default: ret =3D -EINVAL; goto out_unlock; diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/= virtgpu_kms.c index 5a3b5aaed1f3..9f4617a75edd 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -197,12 +197,16 @@ int virtio_gpu_init(struct virtio_device *vdev, struc= t drm_device *dev) if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_CONTEXT_INIT)) { vgdev->has_context_init =3D true; } + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_FENCE_PASSING)) { + vgdev->has_fence_passing =3D true; + } =20 - DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible", + DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible %cfence= _passing", vgdev->has_virgl_3d ? '+' : '-', vgdev->has_edid ? '+' : '-', vgdev->has_resource_blob ? '+' : '-', - vgdev->has_host_visible ? '+' : '-'); + vgdev->has_host_visible ? '+' : '-', + vgdev->has_fence_passing ? '+' : '-'); =20 DRM_INFO("features: %ccontext_init\n", vgdev->has_context_init ? '+' : '-'); diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virt= io/virtgpu_submit.c index 3c00135ead45..129d063029a6 100644 --- a/drivers/gpu/drm/virtio/virtgpu_submit.c +++ b/drivers/gpu/drm/virtio/virtgpu_submit.c @@ -25,6 +25,11 @@ struct virtio_gpu_submit_post_dep { u64 point; }; =20 +struct virtio_gpu_in_fence { + u64 id; + u32 context; +}; + struct virtio_gpu_submit { struct virtio_gpu_submit_post_dep *post_deps; unsigned int num_out_syncobjs; @@ -32,6 +37,9 @@ struct virtio_gpu_submit { struct drm_syncobj **in_syncobjs; unsigned int num_in_syncobjs; =20 + struct virtio_gpu_in_fence *in_fences; + unsigned int num_in_fences; + struct virtio_gpu_object_array *buflist; struct drm_virtgpu_execbuffer *exbuf; struct virtio_gpu_fence *out_fence; @@ -41,6 +49,8 @@ struct virtio_gpu_submit { struct drm_file *file; int out_fence_fd; u64 fence_ctx; + u32 data_size; + u32 cmd_size; u32 ring_idx; void *buf; }; @@ -48,11 +58,44 @@ struct virtio_gpu_submit { static int virtio_gpu_do_fence_wait(struct virtio_gpu_submit *submit, struct dma_fence *in_fence) { + struct virtio_gpu_fence *fence =3D to_virtio_gpu_fence(in_fence); u32 context =3D submit->fence_ctx + submit->ring_idx; + struct virtio_gpu_in_fence *vfence, *in_fences; + u32 i; =20 if (dma_fence_match_context(in_fence, context)) return 0; =20 + if (fence && fence->host_shareable && + submit->vfpriv->fence_passing_enabled) { + /* + * Merge sync_file + syncobj in-fences to avoid sending more + * than one fence per-context to host. Use latest fence from + * the same context. + */ + for (i =3D 0; i < submit->num_in_fences; i++) { + vfence =3D &submit->in_fences[i]; + + if (dma_fence_match_context(in_fence, vfence->context)) { + vfence->id =3D max(vfence->id, fence->fence_id); + return 0; + } + } + + in_fences =3D krealloc_array(submit->in_fences, + submit->num_in_fences + 1, + sizeof(*in_fences), GFP_KERNEL); + if (!in_fences) + return -ENOMEM; + + in_fences[submit->num_in_fences].id =3D fence->fence_id; + in_fences[submit->num_in_fences].context =3D context; + submit->in_fences =3D in_fences; + submit->num_in_fences++; + + return 0; + } + return dma_fence_wait(in_fence, true); } =20 @@ -331,6 +374,7 @@ static void virtio_gpu_cleanup_submit(struct virtio_gpu= _submit *submit) virtio_gpu_reset_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs); virtio_gpu_free_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs); virtio_gpu_free_post_deps(submit->post_deps, submit->num_out_syncobjs); + kfree(submit->in_fences); =20 if (!IS_ERR(submit->buf)) kvfree(submit->buf); @@ -348,12 +392,51 @@ static void virtio_gpu_cleanup_submit(struct virtio_g= pu_submit *submit) fput(submit->sync_file->file); } =20 -static void virtio_gpu_submit(struct virtio_gpu_submit *submit) +static int virtio_gpu_attach_in_fences(struct virtio_gpu_submit *submit) { - virtio_gpu_cmd_submit(submit->vgdev, submit->buf, submit->exbuf->size, + size_t in_fences_size =3D sizeof(u64) * submit->num_in_fences; + size_t new_data_size =3D submit->data_size + in_fences_size; + void *buf =3D submit->buf; + u64 *in_fences; + unsigned int i; + + if (new_data_size < submit->data_size) + return -EINVAL; + + buf =3D kvrealloc(buf, submit->data_size, new_data_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memmove(buf + in_fences_size, buf, submit->data_size); + in_fences =3D buf; + + for (i =3D 0; i < submit->num_in_fences; i++) + in_fences[i] =3D cpu_to_le64(submit->in_fences[i].id); + + submit->data_size =3D new_data_size; + submit->buf =3D buf; + + return 0; +} + +static int virtio_gpu_submit(struct virtio_gpu_submit *submit) +{ + int err; + + if (submit->num_in_fences) { + err =3D virtio_gpu_attach_in_fences(submit); + if (err) + return err; + } + + virtio_gpu_cmd_submit(submit->vgdev, submit->buf, submit->data_size, submit->vfpriv->ctx_id, submit->buflist, - submit->out_fence); + submit->out_fence, submit->cmd_size, + submit->num_in_fences); + virtio_gpu_notify(submit->vgdev); + + return 0; } =20 static void virtio_gpu_complete_submit(struct virtio_gpu_submit *submit) @@ -401,6 +484,12 @@ static int virtio_gpu_init_submit(struct virtio_gpu_su= bmit *submit, } } =20 + if ((exbuf->flags & VIRTGPU_EXECBUF_SHARED_FENCE) && + vfpriv->fence_passing_enabled && out_fence) + out_fence->host_shareable =3D true; + + submit->data_size =3D exbuf->size; + submit->cmd_size =3D exbuf->size; submit->out_fence =3D out_fence; submit->fence_ctx =3D fence_ctx; submit->ring_idx =3D ring_idx; @@ -527,7 +616,9 @@ int virtio_gpu_execbuffer_ioctl(struct drm_device *dev,= void *data, if (ret) goto cleanup; =20 - virtio_gpu_submit(&submit); + ret =3D virtio_gpu_submit(&submit); + if (ret) + goto cleanup; =20 /* * Set up usr-out data after submitting the job to optimize diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/v= irtgpu_vq.c index b1a00c0c25a7..29d462b69bad 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -1079,7 +1079,9 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *= vgdev, void *data, uint32_t data_size, uint32_t ctx_id, struct virtio_gpu_object_array *objs, - struct virtio_gpu_fence *fence) + struct virtio_gpu_fence *fence, + uint32_t cmd_size, + unsigned int num_in_fences) { struct virtio_gpu_cmd_submit *cmd_p; struct virtio_gpu_vbuffer *vbuf; @@ -1093,7 +1095,8 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *= vgdev, =20 cmd_p->hdr.type =3D cpu_to_le32(VIRTIO_GPU_CMD_SUBMIT_3D); cmd_p->hdr.ctx_id =3D cpu_to_le32(ctx_id); - cmd_p->size =3D cpu_to_le32(data_size); + cmd_p->size =3D cpu_to_le32(cmd_size); + cmd_p->num_in_fences =3D cpu_to_le32(num_in_fences); =20 virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); } diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index b1d0e56565bc..fd486fdf0441 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -52,10 +52,12 @@ extern "C" { #define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 #define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 #define VIRTGPU_EXECBUF_RING_IDX 0x04 +#define VIRTGPU_EXECBUF_SHARED_FENCE 0x08 #define VIRTGPU_EXECBUF_FLAGS (\ VIRTGPU_EXECBUF_FENCE_FD_IN |\ VIRTGPU_EXECBUF_FENCE_FD_OUT |\ VIRTGPU_EXECBUF_RING_IDX |\ + VIRTGPU_EXECBUF_SHARED_FENCE |\ 0) =20 struct drm_virtgpu_map { @@ -198,6 +200,7 @@ struct drm_virtgpu_resource_create_blob { #define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001 #define VIRTGPU_CONTEXT_PARAM_NUM_RINGS 0x0002 #define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003 +#define VIRTGPU_CONTEXT_PARAM_FENCE_PASSING 0x0004 struct drm_virtgpu_context_set_param { __u64 param; __u64 value; diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gp= u.h index f556fde07b76..c3182c8255cf 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -65,6 +65,11 @@ */ #define VIRTIO_GPU_F_CONTEXT_INIT 4 =20 +/* + * VIRTIO_GPU_CMD_SUBMIT_3D + */ +#define VIRTIO_GPU_F_FENCE_PASSING 5 + enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED =3D 0, =20 @@ -133,6 +138,10 @@ enum virtio_gpu_shm_id { * of the command ring that needs to used when creating the fence */ #define VIRTIO_GPU_FLAG_INFO_RING_IDX (1 << 1) +/* + * The fence is shareable between host contexts if flag is set. + */ +#define VIRTIO_GPU_FLAG_FENCE_SHAREABLE (1 << 2) =20 struct virtio_gpu_ctrl_hdr { __le32 type; @@ -304,7 +313,7 @@ struct virtio_gpu_ctx_resource { struct virtio_gpu_cmd_submit { struct virtio_gpu_ctrl_hdr hdr; __le32 size; - __le32 padding; + __le32 num_in_fences; }; =20 #define VIRTIO_GPU_CAPSET_VIRGL 1 --=20 2.41.0