Propagates the GPAs (in_xlat_addr/out_xlat_addr) of a VirtQueueElement
to vhost_svq_translate_addr() to translate to IOVAs via GPA->IOVA tree
when descriptors are backed by guest memory.
For descriptors backed by guest memory, the translation is performed
using GPAs via the GPA->IOVA tree. GPAs are unique in the guest's
address space, so this ensures unambiguous IOVA translations.
For descriptors not backed by guest memory, the existing IOVA->HVA tree
is used.
This avoids the issue where different GPAs map to the same HVA, causing
the HVA->IOVA translation to potentially return an IOVA associated with
the wrong intended GPA.
Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
---
hw/virtio/vhost-shadow-virtqueue.c | 49 ++++++++++++++++++++++--------
1 file changed, 37 insertions(+), 12 deletions(-)
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 37aca8b431..be0db94ab7 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -78,24 +78,37 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
* @vaddr: Translated IOVA addresses
* @iovec: Source qemu's VA addresses
* @num: Length of iovec and minimum length of vaddr
+ * @gpas: Descriptors' GPAs, if backed by guest memory
*/
static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
hwaddr *addrs, const struct iovec *iovec,
- size_t num)
+ size_t num, const hwaddr *gpas)
{
if (num == 0) {
return true;
}
for (size_t i = 0; i < num; ++i) {
- DMAMap needle = {
- .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
- .size = iovec[i].iov_len,
- };
+ const DMAMap *map;
+ DMAMap needle;
Int128 needle_last, map_last;
size_t off;
- const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
+ if (gpas) {
+ /* Search the GPA->IOVA tree */
+ needle = (DMAMap) {
+ .translated_addr = gpas[i],
+ .size = iovec[i].iov_len,
+ };
+ map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
+ } else {
+ /* Searh the IOVA->HVA tree */
+ needle = (DMAMap) {
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
+ .size = iovec[i].iov_len,
+ };
+ map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
+ }
/*
* Map cannot be NULL since iova map contains all guest space and
* qemu already has a physical address mapped
@@ -132,12 +145,14 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
* @num: iovec length
* @more_descs: True if more descriptors come in the chain
* @write: True if they are writeable descriptors
+ * @gpas: Descriptors' GPAs, if backed by guest memory
*
* Return true if success, false otherwise and print error.
*/
static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
const struct iovec *iovec, size_t num,
- bool more_descs, bool write)
+ bool more_descs, bool write,
+ const hwaddr *gpas)
{
uint16_t i = svq->free_head, last = svq->free_head;
unsigned n;
@@ -149,7 +164,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
return true;
}
- ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+ ok = vhost_svq_translate_addr(svq, sg, iovec, num, gpas);
if (unlikely(!ok)) {
return false;
}
@@ -175,7 +190,8 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
const struct iovec *out_sg, size_t out_num,
const struct iovec *in_sg, size_t in_num,
- unsigned *head)
+ unsigned *head, const hwaddr *in_gpas,
+ const hwaddr *out_gpas)
{
unsigned avail_idx;
vring_avail_t *avail = svq->vring.avail;
@@ -192,12 +208,13 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
}
ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
- false);
+ false, out_gpas);
if (unlikely(!ok)) {
return false;
}
- ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
+ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true,
+ in_gpas);
if (unlikely(!ok)) {
return false;
}
@@ -253,12 +270,20 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
unsigned qemu_head;
unsigned ndescs = in_num + out_num;
bool ok;
+ hwaddr *in_gpas = NULL;
+ hwaddr *out_gpas = NULL;
if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
return -ENOSPC;
}
- ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
+ if (elem) {
+ in_gpas = elem->in_xlat_addr;
+ out_gpas = elem->out_xlat_addr;
+ }
+
+ ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head,
+ in_gpas, out_gpas);
if (unlikely(!ok)) {
return -EINVAL;
}
--
2.43.5
On Fri, Jan 10, 2025 at 6:09 PM Jonah Palmer <jonah.palmer@oracle.com> wrote:
>
> Propagates the GPAs (in_xlat_addr/out_xlat_addr) of a VirtQueueElement
> to vhost_svq_translate_addr() to translate to IOVAs via GPA->IOVA tree
> when descriptors are backed by guest memory.
>
> For descriptors backed by guest memory, the translation is performed
> using GPAs via the GPA->IOVA tree. GPAs are unique in the guest's
> address space, so this ensures unambiguous IOVA translations.
>
> For descriptors not backed by guest memory, the existing IOVA->HVA tree
> is used.
>
> This avoids the issue where different GPAs map to the same HVA, causing
> the HVA->IOVA translation to potentially return an IOVA associated with
> the wrong intended GPA.
>
If SVQ is the only one needing xlat_addrs we can create a new
SVQElement, following the code of VirtIOBlockReq or VirtIOSCSIReq for
example.
But why do we need it? As long as svq->desc_state[qemu_head].elem !=
NULL the GPA is elem->in_addr or elem->out_addr, and we can use that
to look into the GPA tree, isn't it? If we don't have any elem, then
we need to go with "old" HVA -> IOVA lookup.
> Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
> ---
> hw/virtio/vhost-shadow-virtqueue.c | 49 ++++++++++++++++++++++--------
> 1 file changed, 37 insertions(+), 12 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index 37aca8b431..be0db94ab7 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -78,24 +78,37 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
> * @vaddr: Translated IOVA addresses
> * @iovec: Source qemu's VA addresses
> * @num: Length of iovec and minimum length of vaddr
> + * @gpas: Descriptors' GPAs, if backed by guest memory
> */
> static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
> hwaddr *addrs, const struct iovec *iovec,
> - size_t num)
> + size_t num, const hwaddr *gpas)
> {
> if (num == 0) {
> return true;
> }
>
> for (size_t i = 0; i < num; ++i) {
> - DMAMap needle = {
> - .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
> - .size = iovec[i].iov_len,
> - };
> + const DMAMap *map;
> + DMAMap needle;
> Int128 needle_last, map_last;
> size_t off;
>
> - const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
> + if (gpas) {
> + /* Search the GPA->IOVA tree */
> + needle = (DMAMap) {
> + .translated_addr = gpas[i],
> + .size = iovec[i].iov_len,
> + };
> + map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
> + } else {
> + /* Searh the IOVA->HVA tree */
> + needle = (DMAMap) {
> + .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
> + .size = iovec[i].iov_len,
> + };
> + map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
> + }
> /*
> * Map cannot be NULL since iova map contains all guest space and
> * qemu already has a physical address mapped
> @@ -132,12 +145,14 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
> * @num: iovec length
> * @more_descs: True if more descriptors come in the chain
> * @write: True if they are writeable descriptors
> + * @gpas: Descriptors' GPAs, if backed by guest memory
> *
> * Return true if success, false otherwise and print error.
> */
> static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
> const struct iovec *iovec, size_t num,
> - bool more_descs, bool write)
> + bool more_descs, bool write,
> + const hwaddr *gpas)
> {
> uint16_t i = svq->free_head, last = svq->free_head;
> unsigned n;
> @@ -149,7 +164,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
> return true;
> }
>
> - ok = vhost_svq_translate_addr(svq, sg, iovec, num);
> + ok = vhost_svq_translate_addr(svq, sg, iovec, num, gpas);
> if (unlikely(!ok)) {
> return false;
> }
> @@ -175,7 +190,8 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
> static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
> const struct iovec *out_sg, size_t out_num,
> const struct iovec *in_sg, size_t in_num,
> - unsigned *head)
> + unsigned *head, const hwaddr *in_gpas,
> + const hwaddr *out_gpas)
> {
> unsigned avail_idx;
> vring_avail_t *avail = svq->vring.avail;
> @@ -192,12 +208,13 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
> }
>
> ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
> - false);
> + false, out_gpas);
> if (unlikely(!ok)) {
> return false;
> }
>
> - ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
> + ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true,
> + in_gpas);
> if (unlikely(!ok)) {
> return false;
> }
> @@ -253,12 +270,20 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
> unsigned qemu_head;
> unsigned ndescs = in_num + out_num;
> bool ok;
> + hwaddr *in_gpas = NULL;
> + hwaddr *out_gpas = NULL;
>
> if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
> return -ENOSPC;
> }
>
> - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
> + if (elem) {
> + in_gpas = elem->in_xlat_addr;
> + out_gpas = elem->out_xlat_addr;
> + }
> +
> + ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head,
> + in_gpas, out_gpas);
> if (unlikely(!ok)) {
> return -EINVAL;
> }
> --
> 2.43.5
>
On 1/16/25 2:29 PM, Eugenio Perez Martin wrote:
> On Fri, Jan 10, 2025 at 6:09 PM Jonah Palmer <jonah.palmer@oracle.com> wrote:
>>
>> Propagates the GPAs (in_xlat_addr/out_xlat_addr) of a VirtQueueElement
>> to vhost_svq_translate_addr() to translate to IOVAs via GPA->IOVA tree
>> when descriptors are backed by guest memory.
>>
>> For descriptors backed by guest memory, the translation is performed
>> using GPAs via the GPA->IOVA tree. GPAs are unique in the guest's
>> address space, so this ensures unambiguous IOVA translations.
>>
>> For descriptors not backed by guest memory, the existing IOVA->HVA tree
>> is used.
>>
>> This avoids the issue where different GPAs map to the same HVA, causing
>> the HVA->IOVA translation to potentially return an IOVA associated with
>> the wrong intended GPA.
>>
>
> If SVQ is the only one needing xlat_addrs we can create a new
> SVQElement, following the code of VirtIOBlockReq or VirtIOSCSIReq for
> example.
>
> But why do we need it? As long as svq->desc_state[qemu_head].elem !=
> NULL the GPA is elem->in_addr or elem->out_addr, and we can use that
> to look into the GPA tree, isn't it? If we don't have any elem, then
> we need to go with "old" HVA -> IOVA lookup.
>
Oh okay, interesting. I hadn't realized that I was actually duplicating
information already available in elem->in_addr and elem->out_addr with
in_xlat_addr and out_xlat_addr.
If this is indeed the case, then yea there's no need for these
in_xlat_addr and out_xlat_addr members.
I'll look into this! Thanks for your comments Eugenio =]
>> Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
>> ---
>> hw/virtio/vhost-shadow-virtqueue.c | 49 ++++++++++++++++++++++--------
>> 1 file changed, 37 insertions(+), 12 deletions(-)
>>
>> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
>> index 37aca8b431..be0db94ab7 100644
>> --- a/hw/virtio/vhost-shadow-virtqueue.c
>> +++ b/hw/virtio/vhost-shadow-virtqueue.c
>> @@ -78,24 +78,37 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
>> * @vaddr: Translated IOVA addresses
>> * @iovec: Source qemu's VA addresses
>> * @num: Length of iovec and minimum length of vaddr
>> + * @gpas: Descriptors' GPAs, if backed by guest memory
>> */
>> static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
>> hwaddr *addrs, const struct iovec *iovec,
>> - size_t num)
>> + size_t num, const hwaddr *gpas)
>> {
>> if (num == 0) {
>> return true;
>> }
>>
>> for (size_t i = 0; i < num; ++i) {
>> - DMAMap needle = {
>> - .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
>> - .size = iovec[i].iov_len,
>> - };
>> + const DMAMap *map;
>> + DMAMap needle;
>> Int128 needle_last, map_last;
>> size_t off;
>>
>> - const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
>> + if (gpas) {
>> + /* Search the GPA->IOVA tree */
>> + needle = (DMAMap) {
>> + .translated_addr = gpas[i],
>> + .size = iovec[i].iov_len,
>> + };
>> + map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
>> + } else {
>> + /* Searh the IOVA->HVA tree */
>> + needle = (DMAMap) {
>> + .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
>> + .size = iovec[i].iov_len,
>> + };
>> + map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
>> + }
>> /*
>> * Map cannot be NULL since iova map contains all guest space and
>> * qemu already has a physical address mapped
>> @@ -132,12 +145,14 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
>> * @num: iovec length
>> * @more_descs: True if more descriptors come in the chain
>> * @write: True if they are writeable descriptors
>> + * @gpas: Descriptors' GPAs, if backed by guest memory
>> *
>> * Return true if success, false otherwise and print error.
>> */
>> static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>> const struct iovec *iovec, size_t num,
>> - bool more_descs, bool write)
>> + bool more_descs, bool write,
>> + const hwaddr *gpas)
>> {
>> uint16_t i = svq->free_head, last = svq->free_head;
>> unsigned n;
>> @@ -149,7 +164,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>> return true;
>> }
>>
>> - ok = vhost_svq_translate_addr(svq, sg, iovec, num);
>> + ok = vhost_svq_translate_addr(svq, sg, iovec, num, gpas);
>> if (unlikely(!ok)) {
>> return false;
>> }
>> @@ -175,7 +190,8 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>> static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
>> const struct iovec *out_sg, size_t out_num,
>> const struct iovec *in_sg, size_t in_num,
>> - unsigned *head)
>> + unsigned *head, const hwaddr *in_gpas,
>> + const hwaddr *out_gpas)
>> {
>> unsigned avail_idx;
>> vring_avail_t *avail = svq->vring.avail;
>> @@ -192,12 +208,13 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
>> }
>>
>> ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
>> - false);
>> + false, out_gpas);
>> if (unlikely(!ok)) {
>> return false;
>> }
>>
>> - ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
>> + ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true,
>> + in_gpas);
>> if (unlikely(!ok)) {
>> return false;
>> }
>> @@ -253,12 +270,20 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
>> unsigned qemu_head;
>> unsigned ndescs = in_num + out_num;
>> bool ok;
>> + hwaddr *in_gpas = NULL;
>> + hwaddr *out_gpas = NULL;
>>
>> if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
>> return -ENOSPC;
>> }
>>
>> - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
>> + if (elem) {
>> + in_gpas = elem->in_xlat_addr;
>> + out_gpas = elem->out_xlat_addr;
>> + }
>> +
>> + ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head,
>> + in_gpas, out_gpas);
>> if (unlikely(!ok)) {
>> return -EINVAL;
>> }
>> --
>> 2.43.5
>>
>
© 2016 - 2026 Red Hat, Inc.