Propagates the GPAs (in_xlat_addr/out_xlat_addr) of a VirtQueueElement
to vhost_svq_translate_addr() to translate to IOVAs via GPA->IOVA tree
when descriptors are backed by guest memory.
For descriptors backed by guest memory, the translation is performed
using GPAs via the GPA->IOVA tree. GPAs are unique in the guest's
address space, so this ensures unambiguous IOVA translations.
For descriptors not backed by guest memory, the existing IOVA->HVA tree
is used.
This avoids the issue where different GPAs map to the same HVA, causing
the HVA->IOVA translation to potentially return an IOVA associated with
the wrong intended GPA.
Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
---
hw/virtio/vhost-shadow-virtqueue.c | 49 ++++++++++++++++++++++--------
1 file changed, 37 insertions(+), 12 deletions(-)
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 37aca8b431..be0db94ab7 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -78,24 +78,37 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
* @vaddr: Translated IOVA addresses
* @iovec: Source qemu's VA addresses
* @num: Length of iovec and minimum length of vaddr
+ * @gpas: Descriptors' GPAs, if backed by guest memory
*/
static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
hwaddr *addrs, const struct iovec *iovec,
- size_t num)
+ size_t num, const hwaddr *gpas)
{
if (num == 0) {
return true;
}
for (size_t i = 0; i < num; ++i) {
- DMAMap needle = {
- .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
- .size = iovec[i].iov_len,
- };
+ const DMAMap *map;
+ DMAMap needle;
Int128 needle_last, map_last;
size_t off;
- const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
+ if (gpas) {
+ /* Search the GPA->IOVA tree */
+ needle = (DMAMap) {
+ .translated_addr = gpas[i],
+ .size = iovec[i].iov_len,
+ };
+ map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
+ } else {
+ /* Searh the IOVA->HVA tree */
+ needle = (DMAMap) {
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
+ .size = iovec[i].iov_len,
+ };
+ map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
+ }
/*
* Map cannot be NULL since iova map contains all guest space and
* qemu already has a physical address mapped
@@ -132,12 +145,14 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
* @num: iovec length
* @more_descs: True if more descriptors come in the chain
* @write: True if they are writeable descriptors
+ * @gpas: Descriptors' GPAs, if backed by guest memory
*
* Return true if success, false otherwise and print error.
*/
static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
const struct iovec *iovec, size_t num,
- bool more_descs, bool write)
+ bool more_descs, bool write,
+ const hwaddr *gpas)
{
uint16_t i = svq->free_head, last = svq->free_head;
unsigned n;
@@ -149,7 +164,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
return true;
}
- ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+ ok = vhost_svq_translate_addr(svq, sg, iovec, num, gpas);
if (unlikely(!ok)) {
return false;
}
@@ -175,7 +190,8 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
const struct iovec *out_sg, size_t out_num,
const struct iovec *in_sg, size_t in_num,
- unsigned *head)
+ unsigned *head, const hwaddr *in_gpas,
+ const hwaddr *out_gpas)
{
unsigned avail_idx;
vring_avail_t *avail = svq->vring.avail;
@@ -192,12 +208,13 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
}
ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
- false);
+ false, out_gpas);
if (unlikely(!ok)) {
return false;
}
- ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
+ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true,
+ in_gpas);
if (unlikely(!ok)) {
return false;
}
@@ -253,12 +270,20 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
unsigned qemu_head;
unsigned ndescs = in_num + out_num;
bool ok;
+ hwaddr *in_gpas = NULL;
+ hwaddr *out_gpas = NULL;
if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
return -ENOSPC;
}
- ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
+ if (elem) {
+ in_gpas = elem->in_xlat_addr;
+ out_gpas = elem->out_xlat_addr;
+ }
+
+ ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head,
+ in_gpas, out_gpas);
if (unlikely(!ok)) {
return -EINVAL;
}
--
2.43.5
On Fri, Jan 10, 2025 at 6:09 PM Jonah Palmer <jonah.palmer@oracle.com> wrote: > > Propagates the GPAs (in_xlat_addr/out_xlat_addr) of a VirtQueueElement > to vhost_svq_translate_addr() to translate to IOVAs via GPA->IOVA tree > when descriptors are backed by guest memory. > > For descriptors backed by guest memory, the translation is performed > using GPAs via the GPA->IOVA tree. GPAs are unique in the guest's > address space, so this ensures unambiguous IOVA translations. > > For descriptors not backed by guest memory, the existing IOVA->HVA tree > is used. > > This avoids the issue where different GPAs map to the same HVA, causing > the HVA->IOVA translation to potentially return an IOVA associated with > the wrong intended GPA. > If SVQ is the only one needing xlat_addrs we can create a new SVQElement, following the code of VirtIOBlockReq or VirtIOSCSIReq for example. But why do we need it? As long as svq->desc_state[qemu_head].elem != NULL the GPA is elem->in_addr or elem->out_addr, and we can use that to look into the GPA tree, isn't it? If we don't have any elem, then we need to go with "old" HVA -> IOVA lookup. > Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com> > --- > hw/virtio/vhost-shadow-virtqueue.c | 49 ++++++++++++++++++++++-------- > 1 file changed, 37 insertions(+), 12 deletions(-) > > diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c > index 37aca8b431..be0db94ab7 100644 > --- a/hw/virtio/vhost-shadow-virtqueue.c > +++ b/hw/virtio/vhost-shadow-virtqueue.c > @@ -78,24 +78,37 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) > * @vaddr: Translated IOVA addresses > * @iovec: Source qemu's VA addresses > * @num: Length of iovec and minimum length of vaddr > + * @gpas: Descriptors' GPAs, if backed by guest memory > */ > static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, > hwaddr *addrs, const struct iovec *iovec, > - size_t num) > + size_t num, const hwaddr *gpas) > { > if (num == 0) { > return true; > } > > for (size_t i = 0; i < num; ++i) { > - DMAMap needle = { > - .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, > - .size = iovec[i].iov_len, > - }; > + const DMAMap *map; > + DMAMap needle; > Int128 needle_last, map_last; > size_t off; > > - const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); > + if (gpas) { > + /* Search the GPA->IOVA tree */ > + needle = (DMAMap) { > + .translated_addr = gpas[i], > + .size = iovec[i].iov_len, > + }; > + map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle); > + } else { > + /* Searh the IOVA->HVA tree */ > + needle = (DMAMap) { > + .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, > + .size = iovec[i].iov_len, > + }; > + map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); > + } > /* > * Map cannot be NULL since iova map contains all guest space and > * qemu already has a physical address mapped > @@ -132,12 +145,14 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, > * @num: iovec length > * @more_descs: True if more descriptors come in the chain > * @write: True if they are writeable descriptors > + * @gpas: Descriptors' GPAs, if backed by guest memory > * > * Return true if success, false otherwise and print error. > */ > static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, > const struct iovec *iovec, size_t num, > - bool more_descs, bool write) > + bool more_descs, bool write, > + const hwaddr *gpas) > { > uint16_t i = svq->free_head, last = svq->free_head; > unsigned n; > @@ -149,7 +164,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, > return true; > } > > - ok = vhost_svq_translate_addr(svq, sg, iovec, num); > + ok = vhost_svq_translate_addr(svq, sg, iovec, num, gpas); > if (unlikely(!ok)) { > return false; > } > @@ -175,7 +190,8 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, > static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, > const struct iovec *out_sg, size_t out_num, > const struct iovec *in_sg, size_t in_num, > - unsigned *head) > + unsigned *head, const hwaddr *in_gpas, > + const hwaddr *out_gpas) > { > unsigned avail_idx; > vring_avail_t *avail = svq->vring.avail; > @@ -192,12 +208,13 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, > } > > ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, > - false); > + false, out_gpas); > if (unlikely(!ok)) { > return false; > } > > - ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); > + ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true, > + in_gpas); > if (unlikely(!ok)) { > return false; > } > @@ -253,12 +270,20 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, > unsigned qemu_head; > unsigned ndescs = in_num + out_num; > bool ok; > + hwaddr *in_gpas = NULL; > + hwaddr *out_gpas = NULL; > > if (unlikely(ndescs > vhost_svq_available_slots(svq))) { > return -ENOSPC; > } > > - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); > + if (elem) { > + in_gpas = elem->in_xlat_addr; > + out_gpas = elem->out_xlat_addr; > + } > + > + ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head, > + in_gpas, out_gpas); > if (unlikely(!ok)) { > return -EINVAL; > } > -- > 2.43.5 >
On 1/16/25 2:29 PM, Eugenio Perez Martin wrote: > On Fri, Jan 10, 2025 at 6:09 PM Jonah Palmer <jonah.palmer@oracle.com> wrote: >> >> Propagates the GPAs (in_xlat_addr/out_xlat_addr) of a VirtQueueElement >> to vhost_svq_translate_addr() to translate to IOVAs via GPA->IOVA tree >> when descriptors are backed by guest memory. >> >> For descriptors backed by guest memory, the translation is performed >> using GPAs via the GPA->IOVA tree. GPAs are unique in the guest's >> address space, so this ensures unambiguous IOVA translations. >> >> For descriptors not backed by guest memory, the existing IOVA->HVA tree >> is used. >> >> This avoids the issue where different GPAs map to the same HVA, causing >> the HVA->IOVA translation to potentially return an IOVA associated with >> the wrong intended GPA. >> > > If SVQ is the only one needing xlat_addrs we can create a new > SVQElement, following the code of VirtIOBlockReq or VirtIOSCSIReq for > example. > > But why do we need it? As long as svq->desc_state[qemu_head].elem != > NULL the GPA is elem->in_addr or elem->out_addr, and we can use that > to look into the GPA tree, isn't it? If we don't have any elem, then > we need to go with "old" HVA -> IOVA lookup. > Oh okay, interesting. I hadn't realized that I was actually duplicating information already available in elem->in_addr and elem->out_addr with in_xlat_addr and out_xlat_addr. If this is indeed the case, then yea there's no need for these in_xlat_addr and out_xlat_addr members. I'll look into this! Thanks for your comments Eugenio =] >> Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com> >> --- >> hw/virtio/vhost-shadow-virtqueue.c | 49 ++++++++++++++++++++++-------- >> 1 file changed, 37 insertions(+), 12 deletions(-) >> >> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c >> index 37aca8b431..be0db94ab7 100644 >> --- a/hw/virtio/vhost-shadow-virtqueue.c >> +++ b/hw/virtio/vhost-shadow-virtqueue.c >> @@ -78,24 +78,37 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) >> * @vaddr: Translated IOVA addresses >> * @iovec: Source qemu's VA addresses >> * @num: Length of iovec and minimum length of vaddr >> + * @gpas: Descriptors' GPAs, if backed by guest memory >> */ >> static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, >> hwaddr *addrs, const struct iovec *iovec, >> - size_t num) >> + size_t num, const hwaddr *gpas) >> { >> if (num == 0) { >> return true; >> } >> >> for (size_t i = 0; i < num; ++i) { >> - DMAMap needle = { >> - .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, >> - .size = iovec[i].iov_len, >> - }; >> + const DMAMap *map; >> + DMAMap needle; >> Int128 needle_last, map_last; >> size_t off; >> >> - const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); >> + if (gpas) { >> + /* Search the GPA->IOVA tree */ >> + needle = (DMAMap) { >> + .translated_addr = gpas[i], >> + .size = iovec[i].iov_len, >> + }; >> + map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle); >> + } else { >> + /* Searh the IOVA->HVA tree */ >> + needle = (DMAMap) { >> + .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, >> + .size = iovec[i].iov_len, >> + }; >> + map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); >> + } >> /* >> * Map cannot be NULL since iova map contains all guest space and >> * qemu already has a physical address mapped >> @@ -132,12 +145,14 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, >> * @num: iovec length >> * @more_descs: True if more descriptors come in the chain >> * @write: True if they are writeable descriptors >> + * @gpas: Descriptors' GPAs, if backed by guest memory >> * >> * Return true if success, false otherwise and print error. >> */ >> static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, >> const struct iovec *iovec, size_t num, >> - bool more_descs, bool write) >> + bool more_descs, bool write, >> + const hwaddr *gpas) >> { >> uint16_t i = svq->free_head, last = svq->free_head; >> unsigned n; >> @@ -149,7 +164,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, >> return true; >> } >> >> - ok = vhost_svq_translate_addr(svq, sg, iovec, num); >> + ok = vhost_svq_translate_addr(svq, sg, iovec, num, gpas); >> if (unlikely(!ok)) { >> return false; >> } >> @@ -175,7 +190,8 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, >> static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, >> const struct iovec *out_sg, size_t out_num, >> const struct iovec *in_sg, size_t in_num, >> - unsigned *head) >> + unsigned *head, const hwaddr *in_gpas, >> + const hwaddr *out_gpas) >> { >> unsigned avail_idx; >> vring_avail_t *avail = svq->vring.avail; >> @@ -192,12 +208,13 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, >> } >> >> ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, >> - false); >> + false, out_gpas); >> if (unlikely(!ok)) { >> return false; >> } >> >> - ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); >> + ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true, >> + in_gpas); >> if (unlikely(!ok)) { >> return false; >> } >> @@ -253,12 +270,20 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, >> unsigned qemu_head; >> unsigned ndescs = in_num + out_num; >> bool ok; >> + hwaddr *in_gpas = NULL; >> + hwaddr *out_gpas = NULL; >> >> if (unlikely(ndescs > vhost_svq_available_slots(svq))) { >> return -ENOSPC; >> } >> >> - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); >> + if (elem) { >> + in_gpas = elem->in_xlat_addr; >> + out_gpas = elem->out_xlat_addr; >> + } >> + >> + ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head, >> + in_gpas, out_gpas); >> if (unlikely(!ok)) { >> return -EINVAL; >> } >> -- >> 2.43.5 >> >
© 2016 - 2025 Red Hat, Inc.