[v4] virtio: introduce support for GSO over UDP tunnel

[RFC PATCH v4 07/14] virtio-pci: implement support for extended features

Posted by Paolo Abeni 3 months, 3 weeks ago

Extend the features configuration space to 128 bits. If the virtio
device supports any extended features, allow the common read/write
operation to access all of it, otherwise keep exposing only the
lower 64 bits.

On migration, save the 128 bit version of the features only if the
upper bits are non zero. Relay on reset to clear all the feature
space before load.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v3 -> v4:
  - use new virtio_features macro names
  - move the build bug before vmstate_virtio_pci_modern_state_sub

v2 -> v3:
  - drop the pre_load/post_load trickery and relay on reset zeroing
    the features instead.
  - avoid union usage, just increase guest_features size and use
    SUB_ARRAY.
  - drop unneeded '!!'
  - _array -> _ex

v1 -> v2:
  - use separate VMStateDescription and pre/post load to avoid breaking
    migration
  - clear proxy features on device reset
---
 hw/virtio/virtio-pci.c         | 69 +++++++++++++++++++++++++++++-----
 include/hw/virtio/virtio-pci.h |  2 +-
 2 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 767216d795..bcc4d48c2c 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -109,6 +109,29 @@ static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
     }
 };
 
+static bool virtio_pci_modern_state_features128_needed(void *opaque)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    uint32_t features = 0;
+    int i;
+
+    for (i = 2; i < ARRAY_SIZE(proxy->guest_features); ++i) {
+        features |= proxy->guest_features[i];
+    }
+    return features;
+}
+
+static const VMStateDescription vmstate_virtio_pci_modern_state_features128 = {
+    .name = "virtio_pci/modern_state/features128",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_pci_modern_state_features128_needed,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT32_SUB_ARRAY(guest_features, VirtIOPCIProxy, 2, 2),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static bool virtio_pci_modern_state_needed(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -116,6 +139,11 @@ static bool virtio_pci_modern_state_needed(void *opaque)
     return virtio_pci_modern(proxy);
 }
 
+/*
+ * Avoid silently breaking migration should the feature space increase
+ * even more in the (far away) future
+ */
+QEMU_BUILD_BUG_ON(VIRTIO_FEATURES_NU32S != 4);
 static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
     .name = "virtio_pci/modern_state",
     .version_id = 1,
@@ -124,11 +152,15 @@ static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
     .fields = (const VMStateField[]) {
         VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
         VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
-        VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
+        VMSTATE_UINT32_SUB_ARRAY(guest_features, VirtIOPCIProxy, 0, 2),
         VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
                              vmstate_virtio_pci_modern_queue_state,
                              VirtIOPCIQueue),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * const []) {
+        &vmstate_virtio_pci_modern_state_features128,
+        NULL
     }
 };
 
@@ -1477,6 +1509,13 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
     return virtio_pci_add_mem_cap(proxy, &cap.cap);
 }
 
+static int virtio_pci_select_max(const VirtIODevice *vdev)
+{
+    return virtio_features_use_ex(vdev->host_features_ex) ?
+           VIRTIO_FEATURES_NU32S :
+           2;
+}
+
 static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
                                        unsigned size)
 {
@@ -1494,18 +1533,21 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
         val = proxy->dfselect;
         break;
     case VIRTIO_PCI_COMMON_DF:
-        if (proxy->dfselect <= 1) {
+        if (proxy->dfselect < virtio_pci_select_max(vdev)) {
             VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
 
-            val = (vdev->host_features & ~vdc->legacy_features) >>
-                (32 * proxy->dfselect);
+            val = vdev->host_features_ex[proxy->dfselect >> 1] >>
+                  (32 * (proxy->dfselect & 1));
+            if (proxy->dfselect <= 1) {
+                val &= (~vdc->legacy_features) >> (32 * proxy->dfselect);
+            }
         }
         break;
     case VIRTIO_PCI_COMMON_GFSELECT:
         val = proxy->gfselect;
         break;
     case VIRTIO_PCI_COMMON_GF:
-        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
+        if (proxy->gfselect < virtio_pci_select_max(vdev)) {
             val = proxy->guest_features[proxy->gfselect];
         }
         break;
@@ -1588,11 +1630,18 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
         proxy->gfselect = val;
         break;
     case VIRTIO_PCI_COMMON_GF:
-        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
+        if (proxy->gfselect < virtio_pci_select_max(vdev)) {
+            uint64_t features[VIRTIO_FEATURES_NU64S];
+            int i;
+
             proxy->guest_features[proxy->gfselect] = val;
-            virtio_set_features(vdev,
-                                (((uint64_t)proxy->guest_features[1]) << 32) |
-                                proxy->guest_features[0]);
+            virtio_features_clear(features);
+            for (i = 0; i < ARRAY_SIZE(proxy->guest_features); ++i) {
+                uint64_t cur = proxy->guest_features[i];
+
+                features[i >> 1] |= cur << ((i & 1) * 32);
+            }
+            virtio_set_features_ex(vdev, features);
         }
         break;
     case VIRTIO_PCI_COMMON_MSIX:
@@ -2311,6 +2360,8 @@ static void virtio_pci_reset(DeviceState *qdev)
     virtio_bus_reset(bus);
     msix_unuse_all_vectors(&proxy->pci_dev);
 
+    memset(proxy->guest_features, 0, sizeof(proxy->guest_features));
+
     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
         proxy->vqs[i].enabled = 0;
         proxy->vqs[i].reset = 0;
diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
index eab5394898..639752977e 100644
--- a/include/hw/virtio/virtio-pci.h
+++ b/include/hw/virtio/virtio-pci.h
@@ -158,7 +158,7 @@ struct VirtIOPCIProxy {
     uint32_t nvectors;
     uint32_t dfselect;
     uint32_t gfselect;
-    uint32_t guest_features[2];
+    uint32_t guest_features[VIRTIO_FEATURES_NU32S];
     VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX];
 
     VirtIOIRQFD *vector_irqfd;
-- 
2.50.0

Re: [RFC PATCH v4 07/14] virtio-pci: implement support for extended features

Posted by Akihiko Odaki 3 months, 3 weeks ago

On 2025/07/24 4:31, Paolo Abeni wrote:
> Extend the features configuration space to 128 bits. If the virtio
> device supports any extended features, allow the common read/write
> operation to access all of it, otherwise keep exposing only the
> lower 64 bits.
> 
> On migration, save the 128 bit version of the features only if the
> upper bits are non zero. Relay on reset to clear all the feature
> space before load.
> 
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> v3 -> v4:
>    - use new virtio_features macro names
>    - move the build bug before vmstate_virtio_pci_modern_state_sub
> 
> v2 -> v3:
>    - drop the pre_load/post_load trickery and relay on reset zeroing
>      the features instead.
>    - avoid union usage, just increase guest_features size and use
>      SUB_ARRAY.
>    - drop unneeded '!!'
>    - _array -> _ex
> 
> v1 -> v2:
>    - use separate VMStateDescription and pre/post load to avoid breaking
>      migration
>    - clear proxy features on device reset
> ---
>   hw/virtio/virtio-pci.c         | 69 +++++++++++++++++++++++++++++-----
>   include/hw/virtio/virtio-pci.h |  2 +-
>   2 files changed, 61 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 767216d795..bcc4d48c2c 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -109,6 +109,29 @@ static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
>       }
>   };
>   
> +static bool virtio_pci_modern_state_features128_needed(void *opaque)
> +{
> +    VirtIOPCIProxy *proxy = opaque;
> +    uint32_t features = 0;
> +    int i;
> +
> +    for (i = 2; i < ARRAY_SIZE(proxy->guest_features); ++i) {
> +        features |= proxy->guest_features[i];
> +    }
> +    return features;
> +}
> +
> +static const VMStateDescription vmstate_virtio_pci_modern_state_features128 = {
> +    .name = "virtio_pci/modern_state/features128",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = &virtio_pci_modern_state_features128_needed,
> +    .fields = (const VMStateField[]) {
> +        VMSTATE_UINT32_SUB_ARRAY(guest_features, VirtIOPCIProxy, 2, 2),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>   static bool virtio_pci_modern_state_needed(void *opaque)
>   {
>       VirtIOPCIProxy *proxy = opaque;
> @@ -116,6 +139,11 @@ static bool virtio_pci_modern_state_needed(void *opaque)
>       return virtio_pci_modern(proxy);
>   }
>   
> +/*
> + * Avoid silently breaking migration should the feature space increase
> + * even more in the (far away) future
> + */
> +QEMU_BUILD_BUG_ON(VIRTIO_FEATURES_NU32S != 4);

Another nitpick: there is a blank line after QEMU_BUILD_BUG_ON() in 
"[RFC PATCH v4 05/14] virtio: serialize extended features state" but 
this patch doesn't have a corresponding one. Please keep them consistent.

>   static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
>       .name = "virtio_pci/modern_state",
>       .version_id = 1,
> @@ -124,11 +152,15 @@ static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
>       .fields = (const VMStateField[]) {
>           VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
>           VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
> -        VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
> +        VMSTATE_UINT32_SUB_ARRAY(guest_features, VirtIOPCIProxy, 0, 2),
>           VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
>                                vmstate_virtio_pci_modern_queue_state,
>                                VirtIOPCIQueue),
>           VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * const []) {
> +        &vmstate_virtio_pci_modern_state_features128,
> +        NULL
>       }
>   };
>   
> @@ -1477,6 +1509,13 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
>       return virtio_pci_add_mem_cap(proxy, &cap.cap);
>   }
>   
> +static int virtio_pci_select_max(const VirtIODevice *vdev)
> +{
> +    return virtio_features_use_ex(vdev->host_features_ex) ?
> +           VIRTIO_FEATURES_NU32S :
> +           2;

This function could be simplified by replacing VIRTIO_FEATURES_NU32S 
without any functional difference:

1. For writes: virtio_set_features_ex() already ignores extended 
features when !virtio_features_use_ex(vdev->host_features_ex)
2. For reads: When !virtio_features_use_ex(vdev->host_features_ex), the 
upper bits of host_features_ex are zero, and guest_features upper bits 
remain zero (since they can't be set per point 1)

So the conditional logic is redundant here.

> +}
> +
>   static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
>                                          unsigned size)
>   {
> @@ -1494,18 +1533,21 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
>           val = proxy->dfselect;
>           break;
>       case VIRTIO_PCI_COMMON_DF:
> -        if (proxy->dfselect <= 1) {
> +        if (proxy->dfselect < virtio_pci_select_max(vdev)) {
>               VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
>   
> -            val = (vdev->host_features & ~vdc->legacy_features) >>
> -                (32 * proxy->dfselect);
> +            val = vdev->host_features_ex[proxy->dfselect >> 1] >>
> +                  (32 * (proxy->dfselect & 1));
> +            if (proxy->dfselect <= 1) {
> +                val &= (~vdc->legacy_features) >> (32 * proxy->dfselect);
> +            }
>           }
>           break;
>       case VIRTIO_PCI_COMMON_GFSELECT:
>           val = proxy->gfselect;
>           break;
>       case VIRTIO_PCI_COMMON_GF:
> -        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
> +        if (proxy->gfselect < virtio_pci_select_max(vdev)) {
>               val = proxy->guest_features[proxy->gfselect];
>           }
>           break;
> @@ -1588,11 +1630,18 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
>           proxy->gfselect = val;
>           break;
>       case VIRTIO_PCI_COMMON_GF:
> -        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
> +        if (proxy->gfselect < virtio_pci_select_max(vdev)) {
> +            uint64_t features[VIRTIO_FEATURES_NU64S];
> +            int i;
> +
>               proxy->guest_features[proxy->gfselect] = val;
> -            virtio_set_features(vdev,
> -                                (((uint64_t)proxy->guest_features[1]) << 32) |
> -                                proxy->guest_features[0]);
> +            virtio_features_clear(features);
> +            for (i = 0; i < ARRAY_SIZE(proxy->guest_features); ++i) {
> +                uint64_t cur = proxy->guest_features[i];
> +
> +                features[i >> 1] |= cur << ((i & 1) * 32);
> +            }
> +            virtio_set_features_ex(vdev, features);
>           }
>           break;
>       case VIRTIO_PCI_COMMON_MSIX:
> @@ -2311,6 +2360,8 @@ static void virtio_pci_reset(DeviceState *qdev)
>       virtio_bus_reset(bus);
>       msix_unuse_all_vectors(&proxy->pci_dev);
>   
> +    memset(proxy->guest_features, 0, sizeof(proxy->guest_features));
> +
>       for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
>           proxy->vqs[i].enabled = 0;
>           proxy->vqs[i].reset = 0;
> diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
> index eab5394898..639752977e 100644
> --- a/include/hw/virtio/virtio-pci.h
> +++ b/include/hw/virtio/virtio-pci.h
> @@ -158,7 +158,7 @@ struct VirtIOPCIProxy {
>       uint32_t nvectors;
>       uint32_t dfselect;
>       uint32_t gfselect;
> -    uint32_t guest_features[2];
> +    uint32_t guest_features[VIRTIO_FEATURES_NU32S];
>       VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX];
>   
>       VirtIOIRQFD *vector_irqfd;

Re: [RFC PATCH v4 07/14] virtio-pci: implement support for extended features

Posted by Paolo Abeni 3 months, 1 week ago

On 7/26/25 1:52 PM, Akihiko Odaki wrote:
> On 2025/07/24 4:31, Paolo Abeni wrote:
>> @@ -1477,6 +1509,13 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
>>       return virtio_pci_add_mem_cap(proxy, &cap.cap);
>>   }
>>   
>> +static int virtio_pci_select_max(const VirtIODevice *vdev)
>> +{
>> +    return virtio_features_use_ex(vdev->host_features_ex) ?
>> +           VIRTIO_FEATURES_NU32S :
>> +           2;
> 
> This function could be simplified by replacing VIRTIO_FEATURES_NU32S 
> without any functional difference:
> 
> 1. For writes: virtio_set_features_ex() already ignores extended 
> features when !virtio_features_use_ex(vdev->host_features_ex)
> 2. For reads: When !virtio_features_use_ex(vdev->host_features_ex), the 
> upper bits of host_features_ex are zero, and guest_features upper bits 
> remain zero (since they can't be set per point 1)
> 
> So the conditional logic is redundant here.

This is to satisfy a request from Jason:

https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05291.html
https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05423.html

I agree there will not be functional differences always accessing the
full space, but the guest could still be able to notice, i.e. the
extended space will be zeroed on read with that patched qemu and
untouched by the current code and this patch. To be on the safe side I
think it would be better to avoid such difference, as suggested by Jason.

Does the above make sense to you?

Thanks,

Paolo

Re: [RFC PATCH v4 07/14] virtio-pci: implement support for extended features

Posted by Akihiko Odaki 3 months, 1 week ago

On 2025/08/08 5:18, Paolo Abeni wrote:
> On 7/26/25 1:52 PM, Akihiko Odaki wrote:
>> On 2025/07/24 4:31, Paolo Abeni wrote:
>>> @@ -1477,6 +1509,13 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
>>>        return virtio_pci_add_mem_cap(proxy, &cap.cap);
>>>    }
>>>    
>>> +static int virtio_pci_select_max(const VirtIODevice *vdev)
>>> +{
>>> +    return virtio_features_use_ex(vdev->host_features_ex) ?
>>> +           VIRTIO_FEATURES_NU32S :
>>> +           2;
>>
>> This function could be simplified by replacing VIRTIO_FEATURES_NU32S
>> without any functional difference:
>>
>> 1. For writes: virtio_set_features_ex() already ignores extended
>> features when !virtio_features_use_ex(vdev->host_features_ex)
>> 2. For reads: When !virtio_features_use_ex(vdev->host_features_ex), the
>> upper bits of host_features_ex are zero, and guest_features upper bits
>> remain zero (since they can't be set per point 1)
>>
>> So the conditional logic is redundant here.
> 
> This is to satisfy a request from Jason:
> 
> https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05291.html
> https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05423.html
> 
> I agree there will not be functional differences always accessing the
> full space, but the guest could still be able to notice, i.e. the
> extended space will be zeroed on read with that patched qemu and
> untouched by the current code and this patch. To be on the safe side I
> think it would be better to avoid such difference, as suggested by Jason.
> 
> Does the above make sense to you?

By functional, I meant the functionality of QEMU, visible to the guest, 
rather than the whole system including the guest, visible to the end 
user. The guest cannot notice the difference because the extended space 
is zero on read even without the conditional, which is described as 
point 2 in the previous email.

 > 2. For reads: When !virtio_features_use_ex(vdev->host_features_ex),
 > the upper bits of host_features_ex are zero, and guest_features upper
 > bits remain zero (since they can't be set per point 1)

Regards,
Akihiko Odaki

Re: [RFC PATCH v4 07/14] virtio-pci: implement support for extended features

Posted by Jason Wang 3 months ago

On Fri, Aug 8, 2025 at 12:55 PM Akihiko Odaki
<odaki@rsg.ci.i.u-tokyo.ac.jp> wrote:
>
> On 2025/08/08 5:18, Paolo Abeni wrote:
> > On 7/26/25 1:52 PM, Akihiko Odaki wrote:
> >> On 2025/07/24 4:31, Paolo Abeni wrote:
> >>> @@ -1477,6 +1509,13 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
> >>>        return virtio_pci_add_mem_cap(proxy, &cap.cap);
> >>>    }
> >>>
> >>> +static int virtio_pci_select_max(const VirtIODevice *vdev)
> >>> +{
> >>> +    return virtio_features_use_ex(vdev->host_features_ex) ?
> >>> +           VIRTIO_FEATURES_NU32S :
> >>> +           2;
> >>
> >> This function could be simplified by replacing VIRTIO_FEATURES_NU32S
> >> without any functional difference:

Did you mean using VIRTIO_FEATURES_NU32S instead?

> >>
> >> 1. For writes: virtio_set_features_ex() already ignores extended
> >> features when !virtio_features_use_ex(vdev->host_features_ex)
> >> 2. For reads: When !virtio_features_use_ex(vdev->host_features_ex), the
> >> upper bits of host_features_ex are zero, and guest_features upper bits
> >> remain zero (since they can't be set per point 1)

I think it depends on the compatibility work which hasn't been done in
this series.

> >>
> >> So the conditional logic is redundant here.

See below

> >
> > This is to satisfy a request from Jason:
> >
> > https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05291.html
> > https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05423.html
> >
> > I agree there will not be functional differences always accessing the
> > full space, but the guest could still be able to notice, i.e. the
> > extended space will be zeroed on read with that patched qemu and
> > untouched by the current code and this patch. To be on the safe side I
> > think it would be better to avoid such difference, as suggested by Jason.
> >
> > Does the above make sense to you?
>
> By functional, I meant the functionality of QEMU, visible to the guest,
> rather than the whole system including the guest, visible to the end
> user. The guest cannot notice the difference because the extended space
> is zero on read even without the conditional, which is described as
> point 2 in the previous email.

I'm not sure I understand this correctly. But it doesn't harm here consider:

1) it's the entry point from the guest, checking and failing early is
better than depending on the low layer functions
2) we have checks in several layers (both virtio-pci and virtio core).

And it looks like a must for at least GF:

    case VIRTIO_PCI_COMMON_GF:
        if (proxy->gfselect < virtio_pci_select_max(vdev)) {
            uint64_t features[VIRTIO_FEATURES_NU64S];
            int i;

            proxy->guest_features[proxy->gfselect] = val;

Thanks

>
>  > 2. For reads: When !virtio_features_use_ex(vdev->host_features_ex),
>  > the upper bits of host_features_ex are zero, and guest_features upper
>  > bits remain zero (since they can't be set per point 1)
>
> Regards,
> Akihiko Odaki
>

Re: [RFC PATCH v4 07/14] virtio-pci: implement support for extended features

Posted by Akihiko Odaki 3 months ago

On 2025/08/12 13:01, Jason Wang wrote:
> On Fri, Aug 8, 2025 at 12:55 PM Akihiko Odaki
> <odaki@rsg.ci.i.u-tokyo.ac.jp> wrote:
>>
>> On 2025/08/08 5:18, Paolo Abeni wrote:
>>> On 7/26/25 1:52 PM, Akihiko Odaki wrote:
>>>> On 2025/07/24 4:31, Paolo Abeni wrote:
>>>>> @@ -1477,6 +1509,13 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
>>>>>         return virtio_pci_add_mem_cap(proxy, &cap.cap);
>>>>>     }
>>>>>
>>>>> +static int virtio_pci_select_max(const VirtIODevice *vdev)
>>>>> +{
>>>>> +    return virtio_features_use_ex(vdev->host_features_ex) ?
>>>>> +           VIRTIO_FEATURES_NU32S :
>>>>> +           2;
>>>>
>>>> This function could be simplified by replacing VIRTIO_FEATURES_NU32S
>>>> without any functional difference:
> 
> Did you mean using VIRTIO_FEATURES_NU32S instead?
> 
>>>>
>>>> 1. For writes: virtio_set_features_ex() already ignores extended
>>>> features when !virtio_features_use_ex(vdev->host_features_ex)
>>>> 2. For reads: When !virtio_features_use_ex(vdev->host_features_ex), the
>>>> upper bits of host_features_ex are zero, and guest_features upper bits
>>>> remain zero (since they can't be set per point 1)
> 
> I think it depends on the compatibility work which hasn't been done in
> this series.
> 
>>>>
>>>> So the conditional logic is redundant here.
> 
> See below
> 
>>>
>>> This is to satisfy a request from Jason:
>>>
>>> https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05291.html
>>> https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05423.html
>>>
>>> I agree there will not be functional differences always accessing the
>>> full space, but the guest could still be able to notice, i.e. the
>>> extended space will be zeroed on read with that patched qemu and
>>> untouched by the current code and this patch. To be on the safe side I
>>> think it would be better to avoid such difference, as suggested by Jason.
>>>
>>> Does the above make sense to you?
>>
>> By functional, I meant the functionality of QEMU, visible to the guest,
>> rather than the whole system including the guest, visible to the end
>> user. The guest cannot notice the difference because the extended space
>> is zero on read even without the conditional, which is described as
>> point 2 in the previous email.
> 
> I'm not sure I understand this correctly. But it doesn't harm here consider:
> 
> 1) it's the entry point from the guest, checking and failing early is
> better than depending on the low layer functions
> 2) we have checks in several layers (both virtio-pci and virtio core).
> 
> And it looks like a must for at least GF:
> 
>      case VIRTIO_PCI_COMMON_GF:
>          if (proxy->gfselect < virtio_pci_select_max(vdev)) {
>              uint64_t features[VIRTIO_FEATURES_NU64S];
>              int i;
> 
>              proxy->guest_features[proxy->gfselect] = val;

I missed proxy->guest_features. Indeed it makes a difference.

Now I have another concern with virtio_pci_select_max(). If the feature 
set grows again, virtio_pci_select_max() will return the grown size for 
the devices that have [127:64] bits, which will be a breaking change. In 
this sense, VIRTIO_FEATURES_NU32S, which will grow along with the 
feature set, is not appropriate here.

Hardcoding 4 ensures such a breaking change will not happen at least.

Perhaps QEMU_BUILD_BUG_ON() may be placed here to tell it needs to be 
updated when the feature set grows, but I don't require that since there 
is QEMU_BUILD_BUG_ON() before vmstate_virtio_pci_modern_state_sub. It 
may be a good idea to move this function somewhere close to 
QEMU_BUILD_BUG_ON().

Alternatively the function may compute the value to return by finding 
the last non-zero element in vdev->host_features_ex and multiplying by 
2. It will probably work even if the feature set grows again in the future.

Regards,
Akihiko Odaki

Re: [RFC PATCH v4 07/14] virtio-pci: implement support for extended features

Posted by Jason Wang 3 months ago

On Tue, Aug 12, 2025 at 4:03 PM Akihiko Odaki
<odaki@rsg.ci.i.u-tokyo.ac.jp> wrote:
>
> On 2025/08/12 13:01, Jason Wang wrote:
> > On Fri, Aug 8, 2025 at 12:55 PM Akihiko Odaki
> > <odaki@rsg.ci.i.u-tokyo.ac.jp> wrote:
> >>
> >> On 2025/08/08 5:18, Paolo Abeni wrote:
> >>> On 7/26/25 1:52 PM, Akihiko Odaki wrote:
> >>>> On 2025/07/24 4:31, Paolo Abeni wrote:
> >>>>> @@ -1477,6 +1509,13 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
> >>>>>         return virtio_pci_add_mem_cap(proxy, &cap.cap);
> >>>>>     }
> >>>>>
> >>>>> +static int virtio_pci_select_max(const VirtIODevice *vdev)
> >>>>> +{
> >>>>> +    return virtio_features_use_ex(vdev->host_features_ex) ?
> >>>>> +           VIRTIO_FEATURES_NU32S :
> >>>>> +           2;
> >>>>
> >>>> This function could be simplified by replacing VIRTIO_FEATURES_NU32S
> >>>> without any functional difference:
> >
> > Did you mean using VIRTIO_FEATURES_NU32S instead?
> >
> >>>>
> >>>> 1. For writes: virtio_set_features_ex() already ignores extended
> >>>> features when !virtio_features_use_ex(vdev->host_features_ex)
> >>>> 2. For reads: When !virtio_features_use_ex(vdev->host_features_ex), the
> >>>> upper bits of host_features_ex are zero, and guest_features upper bits
> >>>> remain zero (since they can't be set per point 1)
> >
> > I think it depends on the compatibility work which hasn't been done in
> > this series.
> >
> >>>>
> >>>> So the conditional logic is redundant here.
> >
> > See below
> >
> >>>
> >>> This is to satisfy a request from Jason:
> >>>
> >>> https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05291.html
> >>> https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05423.html
> >>>
> >>> I agree there will not be functional differences always accessing the
> >>> full space, but the guest could still be able to notice, i.e. the
> >>> extended space will be zeroed on read with that patched qemu and
> >>> untouched by the current code and this patch. To be on the safe side I
> >>> think it would be better to avoid such difference, as suggested by Jason.
> >>>
> >>> Does the above make sense to you?
> >>
> >> By functional, I meant the functionality of QEMU, visible to the guest,
> >> rather than the whole system including the guest, visible to the end
> >> user. The guest cannot notice the difference because the extended space
> >> is zero on read even without the conditional, which is described as
> >> point 2 in the previous email.
> >
> > I'm not sure I understand this correctly. But it doesn't harm here consider:
> >
> > 1) it's the entry point from the guest, checking and failing early is
> > better than depending on the low layer functions
> > 2) we have checks in several layers (both virtio-pci and virtio core).
> >
> > And it looks like a must for at least GF:
> >
> >      case VIRTIO_PCI_COMMON_GF:
> >          if (proxy->gfselect < virtio_pci_select_max(vdev)) {
> >              uint64_t features[VIRTIO_FEATURES_NU64S];
> >              int i;
> >
> >              proxy->guest_features[proxy->gfselect] = val;
>
> I missed proxy->guest_features. Indeed it makes a difference.
>
> Now I have another concern with virtio_pci_select_max(). If the feature
> set grows again, virtio_pci_select_max() will return the grown size for
> the devices that have [127:64] bits, which will be a breaking change. In
> this sense, VIRTIO_FEATURES_NU32S, which will grow along with the
> feature set, is not appropriate here.

Things will be fine if we do the compatibility work correctly, so host
"host_features_ex_ex" won't contain anything for the legacy machine
types.

But I'm not sure it's worth worrying about it now considering we take
years to reach 64.

>
> Hardcoding 4 ensures such a breaking change will not happen at least.
>
> Perhaps QEMU_BUILD_BUG_ON() may be placed here to tell it needs to be
> updated when the feature set grows, but I don't require that since there
> is QEMU_BUILD_BUG_ON() before vmstate_virtio_pci_modern_state_sub. It
> may be a good idea to move this function somewhere close to
> QEMU_BUILD_BUG_ON().
>
> Alternatively the function may compute the value to return by finding
> the last non-zero element in vdev->host_features_ex and multiplying by
> 2. It will probably work even if the feature set grows again in the future.
>
> Regards,
> Akihiko Odaki

Thanks

>

Re: [RFC PATCH v4 07/14] virtio-pci: implement support for extended features

Posted by Paolo Abeni 3 months ago

On 8/13/25 7:55 AM, Jason Wang wrote:
> On Tue, Aug 12, 2025 at 4:03 PM Akihiko Odaki
> <odaki@rsg.ci.i.u-tokyo.ac.jp> wrote:
>>
>> On 2025/08/12 13:01, Jason Wang wrote:
>>> On Fri, Aug 8, 2025 at 12:55 PM Akihiko Odaki
>>> <odaki@rsg.ci.i.u-tokyo.ac.jp> wrote:
>>>>
>>>> On 2025/08/08 5:18, Paolo Abeni wrote:
>>>>> On 7/26/25 1:52 PM, Akihiko Odaki wrote:
>>>>>> On 2025/07/24 4:31, Paolo Abeni wrote:
>>>>>>> @@ -1477,6 +1509,13 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
>>>>>>>         return virtio_pci_add_mem_cap(proxy, &cap.cap);
>>>>>>>     }
>>>>>>>
>>>>>>> +static int virtio_pci_select_max(const VirtIODevice *vdev)
>>>>>>> +{
>>>>>>> +    return virtio_features_use_ex(vdev->host_features_ex) ?
>>>>>>> +           VIRTIO_FEATURES_NU32S :
>>>>>>> +           2;
>>>>>>
>>>>>> This function could be simplified by replacing VIRTIO_FEATURES_NU32S
>>>>>> without any functional difference:
>>>
>>> Did you mean using VIRTIO_FEATURES_NU32S instead?
>>>
>>>>>>
>>>>>> 1. For writes: virtio_set_features_ex() already ignores extended
>>>>>> features when !virtio_features_use_ex(vdev->host_features_ex)
>>>>>> 2. For reads: When !virtio_features_use_ex(vdev->host_features_ex), the
>>>>>> upper bits of host_features_ex are zero, and guest_features upper bits
>>>>>> remain zero (since they can't be set per point 1)
>>>
>>> I think it depends on the compatibility work which hasn't been done in
>>> this series.
>>>
>>>>>>
>>>>>> So the conditional logic is redundant here.
>>>
>>> See below
>>>
>>>>>
>>>>> This is to satisfy a request from Jason:
>>>>>
>>>>> https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05291.html
>>>>> https://lists.gnu.org/archive/html/qemu-devel/2025-07/msg05423.html
>>>>>
>>>>> I agree there will not be functional differences always accessing the
>>>>> full space, but the guest could still be able to notice, i.e. the
>>>>> extended space will be zeroed on read with that patched qemu and
>>>>> untouched by the current code and this patch. To be on the safe side I
>>>>> think it would be better to avoid such difference, as suggested by Jason.
>>>>>
>>>>> Does the above make sense to you?
>>>>
>>>> By functional, I meant the functionality of QEMU, visible to the guest,
>>>> rather than the whole system including the guest, visible to the end
>>>> user. The guest cannot notice the difference because the extended space
>>>> is zero on read even without the conditional, which is described as
>>>> point 2 in the previous email.
>>>
>>> I'm not sure I understand this correctly. But it doesn't harm here consider:
>>>
>>> 1) it's the entry point from the guest, checking and failing early is
>>> better than depending on the low layer functions
>>> 2) we have checks in several layers (both virtio-pci and virtio core).
>>>
>>> And it looks like a must for at least GF:
>>>
>>>      case VIRTIO_PCI_COMMON_GF:
>>>          if (proxy->gfselect < virtio_pci_select_max(vdev)) {
>>>              uint64_t features[VIRTIO_FEATURES_NU64S];
>>>              int i;
>>>
>>>              proxy->guest_features[proxy->gfselect] = val;
>>
>> I missed proxy->guest_features. Indeed it makes a difference.
>>
>> Now I have another concern with virtio_pci_select_max(). If the feature
>> set grows again, virtio_pci_select_max() will return the grown size for
>> the devices that have [127:64] bits, which will be a breaking change. In
>> this sense, VIRTIO_FEATURES_NU32S, which will grow along with the
>> feature set, is not appropriate here.
> 
> Things will be fine if we do the compatibility work correctly, so host
> "host_features_ex_ex" won't contain anything for the legacy machine
> types.
> 
> But I'm not sure it's worth worrying about it now considering we take
> years to reach 64.

FTR, I gave a shot to the last option mentioned by Akihiko - make
virtio_pci_select_max() future proof - and the implementation complexity
is almost the same of the current one, so I guess I'll opt for such a thing.

Cheers,

Paolo