[RFC PATCH v11bis 00/26] Emulated XenStore and PV backend support

David Woodhouse posted 26 patches 1 year, 2 months ago
Failed in applying to current master (apply log)
accel/xen/xen-all.c                           |   69 +-
hw/9pfs/meson.build                           |    2 +-
hw/9pfs/xen-9p-backend.c                      |   32 +-
hw/block/dataplane/meson.build                |    2 +-
hw/block/dataplane/xen-block.c                |   12 +-
hw/block/meson.build                          |    2 +-
hw/block/xen-block.c                          |   12 +-
hw/char/meson.build                           |    2 +-
hw/char/xen_console.c                         |   57 +-
hw/display/meson.build                        |    2 +-
hw/display/xenfb.c                            |   32 +-
hw/i386/kvm/meson.build                       |    1 +
hw/i386/kvm/trace-events                      |   15 +
hw/i386/kvm/xen_evtchn.c                      |   15 +
hw/i386/kvm/xen_gnttab.c                      |  320 ++++-
hw/i386/kvm/xen_gnttab.h                      |    1 +
hw/i386/kvm/xen_xenstore.c                    | 1248 +++++++++++++++++-
hw/i386/kvm/xenstore_impl.c                   | 1754 +++++++++++++++++++++++++
hw/i386/kvm/xenstore_impl.h                   |   63 +
hw/i386/pc.c                                  |    7 +
hw/i386/pc_piix.c                             |    4 +-
hw/i386/xen/xen-hvm.c                         |    8 +-
hw/i386/xen/xen_platform.c                    |    7 +-
hw/net/xen_nic.c                              |   25 +-
hw/usb/meson.build                            |    2 +-
hw/usb/xen-usb.c                              |   29 +-
hw/xen/meson.build                            |    6 +-
hw/xen/trace-events                           |    2 +-
hw/xen/xen-bus-helper.c                       |   61 +-
hw/xen/xen-bus.c                              |  408 +-----
hw/xen/xen-hvm-common.c                       |   45 +-
hw/xen/xen-legacy-backend.c                   |  307 ++---
hw/xen/xen-mapcache.c                         |    2 +-
hw/xen/xen-operations.c                       |  487 +++++++
hw/xen/xen_devconfig.c                        |    4 +-
hw/xen/xen_pt.c                               |    2 +-
hw/xen/xen_pt.h                               |    2 +-
hw/xen/xen_pt_config_init.c                   |    2 +-
hw/xen/xen_pt_graphics.c                      |    1 -
hw/xen/xen_pt_msi.c                           |    4 +-
hw/xen/xen_pvdev.c                            |   63 +-
hw/xenpv/xen_machine_pv.c                     |    6 +-
include/hw/xen/xen-bus-helper.h               |   25 +-
include/hw/xen/xen-bus.h                      |   21 +-
include/hw/xen/xen-hvm-common.h               |    3 +-
include/hw/xen/xen-legacy-backend.h           |   27 +-
include/hw/xen/xen.h                          |   24 +-
include/hw/xen/xen_backend_ops.h              |  399 ++++++
include/hw/xen/{xen_common.h => xen_native.h} |   75 +-
include/hw/xen/xen_pvdev.h                    |    6 +-
softmmu/globals.c                             |    4 +
target/i386/kvm/xen-emu.c                     |    5 +
tests/unit/meson.build                        |    1 +
tests/unit/test-xs-node.c                     |  717 ++++++++++
54 files changed, 5454 insertions(+), 978 deletions(-)
[RFC PATCH v11bis 00/26] Emulated XenStore and PV backend support
Posted by David Woodhouse 1 year, 2 months ago
The non-RFC patch submisson¹ is just the basic platform support for Xen
on KVM. This RFC series is phase 2, adding an internal XenStore and
hooking up the PV back end drivers to that and the emulated grant tables
and event channels.

With this, we can boot a Xen guest with PV disk, under KVM. Full support
for migration isn't there yet because it's actually missing in the PV
back end drivers in the first place (perhaps because upstream Xen doesn't
yet have guest transparent live migration support anyway). I'm assuming
that when the first round is merged and we drop the [RFC] from this set,
that won't be a showstopper for now?

I'd be particularly interested in opinions on the way I implemented
serialization for the XenStore, by creating a GByteArray and then dumping
it with VMSTATE_VARRAY_UINT32_ALLOC().

I may eventually attempt to make xs_node_walk() not actually recursive,
if I can do so without my brain exploding. Since the first time the
XenStore code was showed, I've stopped the XsNode from being an actual
Object; that just made it larger for almost no benefit. 

¹ https://lore.kernel.org/qemu-devel/20230216062444.2129371-1-dwmw2@infradead.org/

David Woodhouse (22):
      hw/xen: Add xenstore wire implementation and implementation stubs
      hw/xen: Add basic XenStore tree walk and write/read/directory support
      hw/xen: Implement XenStore watches
      hw/xen: Implement XenStore transactions
      hw/xen: Watches on XenStore transactions
      hw/xen: Implement core serialize/deserialize methods for xenstore_impl
      hw/xen: Add evtchn operations to allow redirection to internal emulation
      hw/xen: Add gnttab operations to allow redirection to internal emulation
      hw/xen: Pass grant ref to gnttab unmap operation
      hw/xen: Add foreignmem operations to allow redirection to internal emulation
      hw/xen: Move xenstore_store_pv_console_info to xen_console.c
      hw/xen: Use XEN_PAGE_SIZE in PV backend drivers
      hw/xen: Rename xen_common.h to xen_native.h
      hw/xen: Build PV backend drivers for CONFIG_XEN_BUS
      hw/xen: Only advertise ring-page-order for xen-block if gnttab supports it
      hw/xen: Hook up emulated implementation for event channel operations
      hw/xen: Add emulated implementation of grant table operations
      hw/xen: Add emulated implementation of XenStore operations
      hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore
      hw/xen: Implement soft reset for emulated gnttab
      hw/xen: Subsume xen_be_register_common() into xen_be_init()
      i386/xen: Initialize Xen backends from pc_basic_device_init() for emulation

Paul Durrant (4):
      xenstore perms WIP
      hw/xen: Create initial XenStore nodes
      hw/xen: Add xenstore operations to allow redirection to internal emulation
      hw/xen: Avoid crash when backend watch fires too early

 accel/xen/xen-all.c                           |   69 +-
 hw/9pfs/meson.build                           |    2 +-
 hw/9pfs/xen-9p-backend.c                      |   32 +-
 hw/block/dataplane/meson.build                |    2 +-
 hw/block/dataplane/xen-block.c                |   12 +-
 hw/block/meson.build                          |    2 +-
 hw/block/xen-block.c                          |   12 +-
 hw/char/meson.build                           |    2 +-
 hw/char/xen_console.c                         |   57 +-
 hw/display/meson.build                        |    2 +-
 hw/display/xenfb.c                            |   32 +-
 hw/i386/kvm/meson.build                       |    1 +
 hw/i386/kvm/trace-events                      |   15 +
 hw/i386/kvm/xen_evtchn.c                      |   15 +
 hw/i386/kvm/xen_gnttab.c                      |  320 ++++-
 hw/i386/kvm/xen_gnttab.h                      |    1 +
 hw/i386/kvm/xen_xenstore.c                    | 1248 +++++++++++++++++-
 hw/i386/kvm/xenstore_impl.c                   | 1754 +++++++++++++++++++++++++
 hw/i386/kvm/xenstore_impl.h                   |   63 +
 hw/i386/pc.c                                  |    7 +
 hw/i386/pc_piix.c                             |    4 +-
 hw/i386/xen/xen-hvm.c                         |    8 +-
 hw/i386/xen/xen_platform.c                    |    7 +-
 hw/net/xen_nic.c                              |   25 +-
 hw/usb/meson.build                            |    2 +-
 hw/usb/xen-usb.c                              |   29 +-
 hw/xen/meson.build                            |    6 +-
 hw/xen/trace-events                           |    2 +-
 hw/xen/xen-bus-helper.c                       |   61 +-
 hw/xen/xen-bus.c                              |  408 +-----
 hw/xen/xen-hvm-common.c                       |   45 +-
 hw/xen/xen-legacy-backend.c                   |  307 ++---
 hw/xen/xen-mapcache.c                         |    2 +-
 hw/xen/xen-operations.c                       |  487 +++++++
 hw/xen/xen_devconfig.c                        |    4 +-
 hw/xen/xen_pt.c                               |    2 +-
 hw/xen/xen_pt.h                               |    2 +-
 hw/xen/xen_pt_config_init.c                   |    2 +-
 hw/xen/xen_pt_graphics.c                      |    1 -
 hw/xen/xen_pt_msi.c                           |    4 +-
 hw/xen/xen_pvdev.c                            |   63 +-
 hw/xenpv/xen_machine_pv.c                     |    6 +-
 include/hw/xen/xen-bus-helper.h               |   25 +-
 include/hw/xen/xen-bus.h                      |   21 +-
 include/hw/xen/xen-hvm-common.h               |    3 +-
 include/hw/xen/xen-legacy-backend.h           |   27 +-
 include/hw/xen/xen.h                          |   24 +-
 include/hw/xen/xen_backend_ops.h              |  399 ++++++
 include/hw/xen/{xen_common.h => xen_native.h} |   75 +-
 include/hw/xen/xen_pvdev.h                    |    6 +-
 softmmu/globals.c                             |    4 +
 target/i386/kvm/xen-emu.c                     |    5 +
 tests/unit/meson.build                        |    1 +
 tests/unit/test-xs-node.c                     |  717 ++++++++++
 54 files changed, 5454 insertions(+), 978 deletions(-)



Re: [RFC PATCH v11bis 00/26] Emulated XenStore and PV backend support
Posted by Juan Quintela 1 year, 2 months ago
David Woodhouse <dwmw2@infradead.org> wrote:
> The non-RFC patch submisson¹ is just the basic platform support for Xen
> on KVM. This RFC series is phase 2, adding an internal XenStore and
> hooking up the PV back end drivers to that and the emulated grant tables
> and event channels.
>
> With this, we can boot a Xen guest with PV disk, under KVM. Full support
> for migration isn't there yet because it's actually missing in the PV
> back end drivers in the first place (perhaps because upstream Xen doesn't
> yet have guest transparent live migration support anyway). I'm assuming
> that when the first round is merged and we drop the [RFC] from this set,
> that won't be a showstopper for now?
>
> I'd be particularly interested in opinions on the way I implemented
> serialization for the XenStore, by creating a GByteArray and then dumping
> it with VMSTATE_VARRAY_UINT32_ALLOC().

And I was wondering why I was CC'd in the whole series O:-)

How big is the xenstore?
I mean typical size and maximun size.

If it is suficientely small (i.e. in the single unit megabytes), you can
send it as a normal device at the end of migration.

If it is bigger, I think that you are going to have to enter Migration
iteration stage, and have some kind of dirty bitmap to know what entries
are on the target and what not.

As examples, we are going to discuss how to migrate Vhost-user-fs in the
near future, and as far as I know that is something similar to the
Xenstore (from 10000 feet view, both are a (key, value) store, no?).

We are having starting other discussions about how to migrate vfio and
(not yet started) vhost/vpda devices, so you can get some "inspiration"
from there if you are going the "opaque" route.

Later, Juan.
Re: [RFC PATCH v11bis 00/26] Emulated XenStore and PV backend support
Posted by David Woodhouse 1 year, 2 months ago
On Thu, 2023-02-16 at 11:49 +0100, Juan Quintela wrote:
> David Woodhouse <dwmw2@infradead.org> wrote:
> > The non-RFC patch submisson¹ is just the basic platform support for Xen
> > on KVM. This RFC series is phase 2, adding an internal XenStore and
> > hooking up the PV back end drivers to that and the emulated grant tables
> > and event channels.
> > 
> > With this, we can boot a Xen guest with PV disk, under KVM. Full support
> > for migration isn't there yet because it's actually missing in the PV
> > back end drivers in the first place (perhaps because upstream Xen doesn't
> > yet have guest transparent live migration support anyway). I'm assuming
> > that when the first round is merged and we drop the [RFC] from this set,
> > that won't be a showstopper for now?
> > 
> > I'd be particularly interested in opinions on the way I implemented
> > serialization for the XenStore, by creating a GByteArray and then dumping
> > it with VMSTATE_VARRAY_UINT32_ALLOC().
> 
> And I was wondering why I was CC'd in the whole series O:-)
> 

Indeed, Philippe M-D added you to Cc when discussing migrations on the
first RFC submission back in December, and you've been included ever
since.


> How big is the xenstore?
> I mean typical size and maximun size.
> 

Booting a simple instance with a single disk:

$ scripts/analyze-migration.py -f foo | grep impl_state_size
        "impl_state_size": "0x00000634",

Theoretical maximum is about 1000 nodes @2KiB, so around 2MiB.

> If it is suficientely small (i.e. in the single unit megabytes), you can
> send it as a normal device at the end of migration.
> 

Right now it's part of the xen_xenstore device. Most of that is fairly
simple and it's just the impl_state that's slightly different.


> If it is bigger, I think that you are going to have to enter Migration
> iteration stage, and have some kind of dirty bitmap to know what entries
> are on the target and what not.
> 

We have COW and transactions; that isn't an impossibility; I think we
can avoid that complexity though.

Re: [RFC PATCH v11bis 00/26] Emulated XenStore and PV backend support
Posted by Juan Quintela 1 year, 2 months ago
David Woodhouse <dwmw2@infradead.org> wrote:
> --=-jDk4SYxkcOAZoZa6DCVr
> Content-Type: text/plain; charset="UTF-8"
> Content-Transfer-Encoding: quoted-printable
>
> On Thu, 2023-02-16 at 11:49 +0100, Juan Quintela wrote:
>> David Woodhouse <dwmw2@infradead.org> wrote:
>> > The non-RFC patch submisson=C2=B9 is just the basic platform support fo=
> r Xen
>> > on KVM. This RFC series is phase 2, adding an internal XenStore and
>> > hooking up the PV back end drivers to that and the emulated grant table=
> s
>> > and event channels.
>> >=20
>> > With this, we can boot a Xen guest with PV disk, under KVM. Full suppor=
> t
>> > for migration isn't there yet because it's actually missing in the PV
>> > back end drivers in the first place (perhaps because upstream Xen doesn=
> 't
>> > yet have guest transparent live migration support anyway). I'm assuming
>> > that when the first round is merged and we drop the [RFC] from this set=
> ,
>> > that won't be a showstopper for now?
>> >=20
>> > I'd be particularly interested in opinions on the way I implemented
>> > serialization for the XenStore, by creating a GByteArray and then dumpi=
> ng
>> > it with VMSTATE_VARRAY_UINT32_ALLOC().
>>=20
>> And I was wondering why I was CC'd in the whole series O:-)
>>=20
>
> Indeed, Philippe M-D added you to Cc when discussing migrations on the
> first RFC submission back in December, and you've been included ever
> since.
>
>
>> How big is the xenstore?
>> I mean typical size and maximun size.
>>=20
>
> Booting a simple instance with a single disk:
>
> $ scripts/analyze-migration.py -f foo | grep impl_state_size
>         "impl_state_size": "0x00000634",
>
> Theoretical maximum is about 1000 nodes @2KiB, so around 2MiB.
>
>> If it is suficientely small (i.e. in the single unit megabytes), you can
>> send it as a normal device at the end of migration.
>>=20
>
> Right now it's part of the xen_xenstore device. Most of that is fairly
> simple and it's just the impl_state that's slightly different.
>
>
>> If it is bigger, I think that you are going to have to enter Migration
>> iteration stage, and have some kind of dirty bitmap to know what entries
>> are on the target and what not.
>>=20
>
> We have COW and transactions; that isn't an impossibility; I think we
> can avoid that complexity though.

It is relatively small.  I will go with migrating at the end of
migration for now.  Later we can measure if we need to improve
performance there.

Later, Juan.
Re: [RFC PATCH v11bis 00/26] Emulated XenStore and PV backend support
Posted by David Woodhouse 1 year, 2 months ago
On Thu, 2023-02-16 at 15:02 +0100, Juan Quintela wrote:
> David Woodhouse <dwmw2@infradead.org> wrote:
> > --=-jDk4SYxkcOAZoZa6DCVr
> > Content-Type: text/plain; charset="UTF-8"
> > Content-Transfer-Encoding: quoted-printable
> > 
> > On Thu, 2023-02-16 at 11:49 +0100, Juan Quintela wrote:
> > > David Woodhouse <dwmw2@infradead.org> wrote:
> > > > The non-RFC patch submisson=C2=B9 is just the basic platform support fo=
> > r Xen
> > > > on KVM. This RFC series is phase 2, adding an internal XenStore and
> > > > hooking up the PV back end drivers to that and the emulated grant table=
> > s
> > > > and event channels.
> > > > =20
> > > > With this, we can boot a Xen guest with PV disk, under KVM. Full suppor=
> > t
> > > > for migration isn't there yet because it's actually missing in the PV
> > > > back end drivers in the first place (perhaps because upstream Xen doesn=
> > 't
> > > > yet have guest transparent live migration support anyway). I'm assuming
> > > > that when the first round is merged and we drop the [RFC] from this set=
> > ,
> > > > that won't be a showstopper for now?
> > > > =20
> > > > I'd be particularly interested in opinions on the way I implemented
> > > > serialization for the XenStore, by creating a GByteArray and then dumpi=
> > ng
> > > > it with VMSTATE_VARRAY_UINT32_ALLOC().
> > > =20
> > > And I was wondering why I was CC'd in the whole series O:-)
> > > =20
> > 
> > Indeed, Philippe M-D added you to Cc when discussing migrations on the
> > first RFC submission back in December, and you've been included ever
> > since.
> > 
> > 
> > > How big is the xenstore?
> > > I mean typical size and maximun size.
> > > =20
> > 
> > Booting a simple instance with a single disk:
> > 
> > $ scripts/analyze-migration.py -f foo | grep impl_state_size
> >         "impl_state_size": "0x00000634",
> > 
> > Theoretical maximum is about 1000 nodes @2KiB, so around 2MiB.
> > 
> > > If it is suficientely small (i.e. in the single unit megabytes), you can
> > > send it as a normal device at the end of migration.
> > > =20
> > 
> > Right now it's part of the xen_xenstore device. Most of that is fairly
> > simple and it's just the impl_state that's slightly different.
> > 
> > 
> > > If it is bigger, I think that you are going to have to enter Migration
> > > iteration stage, and have some kind of dirty bitmap to know what entries
> > > are on the target and what not.
> > > =20
> > 
> > We have COW and transactions; that isn't an impossibility; I think we
> > can avoid that complexity though.
> 
> It is relatively small.  I will go with migrating at the end of
> migration for now.  Later we can measure if we need to improve
> performance there.

Yeah, that much I was relatively OK with. The bit I thought might
attract heckling is how I actually store the byte stream, in
https://git.infradead.org/users/dwmw2/qemu.git/commitdiff/45e7e645080#patch1

--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -66,6 +66,9 @@ struct XenXenstoreState {
     evtchn_port_t guest_port;
     evtchn_port_t be_port;
     struct xenevtchn_handle *eh;
+
+    uint8_t *impl_state;
+    uint32_t impl_state_size;
 };
 
 struct XenXenstoreState *xen_xenstore_singleton;
@@ -109,16 +112,26 @@ static bool xen_xenstore_is_needed(void *opaque)
 static int xen_xenstore_pre_save(void *opaque)
 {
     XenXenstoreState *s = opaque;
+    GByteArray *save;
 
     if (s->eh) {
         s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
     }
+
+    g_free(s->impl_state);
+    save = xs_impl_serialize(s->impl);
+    s->impl_state = save->data;
+    s->impl_state_size = save->len;
+    g_byte_array_free(save, false);
+
     return 0;
 }
 
 static int xen_xenstore_post_load(void *opaque, int ver)
 {
     XenXenstoreState *s = opaque;
+    GByteArray *save;
+    int ret;
 
     /*
      * As qemu/dom0, rebind to the guest's port. The Windows drivers may
@@ -134,7 +147,13 @@ static int xen_xenstore_post_load(void *opaque, int ver)
         }
         s->be_port = be_port;
     }
-    return 0;
+
+    save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
+    s->impl_state = NULL;
+    s->impl_state_size = 0;
+
+    ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
+    return ret;
 }
 
 static const VMStateDescription xen_xenstore_vmstate = {
@@ -152,6 +171,10 @@ static const VMStateDescription xen_xenstore_vmstate = {
         VMSTATE_BOOL(rsp_pending, XenXenstoreState),
         VMSTATE_UINT32(guest_port, XenXenstoreState),
         VMSTATE_BOOL(fatal_error, XenXenstoreState),
+        VMSTATE_UINT32(impl_state_size, XenXenstoreState),
+        VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
+                                    impl_state_size, 0,
+                                    vmstate_info_uint8, uint8_t),
         VMSTATE_END_OF_LIST()
     }
 };