From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
When a PCI device gets assigned/de-assigned some work on vPCI side needs
to be done for that device. Introduce a pair of hooks so vPCI can handle
that.
Please note, that in the current design the error path is handled by
the toolstack via XEN_DOMCTL_assign_device/XEN_DOMCTL_deassign_device,
so this is why it is acceptable not to de-assign devices if vPCI's
assign fails, e.g. the roll back will be handled on deassign_device when
it is called by the toolstack.
Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
---
Since v2:
- define CONFIG_HAS_VPCI_GUEST_SUPPORT so dead code is not compiled
for x86
Since v1:
- constify struct pci_dev where possible
- do not open code is_system_domain()
- extended the commit message
---
xen/drivers/Kconfig | 4 ++++
xen/drivers/passthrough/pci.c | 9 +++++++++
xen/drivers/vpci/vpci.c | 23 +++++++++++++++++++++++
xen/include/xen/vpci.h | 20 ++++++++++++++++++++
4 files changed, 56 insertions(+)
diff --git a/xen/drivers/Kconfig b/xen/drivers/Kconfig
index db94393f47a6..780490cf8e39 100644
--- a/xen/drivers/Kconfig
+++ b/xen/drivers/Kconfig
@@ -15,4 +15,8 @@ source "drivers/video/Kconfig"
config HAS_VPCI
bool
+config HAS_VPCI_GUEST_SUPPORT
+ bool
+ depends on HAS_VPCI
+
endmenu
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 9f804a50e780..805ab86ed555 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -870,6 +870,10 @@ static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
if ( ret )
goto out;
+ ret = vpci_deassign_device(d, pdev);
+ if ( ret )
+ goto out;
+
if ( pdev->domain == hardware_domain )
pdev->quarantine = false;
@@ -1429,6 +1433,11 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag);
}
+ if ( rc )
+ goto done;
+
+ rc = vpci_assign_device(d, pdev);
+
done:
if ( rc )
printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n",
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 1666402d55b8..0fe86cb30d23 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -86,6 +86,29 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev)
return rc;
}
+
+#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
+/* Notify vPCI that device is assigned to guest. */
+int vpci_assign_device(struct domain *d, const struct pci_dev *dev)
+{
+ /* It only makes sense to assign for hwdom or guest domain. */
+ if ( is_system_domain(d) || !has_vpci(d) )
+ return 0;
+
+ return 0;
+}
+
+/* Notify vPCI that device is de-assigned from guest. */
+int vpci_deassign_device(struct domain *d, const struct pci_dev *dev)
+{
+ /* It only makes sense to de-assign from hwdom or guest domain. */
+ if ( is_system_domain(d) || !has_vpci(d) )
+ return 0;
+
+ return 0;
+}
+#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */
+
#endif /* __XEN__ */
static int vpci_register_cmp(const struct vpci_register *r1,
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 2e910d0b1f90..ecc08f2c0f65 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -242,6 +242,26 @@ static inline bool vpci_process_pending(struct vcpu *v)
}
#endif
+#if defined(CONFIG_HAS_VPCI) && defined(CONFIG_HAS_VPCI_GUEST_SUPPORT)
+/* Notify vPCI that device is assigned/de-assigned to/from guest. */
+int __must_check vpci_assign_device(struct domain *d,
+ const struct pci_dev *dev);
+int __must_check vpci_deassign_device(struct domain *d,
+ const struct pci_dev *dev);
+#else
+static inline int vpci_assign_device(struct domain *d,
+ const struct pci_dev *dev)
+{
+ return 0;
+};
+
+static inline int vpci_deassign_device(struct domain *d,
+ const struct pci_dev *dev)
+{
+ return 0;
+};
+#endif
+
#endif
/*
--
2.25.1
On 30.09.2021 09:52, Oleksandr Andrushchenko wrote: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > When a PCI device gets assigned/de-assigned some work on vPCI side needs > to be done for that device. Introduce a pair of hooks so vPCI can handle > that. > > Please note, that in the current design the error path is handled by > the toolstack via XEN_DOMCTL_assign_device/XEN_DOMCTL_deassign_device, > so this is why it is acceptable not to de-assign devices if vPCI's > assign fails, e.g. the roll back will be handled on deassign_device when > it is called by the toolstack. > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > --- > Since v2: > - define CONFIG_HAS_VPCI_GUEST_SUPPORT so dead code is not compiled > for x86 > Since v1: > - constify struct pci_dev where possible > - do not open code is_system_domain() > - extended the commit message > --- > xen/drivers/Kconfig | 4 ++++ > xen/drivers/passthrough/pci.c | 9 +++++++++ The Cc list does not match these files getting modified. Please make sure you Cc maintainers, so they have a chance of noticing that changes are being made which they may have an opinion on. > @@ -1429,6 +1433,11 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) > rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag); > } > > + if ( rc ) > + goto done; From all I can tell this is dead code. > + rc = vpci_assign_device(d, pdev); > + > done: > if ( rc ) > printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n", > --- a/xen/drivers/vpci/vpci.c > +++ b/xen/drivers/vpci/vpci.c > @@ -86,6 +86,29 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev) > > return rc; > } > + > +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT > +/* Notify vPCI that device is assigned to guest. */ > +int vpci_assign_device(struct domain *d, const struct pci_dev *dev) > +{ > + /* It only makes sense to assign for hwdom or guest domain. */ Could you clarify for me in how far this code path is indeed intended to be taken by hwdom? Because if it is, I'd like to further understand the interaction with setup_hwdom_pci_devices(). > + if ( is_system_domain(d) || !has_vpci(d) ) > + return 0; > + > + return 0; > +} > + > +/* Notify vPCI that device is de-assigned from guest. */ > +int vpci_deassign_device(struct domain *d, const struct pci_dev *dev) > +{ > + /* It only makes sense to de-assign from hwdom or guest domain. */ > + if ( is_system_domain(d) || !has_vpci(d) ) > + return 0; > + > + return 0; > +} > +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ At this point of the series #ifdef is the less preferable variant of arranging for dead code to get compiled out. I expect later patches will change that? > --- a/xen/include/xen/vpci.h > +++ b/xen/include/xen/vpci.h > @@ -242,6 +242,26 @@ static inline bool vpci_process_pending(struct vcpu *v) > } > #endif > > +#if defined(CONFIG_HAS_VPCI) && defined(CONFIG_HAS_VPCI_GUEST_SUPPORT) > +/* Notify vPCI that device is assigned/de-assigned to/from guest. */ > +int __must_check vpci_assign_device(struct domain *d, > + const struct pci_dev *dev); > +int __must_check vpci_deassign_device(struct domain *d, > + const struct pci_dev *dev); > +#else > +static inline int vpci_assign_device(struct domain *d, > + const struct pci_dev *dev) > +{ > + return 0; > +}; > + > +static inline int vpci_deassign_device(struct domain *d, > + const struct pci_dev *dev) > +{ > + return 0; > +}; > +#endif Please put the __must_check also on the stubs, or else people only build-testing x86 may not notice that they might be introducing build failures on Arm. Then again I'm not sure whether in this case the attributes are necessary in the first place. Jan
On 30.09.21 11:21, Jan Beulich wrote: > On 30.09.2021 09:52, Oleksandr Andrushchenko wrote: >> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> >> When a PCI device gets assigned/de-assigned some work on vPCI side needs >> to be done for that device. Introduce a pair of hooks so vPCI can handle >> that. >> >> Please note, that in the current design the error path is handled by >> the toolstack via XEN_DOMCTL_assign_device/XEN_DOMCTL_deassign_device, >> so this is why it is acceptable not to de-assign devices if vPCI's >> assign fails, e.g. the roll back will be handled on deassign_device when >> it is called by the toolstack. >> >> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> --- >> Since v2: >> - define CONFIG_HAS_VPCI_GUEST_SUPPORT so dead code is not compiled >> for x86 >> Since v1: >> - constify struct pci_dev where possible >> - do not open code is_system_domain() >> - extended the commit message >> --- >> xen/drivers/Kconfig | 4 ++++ >> xen/drivers/passthrough/pci.c | 9 +++++++++ > The Cc list does not match these files getting modified. Please make > sure you Cc maintainers, so they have a chance of noticing that > changes are being made which they may have an opinion on. Sure, I will go over the patches and put required Cc: next time > >> @@ -1429,6 +1433,11 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) >> rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag); >> } >> >> + if ( rc ) >> + goto done; > From all I can tell this is dead code. Before the change rc was set in the loop. And then we fall through to the "done" label. I do agree that the way this code is done the value of that rc will only reflect the last assignment done in the loop, but with my change I didn't want to change the existing behavior, thus "if ( rc" > >> + rc = vpci_assign_device(d, pdev); >> + >> done: >> if ( rc ) >> printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n", >> --- a/xen/drivers/vpci/vpci.c >> +++ b/xen/drivers/vpci/vpci.c >> @@ -86,6 +86,29 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev) >> >> return rc; >> } >> + >> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >> +/* Notify vPCI that device is assigned to guest. */ >> +int vpci_assign_device(struct domain *d, const struct pci_dev *dev) >> +{ >> + /* It only makes sense to assign for hwdom or guest domain. */ > Could you clarify for me in how far this code path is indeed intended > to be taken by hwdom? Because if it is, I'd like to further understand > the interaction with setup_hwdom_pci_devices(). setup_hwdom_pci_devices is not used on Arm as we do rely on Dom0 to perform PCI host initialization and PCI device enumeration. This is because of the fact that on Arm it is not a trivial task to initialize a PCI host bridge in Xen, e.g. you need to properly initialize power domains, clocks, quirks etc. for different SoCs. All these make the task too complex and it was decided that at the moment we do not want to bring PCI device drivers in Xen for that. It was also decided that we expect Dom0 to take care of initialization and enumeration. Some day, when firmware can do PCI initialization for us and then we can easily access ECAM, this will change. Then setup_hwdom_pci_devices will be used on Arm as well. Thus, we need to take care that Xen knows about the discovered PCI devices via assign_device etc. > >> + if ( is_system_domain(d) || !has_vpci(d) ) >> + return 0; >> + >> + return 0; >> +} >> + >> +/* Notify vPCI that device is de-assigned from guest. */ >> +int vpci_deassign_device(struct domain *d, const struct pci_dev *dev) >> +{ >> + /* It only makes sense to de-assign from hwdom or guest domain. */ >> + if ( is_system_domain(d) || !has_vpci(d) ) >> + return 0; >> + >> + return 0; >> +} >> +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ > At this point of the series #ifdef is the less preferable variant of > arranging for dead code to get compiled out. What is that other preferable way then? > I expect later patches > will change that? No, it is going to be this way all the time > >> --- a/xen/include/xen/vpci.h >> +++ b/xen/include/xen/vpci.h >> @@ -242,6 +242,26 @@ static inline bool vpci_process_pending(struct vcpu *v) >> } >> #endif >> >> +#if defined(CONFIG_HAS_VPCI) && defined(CONFIG_HAS_VPCI_GUEST_SUPPORT) >> +/* Notify vPCI that device is assigned/de-assigned to/from guest. */ >> +int __must_check vpci_assign_device(struct domain *d, >> + const struct pci_dev *dev); >> +int __must_check vpci_deassign_device(struct domain *d, >> + const struct pci_dev *dev); >> +#else >> +static inline int vpci_assign_device(struct domain *d, >> + const struct pci_dev *dev) >> +{ >> + return 0; >> +}; >> + >> +static inline int vpci_deassign_device(struct domain *d, >> + const struct pci_dev *dev) >> +{ >> + return 0; >> +}; >> +#endif > Please put the __must_check also on the stubs, or else people only > build-testing x86 may not notice that they might be introducing > build failures on Arm. Then again I'm not sure whether in this case > the attributes are necessary in the first place. I will remove __must_check > > Jan >
On 30.09.2021 10:45, Oleksandr Andrushchenko wrote: > On 30.09.21 11:21, Jan Beulich wrote: >> On 30.09.2021 09:52, Oleksandr Andrushchenko wrote: >>> @@ -1429,6 +1433,11 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) >>> rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag); >>> } >>> >>> + if ( rc ) >>> + goto done; >> From all I can tell this is dead code. > Before the change rc was set in the loop. And then we fall through > to the "done" label. I do agree that the way this code is done the > value of that rc will only reflect the last assignment done in the loop, > but with my change I didn't want to change the existing behavior, > thus "if ( rc" rc is always 0 upon loop exit, afaict: for ( ; pdev->phantom_stride; rc = 0 ) Granted this is unusual and hence possibly unexpected. >>> --- a/xen/drivers/vpci/vpci.c >>> +++ b/xen/drivers/vpci/vpci.c >>> @@ -86,6 +86,29 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev) >>> >>> return rc; >>> } >>> + >>> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >>> +/* Notify vPCI that device is assigned to guest. */ >>> +int vpci_assign_device(struct domain *d, const struct pci_dev *dev) >>> +{ >>> + /* It only makes sense to assign for hwdom or guest domain. */ >> Could you clarify for me in how far this code path is indeed intended >> to be taken by hwdom? Because if it is, I'd like to further understand >> the interaction with setup_hwdom_pci_devices(). > setup_hwdom_pci_devices is not used on Arm as we do rely on > Dom0 to perform PCI host initialization and PCI device enumeration. > > This is because of the fact that on Arm it is not a trivial task to > initialize a PCI host bridge in Xen, e.g. you need to properly initialize > power domains, clocks, quirks etc. for different SoCs. > All these make the task too complex and it was decided that at the > moment we do not want to bring PCI device drivers in Xen for that. > It was also decided that we expect Dom0 to take care of initialization > and enumeration. > Some day, when firmware can do PCI initialization for us and then we > can easily access ECAM, this will change. Then setup_hwdom_pci_devices > will be used on Arm as well. > > Thus, we need to take care that Xen knows about the discovered > PCI devices via assign_device etc. Fair enough, but since I've not spotted a patch expressing this (by adding suitable conditionals), may I ask that you do so in yet another patch (unless I've overlooked where this gets done)? >>> + if ( is_system_domain(d) || !has_vpci(d) ) >>> + return 0; >>> + >>> + return 0; >>> +} >>> + >>> +/* Notify vPCI that device is de-assigned from guest. */ >>> +int vpci_deassign_device(struct domain *d, const struct pci_dev *dev) >>> +{ >>> + /* It only makes sense to de-assign from hwdom or guest domain. */ >>> + if ( is_system_domain(d) || !has_vpci(d) ) >>> + return 0; >>> + >>> + return 0; >>> +} >>> +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ >> At this point of the series #ifdef is the less preferable variant of >> arranging for dead code to get compiled out. > What is that other preferable way then? "if ( !IS_ENABLED() )" as I did already point out to you yesterday in reply to v2 of patch 10 of this very series. >> I expect later patches >> will change that? > No, it is going to be this way all the time The question wasn't whether you switch away from the #ifdef-s, but whether later patches leave that as the only choice (avoiding build breakage). Jan
On 30.09.21 12:06, Jan Beulich wrote: > On 30.09.2021 10:45, Oleksandr Andrushchenko wrote: >> On 30.09.21 11:21, Jan Beulich wrote: >>> On 30.09.2021 09:52, Oleksandr Andrushchenko wrote: >>>> @@ -1429,6 +1433,11 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) >>>> rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag); >>>> } >>>> >>>> + if ( rc ) >>>> + goto done; >>> From all I can tell this is dead code. >> Before the change rc was set in the loop. And then we fall through >> to the "done" label. I do agree that the way this code is done the >> value of that rc will only reflect the last assignment done in the loop, >> but with my change I didn't want to change the existing behavior, >> thus "if ( rc" > rc is always 0 upon loop exit, afaict: > > for ( ; pdev->phantom_stride; rc = 0 ) > > Granted this is unusual and hence possibly unexpected. I will remove that check then. Do we want a comment about rc == 0, so it is seen why there is no check for rc? > >>>> --- a/xen/drivers/vpci/vpci.c >>>> +++ b/xen/drivers/vpci/vpci.c >>>> @@ -86,6 +86,29 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev) >>>> >>>> return rc; >>>> } >>>> + >>>> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >>>> +/* Notify vPCI that device is assigned to guest. */ >>>> +int vpci_assign_device(struct domain *d, const struct pci_dev *dev) >>>> +{ >>>> + /* It only makes sense to assign for hwdom or guest domain. */ >>> Could you clarify for me in how far this code path is indeed intended >>> to be taken by hwdom? Because if it is, I'd like to further understand >>> the interaction with setup_hwdom_pci_devices(). >> setup_hwdom_pci_devices is not used on Arm as we do rely on >> Dom0 to perform PCI host initialization and PCI device enumeration. >> >> This is because of the fact that on Arm it is not a trivial task to >> initialize a PCI host bridge in Xen, e.g. you need to properly initialize >> power domains, clocks, quirks etc. for different SoCs. >> All these make the task too complex and it was decided that at the >> moment we do not want to bring PCI device drivers in Xen for that. >> It was also decided that we expect Dom0 to take care of initialization >> and enumeration. >> Some day, when firmware can do PCI initialization for us and then we >> can easily access ECAM, this will change. Then setup_hwdom_pci_devices >> will be used on Arm as well. >> >> Thus, we need to take care that Xen knows about the discovered >> PCI devices via assign_device etc. > Fair enough, but since I've not spotted a patch expressing this Well, it is all in the RFC for PCI passthrough on Arm which is mentioned in series from Arm and EPAM (part 2). I didn't mention the RFC in the cover letter for this series though. > (by > adding suitable conditionals), may I ask that you do so in yet another > patch (unless I've overlooked where this gets done)? Could you please elaborate more on which conditionals you are talking about here? I'm afraid I didn't understand this part. > >>>> + if ( is_system_domain(d) || !has_vpci(d) ) >>>> + return 0; >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +/* Notify vPCI that device is de-assigned from guest. */ >>>> +int vpci_deassign_device(struct domain *d, const struct pci_dev *dev) >>>> +{ >>>> + /* It only makes sense to de-assign from hwdom or guest domain. */ >>>> + if ( is_system_domain(d) || !has_vpci(d) ) >>>> + return 0; >>>> + >>>> + return 0; >>>> +} >>>> +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ >>> At this point of the series #ifdef is the less preferable variant of >>> arranging for dead code to get compiled out. >> What is that other preferable way then? > "if ( !IS_ENABLED() )" as I did already point out to you yesterday in > reply to v2 of patch 10 of this very series. Please see below > >>> I expect later patches >>> will change that? >> No, it is going to be this way all the time > The question wasn't whether you switch away from the #ifdef-s, but > whether later patches leave that as the only choice (avoiding build > breakage). Yes, the code is going to always remain ifdef'ed, so we don't have dead code for x86 (at least). So, does the above mean that you are ok with "#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT" and there is no need for "if ( !IS_ENABLED() )"? > > Jan > > Thank you, Oleksandr
On 30.09.2021 11:21, Oleksandr Andrushchenko wrote: > On 30.09.21 12:06, Jan Beulich wrote: >> On 30.09.2021 10:45, Oleksandr Andrushchenko wrote: >>> On 30.09.21 11:21, Jan Beulich wrote: >>>> On 30.09.2021 09:52, Oleksandr Andrushchenko wrote: >>>>> @@ -1429,6 +1433,11 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) >>>>> rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag); >>>>> } >>>>> >>>>> + if ( rc ) >>>>> + goto done; >>>> From all I can tell this is dead code. >>> Before the change rc was set in the loop. And then we fall through >>> to the "done" label. I do agree that the way this code is done the >>> value of that rc will only reflect the last assignment done in the loop, >>> but with my change I didn't want to change the existing behavior, >>> thus "if ( rc" >> rc is always 0 upon loop exit, afaict: >> >> for ( ; pdev->phantom_stride; rc = 0 ) >> >> Granted this is unusual and hence possibly unexpected. > I will remove that check then. Do we want a comment about rc == 0, > so it is seen why there is no check for rc? So far we've been doing fine without such a comment, but I wouldn't object to a well worded one getting added. >>>>> --- a/xen/drivers/vpci/vpci.c >>>>> +++ b/xen/drivers/vpci/vpci.c >>>>> @@ -86,6 +86,29 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev) >>>>> >>>>> return rc; >>>>> } >>>>> + >>>>> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >>>>> +/* Notify vPCI that device is assigned to guest. */ >>>>> +int vpci_assign_device(struct domain *d, const struct pci_dev *dev) >>>>> +{ >>>>> + /* It only makes sense to assign for hwdom or guest domain. */ >>>> Could you clarify for me in how far this code path is indeed intended >>>> to be taken by hwdom? Because if it is, I'd like to further understand >>>> the interaction with setup_hwdom_pci_devices(). >>> setup_hwdom_pci_devices is not used on Arm as we do rely on >>> Dom0 to perform PCI host initialization and PCI device enumeration. >>> >>> This is because of the fact that on Arm it is not a trivial task to >>> initialize a PCI host bridge in Xen, e.g. you need to properly initialize >>> power domains, clocks, quirks etc. for different SoCs. >>> All these make the task too complex and it was decided that at the >>> moment we do not want to bring PCI device drivers in Xen for that. >>> It was also decided that we expect Dom0 to take care of initialization >>> and enumeration. >>> Some day, when firmware can do PCI initialization for us and then we >>> can easily access ECAM, this will change. Then setup_hwdom_pci_devices >>> will be used on Arm as well. >>> >>> Thus, we need to take care that Xen knows about the discovered >>> PCI devices via assign_device etc. >> Fair enough, but since I've not spotted a patch expressing this > Well, it is all in the RFC for PCI passthrough on Arm which is mentioned > in series from Arm and EPAM (part 2). I didn't mention the RFC in the > cover letter for this series though. >> (by >> adding suitable conditionals), may I ask that you do so in yet another >> patch (unless I've overlooked where this gets done)? > Could you please elaborate more on which conditionals you are > talking about here? I'm afraid I didn't understand this part. By putting it inside #if or adding "if ( !IS_ENABLED() )", you'd make very obvious that the code in question isn't used, and hence no interaction issues with vPCI exist. >>>>> + if ( is_system_domain(d) || !has_vpci(d) ) >>>>> + return 0; >>>>> + >>>>> + return 0; >>>>> +} >>>>> + >>>>> +/* Notify vPCI that device is de-assigned from guest. */ >>>>> +int vpci_deassign_device(struct domain *d, const struct pci_dev *dev) >>>>> +{ >>>>> + /* It only makes sense to de-assign from hwdom or guest domain. */ >>>>> + if ( is_system_domain(d) || !has_vpci(d) ) >>>>> + return 0; >>>>> + >>>>> + return 0; >>>>> +} >>>>> +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ >>>> At this point of the series #ifdef is the less preferable variant of >>>> arranging for dead code to get compiled out. >>> What is that other preferable way then? >> "if ( !IS_ENABLED() )" as I did already point out to you yesterday in >> reply to v2 of patch 10 of this very series. > Please see below >> >>>> I expect later patches >>>> will change that? >>> No, it is going to be this way all the time >> The question wasn't whether you switch away from the #ifdef-s, but >> whether later patches leave that as the only choice (avoiding build >> breakage). > Yes, the code is going to always remain ifdef'ed, so we don't have > dead code for x86 (at least). > So, does the above mean that you are ok with "#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT" > and there is no need for "if ( !IS_ENABLED() )"? I'm afraid you still didn't understand: "if ( !IS_ENABLED() )" is also a way to make sure there's (almost) no dead code. And this model has the advantage that the compiler would still check all that code even in x86 builds (throwing away most of it in one of its DCE passes), reducing the risk for someone not routinely doing Arm builds to introduce a build issue. But as soon a code references struct members which sit inside an #ifdef, that code can't use this preferred approach anymore. That's what I suspect might be happening in subsequent patches, which would then justify your choice of #ifdef. Jan
On 30.09.21 13:14, Jan Beulich wrote: > On 30.09.2021 11:21, Oleksandr Andrushchenko wrote: >> On 30.09.21 12:06, Jan Beulich wrote: >>> On 30.09.2021 10:45, Oleksandr Andrushchenko wrote: >>>> On 30.09.21 11:21, Jan Beulich wrote: >>>>> On 30.09.2021 09:52, Oleksandr Andrushchenko wrote: >>>>>> @@ -1429,6 +1433,11 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) >>>>>> rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag); >>>>>> } >>>>>> >>>>>> + if ( rc ) >>>>>> + goto done; >>>>> From all I can tell this is dead code. >>>> Before the change rc was set in the loop. And then we fall through >>>> to the "done" label. I do agree that the way this code is done the >>>> value of that rc will only reflect the last assignment done in the loop, >>>> but with my change I didn't want to change the existing behavior, >>>> thus "if ( rc" >>> rc is always 0 upon loop exit, afaict: >>> >>> for ( ; pdev->phantom_stride; rc = 0 ) >>> >>> Granted this is unusual and hence possibly unexpected. >> I will remove that check then. Do we want a comment about rc == 0, >> so it is seen why there is no check for rc? > So far we've been doing fine without such a comment, but I wouldn't > object to a well worded one getting added. > >>>>>> --- a/xen/drivers/vpci/vpci.c >>>>>> +++ b/xen/drivers/vpci/vpci.c >>>>>> @@ -86,6 +86,29 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev) >>>>>> >>>>>> return rc; >>>>>> } >>>>>> + >>>>>> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >>>>>> +/* Notify vPCI that device is assigned to guest. */ >>>>>> +int vpci_assign_device(struct domain *d, const struct pci_dev *dev) >>>>>> +{ >>>>>> + /* It only makes sense to assign for hwdom or guest domain. */ >>>>> Could you clarify for me in how far this code path is indeed intended >>>>> to be taken by hwdom? Because if it is, I'd like to further understand >>>>> the interaction with setup_hwdom_pci_devices(). >>>> setup_hwdom_pci_devices is not used on Arm as we do rely on >>>> Dom0 to perform PCI host initialization and PCI device enumeration. >>>> >>>> This is because of the fact that on Arm it is not a trivial task to >>>> initialize a PCI host bridge in Xen, e.g. you need to properly initialize >>>> power domains, clocks, quirks etc. for different SoCs. >>>> All these make the task too complex and it was decided that at the >>>> moment we do not want to bring PCI device drivers in Xen for that. >>>> It was also decided that we expect Dom0 to take care of initialization >>>> and enumeration. >>>> Some day, when firmware can do PCI initialization for us and then we >>>> can easily access ECAM, this will change. Then setup_hwdom_pci_devices >>>> will be used on Arm as well. >>>> >>>> Thus, we need to take care that Xen knows about the discovered >>>> PCI devices via assign_device etc. >>> Fair enough, but since I've not spotted a patch expressing this >> Well, it is all in the RFC for PCI passthrough on Arm which is mentioned >> in series from Arm and EPAM (part 2). I didn't mention the RFC in the >> cover letter for this series though. >>> (by >>> adding suitable conditionals), may I ask that you do so in yet another >>> patch (unless I've overlooked where this gets done)? >> Could you please elaborate more on which conditionals you are >> talking about here? I'm afraid I didn't understand this part. > By putting it inside #if or adding "if ( !IS_ENABLED() )", you'd make > very obvious that the code in question isn't used, and hence no > interaction issues with vPCI exist. > >>>>>> + if ( is_system_domain(d) || !has_vpci(d) ) >>>>>> + return 0; >>>>>> + >>>>>> + return 0; >>>>>> +} >>>>>> + >>>>>> +/* Notify vPCI that device is de-assigned from guest. */ >>>>>> +int vpci_deassign_device(struct domain *d, const struct pci_dev *dev) >>>>>> +{ >>>>>> + /* It only makes sense to de-assign from hwdom or guest domain. */ >>>>>> + if ( is_system_domain(d) || !has_vpci(d) ) >>>>>> + return 0; >>>>>> + >>>>>> + return 0; >>>>>> +} >>>>>> +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ >>>>> At this point of the series #ifdef is the less preferable variant of >>>>> arranging for dead code to get compiled out. >>>> What is that other preferable way then? >>> "if ( !IS_ENABLED() )" as I did already point out to you yesterday in >>> reply to v2 of patch 10 of this very series. >> Please see below >>>>> I expect later patches >>>>> will change that? >>>> No, it is going to be this way all the time >>> The question wasn't whether you switch away from the #ifdef-s, but >>> whether later patches leave that as the only choice (avoiding build >>> breakage). >> Yes, the code is going to always remain ifdef'ed, so we don't have >> dead code for x86 (at least). >> So, does the above mean that you are ok with "#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT" >> and there is no need for "if ( !IS_ENABLED() )"? > I'm afraid you still didn't understand: "if ( !IS_ENABLED() )" is > also a way to make sure there's (almost) no dead code. And this model > has the advantage that the compiler would still check all that code > even in x86 builds (throwing away most of it in one of its DCE passes), > reducing the risk for someone not routinely doing Arm builds to > introduce a build issue. > > But as soon a code references struct members which sit inside an > #ifdef, that code can't use this preferred approach anymore. That's > what I suspect might be happening in subsequent patches, which would > then justify your choice of #ifdef. This is the key to my not understanding: indeed, there are structure members which are ifdef'ed thus rendering the idea with IS_ENABLED not applicable: @@ -444,6 +444,14 @@ struct domain +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT + struct list_head vdev_list; + /* + * Current device number used by the virtual PCI bus topology + * to assign a unique SBDF to a passed through virtual PCI device. + */ + int vpci_dev_next; +#endif > > Jan > > Thank you, Oleksandr
On Thu, Sep 30, 2021 at 10:52:14AM +0300, Oleksandr Andrushchenko wrote: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > When a PCI device gets assigned/de-assigned some work on vPCI side needs > to be done for that device. Introduce a pair of hooks so vPCI can handle > that. > > Please note, that in the current design the error path is handled by > the toolstack via XEN_DOMCTL_assign_device/XEN_DOMCTL_deassign_device, > so this is why it is acceptable not to de-assign devices if vPCI's > assign fails, e.g. the roll back will be handled on deassign_device when > it is called by the toolstack. It's kind of hard to see what would need to be rolled back, as the functions are just dummies right now that don't perform any actions. I don't think the toolstack should be the one to deal with the fallout, as it could leave Xen in a broken state. The current commit message doesn't provide any information about why it has been designed this way. > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > --- > Since v2: > - define CONFIG_HAS_VPCI_GUEST_SUPPORT so dead code is not compiled > for x86 > Since v1: > - constify struct pci_dev where possible > - do not open code is_system_domain() > - extended the commit message > --- > xen/drivers/Kconfig | 4 ++++ > xen/drivers/passthrough/pci.c | 9 +++++++++ > xen/drivers/vpci/vpci.c | 23 +++++++++++++++++++++++ > xen/include/xen/vpci.h | 20 ++++++++++++++++++++ > 4 files changed, 56 insertions(+) > > diff --git a/xen/drivers/Kconfig b/xen/drivers/Kconfig > index db94393f47a6..780490cf8e39 100644 > --- a/xen/drivers/Kconfig > +++ b/xen/drivers/Kconfig > @@ -15,4 +15,8 @@ source "drivers/video/Kconfig" > config HAS_VPCI > bool > > +config HAS_VPCI_GUEST_SUPPORT > + bool > + depends on HAS_VPCI I would assume this is to go away once the work is finished? I don't think it makes sense to split vPCI code between domU/dom0 on a build time basis. > + > endmenu > diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c > index 9f804a50e780..805ab86ed555 100644 > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -870,6 +870,10 @@ static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus, > if ( ret ) > goto out; > > + ret = vpci_deassign_device(d, pdev); > + if ( ret ) > + goto out; > + > if ( pdev->domain == hardware_domain ) > pdev->quarantine = false; > > @@ -1429,6 +1433,11 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) > rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag); > } > > + if ( rc ) > + goto done; > + > + rc = vpci_assign_device(d, pdev); > + > done: > if ( rc ) > printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n", > diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c > index 1666402d55b8..0fe86cb30d23 100644 > --- a/xen/drivers/vpci/vpci.c > +++ b/xen/drivers/vpci/vpci.c > @@ -86,6 +86,29 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev) > > return rc; > } > + > +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT > +/* Notify vPCI that device is assigned to guest. */ > +int vpci_assign_device(struct domain *d, const struct pci_dev *dev) > +{ > + /* It only makes sense to assign for hwdom or guest domain. */ > + if ( is_system_domain(d) || !has_vpci(d) ) > + return 0; > + > + return 0; > +} > + > +/* Notify vPCI that device is de-assigned from guest. */ > +int vpci_deassign_device(struct domain *d, const struct pci_dev *dev) > +{ > + /* It only makes sense to de-assign from hwdom or guest domain. */ > + if ( is_system_domain(d) || !has_vpci(d) ) > + return 0; > + > + return 0; > +} > +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ > + > #endif /* __XEN__ */ > > static int vpci_register_cmp(const struct vpci_register *r1, > diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h > index 2e910d0b1f90..ecc08f2c0f65 100644 > --- a/xen/include/xen/vpci.h > +++ b/xen/include/xen/vpci.h > @@ -242,6 +242,26 @@ static inline bool vpci_process_pending(struct vcpu *v) > } > #endif > > +#if defined(CONFIG_HAS_VPCI) && defined(CONFIG_HAS_VPCI_GUEST_SUPPORT) You don't need to check for CONFIG_HAS_VPCI, as CONFIG_HAS_VPCI_GUEST_SUPPORT already depends on CONFIG_HAS_VPCI being set. > +/* Notify vPCI that device is assigned/de-assigned to/from guest. */ > +int __must_check vpci_assign_device(struct domain *d, > + const struct pci_dev *dev); > +int __must_check vpci_deassign_device(struct domain *d, > + const struct pci_dev *dev); > +#else > +static inline int vpci_assign_device(struct domain *d, > + const struct pci_dev *dev) > +{ > + return 0; > +}; > + > +static inline int vpci_deassign_device(struct domain *d, > + const struct pci_dev *dev) > +{ > + return 0; > +}; You need the __must_check attributes here also to match the prototypes above. Thanks, Roger.
On 13.10.2021 13:29, Roger Pau Monné wrote: > On Thu, Sep 30, 2021 at 10:52:14AM +0300, Oleksandr Andrushchenko wrote: >> --- a/xen/drivers/Kconfig >> +++ b/xen/drivers/Kconfig >> @@ -15,4 +15,8 @@ source "drivers/video/Kconfig" >> config HAS_VPCI >> bool >> >> +config HAS_VPCI_GUEST_SUPPORT >> + bool >> + depends on HAS_VPCI > > I would assume this is to go away once the work is finished? I don't > think it makes sense to split vPCI code between domU/dom0 on a build > time basis. If by that you mean x86 side work, then maybe. I did ask for this so that x86 wouldn't carry quite a bit of presently dead code. Jan
Hi, Roger! On 13.10.21 14:29, Roger Pau Monné wrote: > On Thu, Sep 30, 2021 at 10:52:14AM +0300, Oleksandr Andrushchenko wrote: >> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> >> When a PCI device gets assigned/de-assigned some work on vPCI side needs >> to be done for that device. Introduce a pair of hooks so vPCI can handle >> that. >> >> Please note, that in the current design the error path is handled by >> the toolstack via XEN_DOMCTL_assign_device/XEN_DOMCTL_deassign_device, >> so this is why it is acceptable not to de-assign devices if vPCI's >> assign fails, e.g. the roll back will be handled on deassign_device when >> it is called by the toolstack. > It's kind of hard to see what would need to be rolled back, as the > functions are just dummies right now that don't perform any actions. > > I don't think the toolstack should be the one to deal with the > fallout, as it could leave Xen in a broken state. The current commit > message doesn't provide any information about why it has been designed > this way. Yes, we discussed in other patches that we need not rely on the toolstack and perform cleanup ourselves, so this the code from the future to illustrate the roll-back: int vpci_assign_device(struct domain *d, const struct pci_dev *pdev) { int rc; /* It only makes sense to assign for hwdom or guest domain. */ if ( is_system_domain(d) || !has_vpci(d) ) return 0; rc = vpci_bar_add_handlers(d, pdev); if ( rc ) goto fail; rc = vpci_add_virtual_device(d, pdev); if ( rc ) { gdprintk(XENLOG_ERR, "%pp: failed to add virtual device for %pd: %d\n", &pdev->sbdf, d, rc); goto fail; } return 0; fail: /* * We are trying to clean up as much as we can, so ignore the return * value of vpci_deassign_device below, so we can return the * error which caused the failure. */ vpci_deassign_device(d, pdev); return rc; } So, I will drop the part about the toolstack and cleanup from the commit message > >> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> --- >> Since v2: >> - define CONFIG_HAS_VPCI_GUEST_SUPPORT so dead code is not compiled >> for x86 >> Since v1: >> - constify struct pci_dev where possible >> - do not open code is_system_domain() >> - extended the commit message >> --- >> xen/drivers/Kconfig | 4 ++++ >> xen/drivers/passthrough/pci.c | 9 +++++++++ >> xen/drivers/vpci/vpci.c | 23 +++++++++++++++++++++++ >> xen/include/xen/vpci.h | 20 ++++++++++++++++++++ >> 4 files changed, 56 insertions(+) >> >> diff --git a/xen/drivers/Kconfig b/xen/drivers/Kconfig >> index db94393f47a6..780490cf8e39 100644 >> --- a/xen/drivers/Kconfig >> +++ b/xen/drivers/Kconfig >> @@ -15,4 +15,8 @@ source "drivers/video/Kconfig" >> config HAS_VPCI >> bool >> >> +config HAS_VPCI_GUEST_SUPPORT >> + bool >> + depends on HAS_VPCI > I would assume this is to go away once the work is finished? I don't > think it makes sense to split vPCI code between domU/dom0 on a build > time basis. > >> + >> endmenu >> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c >> index 9f804a50e780..805ab86ed555 100644 >> --- a/xen/drivers/passthrough/pci.c >> +++ b/xen/drivers/passthrough/pci.c >> @@ -870,6 +870,10 @@ static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus, >> if ( ret ) >> goto out; >> >> + ret = vpci_deassign_device(d, pdev); >> + if ( ret ) >> + goto out; >> + >> if ( pdev->domain == hardware_domain ) >> pdev->quarantine = false; >> >> @@ -1429,6 +1433,11 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) >> rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag); >> } >> >> + if ( rc ) >> + goto done; >> + >> + rc = vpci_assign_device(d, pdev); >> + >> done: >> if ( rc ) >> printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n", >> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c >> index 1666402d55b8..0fe86cb30d23 100644 >> --- a/xen/drivers/vpci/vpci.c >> +++ b/xen/drivers/vpci/vpci.c >> @@ -86,6 +86,29 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev) >> >> return rc; >> } >> + >> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >> +/* Notify vPCI that device is assigned to guest. */ >> +int vpci_assign_device(struct domain *d, const struct pci_dev *dev) >> +{ >> + /* It only makes sense to assign for hwdom or guest domain. */ >> + if ( is_system_domain(d) || !has_vpci(d) ) >> + return 0; >> + >> + return 0; >> +} >> + >> +/* Notify vPCI that device is de-assigned from guest. */ >> +int vpci_deassign_device(struct domain *d, const struct pci_dev *dev) >> +{ >> + /* It only makes sense to de-assign from hwdom or guest domain. */ >> + if ( is_system_domain(d) || !has_vpci(d) ) >> + return 0; >> + >> + return 0; >> +} >> +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ >> + >> #endif /* __XEN__ */ >> >> static int vpci_register_cmp(const struct vpci_register *r1, >> diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h >> index 2e910d0b1f90..ecc08f2c0f65 100644 >> --- a/xen/include/xen/vpci.h >> +++ b/xen/include/xen/vpci.h >> @@ -242,6 +242,26 @@ static inline bool vpci_process_pending(struct vcpu *v) >> } >> #endif >> >> +#if defined(CONFIG_HAS_VPCI) && defined(CONFIG_HAS_VPCI_GUEST_SUPPORT) > You don't need to check for CONFIG_HAS_VPCI, as > CONFIG_HAS_VPCI_GUEST_SUPPORT already depends on CONFIG_HAS_VPCI being > set. > Will fix >> +/* Notify vPCI that device is assigned/de-assigned to/from guest. */ >> +int __must_check vpci_assign_device(struct domain *d, >> + const struct pci_dev *dev); >> +int __must_check vpci_deassign_device(struct domain *d, >> + const struct pci_dev *dev); >> +#else >> +static inline int vpci_assign_device(struct domain *d, >> + const struct pci_dev *dev) >> +{ >> + return 0; >> +}; >> + >> +static inline int vpci_deassign_device(struct domain *d, >> + const struct pci_dev *dev) >> +{ >> + return 0; >> +}; > You need the __must_check attributes here also to match the prototypes > above. Yes, it was already discussed and I will remove __must_check. > Thanks, Roger. Thank you, Oleksandr
© 2016 - 2024 Red Hat, Inc.