1: correct is_pv_domain() when !CONFIG_PV 2: use is_pv_64bit_domain() to avoid double evaluate_nospec() Jan
On x86, idle and other system domains are implicitly PV. While I couldn't spot any cases where this is actively a problem, some cases required quite close inspection to be certain there couldn't e.g. be some ASSERT_UNREACHABLE() that would trigger in this case. Let's be on the safe side and make sure these always have is_pv_domain() returning true. For the build to still work, this requires a few adjustments elsewhere. In particular is_pv_64bit_domain() now gains a CONFIG_PV dependency, which means that is_pv_32bit_domain() || is_pv_64bit_domain() is no longer guaranteed to be the same as is_pv_domain(). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/dom0_build.c +++ b/xen/arch/x86/dom0_build.c @@ -568,7 +568,7 @@ int __init construct_dom0(struct domain if ( is_hvm_domain(d) ) rc = dom0_construct_pvh(d, image, image_headroom, initrd, cmdline); - else if ( is_pv_domain(d) ) + else if ( is_pv_64bit_domain(d) || is_pv_32bit_domain(d) ) rc = dom0_construct_pv(d, image, image_headroom, initrd, cmdline); else panic("Cannot construct Dom0. No guest interface available\n"); --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1544,6 +1544,7 @@ arch_do_vcpu_op( */ static void load_segments(struct vcpu *n) { +#ifdef CONFIG_PV struct cpu_user_regs *uregs = &n->arch.user_regs; unsigned long gsb = 0, gss = 0; bool compat = is_pv_32bit_vcpu(n); @@ -1709,6 +1710,7 @@ static void load_segments(struct vcpu *n regs->cs = FLAT_KERNEL_CS; regs->rip = pv->failsafe_callback_eip; } +#endif } /* @@ -1723,6 +1725,7 @@ static void load_segments(struct vcpu *n */ static void save_segments(struct vcpu *v) { +#ifdef CONFIG_PV struct cpu_user_regs *regs = &v->arch.user_regs; read_sregs(regs); @@ -1748,6 +1751,7 @@ static void save_segments(struct vcpu *v else v->arch.pv.gs_base_user = gs_base; } +#endif } void paravirt_ctxt_switch_from(struct vcpu *v) --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -408,13 +408,13 @@ long arch_do_domctl( case XEN_DOMCTL_set_address_size: if ( is_hvm_domain(d) ) ret = -EOPNOTSUPP; + else if ( is_pv_64bit_domain(d) && domctl->u.address_size.size == 32 ) + ret = switch_compat(d); else if ( is_pv_domain(d) ) { if ( ((domctl->u.address_size.size == 64) && !d->arch.pv.is_32bit) || ((domctl->u.address_size.size == 32) && d->arch.pv.is_32bit) ) ret = 0; - else if ( domctl->u.address_size.size == 32 ) - ret = switch_compat(d); else ret = -EINVAL; } --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -985,7 +985,7 @@ static always_inline bool is_control_dom static always_inline bool is_pv_domain(const struct domain *d) { - return IS_ENABLED(CONFIG_PV) && + return IS_ENABLED(CONFIG_X86) && evaluate_nospec(!(d->options & XEN_DOMCTL_CDF_hvm)); } @@ -1011,7 +1011,7 @@ static always_inline bool is_pv_32bit_vc static always_inline bool is_pv_64bit_domain(const struct domain *d) { - if ( !is_pv_domain(d) ) + if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) return false; #ifdef CONFIG_PV32
On Fri, Nov 27, 2020 at 05:54:57PM +0100, Jan Beulich wrote: > On x86, idle and other system domains are implicitly PV. While I > couldn't spot any cases where this is actively a problem, some cases > required quite close inspection to be certain there couldn't e.g. be > some ASSERT_UNREACHABLE() that would trigger in this case. Let's be on > the safe side and make sure these always have is_pv_domain() returning > true. > > For the build to still work, this requires a few adjustments elsewhere. > In particular is_pv_64bit_domain() now gains a CONFIG_PV dependency, > which means that is_pv_32bit_domain() || is_pv_64bit_domain() is no > longer guaranteed to be the same as is_pv_domain(). > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/dom0_build.c > +++ b/xen/arch/x86/dom0_build.c > @@ -568,7 +568,7 @@ int __init construct_dom0(struct domain > > if ( is_hvm_domain(d) ) > rc = dom0_construct_pvh(d, image, image_headroom, initrd, cmdline); > - else if ( is_pv_domain(d) ) > + else if ( is_pv_64bit_domain(d) || is_pv_32bit_domain(d) ) Urg, that's very confusing IMO, as I'm sure I would ask someone to just use is_pv_domain without realizing. It needs at least a comment, but even then I'm not sure I like it. So that I understand it, the point to use those expressions instead of is_pv_domain is to avoid calling dom0_construct_pv when CONFIG_PV is not enabled? Maybe it wold be better to instead use: if ( IS_ENABLED(CONFIG_PV) && is_pv_domain(d) ) In any case I wonder if we should maybe aim to introduce a new type for system domains, that's neither PV or HVM, in order to avoid having system domains qualified as PV even when PV is compiled out. > rc = dom0_construct_pv(d, image, image_headroom, initrd, cmdline); > else > panic("Cannot construct Dom0. No guest interface available\n"); > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -1544,6 +1544,7 @@ arch_do_vcpu_op( > */ > static void load_segments(struct vcpu *n) > { > +#ifdef CONFIG_PV > struct cpu_user_regs *uregs = &n->arch.user_regs; > unsigned long gsb = 0, gss = 0; > bool compat = is_pv_32bit_vcpu(n); > @@ -1709,6 +1710,7 @@ static void load_segments(struct vcpu *n > regs->cs = FLAT_KERNEL_CS; > regs->rip = pv->failsafe_callback_eip; > } > +#endif > } > > /* > @@ -1723,6 +1725,7 @@ static void load_segments(struct vcpu *n > */ > static void save_segments(struct vcpu *v) > { > +#ifdef CONFIG_PV > struct cpu_user_regs *regs = &v->arch.user_regs; > > read_sregs(regs); > @@ -1748,6 +1751,7 @@ static void save_segments(struct vcpu *v > else > v->arch.pv.gs_base_user = gs_base; > } > +#endif > } Could you move {load,save}_segments to pv/domain.c and rename to pv_{load,save}_segments and provide a dummy handler for !CONFIG_PV in pv/domain.h? Sorry it's slightly more work, but I think it's cleaner overall. > > void paravirt_ctxt_switch_from(struct vcpu *v) > --- a/xen/arch/x86/domctl.c > +++ b/xen/arch/x86/domctl.c > @@ -408,13 +408,13 @@ long arch_do_domctl( > case XEN_DOMCTL_set_address_size: > if ( is_hvm_domain(d) ) > ret = -EOPNOTSUPP; > + else if ( is_pv_64bit_domain(d) && domctl->u.address_size.size == 32 ) > + ret = switch_compat(d); > else if ( is_pv_domain(d) ) > { > if ( ((domctl->u.address_size.size == 64) && !d->arch.pv.is_32bit) || > ((domctl->u.address_size.size == 32) && d->arch.pv.is_32bit) ) > ret = 0; > - else if ( domctl->u.address_size.size == 32 ) > - ret = switch_compat(d); > else > ret = -EINVAL; > } > --- a/xen/include/xen/sched.h > +++ b/xen/include/xen/sched.h > @@ -985,7 +985,7 @@ static always_inline bool is_control_dom > > static always_inline bool is_pv_domain(const struct domain *d) > { > - return IS_ENABLED(CONFIG_PV) && > + return IS_ENABLED(CONFIG_X86) && > evaluate_nospec(!(d->options & XEN_DOMCTL_CDF_hvm)); > } > > @@ -1011,7 +1011,7 @@ static always_inline bool is_pv_32bit_vc > > static always_inline bool is_pv_64bit_domain(const struct domain *d) > { > - if ( !is_pv_domain(d) ) > + if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) > return false; I think overall is confusing to have a domain that returns true for is_pv_domain but false for both is_pv_{64,32}bit_domain checks. I know those are only the system domains, but it feels confusing and could cause mistakes in the future IMO, as then we would have to carefully think where to use ( is_pv_64bit_domain(d) || is_pv_32bit_domain(d) ) vs just using is_pv_domain(d), or IS_ENABLED(CONFIG_PV) && is_pv_domain(d) Thanks, Roger.
On 12.04.2021 11:34, Roger Pau Monné wrote: > On Fri, Nov 27, 2020 at 05:54:57PM +0100, Jan Beulich wrote: >> --- a/xen/arch/x86/dom0_build.c >> +++ b/xen/arch/x86/dom0_build.c >> @@ -568,7 +568,7 @@ int __init construct_dom0(struct domain >> >> if ( is_hvm_domain(d) ) >> rc = dom0_construct_pvh(d, image, image_headroom, initrd, cmdline); >> - else if ( is_pv_domain(d) ) >> + else if ( is_pv_64bit_domain(d) || is_pv_32bit_domain(d) ) > > Urg, that's very confusing IMO, as I'm sure I would ask someone to > just use is_pv_domain without realizing. It needs at least a comment, > but even then I'm not sure I like it. I can add a comment, sure, but I think this is as confusing (or not) as ... > So that I understand it, the point to use those expressions instead of > is_pv_domain is to avoid calling dom0_construct_pv when CONFIG_PV is > not enabled? > > Maybe it wold be better to instead use: > > if ( IS_ENABLED(CONFIG_PV) && is_pv_domain(d) ) ... this. > In any case I wonder if we should maybe aim to introduce a new type > for system domains, that's neither PV or HVM, in order to avoid having > system domains qualified as PV even when PV is compiled out. This was my first thought, too, but would come with a much higher price tag: We'd need to audit all uses for whether they're meant to include the special domains. And this includes auditing of cases where !is_hvm_*() may be inferred to mean is_pv_*(). >> --- a/xen/arch/x86/domain.c >> +++ b/xen/arch/x86/domain.c >> @@ -1544,6 +1544,7 @@ arch_do_vcpu_op( >> */ >> static void load_segments(struct vcpu *n) >> { >> +#ifdef CONFIG_PV >> struct cpu_user_regs *uregs = &n->arch.user_regs; >> unsigned long gsb = 0, gss = 0; >> bool compat = is_pv_32bit_vcpu(n); >> @@ -1709,6 +1710,7 @@ static void load_segments(struct vcpu *n >> regs->cs = FLAT_KERNEL_CS; >> regs->rip = pv->failsafe_callback_eip; >> } >> +#endif >> } >> >> /* >> @@ -1723,6 +1725,7 @@ static void load_segments(struct vcpu *n >> */ >> static void save_segments(struct vcpu *v) >> { >> +#ifdef CONFIG_PV >> struct cpu_user_regs *regs = &v->arch.user_regs; >> >> read_sregs(regs); >> @@ -1748,6 +1751,7 @@ static void save_segments(struct vcpu *v >> else >> v->arch.pv.gs_base_user = gs_base; >> } >> +#endif >> } > > Could you move {load,save}_segments to pv/domain.c and rename to > pv_{load,save}_segments and provide a dummy handler for !CONFIG_PV in > pv/domain.h? > > Sorry it's slightly more work, but I think it's cleaner overall. Doing so was my first thought too, but we'd lose the present inlining of the functions. For save_segments() this could be dealt with by moving paravirt_ctxt_switch_from() as well, but load_segments() would remain. As an aside, I've long been wondering why we use paravirt_ctxt_switch_{from,to}() also for the idle domain. This presently prevents their movement to pv/domain.c. From my not overly detailed looking at it, I don't think anything the functions do actually applies to idle vcpus. >> --- a/xen/include/xen/sched.h >> +++ b/xen/include/xen/sched.h >> @@ -985,7 +985,7 @@ static always_inline bool is_control_dom >> >> static always_inline bool is_pv_domain(const struct domain *d) >> { >> - return IS_ENABLED(CONFIG_PV) && >> + return IS_ENABLED(CONFIG_X86) && >> evaluate_nospec(!(d->options & XEN_DOMCTL_CDF_hvm)); >> } >> >> @@ -1011,7 +1011,7 @@ static always_inline bool is_pv_32bit_vc >> >> static always_inline bool is_pv_64bit_domain(const struct domain *d) >> { >> - if ( !is_pv_domain(d) ) >> + if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) >> return false; > > I think overall is confusing to have a domain that returns true for > is_pv_domain but false for both is_pv_{64,32}bit_domain checks. > > I know those are only the system domains, but it feels confusing and > could cause mistakes in the future IMO, as then we would have to > carefully think where to use ( is_pv_64bit_domain(d) > || is_pv_32bit_domain(d) ) vs just using is_pv_domain(d), or > IS_ENABLED(CONFIG_PV) && is_pv_domain(d) Imo it's not "then we would have to carefully think where to use ..." but instead this patch is an indication that we should have been for quite some time. For this reason (coming back to your first comment at the top) I'm not sure adding a comment _there_ is actually useful. Every use of is_pv_*() needs carefully considering which domains are really meant. Jan
On Mon, Apr 12, 2021 at 12:07:12PM +0200, Jan Beulich wrote: > On 12.04.2021 11:34, Roger Pau Monné wrote: > > On Fri, Nov 27, 2020 at 05:54:57PM +0100, Jan Beulich wrote: > >> --- a/xen/arch/x86/dom0_build.c > >> +++ b/xen/arch/x86/dom0_build.c > >> @@ -568,7 +568,7 @@ int __init construct_dom0(struct domain > >> > >> if ( is_hvm_domain(d) ) > >> rc = dom0_construct_pvh(d, image, image_headroom, initrd, cmdline); > >> - else if ( is_pv_domain(d) ) > >> + else if ( is_pv_64bit_domain(d) || is_pv_32bit_domain(d) ) > > > > Urg, that's very confusing IMO, as I'm sure I would ask someone to > > just use is_pv_domain without realizing. It needs at least a comment, > > but even then I'm not sure I like it. > > I can add a comment, sure, but I think this is as confusing (or not) > as ... > > > So that I understand it, the point to use those expressions instead of > > is_pv_domain is to avoid calling dom0_construct_pv when CONFIG_PV is > > not enabled? > > > > Maybe it wold be better to instead use: > > > > if ( IS_ENABLED(CONFIG_PV) && is_pv_domain(d) ) > > ... this. > > > In any case I wonder if we should maybe aim to introduce a new type > > for system domains, that's neither PV or HVM, in order to avoid having > > system domains qualified as PV even when PV is compiled out. > > This was my first thought, too, but would come with a much higher > price tag: We'd need to audit all uses for whether they're meant > to include the special domains. And this includes auditing of cases > where !is_hvm_*() may be inferred to mean is_pv_*(). What about we provide a dummy dom0_construct_pv that returns -EOPNOTSUPP when !CONFIG_PV and take rc into account for the panic call in construct_dom0 ie: if ( is_hvm_domain(d) ) rc = dom0_construct_pvh(d, image, image_headroom, initrd, cmdline); else rc = dom0_construct_pv(d, image, image_headroom, initrd, cmdline); if ( rc == -EOPNOTSUPP ) panic("Cannot construct Dom0. No guest interface available\n"); if ( rc ) return rc; I think that's likely less confusing that the alternatives. > >> --- a/xen/arch/x86/domain.c > >> +++ b/xen/arch/x86/domain.c > >> @@ -1544,6 +1544,7 @@ arch_do_vcpu_op( > >> */ > >> static void load_segments(struct vcpu *n) > >> { > >> +#ifdef CONFIG_PV > >> struct cpu_user_regs *uregs = &n->arch.user_regs; > >> unsigned long gsb = 0, gss = 0; > >> bool compat = is_pv_32bit_vcpu(n); > >> @@ -1709,6 +1710,7 @@ static void load_segments(struct vcpu *n > >> regs->cs = FLAT_KERNEL_CS; > >> regs->rip = pv->failsafe_callback_eip; > >> } > >> +#endif > >> } > >> > >> /* > >> @@ -1723,6 +1725,7 @@ static void load_segments(struct vcpu *n > >> */ > >> static void save_segments(struct vcpu *v) > >> { > >> +#ifdef CONFIG_PV > >> struct cpu_user_regs *regs = &v->arch.user_regs; > >> > >> read_sregs(regs); > >> @@ -1748,6 +1751,7 @@ static void save_segments(struct vcpu *v > >> else > >> v->arch.pv.gs_base_user = gs_base; > >> } > >> +#endif > >> } > > > > Could you move {load,save}_segments to pv/domain.c and rename to > > pv_{load,save}_segments and provide a dummy handler for !CONFIG_PV in > > pv/domain.h? > > > > Sorry it's slightly more work, but I think it's cleaner overall. > > Doing so was my first thought too, but we'd lose the present inlining > of the functions. For save_segments() this could be dealt with by > moving paravirt_ctxt_switch_from() as well, but load_segments() would > remain. I see, maybe worth marking as inline then or adding a note about why they are not moved to pv/domain.c? As an aside, why do we need to call load_segments with interrupts enabled? Could we move it to paravirt_ctxt_switch_to? > As an aside, I've long been wondering why we use > paravirt_ctxt_switch_{from,to}() also for the idle domain. This > presently prevents their movement to pv/domain.c. From my not overly > detailed looking at it, I don't think anything the functions do > actually applies to idle vcpus. > > >> --- a/xen/include/xen/sched.h > >> +++ b/xen/include/xen/sched.h > >> @@ -985,7 +985,7 @@ static always_inline bool is_control_dom > >> > >> static always_inline bool is_pv_domain(const struct domain *d) > >> { > >> - return IS_ENABLED(CONFIG_PV) && > >> + return IS_ENABLED(CONFIG_X86) && > >> evaluate_nospec(!(d->options & XEN_DOMCTL_CDF_hvm)); > >> } > >> > >> @@ -1011,7 +1011,7 @@ static always_inline bool is_pv_32bit_vc > >> > >> static always_inline bool is_pv_64bit_domain(const struct domain *d) > >> { > >> - if ( !is_pv_domain(d) ) > >> + if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) > >> return false; > > > > I think overall is confusing to have a domain that returns true for > > is_pv_domain but false for both is_pv_{64,32}bit_domain checks. > > > > I know those are only the system domains, but it feels confusing and > > could cause mistakes in the future IMO, as then we would have to > > carefully think where to use ( is_pv_64bit_domain(d) > > || is_pv_32bit_domain(d) ) vs just using is_pv_domain(d), or > > IS_ENABLED(CONFIG_PV) && is_pv_domain(d) > > Imo it's not "then we would have to carefully think where to use ..." > but instead this patch is an indication that we should have been for > quite some time. For this reason (coming back to your first comment > at the top) I'm not sure adding a comment _there_ is actually useful. > Every use of is_pv_*() needs carefully considering which domains are > really meant. Maybe we shouldn't have used is_pv_domain as a way to hide code from the compiler and instead always provide dummy functions, as even with PV support compiled out we still need some of it for system domains. I'm not sure I have a good proposal to make, but it seems wrong to me that is_pv_domain(d) could be different than is_pv_64bit_domain(d) || is_pv_32bit_domain(d). Thanks, Roger.
On 12.04.2021 16:49, Roger Pau Monné wrote: > On Mon, Apr 12, 2021 at 12:07:12PM +0200, Jan Beulich wrote: >> On 12.04.2021 11:34, Roger Pau Monné wrote: >>> On Fri, Nov 27, 2020 at 05:54:57PM +0100, Jan Beulich wrote: >>>> --- a/xen/arch/x86/dom0_build.c >>>> +++ b/xen/arch/x86/dom0_build.c >>>> @@ -568,7 +568,7 @@ int __init construct_dom0(struct domain >>>> >>>> if ( is_hvm_domain(d) ) >>>> rc = dom0_construct_pvh(d, image, image_headroom, initrd, cmdline); >>>> - else if ( is_pv_domain(d) ) >>>> + else if ( is_pv_64bit_domain(d) || is_pv_32bit_domain(d) ) >>> >>> Urg, that's very confusing IMO, as I'm sure I would ask someone to >>> just use is_pv_domain without realizing. It needs at least a comment, >>> but even then I'm not sure I like it. >> >> I can add a comment, sure, but I think this is as confusing (or not) >> as ... >> >>> So that I understand it, the point to use those expressions instead of >>> is_pv_domain is to avoid calling dom0_construct_pv when CONFIG_PV is >>> not enabled? >>> >>> Maybe it wold be better to instead use: >>> >>> if ( IS_ENABLED(CONFIG_PV) && is_pv_domain(d) ) >> >> ... this. >> >>> In any case I wonder if we should maybe aim to introduce a new type >>> for system domains, that's neither PV or HVM, in order to avoid having >>> system domains qualified as PV even when PV is compiled out. >> >> This was my first thought, too, but would come with a much higher >> price tag: We'd need to audit all uses for whether they're meant >> to include the special domains. And this includes auditing of cases >> where !is_hvm_*() may be inferred to mean is_pv_*(). > > What about we provide a dummy dom0_construct_pv that returns > -EOPNOTSUPP when !CONFIG_PV and take rc into account for the panic > call in construct_dom0 ie: > > if ( is_hvm_domain(d) ) > rc = dom0_construct_pvh(d, image, image_headroom, initrd, cmdline); > else > rc = dom0_construct_pv(d, image, image_headroom, initrd, cmdline); > > if ( rc == -EOPNOTSUPP ) > panic("Cannot construct Dom0. No guest interface available\n"); > if ( rc ) > return rc; > > I think that's likely less confusing that the alternatives. This could certainly be made work, but see below (i.e. it would help the situation right here, but not the general issue - the case in arch_do_domctl() may look less confusing, but really suffers the same problem). >>>> --- a/xen/arch/x86/domain.c >>>> +++ b/xen/arch/x86/domain.c >>>> @@ -1544,6 +1544,7 @@ arch_do_vcpu_op( >>>> */ >>>> static void load_segments(struct vcpu *n) >>>> { >>>> +#ifdef CONFIG_PV >>>> struct cpu_user_regs *uregs = &n->arch.user_regs; >>>> unsigned long gsb = 0, gss = 0; >>>> bool compat = is_pv_32bit_vcpu(n); >>>> @@ -1709,6 +1710,7 @@ static void load_segments(struct vcpu *n >>>> regs->cs = FLAT_KERNEL_CS; >>>> regs->rip = pv->failsafe_callback_eip; >>>> } >>>> +#endif >>>> } >>>> >>>> /* >>>> @@ -1723,6 +1725,7 @@ static void load_segments(struct vcpu *n >>>> */ >>>> static void save_segments(struct vcpu *v) >>>> { >>>> +#ifdef CONFIG_PV >>>> struct cpu_user_regs *regs = &v->arch.user_regs; >>>> >>>> read_sregs(regs); >>>> @@ -1748,6 +1751,7 @@ static void save_segments(struct vcpu *v >>>> else >>>> v->arch.pv.gs_base_user = gs_base; >>>> } >>>> +#endif >>>> } >>> >>> Could you move {load,save}_segments to pv/domain.c and rename to >>> pv_{load,save}_segments and provide a dummy handler for !CONFIG_PV in >>> pv/domain.h? >>> >>> Sorry it's slightly more work, but I think it's cleaner overall. >> >> Doing so was my first thought too, but we'd lose the present inlining >> of the functions. For save_segments() this could be dealt with by >> moving paravirt_ctxt_switch_from() as well, but load_segments() would >> remain. > > I see, maybe worth marking as inline then or adding a note about why > they are not moved to pv/domain.c? We try to avoid marking functions inline outside of headers. Adding a note is an option, but I'm not sure something to be done here. > As an aside, why do we need to call load_segments with interrupts > enabled? Could we move it to paravirt_ctxt_switch_to? load_segments() can raise faults, and faults with interrupts disabled are, with (intentionally) very few exceptions, fatal. >>>> --- a/xen/include/xen/sched.h >>>> +++ b/xen/include/xen/sched.h >>>> @@ -985,7 +985,7 @@ static always_inline bool is_control_dom >>>> >>>> static always_inline bool is_pv_domain(const struct domain *d) >>>> { >>>> - return IS_ENABLED(CONFIG_PV) && >>>> + return IS_ENABLED(CONFIG_X86) && >>>> evaluate_nospec(!(d->options & XEN_DOMCTL_CDF_hvm)); >>>> } >>>> >>>> @@ -1011,7 +1011,7 @@ static always_inline bool is_pv_32bit_vc >>>> >>>> static always_inline bool is_pv_64bit_domain(const struct domain *d) >>>> { >>>> - if ( !is_pv_domain(d) ) >>>> + if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) >>>> return false; >>> >>> I think overall is confusing to have a domain that returns true for >>> is_pv_domain but false for both is_pv_{64,32}bit_domain checks. >>> >>> I know those are only the system domains, but it feels confusing and >>> could cause mistakes in the future IMO, as then we would have to >>> carefully think where to use ( is_pv_64bit_domain(d) >>> || is_pv_32bit_domain(d) ) vs just using is_pv_domain(d), or >>> IS_ENABLED(CONFIG_PV) && is_pv_domain(d) >> >> Imo it's not "then we would have to carefully think where to use ..." >> but instead this patch is an indication that we should have been for >> quite some time. For this reason (coming back to your first comment >> at the top) I'm not sure adding a comment _there_ is actually useful. >> Every use of is_pv_*() needs carefully considering which domains are >> really meant. > > Maybe we shouldn't have used is_pv_domain as a way to hide code from > the compiler and instead always provide dummy functions, as even with > PV support compiled out we still need some of it for system domains. > > I'm not sure I have a good proposal to make, but it seems wrong to me > that is_pv_domain(d) could be different than is_pv_64bit_domain(d) || > is_pv_32bit_domain(d). Hmm, so we're of opposite opinions - not sure what to do. Short of having / introducing is_system_domain() or some such (with all the needed auditing) I can't see how assuming the two would mean the same could ever have been true. With what we have is_pv_domain() is legitimately true for them, and both is_pv_{32,64}bit_domain() ought to be false (as there's no specific bitness associated with them) imo _at least_ when !PV. Jan
On Mon, Apr 12, 2021 at 05:24:41PM +0200, Jan Beulich wrote: > On 12.04.2021 16:49, Roger Pau Monné wrote: > > On Mon, Apr 12, 2021 at 12:07:12PM +0200, Jan Beulich wrote: > >> On 12.04.2021 11:34, Roger Pau Monné wrote: > >>> On Fri, Nov 27, 2020 at 05:54:57PM +0100, Jan Beulich wrote: > >>>> --- a/xen/include/xen/sched.h > >>>> +++ b/xen/include/xen/sched.h > >>>> @@ -985,7 +985,7 @@ static always_inline bool is_control_dom > >>>> > >>>> static always_inline bool is_pv_domain(const struct domain *d) > >>>> { > >>>> - return IS_ENABLED(CONFIG_PV) && > >>>> + return IS_ENABLED(CONFIG_X86) && > >>>> evaluate_nospec(!(d->options & XEN_DOMCTL_CDF_hvm)); > >>>> } > >>>> > >>>> @@ -1011,7 +1011,7 @@ static always_inline bool is_pv_32bit_vc > >>>> > >>>> static always_inline bool is_pv_64bit_domain(const struct domain *d) > >>>> { > >>>> - if ( !is_pv_domain(d) ) > >>>> + if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) > >>>> return false; > >>> > >>> I think overall is confusing to have a domain that returns true for > >>> is_pv_domain but false for both is_pv_{64,32}bit_domain checks. > >>> > >>> I know those are only the system domains, but it feels confusing and > >>> could cause mistakes in the future IMO, as then we would have to > >>> carefully think where to use ( is_pv_64bit_domain(d) > >>> || is_pv_32bit_domain(d) ) vs just using is_pv_domain(d), or > >>> IS_ENABLED(CONFIG_PV) && is_pv_domain(d) > >> > >> Imo it's not "then we would have to carefully think where to use ..." > >> but instead this patch is an indication that we should have been for > >> quite some time. For this reason (coming back to your first comment > >> at the top) I'm not sure adding a comment _there_ is actually useful. > >> Every use of is_pv_*() needs carefully considering which domains are > >> really meant. > > > > Maybe we shouldn't have used is_pv_domain as a way to hide code from > > the compiler and instead always provide dummy functions, as even with > > PV support compiled out we still need some of it for system domains. > > > > I'm not sure I have a good proposal to make, but it seems wrong to me > > that is_pv_domain(d) could be different than is_pv_64bit_domain(d) || > > is_pv_32bit_domain(d). > > Hmm, so we're of opposite opinions - not sure what to do. Short of > having / introducing is_system_domain() or some such (with all the > needed auditing) I can't see how assuming the two would mean the > same could ever have been true. With what we have is_pv_domain() is > legitimately true for them, and both is_pv_{32,64}bit_domain() ought > to be false (as there's no specific bitness associated with them) > imo _at least_ when !PV. It's all quite ugly, but I wasn't really getting your reasoning that system domains can be considered PV domains without a bitness. I think we both agree that long term having is_system_domain would be the cleanest solution, but it needs a lot of auditing. I think I would be fine if you could add a comment somewhere noting that system domains can be identified as PV domains without a bitness, so that it's likely less confusing in the future. Thanks, Roger.
On 12.04.2021 17:40, Roger Pau Monné wrote: > On Mon, Apr 12, 2021 at 05:24:41PM +0200, Jan Beulich wrote: >> On 12.04.2021 16:49, Roger Pau Monné wrote: >>> On Mon, Apr 12, 2021 at 12:07:12PM +0200, Jan Beulich wrote: >>>> On 12.04.2021 11:34, Roger Pau Monné wrote: >>>>> On Fri, Nov 27, 2020 at 05:54:57PM +0100, Jan Beulich wrote: >>>>>> --- a/xen/include/xen/sched.h >>>>>> +++ b/xen/include/xen/sched.h >>>>>> @@ -985,7 +985,7 @@ static always_inline bool is_control_dom >>>>>> >>>>>> static always_inline bool is_pv_domain(const struct domain *d) >>>>>> { >>>>>> - return IS_ENABLED(CONFIG_PV) && >>>>>> + return IS_ENABLED(CONFIG_X86) && >>>>>> evaluate_nospec(!(d->options & XEN_DOMCTL_CDF_hvm)); >>>>>> } >>>>>> >>>>>> @@ -1011,7 +1011,7 @@ static always_inline bool is_pv_32bit_vc >>>>>> >>>>>> static always_inline bool is_pv_64bit_domain(const struct domain *d) >>>>>> { >>>>>> - if ( !is_pv_domain(d) ) >>>>>> + if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) >>>>>> return false; >>>>> >>>>> I think overall is confusing to have a domain that returns true for >>>>> is_pv_domain but false for both is_pv_{64,32}bit_domain checks. >>>>> >>>>> I know those are only the system domains, but it feels confusing and >>>>> could cause mistakes in the future IMO, as then we would have to >>>>> carefully think where to use ( is_pv_64bit_domain(d) >>>>> || is_pv_32bit_domain(d) ) vs just using is_pv_domain(d), or >>>>> IS_ENABLED(CONFIG_PV) && is_pv_domain(d) >>>> >>>> Imo it's not "then we would have to carefully think where to use ..." >>>> but instead this patch is an indication that we should have been for >>>> quite some time. For this reason (coming back to your first comment >>>> at the top) I'm not sure adding a comment _there_ is actually useful. >>>> Every use of is_pv_*() needs carefully considering which domains are >>>> really meant. >>> >>> Maybe we shouldn't have used is_pv_domain as a way to hide code from >>> the compiler and instead always provide dummy functions, as even with >>> PV support compiled out we still need some of it for system domains. >>> >>> I'm not sure I have a good proposal to make, but it seems wrong to me >>> that is_pv_domain(d) could be different than is_pv_64bit_domain(d) || >>> is_pv_32bit_domain(d). >> >> Hmm, so we're of opposite opinions - not sure what to do. Short of >> having / introducing is_system_domain() or some such (with all the >> needed auditing) I can't see how assuming the two would mean the >> same could ever have been true. With what we have is_pv_domain() is >> legitimately true for them, and both is_pv_{32,64}bit_domain() ought >> to be false (as there's no specific bitness associated with them) >> imo _at least_ when !PV. > > It's all quite ugly, but I wasn't really getting your reasoning that > system domains can be considered PV domains without a bitness. > > I think we both agree that long term having is_system_domain would be > the cleanest solution, but it needs a lot of auditing. Yes. > I think I would > be fine if you could add a comment somewhere noting that system > domains can be identified as PV domains without a bitness, so that > it's likely less confusing in the future. I've added /* * Note that is_pv_domain() can return true (for system domains) even when * both is_pv_64bit_domain() and is_pv_32bit_domain() return false. IOW * system domains can be considered PV without specific bitness. */ immediately ahead of is_pv_domain(). Does this sound okay? Jan
On Mon, Apr 12, 2021 at 05:51:17PM +0200, Jan Beulich wrote: > On 12.04.2021 17:40, Roger Pau Monné wrote: > > On Mon, Apr 12, 2021 at 05:24:41PM +0200, Jan Beulich wrote: > >> On 12.04.2021 16:49, Roger Pau Monné wrote: > >>> On Mon, Apr 12, 2021 at 12:07:12PM +0200, Jan Beulich wrote: > >>>> On 12.04.2021 11:34, Roger Pau Monné wrote: > >>>>> On Fri, Nov 27, 2020 at 05:54:57PM +0100, Jan Beulich wrote: > >>>>>> --- a/xen/include/xen/sched.h > >>>>>> +++ b/xen/include/xen/sched.h > >>>>>> @@ -985,7 +985,7 @@ static always_inline bool is_control_dom > >>>>>> > >>>>>> static always_inline bool is_pv_domain(const struct domain *d) > >>>>>> { > >>>>>> - return IS_ENABLED(CONFIG_PV) && > >>>>>> + return IS_ENABLED(CONFIG_X86) && > >>>>>> evaluate_nospec(!(d->options & XEN_DOMCTL_CDF_hvm)); > >>>>>> } > >>>>>> > >>>>>> @@ -1011,7 +1011,7 @@ static always_inline bool is_pv_32bit_vc > >>>>>> > >>>>>> static always_inline bool is_pv_64bit_domain(const struct domain *d) > >>>>>> { > >>>>>> - if ( !is_pv_domain(d) ) > >>>>>> + if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) > >>>>>> return false; > >>>>> > >>>>> I think overall is confusing to have a domain that returns true for > >>>>> is_pv_domain but false for both is_pv_{64,32}bit_domain checks. > >>>>> > >>>>> I know those are only the system domains, but it feels confusing and > >>>>> could cause mistakes in the future IMO, as then we would have to > >>>>> carefully think where to use ( is_pv_64bit_domain(d) > >>>>> || is_pv_32bit_domain(d) ) vs just using is_pv_domain(d), or > >>>>> IS_ENABLED(CONFIG_PV) && is_pv_domain(d) > >>>> > >>>> Imo it's not "then we would have to carefully think where to use ..." > >>>> but instead this patch is an indication that we should have been for > >>>> quite some time. For this reason (coming back to your first comment > >>>> at the top) I'm not sure adding a comment _there_ is actually useful. > >>>> Every use of is_pv_*() needs carefully considering which domains are > >>>> really meant. > >>> > >>> Maybe we shouldn't have used is_pv_domain as a way to hide code from > >>> the compiler and instead always provide dummy functions, as even with > >>> PV support compiled out we still need some of it for system domains. > >>> > >>> I'm not sure I have a good proposal to make, but it seems wrong to me > >>> that is_pv_domain(d) could be different than is_pv_64bit_domain(d) || > >>> is_pv_32bit_domain(d). > >> > >> Hmm, so we're of opposite opinions - not sure what to do. Short of > >> having / introducing is_system_domain() or some such (with all the > >> needed auditing) I can't see how assuming the two would mean the > >> same could ever have been true. With what we have is_pv_domain() is > >> legitimately true for them, and both is_pv_{32,64}bit_domain() ought > >> to be false (as there's no specific bitness associated with them) > >> imo _at least_ when !PV. > > > > It's all quite ugly, but I wasn't really getting your reasoning that > > system domains can be considered PV domains without a bitness. > > > > I think we both agree that long term having is_system_domain would be > > the cleanest solution, but it needs a lot of auditing. > > Yes. > > > I think I would > > be fine if you could add a comment somewhere noting that system > > domains can be identified as PV domains without a bitness, so that > > it's likely less confusing in the future. > > I've added > > /* > * Note that is_pv_domain() can return true (for system domains) even when > * both is_pv_64bit_domain() and is_pv_32bit_domain() return false. IOW > * system domains can be considered PV without specific bitness. > */ > > immediately ahead of is_pv_domain(). Does this sound okay? Yes, I think the text is fine, I'm however confused by the resulting code in is_pv_64bit_domain: static always_inline bool is_pv_64bit_domain(const struct domain *d) { if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) return false; #ifdef CONFIG_PV32 return !d->arch.pv.is_32bit; #else return true; #endif } Won't this return true for system domains if CONFIG_PV is enabled, and hence the distinction that system domains are PV domain without a bitness won't be true anymore? Sorry if I'm missing something, I find this all quite confusing. Thanks, Roger.
On 13.04.2021 09:56, Roger Pau Monné wrote: > On Mon, Apr 12, 2021 at 05:51:17PM +0200, Jan Beulich wrote: >> On 12.04.2021 17:40, Roger Pau Monné wrote: >>> On Mon, Apr 12, 2021 at 05:24:41PM +0200, Jan Beulich wrote: >>>> On 12.04.2021 16:49, Roger Pau Monné wrote: >>>>> On Mon, Apr 12, 2021 at 12:07:12PM +0200, Jan Beulich wrote: >>>>>> On 12.04.2021 11:34, Roger Pau Monné wrote: >>>>>>> On Fri, Nov 27, 2020 at 05:54:57PM +0100, Jan Beulich wrote: >>>>>>>> --- a/xen/include/xen/sched.h >>>>>>>> +++ b/xen/include/xen/sched.h >>>>>>>> @@ -985,7 +985,7 @@ static always_inline bool is_control_dom >>>>>>>> >>>>>>>> static always_inline bool is_pv_domain(const struct domain *d) >>>>>>>> { >>>>>>>> - return IS_ENABLED(CONFIG_PV) && >>>>>>>> + return IS_ENABLED(CONFIG_X86) && >>>>>>>> evaluate_nospec(!(d->options & XEN_DOMCTL_CDF_hvm)); >>>>>>>> } >>>>>>>> >>>>>>>> @@ -1011,7 +1011,7 @@ static always_inline bool is_pv_32bit_vc >>>>>>>> >>>>>>>> static always_inline bool is_pv_64bit_domain(const struct domain *d) >>>>>>>> { >>>>>>>> - if ( !is_pv_domain(d) ) >>>>>>>> + if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) >>>>>>>> return false; >>>>>>> >>>>>>> I think overall is confusing to have a domain that returns true for >>>>>>> is_pv_domain but false for both is_pv_{64,32}bit_domain checks. >>>>>>> >>>>>>> I know those are only the system domains, but it feels confusing and >>>>>>> could cause mistakes in the future IMO, as then we would have to >>>>>>> carefully think where to use ( is_pv_64bit_domain(d) >>>>>>> || is_pv_32bit_domain(d) ) vs just using is_pv_domain(d), or >>>>>>> IS_ENABLED(CONFIG_PV) && is_pv_domain(d) >>>>>> >>>>>> Imo it's not "then we would have to carefully think where to use ..." >>>>>> but instead this patch is an indication that we should have been for >>>>>> quite some time. For this reason (coming back to your first comment >>>>>> at the top) I'm not sure adding a comment _there_ is actually useful. >>>>>> Every use of is_pv_*() needs carefully considering which domains are >>>>>> really meant. >>>>> >>>>> Maybe we shouldn't have used is_pv_domain as a way to hide code from >>>>> the compiler and instead always provide dummy functions, as even with >>>>> PV support compiled out we still need some of it for system domains. >>>>> >>>>> I'm not sure I have a good proposal to make, but it seems wrong to me >>>>> that is_pv_domain(d) could be different than is_pv_64bit_domain(d) || >>>>> is_pv_32bit_domain(d). >>>> >>>> Hmm, so we're of opposite opinions - not sure what to do. Short of >>>> having / introducing is_system_domain() or some such (with all the >>>> needed auditing) I can't see how assuming the two would mean the >>>> same could ever have been true. With what we have is_pv_domain() is >>>> legitimately true for them, and both is_pv_{32,64}bit_domain() ought >>>> to be false (as there's no specific bitness associated with them) >>>> imo _at least_ when !PV. >>> >>> It's all quite ugly, but I wasn't really getting your reasoning that >>> system domains can be considered PV domains without a bitness. >>> >>> I think we both agree that long term having is_system_domain would be >>> the cleanest solution, but it needs a lot of auditing. >> >> Yes. >> >>> I think I would >>> be fine if you could add a comment somewhere noting that system >>> domains can be identified as PV domains without a bitness, so that >>> it's likely less confusing in the future. >> >> I've added >> >> /* >> * Note that is_pv_domain() can return true (for system domains) even when >> * both is_pv_64bit_domain() and is_pv_32bit_domain() return false. IOW >> * system domains can be considered PV without specific bitness. >> */ >> >> immediately ahead of is_pv_domain(). Does this sound okay? > > Yes, I think the text is fine, I'm however confused by the resulting > code in is_pv_64bit_domain: > > static always_inline bool is_pv_64bit_domain(const struct domain *d) > { > if ( !IS_ENABLED(CONFIG_PV) || !is_pv_domain(d) ) > return false; > > #ifdef CONFIG_PV32 > return !d->arch.pv.is_32bit; > #else > return true; > #endif > } > > Won't this return true for system domains if CONFIG_PV is enabled, and > hence the distinction that system domains are PV domain without a > bitness won't be true anymore? > > Sorry if I'm missing something, I find this all quite confusing. Earlier I said "With what we have is_pv_domain() is legitimately true for them, and both is_pv_{32,64}bit_domain() ought to be false (as there's no specific bitness associated with them) imo _at least_ when !PV." Note the emphasis on "at least". For the "normal" case (PV enabled) I'm again uncertain we can easily change present behavior. Hence the new comment also is worded such that this not fully consistent behavior is still covered. Jan
Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1085,7 +1085,7 @@ int arch_set_info_guest( * update_cr3(), sh_update_cr3(), sh_walk_guest_tables(), and * shadow_one_bit_disable() for why that is. */ - !is_hvm_domain(d) && !is_pv_32bit_domain(d) ) + is_pv_64bit_domain(d) ) v->arch.flags &= ~TF_kernel_mode; vcpu_setup_fpu(v, v->arch.xsave_area, @@ -1231,7 +1231,7 @@ int arch_set_info_guest( * correct initial RO_MPT_VIRT_{START,END} L4 entry). */ if ( d != current->domain && !VM_ASSIST(d, m2p_strict) && - is_pv_domain(d) && !is_pv_32bit_domain(d) && + is_pv_64bit_domain(d) && test_bit(VMASST_TYPE_m2p_strict, &c.nat->vm_assist) && atomic_read(&d->arch.pv.nr_l4_pages) ) { @@ -1960,8 +1960,7 @@ static void __context_switch(void) #if defined(CONFIG_PV) && defined(CONFIG_HVM) /* Prefetch the VMCB if we expect to use it later in the context switch */ - if ( cpu_has_svm && is_pv_domain(nd) && !is_pv_32bit_domain(nd) && - !is_idle_domain(nd) ) + if ( cpu_has_svm && is_pv_64bit_domain(nd) && !is_idle_domain(nd) ) svm_load_segs_prefetch(); #endif
© 2016 - 2024 Red Hat, Inc.