update_paging_mode() in the AMD IOMMU code expects to be invoked with the PCI devices lock held. The check occurring only when the mode actually needs updating, the violation of this rule by the majority of callers did go unnoticed until per-domain IOMMU setup was changed to do away with on-demand creation of IOMMU page tables. Unfortunately the only half way reasonable fix to this that I could come up with requires more re-work than would seem desirable at this time of the release process, but addressing the issue seems unavoidable to me as its manifestation is a regression from the IOMMU page table setup re-work. The change also isn't without risk of further regressions - if in patch 2 I've missed a code path that would also need to invoke the new hook, then this might mean non- working guests (with passed-through devices on AMD hardware). 1: introduce GFN notification for translated domains 2: AMD/IOMMU: use notify_dfn() hook to update paging mode Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
In order for individual IOMMU drivers (and from an abstract pov also architectures) to be able to adjust, ahead of actual mapping requests, their data structures when they might cover only a sub-range of all possible GFNs, introduce a notification call used by various code paths potentially installing a fresh mapping of a never used GFN (for a particular domain). Note that before this patch, in gnttab_transfer(), once past assign_pages(), further errors modifying the physmap are ignored (presumably because it would be too complicated to try to roll back at that point). This patch follows suit by ignoring failed notify_gfn()s or races due to the need to intermediately drop locks, simply printing out a warning that the gfn may not be accessible due to the failure. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v2: Introduce arch_notify_gfn(), to invoke gfn_valid() on x86 (this unfortunately means it and notify_gfn() now need to be macros, or else include file dependencies get in the way, as gfn_valid() lives in paging.h, which we shouldn't include from xen/sched.h). Improve description. TBD: Does Arm actually have anything to check against in its arch_notify_gfn()? --- a/xen/arch/x86/hvm/dom0_build.c +++ b/xen/arch/x86/hvm/dom0_build.c @@ -173,7 +173,8 @@ static int __init pvh_populate_memory_ra continue; } - rc = guest_physmap_add_page(d, _gfn(start), page_to_mfn(page), + rc = notify_gfn(d, _gfn(start + (1UL << order) - 1)) ?: + guest_physmap_add_page(d, _gfn(start), page_to_mfn(page), order); if ( rc != 0 ) { --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -4304,9 +4304,17 @@ static int hvmop_set_param( if ( a.value > SHUTDOWN_MAX ) rc = -EINVAL; break; + case HVM_PARAM_IOREQ_SERVER_PFN: - d->arch.hvm.ioreq_gfn.base = a.value; + if ( d->arch.hvm.params[HVM_PARAM_NR_IOREQ_SERVER_PAGES] ) + rc = notify_gfn( + d, + _gfn(a.value + d->arch.hvm.params + [HVM_PARAM_NR_IOREQ_SERVER_PAGES] - 1)); + if ( !rc ) + d->arch.hvm.ioreq_gfn.base = a.value; break; + case HVM_PARAM_NR_IOREQ_SERVER_PAGES: { unsigned int i; @@ -4317,6 +4325,9 @@ static int hvmop_set_param( rc = -EINVAL; break; } + rc = notify_gfn(d, _gfn(d->arch.hvm.ioreq_gfn.base + a.value - 1)); + if ( rc ) + break; for ( i = 0; i < a.value; i++ ) set_bit(i, &d->arch.hvm.ioreq_gfn.mask); @@ -4330,7 +4341,11 @@ static int hvmop_set_param( BUILD_BUG_ON(HVM_PARAM_BUFIOREQ_PFN > sizeof(d->arch.hvm.ioreq_gfn.legacy_mask) * 8); if ( a.value ) - set_bit(a.index, &d->arch.hvm.ioreq_gfn.legacy_mask); + { + rc = notify_gfn(d, _gfn(a.value)); + if ( !rc ) + set_bit(a.index, &d->arch.hvm.ioreq_gfn.legacy_mask); + } break; case HVM_PARAM_X87_FIP_WIDTH: --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -946,6 +946,16 @@ map_grant_ref( return; } + if ( paging_mode_translate(ld) /* && (op->flags & GNTMAP_host_map) */ && + (rc = notify_gfn(ld, gaddr_to_gfn(op->host_addr))) ) + { + gdprintk(XENLOG_INFO, "notify(%"PRI_gfn") -> %d\n", + gfn_x(gaddr_to_gfn(op->host_addr)), rc); + op->status = GNTST_general_error; + return; + BUILD_BUG_ON(GNTST_okay); + } + if ( unlikely((rd = rcu_lock_domain_by_id(op->dom)) == NULL) ) { gdprintk(XENLOG_INFO, "Could not find domain %d\n", op->dom); @@ -2123,6 +2133,7 @@ gnttab_transfer( { bool_t okay; int rc; + gfn_t gfn; if ( i && hypercall_preempt_check() ) return i; @@ -2300,21 +2311,52 @@ gnttab_transfer( act = active_entry_acquire(e->grant_table, gop.ref); if ( evaluate_nospec(e->grant_table->gt_version == 1) ) + gfn = _gfn(shared_entry_v1(e->grant_table, gop.ref).frame); + else + gfn = _gfn(shared_entry_v2(e->grant_table, gop.ref).full_page.frame); + + if ( paging_mode_translate(e) ) { - grant_entry_v1_t *sha = &shared_entry_v1(e->grant_table, gop.ref); + gfn_t gfn2; + + active_entry_release(act); + grant_read_unlock(e->grant_table); + + rc = notify_gfn(e, gfn); + if ( rc ) + printk(XENLOG_G_WARNING + "%pd: gref %u: xfer GFN %"PRI_gfn" may be inaccessible (%d)\n", + e, gop.ref, gfn_x(gfn), rc); + + grant_read_lock(e->grant_table); + act = active_entry_acquire(e->grant_table, gop.ref); - guest_physmap_add_page(e, _gfn(sha->frame), mfn, 0); - if ( !paging_mode_translate(e) ) - sha->frame = mfn_x(mfn); + if ( evaluate_nospec(e->grant_table->gt_version == 1) ) + gfn2 = _gfn(shared_entry_v1(e->grant_table, gop.ref).frame); + else + gfn2 = _gfn(shared_entry_v2(e->grant_table, gop.ref). + full_page.frame); + + if ( !gfn_eq(gfn, gfn2) ) + { + printk(XENLOG_G_WARNING + "%pd: gref %u: xfer GFN went %"PRI_gfn" -> %"PRI_gfn"\n", + e, gop.ref, gfn_x(gfn), gfn_x(gfn2)); + gfn = gfn2; + } } - else - { - grant_entry_v2_t *sha = &shared_entry_v2(e->grant_table, gop.ref); - guest_physmap_add_page(e, _gfn(sha->full_page.frame), mfn, 0); - if ( !paging_mode_translate(e) ) - sha->full_page.frame = mfn_x(mfn); + guest_physmap_add_page(e, gfn, mfn, 0); + + if ( !paging_mode_translate(e) ) + { + if ( evaluate_nospec(e->grant_table->gt_version == 1) ) + shared_entry_v1(e->grant_table, gop.ref).frame = mfn_x(mfn); + else + shared_entry_v2(e->grant_table, gop.ref).full_page.frame = + mfn_x(mfn); } + smp_wmb(); shared_entry_header(e->grant_table, gop.ref)->flags |= GTF_transfer_completed; --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -203,6 +203,10 @@ static void populate_physmap(struct memo if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) ) goto out; + if ( paging_mode_translate(d) && + notify_gfn(d, _gfn(gpfn + (1U << a->extent_order) - 1)) ) + goto out; + if ( a->memflags & MEMF_populate_on_demand ) { /* Disallow populating PoD pages on oneself. */ @@ -745,6 +749,10 @@ static long memory_exchange(XEN_GUEST_HA continue; } + if ( paging_mode_translate(d) ) + rc = notify_gfn(d, + _gfn(gpfn + (1U << exch.out.extent_order) - 1)); + mfn = page_to_mfn(page); guest_physmap_add_page(d, _gfn(gpfn), mfn, exch.out.extent_order); @@ -813,12 +821,20 @@ int xenmem_add_to_physmap(struct domain extra.foreign_domid = DOMID_INVALID; if ( xatp->space != XENMAPSPACE_gmfn_range ) - return xenmem_add_to_physmap_one(d, xatp->space, extra, + return notify_gfn(d, _gfn(xatp->gpfn)) ?: + xenmem_add_to_physmap_one(d, xatp->space, extra, xatp->idx, _gfn(xatp->gpfn)); if ( xatp->size < start ) return -EILSEQ; + if ( !start && xatp->size ) + { + rc = notify_gfn(d, _gfn(xatp->gpfn + xatp->size - 1)); + if ( rc ) + return rc; + } + xatp->idx += start; xatp->gpfn += start; xatp->size -= start; @@ -891,7 +907,8 @@ static int xenmem_add_to_physmap_batch(s extent, 1)) ) return -EFAULT; - rc = xenmem_add_to_physmap_one(d, xatpb->space, + rc = notify_gfn(d, _gfn(gpfn)) ?: + xenmem_add_to_physmap_one(d, xatpb->space, xatpb->u, idx, _gfn(gpfn)); --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -530,6 +530,14 @@ void iommu_share_p2m_table(struct domain iommu_get_ops()->share_p2m(d); } +int iommu_notify_gfn(struct domain *d, gfn_t gfn) +{ + const struct iommu_ops *ops = dom_iommu(d)->platform_ops; + + return need_iommu_pt_sync(d) && ops->notify_dfn + ? iommu_call(ops, notify_dfn, d, _dfn(gfn_x(gfn))) : 0; +} + void iommu_crash_shutdown(void) { if ( !iommu_crash_disable ) --- a/xen/include/asm-arm/domain.h +++ b/xen/include/asm-arm/domain.h @@ -272,6 +272,8 @@ static inline void free_vcpu_guest_conte static inline void arch_vcpu_block(struct vcpu *v) {} +#define arch_notify_gfn(d, gfn) ((void)(d), (void)(gfn), 0) + #endif /* __ASM_DOMAIN_H__ */ /* --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -647,6 +647,8 @@ static inline void free_vcpu_guest_conte void arch_vcpu_regs_init(struct vcpu *v); +#define arch_notify_gfn(d, gfn) (gfn_valid(d, gfn) ? 0 : -EADDRNOTAVAIL) + struct vcpu_hvm_context; int arch_set_info_hvm_guest(struct vcpu *v, const struct vcpu_hvm_context *ctx); --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -237,6 +237,8 @@ struct iommu_ops { int __must_check (*lookup_page)(struct domain *d, dfn_t dfn, mfn_t *mfn, unsigned int *flags); + int __must_check (*notify_dfn)(struct domain *d, dfn_t dfn); + void (*free_page_table)(struct page_info *); #ifdef CONFIG_X86 @@ -331,6 +333,7 @@ void iommu_crash_shutdown(void); int iommu_get_reserved_device_memory(iommu_grdm_t *, void *); void iommu_share_p2m_table(struct domain *d); +int __must_check iommu_notify_gfn(struct domain *d, gfn_t gfn); #ifdef CONFIG_HAS_PCI int iommu_do_pci_domctl(struct xen_domctl *, struct domain *d, --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -1039,6 +1039,8 @@ static always_inline bool is_iommu_enabl return evaluate_nospec(d->options & XEN_DOMCTL_CDF_iommu); } +#define notify_gfn(d, gfn) (arch_notify_gfn(d, gfn) ?: iommu_notify_gfn(d, gfn)) + extern bool sched_smt_power_savings; extern bool sched_disable_smt_switching; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Hi Jan, On 14/11/2019 16:43, Jan Beulich wrote: > In order for individual IOMMU drivers (and from an abstract pov also > architectures) to be able to adjust, ahead of actual mapping requests, > their data structures when they might cover only a sub-range of all > possible GFNs, introduce a notification call used by various code paths > potentially installing a fresh mapping of a never used GFN (for a > particular domain). If I understand this correctly, this is mostly targeting IOMMNU driver where page-table are not shared with the processor. Right? > > Note that before this patch, in gnttab_transfer(), once past > assign_pages(), further errors modifying the physmap are ignored > (presumably because it would be too complicated to try to roll back at > that point). This patch follows suit by ignoring failed notify_gfn()s or > races due to the need to intermediately drop locks, simply printing out > a warning that the gfn may not be accessible due to the failure. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > v2: Introduce arch_notify_gfn(), to invoke gfn_valid() on x86 (this > unfortunately means it and notify_gfn() now need to be macros, or > else include file dependencies get in the way, as gfn_valid() lives > in paging.h, which we shouldn't include from xen/sched.h). Improve > description. > > TBD: Does Arm actually have anything to check against in its > arch_notify_gfn()? I understand that we want to keep the code mostly generic, but I am a bit concerned of the extra cost to use notify_gfn() (and indirectly iommu_notify_gfn()) for doing nothing. I can't see any direct use of this for the foreseable future on Arm. So could we gate this under a config option? > > --- a/xen/arch/x86/hvm/dom0_build.c > +++ b/xen/arch/x86/hvm/dom0_build.c > @@ -173,7 +173,8 @@ static int __init pvh_populate_memory_ra > continue; > } > > - rc = guest_physmap_add_page(d, _gfn(start), page_to_mfn(page), > + rc = notify_gfn(d, _gfn(start + (1UL << order) - 1)) ?: > + guest_physmap_add_page(d, _gfn(start), page_to_mfn(page), > order); > if ( rc != 0 ) > { > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -4304,9 +4304,17 @@ static int hvmop_set_param( > if ( a.value > SHUTDOWN_MAX ) > rc = -EINVAL; > break; > + > case HVM_PARAM_IOREQ_SERVER_PFN: > - d->arch.hvm.ioreq_gfn.base = a.value; > + if ( d->arch.hvm.params[HVM_PARAM_NR_IOREQ_SERVER_PAGES] ) > + rc = notify_gfn( > + d, > + _gfn(a.value + d->arch.hvm.params > + [HVM_PARAM_NR_IOREQ_SERVER_PAGES] - 1)); > + if ( !rc ) > + d->arch.hvm.ioreq_gfn.base = a.value; > break; > + > case HVM_PARAM_NR_IOREQ_SERVER_PAGES: > { > unsigned int i; > @@ -4317,6 +4325,9 @@ static int hvmop_set_param( > rc = -EINVAL; > break; > } > + rc = notify_gfn(d, _gfn(d->arch.hvm.ioreq_gfn.base + a.value - 1)); > + if ( rc ) > + break; > for ( i = 0; i < a.value; i++ ) > set_bit(i, &d->arch.hvm.ioreq_gfn.mask); > > @@ -4330,7 +4341,11 @@ static int hvmop_set_param( > BUILD_BUG_ON(HVM_PARAM_BUFIOREQ_PFN > > sizeof(d->arch.hvm.ioreq_gfn.legacy_mask) * 8); > if ( a.value ) > - set_bit(a.index, &d->arch.hvm.ioreq_gfn.legacy_mask); > + { > + rc = notify_gfn(d, _gfn(a.value)); > + if ( !rc ) > + set_bit(a.index, &d->arch.hvm.ioreq_gfn.legacy_mask); > + } > break; > > case HVM_PARAM_X87_FIP_WIDTH: > --- a/xen/common/grant_table.c > +++ b/xen/common/grant_table.c > @@ -946,6 +946,16 @@ map_grant_ref( > return; > } > > + if ( paging_mode_translate(ld) /* && (op->flags & GNTMAP_host_map) */ && I think this wants an explanation in the code why the check is commented. Cheers, -- Julien Grall _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 20.11.2019 21:22, Julien Grall wrote: > On 14/11/2019 16:43, Jan Beulich wrote: >> In order for individual IOMMU drivers (and from an abstract pov also >> architectures) to be able to adjust, ahead of actual mapping requests, >> their data structures when they might cover only a sub-range of all >> possible GFNs, introduce a notification call used by various code paths >> potentially installing a fresh mapping of a never used GFN (for a >> particular domain). > > If I understand this correctly, this is mostly targeting IOMMNU driver > where page-table are not shared with the processor. Right? Yes - with shared page tables there's no separate handling of IOMMU (un)mappings in the first place. >> TBD: Does Arm actually have anything to check against in its >> arch_notify_gfn()? > > I understand that we want to keep the code mostly generic, but I am a > bit concerned of the extra cost to use notify_gfn() (and indirectly > iommu_notify_gfn()) for doing nothing. > > I can't see any direct use of this for the foreseable future on Arm. So > could we gate this under a config option? This is an option, sure. Alternatively I could see about making this an inline function, but iirc there were header dependency issues. Then again - is a call to a function doing almost nothing really so much extra overhead on Arm? >> --- a/xen/common/grant_table.c >> +++ b/xen/common/grant_table.c >> @@ -946,6 +946,16 @@ map_grant_ref( >> return; >> } >> >> + if ( paging_mode_translate(ld) /* && (op->flags & GNTMAP_host_map) */ && > > I think this wants an explanation in the code why the check is commented. Hmm, in such a case I'd rather omit the commented condition. It being commented has the purpose of documenting itself. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 21/11/2019 09:04, Jan Beulich wrote: > On 20.11.2019 21:22, Julien Grall wrote: >> On 14/11/2019 16:43, Jan Beulich wrote: >>> In order for individual IOMMU drivers (and from an abstract pov also >>> architectures) to be able to adjust, ahead of actual mapping requests, >>> their data structures when they might cover only a sub-range of all >>> possible GFNs, introduce a notification call used by various code paths >>> potentially installing a fresh mapping of a never used GFN (for a >>> particular domain). >> >> If I understand this correctly, this is mostly targeting IOMMNU driver >> where page-table are not shared with the processor. Right? > > Yes - with shared page tables there's no separate handling of > IOMMU (un)mappings in the first place. > >>> TBD: Does Arm actually have anything to check against in its >>> arch_notify_gfn()? >> >> I understand that we want to keep the code mostly generic, but I am a >> bit concerned of the extra cost to use notify_gfn() (and indirectly >> iommu_notify_gfn()) for doing nothing. >> >> I can't see any direct use of this for the foreseable future on Arm. So >> could we gate this under a config option? > > This is an option, sure. Alternatively I could see about making this > an inline function, but iirc there were header dependency issues. > Then again - is a call to a function doing almost nothing really so > much extra overhead on Arm. AFAICT, this is a workaround for AMD driver. So any impact (no matter the size) feels not right for Arm. In this particular case, the only thing I request is to protect the notify_gfn & callback with !CONFIG_IOMMU_FORCE_SHARE. > >>> --- a/xen/common/grant_table.c >>> +++ b/xen/common/grant_table.c >>> @@ -946,6 +946,16 @@ map_grant_ref( >>> return; >>> } >>> >>> + if ( paging_mode_translate(ld) /* && (op->flags & GNTMAP_host_map) */ && >> >> I think this wants an explanation in the code why the check is commented. > > Hmm, in such a case I'd rather omit the commented condition. It > being commented has the purpose of documenting itself. I fail to understand why GNTMAP_host_map would always be true in the case. AFAIU the code, this is only correct for paging_mode_external(ld) == 1. Does it mean that paging_mode_translate(ld) and paging_mode_external(ld) are always equal? If so, what's the point of having two macro (and two flags)? Cheers, -- Julien Grall _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 21.11.2019 11:07, Julien Grall wrote: > > > On 21/11/2019 09:04, Jan Beulich wrote: >> On 20.11.2019 21:22, Julien Grall wrote: >>> On 14/11/2019 16:43, Jan Beulich wrote: >>>> TBD: Does Arm actually have anything to check against in its >>>> arch_notify_gfn()? >>> >>> I understand that we want to keep the code mostly generic, but I am a >>> bit concerned of the extra cost to use notify_gfn() (and indirectly >>> iommu_notify_gfn()) for doing nothing. >>> >>> I can't see any direct use of this for the foreseable future on Arm. So >>> could we gate this under a config option? >> >> This is an option, sure. Alternatively I could see about making this >> an inline function, but iirc there were header dependency issues. >> Then again - is a call to a function doing almost nothing really so >> much extra overhead on Arm. > > AFAICT, this is a workaround for AMD driver. So any impact (no matter > the size) feels not right for Arm. > > In this particular case, the only thing I request is to protect the > notify_gfn & callback with !CONFIG_IOMMU_FORCE_SHARE. Oh, there already is a suitable config option. Will do (and cover share_p2m_table() at the same time). >>>> --- a/xen/common/grant_table.c >>>> +++ b/xen/common/grant_table.c >>>> @@ -946,6 +946,16 @@ map_grant_ref( >>>> return; >>>> } >>>> >>>> + if ( paging_mode_translate(ld) /* && (op->flags & GNTMAP_host_map) */ && >>> >>> I think this wants an explanation in the code why the check is commented. >> >> Hmm, in such a case I'd rather omit the commented condition. It >> being commented has the purpose of documenting itself. > > I fail to understand why GNTMAP_host_map would always be true in the case. > > AFAIU the code, this is only correct for paging_mode_external(ld) == 1. > Does it mean that paging_mode_translate(ld) and paging_mode_external(ld) > are always equal? If so, what's the point of having two macro (and two > flags)? Historical reasons. Nowadays translate == external == refcounts on x86. But since this is common code, perhaps I better un-comment that part of the conditional. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Hi Jan, On 21/11/2019 10:41, Jan Beulich wrote: > On 21.11.2019 11:07, Julien Grall wrote: >> >> >> On 21/11/2019 09:04, Jan Beulich wrote: >>> On 20.11.2019 21:22, Julien Grall wrote: >>>> On 14/11/2019 16:43, Jan Beulich wrote: >>>>> TBD: Does Arm actually have anything to check against in its >>>>> arch_notify_gfn()? >>>> >>>> I understand that we want to keep the code mostly generic, but I am a >>>> bit concerned of the extra cost to use notify_gfn() (and indirectly >>>> iommu_notify_gfn()) for doing nothing. >>>> >>>> I can't see any direct use of this for the foreseable future on Arm. So >>>> could we gate this under a config option? >>> >>> This is an option, sure. Alternatively I could see about making this >>> an inline function, but iirc there were header dependency issues. >>> Then again - is a call to a function doing almost nothing really so >>> much extra overhead on Arm. >> >> AFAICT, this is a workaround for AMD driver. So any impact (no matter >> the size) feels not right for Arm. >> >> In this particular case, the only thing I request is to protect the >> notify_gfn & callback with !CONFIG_IOMMU_FORCE_SHARE. > > Oh, there already is a suitable config option. Will do (and > cover share_p2m_table() at the same time). > >>>>> --- a/xen/common/grant_table.c >>>>> +++ b/xen/common/grant_table.c >>>>> @@ -946,6 +946,16 @@ map_grant_ref( >>>>> return; >>>>> } >>>>> >>>>> + if ( paging_mode_translate(ld) /* && (op->flags & GNTMAP_host_map) */ && >>>> >>>> I think this wants an explanation in the code why the check is commented. >>> >>> Hmm, in such a case I'd rather omit the commented condition. It >>> being commented has the purpose of documenting itself. >> >> I fail to understand why GNTMAP_host_map would always be true in the case. >> >> AFAIU the code, this is only correct for paging_mode_external(ld) == 1. >> Does it mean that paging_mode_translate(ld) and paging_mode_external(ld) >> are always equal? If so, what's the point of having two macro (and two >> flags)? > > Historical reasons. Nowadays translate == external == refcounts on > x86. But since this is common code, perhaps I better un-comment that > part of the conditional. For this patch, this would be the ideal solution. We might want to consider to reduce to one macro (maybe paging_mode_translate()) if we don't expect new architecture to return a different value for those 3 macros. Cheers, -- Julien Grall _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
update_paging_mode() expects to be invoked with the PCI devices lock held. The check occurring only when the mode actually needs updating, the violation of this rule by the majority of callers did go unnoticed until per-domain IOMMU setup was changed to do away with on-demand creation of IOMMU page tables. Acquiring the necessary lock in amd_iommu_map_page() or intermediate layers in generic IOMMU code is not possible - we'd risk all sorts of lock order violations. Hence the call to update_paging_mode() gets pulled out of the function, to be invoked instead from the new notify_dfn() hook, where no potentially conflicting locks are being held by the callers. Similarly the call to amd_iommu_alloc_root() gets pulled out - now that we receive notification of all DFN range increases, there's no need anymore to do this check when actually mapping a page. Note that this ought to result in a small performance improvement as well: The hook often gets invoked just once for larger blocks of pages, so rather than going through amd_iommu_alloc_root() and update_paging_mode() once per page, we may now invoke it just once per batch. Reported-by: Sander Eikelenboom <linux@eikelenboom.it> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/amd/iommu_map.c +++ b/xen/drivers/passthrough/amd/iommu_map.c @@ -383,35 +383,16 @@ int amd_iommu_map_page(struct domain *d, unsigned int flags, unsigned int *flush_flags) { struct domain_iommu *hd = dom_iommu(d); - int rc; unsigned long pt_mfn[7]; memset(pt_mfn, 0, sizeof(pt_mfn)); spin_lock(&hd->arch.mapping_lock); - rc = amd_iommu_alloc_root(hd); - if ( rc ) + if ( !hd->arch.root_table ) { spin_unlock(&hd->arch.mapping_lock); - AMD_IOMMU_DEBUG("Root table alloc failed, dfn = %"PRI_dfn"\n", - dfn_x(dfn)); - domain_crash(d); - return rc; - } - - /* Since HVM domain is initialized with 2 level IO page table, - * we might need a deeper page table for wider dfn now */ - if ( is_hvm_domain(d) ) - { - if ( update_paging_mode(d, dfn_x(dfn)) ) - { - spin_unlock(&hd->arch.mapping_lock); - AMD_IOMMU_DEBUG("Update page mode failed dfn = %"PRI_dfn"\n", - dfn_x(dfn)); - domain_crash(d); - return -EFAULT; - } + return -ENODATA; } if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_mfn, true) || (pt_mfn[1] == 0) ) @@ -468,6 +449,48 @@ int amd_iommu_unmap_page(struct domain * return 0; } + +int amd_iommu_notify_dfn(struct domain *d, dfn_t dfn) +{ + struct domain_iommu *hd = dom_iommu(d); + int rc; + + ASSERT(is_hvm_domain(d)); + + /* + * Since HVM domain is initialized with 2 level IO page table, + * we might need a deeper page table for wider dfn now. + */ + pcidevs_lock(); + spin_lock(&hd->arch.mapping_lock); + + rc = amd_iommu_alloc_root(hd); + if ( rc ) + { + spin_unlock(&hd->arch.mapping_lock); + pcidevs_unlock(); + AMD_IOMMU_DEBUG("Root table alloc failed, dfn = %"PRI_dfn" (rc %d)\n", + dfn_x(dfn), rc); + domain_crash(d); + return rc; + } + + rc = update_paging_mode(d, dfn_x(dfn)); + if ( rc ) + { + spin_unlock(&hd->arch.mapping_lock); + pcidevs_unlock(); + AMD_IOMMU_DEBUG("Update paging mode failed dfn %"PRI_dfn" (rc %d)\n", + dfn_x(dfn), rc); + domain_crash(d); + return rc; + } + + spin_unlock(&hd->arch.mapping_lock); + pcidevs_unlock(); + + return 0; +} static unsigned long flush_count(unsigned long dfn, unsigned int page_count, unsigned int order) --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -628,6 +628,7 @@ static const struct iommu_ops __initcons .teardown = amd_iommu_domain_destroy, .map_page = amd_iommu_map_page, .unmap_page = amd_iommu_unmap_page, + .notify_dfn = amd_iommu_notify_dfn, .iotlb_flush = amd_iommu_flush_iotlb_pages, .iotlb_flush_all = amd_iommu_flush_iotlb_all, .free_page_table = deallocate_page_table, --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h @@ -61,6 +61,7 @@ int __must_check amd_iommu_map_page(stru int __must_check amd_iommu_unmap_page(struct domain *d, dfn_t dfn, unsigned int *flush_flags); int __must_check amd_iommu_alloc_root(struct domain_iommu *hd); +int __must_check amd_iommu_notify_dfn(struct domain *d, dfn_t dfn); int amd_iommu_reserve_domain_unity_map(struct domain *domain, paddr_t phys_addr, unsigned long size, int iw, int ir); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
© 2016 - 2024 Red Hat, Inc.