This change just introduces the boilerplate code in order to use a rangeset
when setting up the hardware domain IOMMU mappings. The rangeset is never
populated in this patch, so it's a non-functional change as far as the mappings
the domain gets established.
Note there will be a change for HVM domains (ie: PVH dom0) when the code
introduced here gets used: the p2m mappings will be established using
map_mmio_regions() instead of p2m_add_identity_entry(), so that ranges can be
mapped with a single function call if possible. Note that the interface of
map_mmio_regions() doesn't allow creating read-only mappings, but so far there
are no such mappings created for PVH dom0 in arch_iommu_hwdom_init().
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v2:
- Better deal with read-only regions.
- Destroy rangeset earlier.
- Switch to using map_data.flush_flags.
- Reword commit message to clarify the change in behaviour for HVM will only
take effect after later changes.
Changes since v1:
- Split from bigger patch.
---
xen/drivers/passthrough/x86/iommu.c | 102 +++++++++++++++++++++++++++-
1 file changed, 101 insertions(+), 1 deletion(-)
diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c
index 857dccb6a465..ca5e0bca810f 100644
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -370,10 +370,88 @@ static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d,
return perms;
}
+struct map_data {
+ struct domain *d;
+ unsigned int flush_flags;
+ bool mmio_ro;
+};
+
+static int __hwdom_init cf_check identity_map(unsigned long s, unsigned long e,
+ void *data)
+{
+ struct map_data *info = data;
+ struct domain *d = info->d;
+ long rc;
+
+ if ( iommu_verbose )
+ printk(XENLOG_INFO " [%010lx, %010lx] R%c\n",
+ s, e, info->mmio_ro ? 'O' : 'W');
+
+ if ( paging_mode_translate(d) )
+ {
+ if ( info->mmio_ro )
+ {
+ ASSERT_UNREACHABLE();
+ return 0;
+ }
+ while ( (rc = map_mmio_regions(d, _gfn(s), e - s + 1, _mfn(s))) > 0 )
+ {
+ s += rc;
+ process_pending_softirqs();
+ }
+ }
+ else
+ {
+ const unsigned int perms = IOMMUF_readable | IOMMUF_preempt |
+ (info->mmio_ro ? 0 : IOMMUF_writable);
+
+ /*
+ * Read-only ranges are only created based on the contents of mmio
+ * read-only rangeset, and hence need the additional iomem permissions
+ * check.
+ */
+ while( info->mmio_ro && s <= e && !iomem_access_permitted(d, s, e) )
+ {
+ /*
+ * Consume a frame per iteration until the reminder is accessible
+ * or there's nothing left to map.
+ */
+ if ( iomem_access_permitted(d, s, s) )
+ {
+ rc = iommu_map(d, _dfn(s), _mfn(s), 1, perms,
+ &info->flush_flags);
+ if ( rc < 0 )
+ break;
+ /* Must map a frame at least, which is what we request for. */
+ ASSERT(rc == 1);
+ process_pending_softirqs();
+ }
+ s++;
+ }
+ while ( (rc = iommu_map(d, _dfn(s), _mfn(s), e - s + 1,
+ perms, &info->flush_flags)) > 0 )
+ {
+ s += rc;
+ process_pending_softirqs();
+ }
+ }
+ ASSERT(rc <= 0);
+ if ( rc )
+ printk(XENLOG_WARNING
+ "IOMMU identity mapping of [%lx, %lx] failed: %ld\n",
+ s, e, rc);
+
+ /* Ignore errors and attempt to map the remaining regions. */
+ return 0;
+}
+
void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
{
unsigned long i, top, max_pfn, start, count;
unsigned int flush_flags = 0, start_perms = 0;
+ struct rangeset *map;
+ struct map_data map_data = { .d = d };
+ int rc;
BUG_ON(!is_hardware_domain(d));
@@ -397,6 +475,10 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
if ( iommu_hwdom_passthrough )
return;
+ map = rangeset_new(NULL, NULL, 0);
+ if ( !map )
+ panic("IOMMU init: unable to allocate rangeset\n");
+
max_pfn = (GB(4) >> PAGE_SHIFT) - 1;
top = max(max_pdx, pfn_to_pdx(max_pfn) + 1);
@@ -451,8 +533,26 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
goto commit;
}
+ if ( iommu_verbose )
+ printk(XENLOG_INFO "%pd: identity mappings for IOMMU:\n", d);
+
+ rc = rangeset_report_ranges(map, 0, ~0UL, identity_map, &map_data);
+ if ( rc )
+ panic("IOMMU unable to create mappings: %d\n", rc);
+ rangeset_destroy(map);
+
+ if ( is_pv_domain(d) )
+ {
+ map_data.mmio_ro = true;
+ rc = rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, identity_map,
+ &map_data);
+ if ( rc )
+ panic("IOMMU unable to create read-only mappings: %d\n", rc);
+ }
+
+ map_data.flush_flags |= flush_flags;
/* Use if to avoid compiler warning */
- if ( iommu_iotlb_flush_all(d, flush_flags) )
+ if ( iommu_iotlb_flush_all(d, map_data.flush_flags) )
return;
}
--
2.43.0
On 15.12.2023 15:18, Roger Pau Monne wrote:
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -370,10 +370,88 @@ static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d,
> return perms;
> }
>
> +struct map_data {
> + struct domain *d;
> + unsigned int flush_flags;
> + bool mmio_ro;
> +};
> +
> +static int __hwdom_init cf_check identity_map(unsigned long s, unsigned long e,
> + void *data)
> +{
> + struct map_data *info = data;
> + struct domain *d = info->d;
> + long rc;
> +
> + if ( iommu_verbose )
> + printk(XENLOG_INFO " [%010lx, %010lx] R%c\n",
> + s, e, info->mmio_ro ? 'O' : 'W');
> +
> + if ( paging_mode_translate(d) )
> + {
> + if ( info->mmio_ro )
> + {
> + ASSERT_UNREACHABLE();
> + return 0;
Is this meant to be in line with the main return statement's comment?
I'm inclined to ask for an actual error code (-EOPNOTSUPP?) here.
> + }
> + while ( (rc = map_mmio_regions(d, _gfn(s), e - s + 1, _mfn(s))) > 0 )
> + {
> + s += rc;
> + process_pending_softirqs();
> + }
> + }
> + else
> + {
> + const unsigned int perms = IOMMUF_readable | IOMMUF_preempt |
> + (info->mmio_ro ? 0 : IOMMUF_writable);
> +
> + /*
> + * Read-only ranges are only created based on the contents of mmio
> + * read-only rangeset, and hence need the additional iomem permissions
> + * check.
> + */
> + while( info->mmio_ro && s <= e && !iomem_access_permitted(d, s, e) )
Nit: Missing blank after "while".
> + {
> + /*
> + * Consume a frame per iteration until the reminder is accessible
Nit: remainder?
> + * or there's nothing left to map.
> + */
> + if ( iomem_access_permitted(d, s, s) )
> + {
> + rc = iommu_map(d, _dfn(s), _mfn(s), 1, perms,
> + &info->flush_flags);
> + if ( rc < 0 )
> + break;
> + /* Must map a frame at least, which is what we request for. */
> + ASSERT(rc == 1);
> + process_pending_softirqs();
> + }
> + s++;
> + }
> + while ( (rc = iommu_map(d, _dfn(s), _mfn(s), e - s + 1,
> + perms, &info->flush_flags)) > 0 )
> + {
> + s += rc;
> + process_pending_softirqs();
> + }
> + }
> + ASSERT(rc <= 0);
> + if ( rc )
> + printk(XENLOG_WARNING
> + "IOMMU identity mapping of [%lx, %lx] failed: %ld\n",
> + s, e, rc);
> +
> + /* Ignore errors and attempt to map the remaining regions. */
> + return 0;
> +}
> +
> void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> {
> unsigned long i, top, max_pfn, start, count;
> unsigned int flush_flags = 0, start_perms = 0;
> + struct rangeset *map;
> + struct map_data map_data = { .d = d };
> + int rc;
>
> BUG_ON(!is_hardware_domain(d));
>
> @@ -397,6 +475,10 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> if ( iommu_hwdom_passthrough )
> return;
>
> + map = rangeset_new(NULL, NULL, 0);
> + if ( !map )
> + panic("IOMMU init: unable to allocate rangeset\n");
> +
> max_pfn = (GB(4) >> PAGE_SHIFT) - 1;
> top = max(max_pdx, pfn_to_pdx(max_pfn) + 1);
>
> @@ -451,8 +533,26 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> goto commit;
> }
>
> + if ( iommu_verbose )
> + printk(XENLOG_INFO "%pd: identity mappings for IOMMU:\n", d);
> +
> + rc = rangeset_report_ranges(map, 0, ~0UL, identity_map, &map_data);
> + if ( rc )
> + panic("IOMMU unable to create mappings: %d\n", rc);
> + rangeset_destroy(map);
> +
> + if ( is_pv_domain(d) )
> + {
> + map_data.mmio_ro = true;
> + rc = rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, identity_map,
> + &map_data);
> + if ( rc )
> + panic("IOMMU unable to create read-only mappings: %d\n", rc);
> + }
As it stands identity_map() deliberately returns no error. Yet here
you panic() in case of receiving an error, despite that being impossible?
Also if we want/need to panic() here, can we avoid having two instances
of almost the same string literal in .rodata? Along the lines of
rc = rangeset_report_ranges(map, 0, ~0UL, identity_map, &map_data);
rangeset_destroy(map);
if ( !rc && is_pv_domain(d) )
{
map_data.mmio_ro = true;
rc = rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, identity_map,
&map_data);
}
if ( rc )
panic("IOMMU unable to create %smappings: %d\n",
map_data.mmio_ro ? "read-only " : "", rc);
?
> + map_data.flush_flags |= flush_flags;
So you decided to still keep the standalone "flush_flags" around. Is
there a particular reason?
Jan
On Tue, Dec 19, 2023 at 05:06:32PM +0100, Jan Beulich wrote:
> On 15.12.2023 15:18, Roger Pau Monne wrote:
> > --- a/xen/drivers/passthrough/x86/iommu.c
> > +++ b/xen/drivers/passthrough/x86/iommu.c
> > @@ -370,10 +370,88 @@ static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d,
> > return perms;
> > }
> >
> > +struct map_data {
> > + struct domain *d;
> > + unsigned int flush_flags;
> > + bool mmio_ro;
> > +};
> > +
> > +static int __hwdom_init cf_check identity_map(unsigned long s, unsigned long e,
> > + void *data)
> > +{
> > + struct map_data *info = data;
> > + struct domain *d = info->d;
> > + long rc;
> > +
> > + if ( iommu_verbose )
> > + printk(XENLOG_INFO " [%010lx, %010lx] R%c\n",
> > + s, e, info->mmio_ro ? 'O' : 'W');
> > +
> > + if ( paging_mode_translate(d) )
> > + {
> > + if ( info->mmio_ro )
> > + {
> > + ASSERT_UNREACHABLE();
> > + return 0;
>
> Is this meant to be in line with the main return statement's comment?
> I'm inclined to ask for an actual error code (-EOPNOTSUPP?) here.
Hm, yes, for that one it might make sense to return -EOPNOTSUPP, as
all attempts to map will end up failing anyway.
> > + }
> > + while ( (rc = map_mmio_regions(d, _gfn(s), e - s + 1, _mfn(s))) > 0 )
> > + {
> > + s += rc;
> > + process_pending_softirqs();
> > + }
> > + }
> > + else
> > + {
> > + const unsigned int perms = IOMMUF_readable | IOMMUF_preempt |
> > + (info->mmio_ro ? 0 : IOMMUF_writable);
> > +
> > + /*
> > + * Read-only ranges are only created based on the contents of mmio
> > + * read-only rangeset, and hence need the additional iomem permissions
> > + * check.
> > + */
> > + while( info->mmio_ro && s <= e && !iomem_access_permitted(d, s, e) )
>
> Nit: Missing blank after "while".
>
> > + {
> > + /*
> > + * Consume a frame per iteration until the reminder is accessible
>
> Nit: remainder?
>
> > + * or there's nothing left to map.
> > + */
> > + if ( iomem_access_permitted(d, s, s) )
> > + {
> > + rc = iommu_map(d, _dfn(s), _mfn(s), 1, perms,
> > + &info->flush_flags);
> > + if ( rc < 0 )
> > + break;
> > + /* Must map a frame at least, which is what we request for. */
> > + ASSERT(rc == 1);
> > + process_pending_softirqs();
> > + }
> > + s++;
> > + }
> > + while ( (rc = iommu_map(d, _dfn(s), _mfn(s), e - s + 1,
> > + perms, &info->flush_flags)) > 0 )
> > + {
> > + s += rc;
> > + process_pending_softirqs();
> > + }
> > + }
> > + ASSERT(rc <= 0);
> > + if ( rc )
> > + printk(XENLOG_WARNING
> > + "IOMMU identity mapping of [%lx, %lx] failed: %ld\n",
> > + s, e, rc);
> > +
> > + /* Ignore errors and attempt to map the remaining regions. */
> > + return 0;
> > +}
> > +
> > void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> > {
> > unsigned long i, top, max_pfn, start, count;
> > unsigned int flush_flags = 0, start_perms = 0;
> > + struct rangeset *map;
> > + struct map_data map_data = { .d = d };
> > + int rc;
> >
> > BUG_ON(!is_hardware_domain(d));
> >
> > @@ -397,6 +475,10 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> > if ( iommu_hwdom_passthrough )
> > return;
> >
> > + map = rangeset_new(NULL, NULL, 0);
> > + if ( !map )
> > + panic("IOMMU init: unable to allocate rangeset\n");
> > +
> > max_pfn = (GB(4) >> PAGE_SHIFT) - 1;
> > top = max(max_pdx, pfn_to_pdx(max_pfn) + 1);
> >
> > @@ -451,8 +533,26 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> > goto commit;
> > }
> >
> > + if ( iommu_verbose )
> > + printk(XENLOG_INFO "%pd: identity mappings for IOMMU:\n", d);
> > +
> > + rc = rangeset_report_ranges(map, 0, ~0UL, identity_map, &map_data);
> > + if ( rc )
> > + panic("IOMMU unable to create mappings: %d\n", rc);
> > + rangeset_destroy(map);
> > +
> > + if ( is_pv_domain(d) )
> > + {
> > + map_data.mmio_ro = true;
> > + rc = rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, identity_map,
> > + &map_data);
> > + if ( rc )
> > + panic("IOMMU unable to create read-only mappings: %d\n", rc);
> > + }
>
> As it stands identity_map() deliberately returns no error. Yet here
> you panic() in case of receiving an error, despite that being impossible?
I wasn't sure whether rangeset_report_ranges() itself could return an
error. Thinking twice, we might want to print a message and attempt
to continue, as in the worst case dom0 might be missing maps.
> Also if we want/need to panic() here, can we avoid having two instances
> of almost the same string literal in .rodata? Along the lines of
>
> rc = rangeset_report_ranges(map, 0, ~0UL, identity_map, &map_data);
> rangeset_destroy(map);
> if ( !rc && is_pv_domain(d) )
> {
> map_data.mmio_ro = true;
> rc = rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, identity_map,
> &map_data);
> }
> if ( rc )
> panic("IOMMU unable to create %smappings: %d\n",
> map_data.mmio_ro ? "read-only " : "", rc);
>
> ?
>
> > + map_data.flush_flags |= flush_flags;
>
> So you decided to still keep the standalone "flush_flags" around. Is
> there a particular reason?
Oh, git distracted with the other changes and forgot about this one.
Thanks, Roger.
© 2016 - 2026 Red Hat, Inc.