[PATCH 1/2] iommu/io-pgtable-arm: Fix stage-2 map/umap for concatenated tables

Mostafa Saleh posted 2 patches 1 year, 3 months ago
[PATCH 1/2] iommu/io-pgtable-arm: Fix stage-2 map/umap for concatenated tables
Posted by Mostafa Saleh 1 year, 3 months ago
When calculating the max number of entries in a table, Where
RM_LPAE_LVL_IDX() understands the concatenated pgds and can return
an index spanning more than one concatenated table (for ex for 4K
page size > 512).
But then, max_entries is calculated as follows:
max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;

This leads to a negative index in the page table, where for:
- map: do nothing (no OOB) as fortunately all comparisons are signed,
  but it would return a negative mapped value.

- unmap: it would leak any child tables as it skips the loop over
  “__arm_lpae_free_pgtable”

This bug only happens when map/unmap is requested with a page size
equals to the first level of the concatenated table (for 40 bits input
and 4K granule would be 1GB) and can be triggered from userspace with
VFIO, by choosing a VM IPA in a concatenated table > 0 and using
huge pages to mmap with the first level size.

For example, I was able to reproduce it with the following command
with mainline linux and mainline kvmtool:

./lkvm run --irqchip gicv3 -k {$KERNEL} -p "earlycon" -d {$ROOTFS}  --force-pci -c \
`nproc` --debug -m 4096@525312 --vfio-pci 0000:00:03.0  --hugetlbfs  /hugepages

Where huge pages with 1GB would be used and using IPA in the second table
(512GB for 40 bits SMMU and 4K granule).

Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
 drivers/iommu/io-pgtable-arm.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 0e67f1721a3d..3ecbc024e440 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -199,6 +199,17 @@ static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
 	return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
 }
 
+/*
+ * Using an index returned from ARM_LPAE_PGD_IDX(), which can point to
+ * concatenated PGD concatenated, get the max entries of a that table.
+ */
+static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data)
+{
+	int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
+
+	return ptes_per_table - (i & (ptes_per_table - 1));
+}
+
 static bool selftest_running = false;
 
 static dma_addr_t __arm_lpae_dma_addr(void *pages)
@@ -390,7 +401,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 
 	/* If we can install a leaf entry at this level, then do so */
 	if (size == block_size) {
-		max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
+		max_entries = arm_lpae_max_entries(map_idx_start, data);
 		num_entries = min_t(int, pgcount, max_entries);
 		ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
 		if (!ret)
@@ -592,7 +603,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 
 	if (size == split_sz) {
 		unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
-		max_entries = ptes_per_table - unmap_idx_start;
+		max_entries = arm_lpae_max_entries(unmap_idx_start, data);
 		num_entries = min_t(int, pgcount, max_entries);
 	}
 
@@ -650,7 +661,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
 	/* If the size matches this level, we're in the right place */
 	if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
-		max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
+		max_entries = arm_lpae_max_entries(unmap_idx_start, data);
 		num_entries = min_t(int, pgcount, max_entries);
 
 		/* Find and handle non-leaf entries */
-- 
2.47.0.105.g07ac214952-goog
Re: [PATCH 1/2] iommu/io-pgtable-arm: Fix stage-2 map/umap for concatenated tables
Posted by Daniel Mentz 1 year, 2 months ago
On Thu, Oct 24, 2024 at 9:26 AM Mostafa Saleh <smostafa@google.com> wrote:
>
> When calculating the max number of entries in a table, Where
> RM_LPAE_LVL_IDX() understands the concatenated pgds and can return
> an index spanning more than one concatenated table (for ex for 4K
> page size > 512).
> But then, max_entries is calculated as follows:
> max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
>
> This leads to a negative index in the page table, where for:
> - map: do nothing (no OOB) as fortunately all comparisons are signed,
>   but it would return a negative mapped value.
>
> - unmap: it would leak any child tables as it skips the loop over
>   “__arm_lpae_free_pgtable”
>
> This bug only happens when map/unmap is requested with a page size
> equals to the first level of the concatenated table (for 40 bits input
> and 4K granule would be 1GB) and can be triggered from userspace with
> VFIO, by choosing a VM IPA in a concatenated table > 0 and using
> huge pages to mmap with the first level size.
>
> For example, I was able to reproduce it with the following command
> with mainline linux and mainline kvmtool:
>
> ./lkvm run --irqchip gicv3 -k {$KERNEL} -p "earlycon" -d {$ROOTFS}  --force-pci -c \
> `nproc` --debug -m 4096@525312 --vfio-pci 0000:00:03.0  --hugetlbfs  /hugepages
>
> Where huge pages with 1GB would be used and using IPA in the second table
> (512GB for 40 bits SMMU and 4K granule).
>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
>  drivers/iommu/io-pgtable-arm.c | 17 ++++++++++++++---
>  1 file changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 0e67f1721a3d..3ecbc024e440 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -199,6 +199,17 @@ static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
>         return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
>  }
>
> +/*
> + * Using an index returned from ARM_LPAE_PGD_IDX(), which can point to
> + * concatenated PGD concatenated, get the max entries of a that table.

I believe the macro that returns an index is called ARM_LPAE_LVL_IDX
not ARM_LPAE_PGD_IDX.

> + */
> +static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data)
> +{
> +       int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
> +
> +       return ptes_per_table - (i & (ptes_per_table - 1));
> +}
> +
>  static bool selftest_running = false;
>
>  static dma_addr_t __arm_lpae_dma_addr(void *pages)
> @@ -390,7 +401,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
>
>         /* If we can install a leaf entry at this level, then do so */
>         if (size == block_size) {
> -               max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
> +               max_entries = arm_lpae_max_entries(map_idx_start, data);
>                 num_entries = min_t(int, pgcount, max_entries);
>                 ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
>                 if (!ret)
> @@ -592,7 +603,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
>
>         if (size == split_sz) {
>                 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
> -               max_entries = ptes_per_table - unmap_idx_start;
> +               max_entries = arm_lpae_max_entries(unmap_idx_start, data);
>                 num_entries = min_t(int, pgcount, max_entries);
>         }
>
> @@ -650,7 +661,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
>
>         /* If the size matches this level, we're in the right place */
>         if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
> -               max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
> +               max_entries = arm_lpae_max_entries(unmap_idx_start, data);
>                 num_entries = min_t(int, pgcount, max_entries);
>
>                 /* Find and handle non-leaf entries */
> --
> 2.47.0.105.g07ac214952-goog
>
>
Re: [PATCH 1/2] iommu/io-pgtable-arm: Fix stage-2 map/umap for concatenated tables
Posted by Mostafa Saleh 1 year, 2 months ago
Hi Daniel,

On Sat, Nov 30, 2024 at 08:20:10PM -0800, Daniel Mentz wrote:
> On Thu, Oct 24, 2024 at 9:26 AM Mostafa Saleh <smostafa@google.com> wrote:
> >
> > When calculating the max number of entries in a table, Where
> > RM_LPAE_LVL_IDX() understands the concatenated pgds and can return
> > an index spanning more than one concatenated table (for ex for 4K
> > page size > 512).
> > But then, max_entries is calculated as follows:
> > max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
> >
> > This leads to a negative index in the page table, where for:
> > - map: do nothing (no OOB) as fortunately all comparisons are signed,
> >   but it would return a negative mapped value.
> >
> > - unmap: it would leak any child tables as it skips the loop over
> >   “__arm_lpae_free_pgtable”
> >
> > This bug only happens when map/unmap is requested with a page size
> > equals to the first level of the concatenated table (for 40 bits input
> > and 4K granule would be 1GB) and can be triggered from userspace with
> > VFIO, by choosing a VM IPA in a concatenated table > 0 and using
> > huge pages to mmap with the first level size.
> >
> > For example, I was able to reproduce it with the following command
> > with mainline linux and mainline kvmtool:
> >
> > ./lkvm run --irqchip gicv3 -k {$KERNEL} -p "earlycon" -d {$ROOTFS}  --force-pci -c \
> > `nproc` --debug -m 4096@525312 --vfio-pci 0000:00:03.0  --hugetlbfs  /hugepages
> >
> > Where huge pages with 1GB would be used and using IPA in the second table
> > (512GB for 40 bits SMMU and 4K granule).
> >
> > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > ---
> >  drivers/iommu/io-pgtable-arm.c | 17 ++++++++++++++---
> >  1 file changed, 14 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> > index 0e67f1721a3d..3ecbc024e440 100644
> > --- a/drivers/iommu/io-pgtable-arm.c
> > +++ b/drivers/iommu/io-pgtable-arm.c
> > @@ -199,6 +199,17 @@ static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
> >         return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
> >  }
> >
> > +/*
> > + * Using an index returned from ARM_LPAE_PGD_IDX(), which can point to
> > + * concatenated PGD concatenated, get the max entries of a that table.
> 
> I believe the macro that returns an index is called ARM_LPAE_LVL_IDX
> not ARM_LPAE_PGD_IDX.
> 

Yes, the comment is not quite accurate, although ARM_LPAE_PGD_IDX()
calls ARM_LPAE_PGD_IDX() which is the problem.

Thanks,
Mostafa

> > + */
> > +static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data)
> > +{
> > +       int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
> > +
> > +       return ptes_per_table - (i & (ptes_per_table - 1));
> > +}
> > +
> >  static bool selftest_running = false;
> >
> >  static dma_addr_t __arm_lpae_dma_addr(void *pages)
> > @@ -390,7 +401,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
> >
> >         /* If we can install a leaf entry at this level, then do so */
> >         if (size == block_size) {
> > -               max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
> > +               max_entries = arm_lpae_max_entries(map_idx_start, data);
> >                 num_entries = min_t(int, pgcount, max_entries);
> >                 ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
> >                 if (!ret)
> > @@ -592,7 +603,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
> >
> >         if (size == split_sz) {
> >                 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
> > -               max_entries = ptes_per_table - unmap_idx_start;
> > +               max_entries = arm_lpae_max_entries(unmap_idx_start, data);
> >                 num_entries = min_t(int, pgcount, max_entries);
> >         }
> >
> > @@ -650,7 +661,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
> >
> >         /* If the size matches this level, we're in the right place */
> >         if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
> > -               max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
> > +               max_entries = arm_lpae_max_entries(unmap_idx_start, data);
> >                 num_entries = min_t(int, pgcount, max_entries);
> >
> >                 /* Find and handle non-leaf entries */
> > --
> > 2.47.0.105.g07ac214952-goog
> >
> >
Re: [PATCH 1/2] iommu/io-pgtable-arm: Fix stage-2 map/umap for concatenated tables
Posted by Daniel Mentz 1 year, 2 months ago
Hi Mostafa,

On Mon, Dec 2, 2024 at 4:12 AM Mostafa Saleh <smostafa@google.com> wrote:
>
> Hi Daniel,
>
> On Sat, Nov 30, 2024 at 08:20:10PM -0800, Daniel Mentz wrote:
> > On Thu, Oct 24, 2024 at 9:26 AM Mostafa Saleh <smostafa@google.com> wrote:
> > >
> > > When calculating the max number of entries in a table, Where
> > > RM_LPAE_LVL_IDX() understands the concatenated pgds and can return
> > > an index spanning more than one concatenated table (for ex for 4K
> > > page size > 512).
> > > But then, max_entries is calculated as follows:
> > > max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
> > >
> > > This leads to a negative index in the page table, where for:
> > > - map: do nothing (no OOB) as fortunately all comparisons are signed,
> > >   but it would return a negative mapped value.
> > >
> > > - unmap: it would leak any child tables as it skips the loop over
> > >   “__arm_lpae_free_pgtable”
> > >
> > > This bug only happens when map/unmap is requested with a page size
> > > equals to the first level of the concatenated table (for 40 bits input
> > > and 4K granule would be 1GB) and can be triggered from userspace with
> > > VFIO, by choosing a VM IPA in a concatenated table > 0 and using
> > > huge pages to mmap with the first level size.
> > >
> > > For example, I was able to reproduce it with the following command
> > > with mainline linux and mainline kvmtool:
> > >
> > > ./lkvm run --irqchip gicv3 -k {$KERNEL} -p "earlycon" -d {$ROOTFS}  --force-pci -c \
> > > `nproc` --debug -m 4096@525312 --vfio-pci 0000:00:03.0  --hugetlbfs  /hugepages
> > >
> > > Where huge pages with 1GB would be used and using IPA in the second table
> > > (512GB for 40 bits SMMU and 4K granule).
> > >
> > > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > > ---
> > >  drivers/iommu/io-pgtable-arm.c | 17 ++++++++++++++---
> > >  1 file changed, 14 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> > > index 0e67f1721a3d..3ecbc024e440 100644
> > > --- a/drivers/iommu/io-pgtable-arm.c
> > > +++ b/drivers/iommu/io-pgtable-arm.c
> > > @@ -199,6 +199,17 @@ static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
> > >         return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
> > >  }
> > >
> > > +/*
> > > + * Using an index returned from ARM_LPAE_PGD_IDX(), which can point to
> > > + * concatenated PGD concatenated, get the max entries of a that table.
> >
> > I believe the macro that returns an index is called ARM_LPAE_LVL_IDX
> > not ARM_LPAE_PGD_IDX.
> >
>
> Yes, the comment is not quite accurate, although ARM_LPAE_PGD_IDX()
> calls ARM_LPAE_PGD_IDX() which is the problem.

I assume you mean the problem is that ARM_LPAE_LVL_IDX() calls into
ARM_LPAE_PGD_IDX().

I find that ARM_LPAE_PGD_IDX is a misnomer as it returns a number of
bits as opposed to an index. Something like ARM_LPAE_PGD_EXTRA_BITS
would be a more appropriate name.

>
> Thanks,
> Mostafa
>
> > > + */
> > > +static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data)
> > > +{
> > > +       int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
> > > +
> > > +       return ptes_per_table - (i & (ptes_per_table - 1));
> > > +}
> > > +
> > >  static bool selftest_running = false;
> > >
> > >  static dma_addr_t __arm_lpae_dma_addr(void *pages)
> > > @@ -390,7 +401,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
> > >
> > >         /* If we can install a leaf entry at this level, then do so */
> > >         if (size == block_size) {
> > > -               max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
> > > +               max_entries = arm_lpae_max_entries(map_idx_start, data);
> > >                 num_entries = min_t(int, pgcount, max_entries);
> > >                 ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
> > >                 if (!ret)
> > > @@ -592,7 +603,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
> > >
> > >         if (size == split_sz) {
> > >                 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
> > > -               max_entries = ptes_per_table - unmap_idx_start;
> > > +               max_entries = arm_lpae_max_entries(unmap_idx_start, data);
> > >                 num_entries = min_t(int, pgcount, max_entries);
> > >         }
> > >
> > > @@ -650,7 +661,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
> > >
> > >         /* If the size matches this level, we're in the right place */
> > >         if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
> > > -               max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
> > > +               max_entries = arm_lpae_max_entries(unmap_idx_start, data);
> > >                 num_entries = min_t(int, pgcount, max_entries);
> > >
> > >                 /* Find and handle non-leaf entries */
> > > --
> > > 2.47.0.105.g07ac214952-goog
> > >
> > >