[PATCH v6 4/9] iommu/amd: Introduce per-device DTE cache to store persistent bits

Suravee Suthikulpanit posted 9 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH v6 4/9] iommu/amd: Introduce per-device DTE cache to store persistent bits
Posted by Suravee Suthikulpanit 1 month, 1 week ago
Currently, IOMMU driver initializes each Device Table Entry (DTE) starting
from when it parses the ACPI IVRS table during one-time initialization,
and the value is directly programmed into the table. The value is stored
in the table until next system reset. This makes the DTE programming
difficult since it needs to ensure that all persistent DTE bits are not
overwritten during runtime.

Introduce per-device DTE cache to store persistent DTE bits.

Please note also that the amd_iommu_apply_erratum_63() is not updated since
it will be removed in subsequent patch.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 drivers/iommu/amd/amd_iommu.h       |  2 ++
 drivers/iommu/amd/amd_iommu_types.h | 21 +++++++++++----------
 drivers/iommu/amd/init.c            | 26 +++++++++++++++++++-------
 drivers/iommu/amd/iommu.c           |  2 +-
 4 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 6386fa4556d9..96c3bfc234f8 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -177,3 +177,5 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
 
 #endif
+
+struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid);
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index f537b264f118..3f53d3bc79cb 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -830,6 +830,16 @@ struct devid_map {
 /* Device may request super-user privileges */
 #define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP   0x10
 
+/*
+ * Structure defining one entry in the device table
+ */
+struct dev_table_entry {
+	union {
+		u64 data[4];
+		u128 data128[2];
+	};
+};
+
 /*
  * This struct contains device specific data for the IOMMU
  */
@@ -858,6 +868,7 @@ struct iommu_dev_data {
 	bool defer_attach;
 
 	struct ratelimit_state rs;        /* Ratelimit IOPF messages */
+	struct dev_table_entry dte_cache;
 };
 
 /* Map HPET and IOAPIC ids to the devid used by the IOMMU */
@@ -883,16 +894,6 @@ extern struct list_head amd_iommu_list;
  */
 extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
 
-/*
- * Structure defining one entry in the device table
- */
-struct dev_table_entry {
-	union {
-		u64 data[4];
-		u128 data128[2];
-	};
-};
-
 /*
  * One entry for unity mappings parsed out of the ACPI table.
  */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index a1a0bd398c14..552a13f7668c 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -992,6 +992,18 @@ static void iommu_enable_gt(struct amd_iommu *iommu)
 	iommu_feature_enable(iommu, CONTROL_GT_EN);
 }
 
+static void set_dte_cache_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
+{
+	int i = (bit >> 6) & 0x03;
+	int _bit = bit & 0x3f;
+	struct iommu_dev_data *dev_data = find_dev_data(iommu, devid);
+
+	if (!dev_data)
+		return;
+
+	dev_data->dte_cache.data[i] |= (1UL << _bit);
+}
+
 /* sets a specific bit in the device table entry. */
 static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
 				u16 devid, u8 bit)
@@ -1159,19 +1171,19 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
 					   u16 devid, u32 flags, u32 ext_flags)
 {
 	if (flags & ACPI_DEVFLAG_INITPASS)
-		set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
+		set_dte_cache_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
 	if (flags & ACPI_DEVFLAG_EXTINT)
-		set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
+		set_dte_cache_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
 	if (flags & ACPI_DEVFLAG_NMI)
-		set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
+		set_dte_cache_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
 	if (flags & ACPI_DEVFLAG_SYSMGT1)
-		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
+		set_dte_cache_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
 	if (flags & ACPI_DEVFLAG_SYSMGT2)
-		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
+		set_dte_cache_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
 	if (flags & ACPI_DEVFLAG_LINT0)
-		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
+		set_dte_cache_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
 	if (flags & ACPI_DEVFLAG_LINT1)
-		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
+		set_dte_cache_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
 
 	amd_iommu_apply_erratum_63(iommu, devid);
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index ab0d3f46871e..28516d89168a 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -393,7 +393,7 @@ static void setup_aliases(struct amd_iommu *iommu, struct device *dev)
 	clone_aliases(iommu, dev);
 }
 
-static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid)
+struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid)
 {
 	struct iommu_dev_data *dev_data;
 
-- 
2.34.1
Re: [PATCH v6 4/9] iommu/amd: Introduce per-device DTE cache to store persistent bits
Posted by Jason Gunthorpe 1 month, 1 week ago
On Wed, Oct 16, 2024 at 05:17:51AM +0000, Suravee Suthikulpanit wrote:
> diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
> index f537b264f118..3f53d3bc79cb 100644
> --- a/drivers/iommu/amd/amd_iommu_types.h
> +++ b/drivers/iommu/amd/amd_iommu_types.h
> @@ -830,6 +830,16 @@ struct devid_map {
>  /* Device may request super-user privileges */
>  #define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP   0x10
>  
> +/*
> + * Structure defining one entry in the device table
> + */
> +struct dev_table_entry {
> +	union {
> +		u64 data[4];
> +		u128 data128[2];
> +	};
> +};

It would be appropriate to put this hunk into the prior patch so your
series does not move code it just added..

>  /*
>   * This struct contains device specific data for the IOMMU
>   */
> @@ -858,6 +868,7 @@ struct iommu_dev_data {
>  	bool defer_attach;
>  
>  	struct ratelimit_state rs;        /* Ratelimit IOPF messages */
> +	struct dev_table_entry dte_cache;
>  };

I would call this initial_dte or something, it isn't a cache, it is
the recoding of the ACPI data into a DTE format.

 /* Stores INIT/EINT,NMIm SYSMGTx,LINTx values from ACPI */
 struct dev_tabl_entry initial_dte;

> @@ -1159,19 +1171,19 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
>  					   u16 devid, u32 flags, u32 ext_flags)
>  {
>  	if (flags & ACPI_DEVFLAG_INITPASS)
> -		set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
> +		set_dte_cache_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
>  	if (flags & ACPI_DEVFLAG_EXTINT)
> -		set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
> +		set_dte_cache_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
>  	if (flags & ACPI_DEVFLAG_NMI)
> -		set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
> +		set_dte_cache_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
>  	if (flags & ACPI_DEVFLAG_SYSMGT1)
> -		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
> +		set_dte_cache_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
>  	if (flags & ACPI_DEVFLAG_SYSMGT2)
> -		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
> +		set_dte_cache_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
>  	if (flags & ACPI_DEVFLAG_LINT0)
> -		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
> +		set_dte_cache_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
>  	if (flags & ACPI_DEVFLAG_LINT1)
> -		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
> +		set_dte_cache_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);

Doesn't this break the driver at this patch? Nothing reads from the
dte_cache at this point in the series so the real DTE never got these
bits?

Maybe just add a temporary line here to copy the entire dte_cache to
the real dte? The next patch fixes it I think?

But I like the idea, I think this is the a much more understandable
direction to go.

Jason