[PATCH v9 6/7] ACPI: APEI: EINJ: Enable EINJv2 error injections

Zaid Alali posted 7 patches 3 months, 4 weeks ago
There is a newer version of this series
[PATCH v9 6/7] ACPI: APEI: EINJ: Enable EINJv2 error injections
Posted by Zaid Alali 3 months, 4 weeks ago
Enable injection using EINJv2 mode of operation.

[Tony: Mostly Zaid's original code. I just changed how the error ID
and syndrome bits are implemented. Also swapped out some camelcase
variable names]

Co-developed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Zaid Alali <zaidal@os.amperecomputing.com>
---
 drivers/acpi/apei/einj-core.c | 56 ++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
index 8d60e5f1785c..7741c2082f33 100644
--- a/drivers/acpi/apei/einj-core.c
+++ b/drivers/acpi/apei/einj-core.c
@@ -87,6 +87,7 @@ enum {
 	SETWA_FLAGS_APICID = 1,
 	SETWA_FLAGS_MEM = 2,
 	SETWA_FLAGS_PCIE_SBDF = 4,
+	SETWA_FLAGS_EINJV2 = 8,
 };
 
 /*
@@ -181,6 +182,7 @@ bool einj_initialized __ro_after_init;
 
 static void __iomem *einj_param;
 static u32 v5param_size;
+static bool is_v2;
 
 static void einj_exec_ctx_init(struct apei_exec_context *ctx)
 {
@@ -507,12 +509,20 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
 	return rc;
 }
 
+static bool is_end_of_list(u8 *val)
+{
+	for (int i = 0; i < COMPONENT_LEN; ++i) {
+		if (val[i] != 0xFF)
+			return false;
+	}
+	return true;
+}
 static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 			       u64 param3, u64 param4)
 {
 	struct apei_exec_context ctx;
 	u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
-	int rc;
+	int i, rc;
 
 	einj_exec_ctx_init(&ctx);
 
@@ -521,10 +531,10 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 		return rc;
 	apei_exec_ctx_set_input(&ctx, type);
 	if (acpi5) {
-		struct set_error_type_with_address *v5param, v5_struct;
+		struct set_error_type_with_address *v5param;
 
-		v5param = &v5_struct;
-		memcpy_fromio(v5param, einj_param, sizeof(*v5param));
+		v5param = kmalloc(v5param_size, GFP_KERNEL);
+		memcpy_fromio(v5param, einj_param, v5param_size);
 		v5param->type = type;
 		if (type & ACPI5_VENDOR_BIT) {
 			switch (vendor_flags) {
@@ -544,8 +554,21 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 			v5param->flags = flags;
 			v5param->memory_address = param1;
 			v5param->memory_address_range = param2;
-			v5param->apicid = param3;
-			v5param->pcie_sbdf = param4;
+
+			if (is_v2) {
+				for (i = 0; i < max_nr_components; i++) {
+					if (is_end_of_list(syndrome_data[i].comp_id.acpi_id))
+						break;
+					v5param->einjv2_struct.component_arr[i].comp_id =
+						syndrome_data[i].comp_id;
+					v5param->einjv2_struct.component_arr[i].comp_synd =
+						syndrome_data[i].comp_synd;
+				}
+				v5param->einjv2_struct.component_arr_count = i;
+			} else {
+				v5param->apicid = param3;
+				v5param->pcie_sbdf = param4;
+			}
 		} else {
 			switch (type) {
 			case ACPI_EINJ_PROCESSOR_CORRECTABLE:
@@ -569,7 +592,8 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 				break;
 			}
 		}
-		memcpy_toio(einj_param, v5param, sizeof(*v5param));
+		memcpy_toio(einj_param, v5param, v5param_size);
+		kfree(v5param);
 	} else {
 		rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
 		if (rc)
@@ -631,10 +655,15 @@ int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
 	u64 base_addr, size;
 
 	/* If user manually set "flags", make sure it is legal */
-	if (flags && (flags &
-		~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+	if (flags && (flags & ~(SETWA_FLAGS_APICID | SETWA_FLAGS_MEM |
+		      SETWA_FLAGS_PCIE_SBDF | SETWA_FLAGS_EINJV2)))
 		return -EINVAL;
 
+	/* check if type is a valid EINJv2 error type */
+	if (is_v2) {
+		if (!(type & available_error_type_v2))
+			return -EINVAL;
+	}
 	/*
 	 * We need extra sanity checks for memory errors.
 	 * Other types leap directly to injection.
@@ -743,7 +772,7 @@ static int available_error_type_show(struct seq_file *m, void *v)
 		if (available_error_type & einj_error_type_string[pos].mask)
 			seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
 				   einj_error_type_string[pos].str);
-	if (available_error_type & ACPI65_EINJV2_SUPP) {
+	if ((available_error_type & ACPI65_EINJV2_SUPP) && einj_v2_enabled) {
 		for (int pos = 0; pos < ARRAY_SIZE(einjv2_error_type_string); pos++) {
 			if (available_error_type_v2 & einjv2_error_type_string[pos].mask)
 				seq_printf(m, "V2_0x%08x\t%s\n", einjv2_error_type_string[pos].mask,
@@ -785,7 +814,7 @@ int einj_validate_error_type(u64 type)
 	if (tval & (tval - 1))
 		return -EINVAL;
 	if (!vendor)
-		if (!(type & available_error_type))
+		if (!(type & (available_error_type | available_error_type_v2)))
 			return -EINVAL;
 
 	return 0;
@@ -804,9 +833,11 @@ static ssize_t error_type_set(struct file *file, const char __user *buf,
 	if (strncmp(einj_buf, "V2_", 3) == 0) {
 		if (!sscanf(einj_buf, "V2_%llx", &val))
 			return -EINVAL;
+		is_v2 = true;
 	} else {
 		if (!sscanf(einj_buf, "%llx", &val))
 			return -EINVAL;
+		is_v2 = false;
 	}
 
 	rc = einj_validate_error_type(val);
@@ -828,6 +859,9 @@ static int error_inject_set(void *data, u64 val)
 	if (!error_type)
 		return -EINVAL;
 
+	if (is_v2)
+		error_flags |= SETWA_FLAGS_EINJV2;
+
 	return einj_error_inject(error_type, error_flags, error_param1, error_param2,
 		error_param3, error_param4);
 }
-- 
2.43.0
Re: [PATCH v9 6/7] ACPI: APEI: EINJ: Enable EINJv2 error injections
Posted by Ira Weiny 3 months, 4 weeks ago
Zaid Alali wrote:
> Enable injection using EINJv2 mode of operation.
> 
> [Tony: Mostly Zaid's original code. I just changed how the error ID
> and syndrome bits are implemented. Also swapped out some camelcase
> variable names]
> 
> Co-developed-by: Tony Luck <tony.luck@intel.com>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> Signed-off-by: Zaid Alali <zaidal@os.amperecomputing.com>
> ---
>  drivers/acpi/apei/einj-core.c | 56 ++++++++++++++++++++++++++++-------
>  1 file changed, 45 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
> index 8d60e5f1785c..7741c2082f33 100644
> --- a/drivers/acpi/apei/einj-core.c
> +++ b/drivers/acpi/apei/einj-core.c
> @@ -87,6 +87,7 @@ enum {
>  	SETWA_FLAGS_APICID = 1,
>  	SETWA_FLAGS_MEM = 2,
>  	SETWA_FLAGS_PCIE_SBDF = 4,
> +	SETWA_FLAGS_EINJV2 = 8,
>  };
>  
>  /*
> @@ -181,6 +182,7 @@ bool einj_initialized __ro_after_init;
>  
>  static void __iomem *einj_param;
>  static u32 v5param_size;
> +static bool is_v2;
>  
>  static void einj_exec_ctx_init(struct apei_exec_context *ctx)
>  {
> @@ -507,12 +509,20 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
>  	return rc;
>  }
>  
> +static bool is_end_of_list(u8 *val)
> +{
> +	for (int i = 0; i < COMPONENT_LEN; ++i) {

Back in patch 3/7 these are defined using a hard coded value.

I think it might be better to at least use COMPONENT_LEN for those
definitions.

I'm also wondering if it would be better to have some type safety here...
but probably fine.

> +		if (val[i] != 0xFF)
> +			return false;
> +	}
> +	return true;

I'm unclear of the way this list is terminated.  The cover letter does not
mention it.  I read the documentation patch and it looks like you echo ''
to the id to terminate.  How does that work here?

From the documentation patch.

  # echo '' > component_id2			# Mark id2 invalid to terminate list


> +}
>  static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>  			       u64 param3, u64 param4)
>  {
>  	struct apei_exec_context ctx;
>  	u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
> -	int rc;
> +	int i, rc;
>  
>  	einj_exec_ctx_init(&ctx);
>  
> @@ -521,10 +531,10 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>  		return rc;
>  	apei_exec_ctx_set_input(&ctx, type);
>  	if (acpi5) {
> -		struct set_error_type_with_address *v5param, v5_struct;
> +		struct set_error_type_with_address *v5param;
>  
> -		v5param = &v5_struct;
> -		memcpy_fromio(v5param, einj_param, sizeof(*v5param));
> +		v5param = kmalloc(v5param_size, GFP_KERNEL);
> +		memcpy_fromio(v5param, einj_param, v5param_size);
>  		v5param->type = type;
>  		if (type & ACPI5_VENDOR_BIT) {
>  			switch (vendor_flags) {
> @@ -544,8 +554,21 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>  			v5param->flags = flags;
>  			v5param->memory_address = param1;
>  			v5param->memory_address_range = param2;
> -			v5param->apicid = param3;
> -			v5param->pcie_sbdf = param4;
> +
> +			if (is_v2) {
> +				for (i = 0; i < max_nr_components; i++) {
> +					if (is_end_of_list(syndrome_data[i].comp_id.acpi_id))
> +						break;
> +					v5param->einjv2_struct.component_arr[i].comp_id =
> +						syndrome_data[i].comp_id;
> +					v5param->einjv2_struct.component_arr[i].comp_synd =
> +						syndrome_data[i].comp_synd;
> +				}
> +				v5param->einjv2_struct.component_arr_count = i;
> +			} else {
> +				v5param->apicid = param3;
> +				v5param->pcie_sbdf = param4;
> +			}
>  		} else {
>  			switch (type) {
>  			case ACPI_EINJ_PROCESSOR_CORRECTABLE:
> @@ -569,7 +592,8 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>  				break;
>  			}
>  		}
> -		memcpy_toio(einj_param, v5param, sizeof(*v5param));
> +		memcpy_toio(einj_param, v5param, v5param_size);
> +		kfree(v5param);
>  	} else {
>  		rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
>  		if (rc)
> @@ -631,10 +655,15 @@ int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
>  	u64 base_addr, size;
>  
>  	/* If user manually set "flags", make sure it is legal */
> -	if (flags && (flags &
> -		~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
> +	if (flags && (flags & ~(SETWA_FLAGS_APICID | SETWA_FLAGS_MEM |
> +		      SETWA_FLAGS_PCIE_SBDF | SETWA_FLAGS_EINJV2)))
>  		return -EINVAL;
>  
> +	/* check if type is a valid EINJv2 error type */
> +	if (is_v2) {
> +		if (!(type & available_error_type_v2))
> +			return -EINVAL;
> +	}
>  	/*
>  	 * We need extra sanity checks for memory errors.
>  	 * Other types leap directly to injection.
> @@ -743,7 +772,7 @@ static int available_error_type_show(struct seq_file *m, void *v)
>  		if (available_error_type & einj_error_type_string[pos].mask)
>  			seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
>  				   einj_error_type_string[pos].str);
> -	if (available_error_type & ACPI65_EINJV2_SUPP) {
> +	if ((available_error_type & ACPI65_EINJV2_SUPP) && einj_v2_enabled) {
>  		for (int pos = 0; pos < ARRAY_SIZE(einjv2_error_type_string); pos++) {
>  			if (available_error_type_v2 & einjv2_error_type_string[pos].mask)
>  				seq_printf(m, "V2_0x%08x\t%s\n", einjv2_error_type_string[pos].mask,
> @@ -785,7 +814,7 @@ int einj_validate_error_type(u64 type)
>  	if (tval & (tval - 1))
>  		return -EINVAL;
>  	if (!vendor)
> -		if (!(type & available_error_type))
> +		if (!(type & (available_error_type | available_error_type_v2)))
>  			return -EINVAL;
>  
>  	return 0;
> @@ -804,9 +833,11 @@ static ssize_t error_type_set(struct file *file, const char __user *buf,
>  	if (strncmp(einj_buf, "V2_", 3) == 0) {
>  		if (!sscanf(einj_buf, "V2_%llx", &val))
>  			return -EINVAL;
> +		is_v2 = true;
>  	} else {
>  		if (!sscanf(einj_buf, "%llx", &val))
>  			return -EINVAL;
> +		is_v2 = false;
>  	}
>  
>  	rc = einj_validate_error_type(val);
> @@ -828,6 +859,9 @@ static int error_inject_set(void *data, u64 val)
>  	if (!error_type)
>  		return -EINVAL;
>  
> +	if (is_v2)
> +		error_flags |= SETWA_FLAGS_EINJV2;
> +
> +

Does this flag need to be cleared if a v1 error is being used?

Ira

>  	return einj_error_inject(error_type, error_flags, error_param1, error_param2,
>  		error_param3, error_param4);
>  }
> -- 
> 2.43.0
>
Re: [PATCH v9 6/7] ACPI: APEI: EINJ: Enable EINJv2 error injections
Posted by Zaid Alali 3 months, 3 weeks ago
On Fri, Jun 13, 2025 at 12:37:54PM -0500, Ira Weiny wrote:
> Zaid Alali wrote:
> > Enable injection using EINJv2 mode of operation.
> > 
> > [Tony: Mostly Zaid's original code. I just changed how the error ID
> > and syndrome bits are implemented. Also swapped out some camelcase
> > variable names]
> > 
> > Co-developed-by: Tony Luck <tony.luck@intel.com>
> > Signed-off-by: Tony Luck <tony.luck@intel.com>
> > Signed-off-by: Zaid Alali <zaidal@os.amperecomputing.com>
> > ---
> >  drivers/acpi/apei/einj-core.c | 56 ++++++++++++++++++++++++++++-------
> >  1 file changed, 45 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
> > index 8d60e5f1785c..7741c2082f33 100644
> > --- a/drivers/acpi/apei/einj-core.c
> > +++ b/drivers/acpi/apei/einj-core.c
> > @@ -87,6 +87,7 @@ enum {
> >  	SETWA_FLAGS_APICID = 1,
> >  	SETWA_FLAGS_MEM = 2,
> >  	SETWA_FLAGS_PCIE_SBDF = 4,
> > +	SETWA_FLAGS_EINJV2 = 8,
> >  };
> >  
> >  /*
> > @@ -181,6 +182,7 @@ bool einj_initialized __ro_after_init;
> >  
> >  static void __iomem *einj_param;
> >  static u32 v5param_size;
> > +static bool is_v2;
> >  
> >  static void einj_exec_ctx_init(struct apei_exec_context *ctx)
> >  {
> > @@ -507,12 +509,20 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
> >  	return rc;
> >  }
> >  
> > +static bool is_end_of_list(u8 *val)
> > +{
> > +	for (int i = 0; i < COMPONENT_LEN; ++i) {
> 
> Back in patch 3/7 these are defined using a hard coded value.
> 
> I think it might be better to at least use COMPONENT_LEN for those
> definitions.

Thats a good idea! I will update this in the next revision.

> 
> I'm also wondering if it would be better to have some type safety here...
> but probably fine.
> 
> > +		if (val[i] != 0xFF)
> > +			return false;
> > +	}
> > +	return true;
> 
> I'm unclear of the way this list is terminated.  The cover letter does not
> mention it.  I read the documentation patch and it looks like you echo ''
> to the id to terminate.  How does that work here?
> 
> From the documentation patch.
> 
>   # echo '' > component_id2			# Mark id2 invalid to terminate list
> 
> 

As shown in ptach 7/7 "Writing just a newline to any of these files  sets
an invalid (all-ones) value."

> > +}
> >  static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
> >  			       u64 param3, u64 param4)
> >  {
> >  	struct apei_exec_context ctx;
> >  	u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
> > -	int rc;
> > +	int i, rc;
> >  
> >  	einj_exec_ctx_init(&ctx);
> >  
> > @@ -521,10 +531,10 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
> >  		return rc;
> >  	apei_exec_ctx_set_input(&ctx, type);
> >  	if (acpi5) {
> > -		struct set_error_type_with_address *v5param, v5_struct;
> > +		struct set_error_type_with_address *v5param;
> >  
> > -		v5param = &v5_struct;
> > -		memcpy_fromio(v5param, einj_param, sizeof(*v5param));
> > +		v5param = kmalloc(v5param_size, GFP_KERNEL);
> > +		memcpy_fromio(v5param, einj_param, v5param_size);
> >  		v5param->type = type;
> >  		if (type & ACPI5_VENDOR_BIT) {
> >  			switch (vendor_flags) {
> > @@ -544,8 +554,21 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
> >  			v5param->flags = flags;
> >  			v5param->memory_address = param1;
> >  			v5param->memory_address_range = param2;
> > -			v5param->apicid = param3;
> > -			v5param->pcie_sbdf = param4;
> > +
> > +			if (is_v2) {
> > +				for (i = 0; i < max_nr_components; i++) {
> > +					if (is_end_of_list(syndrome_data[i].comp_id.acpi_id))
> > +						break;
> > +					v5param->einjv2_struct.component_arr[i].comp_id =
> > +						syndrome_data[i].comp_id;
> > +					v5param->einjv2_struct.component_arr[i].comp_synd =
> > +						syndrome_data[i].comp_synd;
> > +				}
> > +				v5param->einjv2_struct.component_arr_count = i;
> > +			} else {
> > +				v5param->apicid = param3;
> > +				v5param->pcie_sbdf = param4;
> > +			}
> >  		} else {
> >  			switch (type) {
> >  			case ACPI_EINJ_PROCESSOR_CORRECTABLE:
> > @@ -569,7 +592,8 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
> >  				break;
> >  			}
> >  		}
> > -		memcpy_toio(einj_param, v5param, sizeof(*v5param));
> > +		memcpy_toio(einj_param, v5param, v5param_size);
> > +		kfree(v5param);
> >  	} else {
> >  		rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
> >  		if (rc)
> > @@ -631,10 +655,15 @@ int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
> >  	u64 base_addr, size;
> >  
> >  	/* If user manually set "flags", make sure it is legal */
> > -	if (flags && (flags &
> > -		~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
> > +	if (flags && (flags & ~(SETWA_FLAGS_APICID | SETWA_FLAGS_MEM |
> > +		      SETWA_FLAGS_PCIE_SBDF | SETWA_FLAGS_EINJV2)))
> >  		return -EINVAL;
> >  
> > +	/* check if type is a valid EINJv2 error type */
> > +	if (is_v2) {
> > +		if (!(type & available_error_type_v2))
> > +			return -EINVAL;
> > +	}
> >  	/*
> >  	 * We need extra sanity checks for memory errors.
> >  	 * Other types leap directly to injection.
> > @@ -743,7 +772,7 @@ static int available_error_type_show(struct seq_file *m, void *v)
> >  		if (available_error_type & einj_error_type_string[pos].mask)
> >  			seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
> >  				   einj_error_type_string[pos].str);
> > -	if (available_error_type & ACPI65_EINJV2_SUPP) {
> > +	if ((available_error_type & ACPI65_EINJV2_SUPP) && einj_v2_enabled) {
> >  		for (int pos = 0; pos < ARRAY_SIZE(einjv2_error_type_string); pos++) {
> >  			if (available_error_type_v2 & einjv2_error_type_string[pos].mask)
> >  				seq_printf(m, "V2_0x%08x\t%s\n", einjv2_error_type_string[pos].mask,
> > @@ -785,7 +814,7 @@ int einj_validate_error_type(u64 type)
> >  	if (tval & (tval - 1))
> >  		return -EINVAL;
> >  	if (!vendor)
> > -		if (!(type & available_error_type))
> > +		if (!(type & (available_error_type | available_error_type_v2)))
> >  			return -EINVAL;
> >  
> >  	return 0;
> > @@ -804,9 +833,11 @@ static ssize_t error_type_set(struct file *file, const char __user *buf,
> >  	if (strncmp(einj_buf, "V2_", 3) == 0) {
> >  		if (!sscanf(einj_buf, "V2_%llx", &val))
> >  			return -EINVAL;
> > +		is_v2 = true;
> >  	} else {
> >  		if (!sscanf(einj_buf, "%llx", &val))
> >  			return -EINVAL;
> > +		is_v2 = false;
> >  	}
> >  
> >  	rc = einj_validate_error_type(val);
> > @@ -828,6 +859,9 @@ static int error_inject_set(void *data, u64 val)
> >  	if (!error_type)
> >  		return -EINVAL;
> >  
> > +	if (is_v2)
> > +		error_flags |= SETWA_FLAGS_EINJV2;
> > +
> > +
> 
> Does this flag need to be cleared if a v1 error is being used?
> 
> Ira

The driver depends on "is_v2" to determine if the injection is v1 or v2, and the user 
is supposed to set the flags for v1 and v2, but I think clearing the flag here is a good idea so
the user does not have to worry about that. I can see why someone would assume not needing
to set the flags when they see "V2_" prefix for v2 error types.
I'll also update this.

Zaid

> 
> >  	return einj_error_inject(error_type, error_flags, error_param1, error_param2,
> >  		error_param3, error_param4);
> >  }
> > -- 
> > 2.43.0
> >