[PATCH 7/8] ACPI: APEI: EINJ: Enable EINJv2 error injections

Zaid Alali posted 8 patches 1 month ago
There is a newer version of this series
[PATCH 7/8] ACPI: APEI: EINJ: Enable EINJv2 error injections
Posted by Zaid Alali 1 month ago
Enable the driver to inject EINJv2 type errors. The component
array values are parsed from user_input and expected to contain
hex values for component id and syndrome separated by space,
and multiple components are separated by new line as follows:

component_id1 component_syndrome1
component_id2 component_syndrome2
 :
component_id(n) component_syndrome(n)

for example:

$comp_arr="0x1 0x2
>0x1 0x4
>0x2 0x4"
$cd /sys/kernel/debug/apei/einj/
$echo "$comp_arr" > einjv2_component_array

Signed-off-by: Zaid Alali <zaidal@os.amperecomputing.com>
---
 drivers/acpi/apei/einj-core.c | 76 +++++++++++++++++++++++++++++++++--
 1 file changed, 72 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
index bd46a611eef7..bc833f42dfc7 100644
--- a/drivers/acpi/apei/einj-core.c
+++ b/drivers/acpi/apei/einj-core.c
@@ -87,6 +87,13 @@ enum {
 	SETWA_FLAGS_APICID = 1,
 	SETWA_FLAGS_MEM = 2,
 	SETWA_FLAGS_PCIE_SBDF = 4,
+	SETWA_FLAGS_EINJV2 = 8,
+};
+
+enum {
+	EINJV2_PROCESSOR_ERROR = 0x1,
+	EINJV2_MEMORY_ERROR = 0x2,
+	EINJV2_PCIE_ERROR = 0x4,
 };
 
 /*
@@ -111,6 +118,7 @@ static char vendor_dev[64];
 static struct debugfs_blob_wrapper einjv2_component_arr;
 static u64 component_count;
 static void *user_input;
+static int nr_components;
 static u32 available_error_type;
 static u32 available_error_type_v2;
 
@@ -287,8 +295,18 @@ static void *einj_get_parameter_address(void)
 
 		v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
 		if (v5param) {
+			int offset, len;
+
 			acpi5 = 1;
 			check_vendor_extension(pa_v5, v5param);
+			if (available_error_type & ACPI65_EINJV2_SUPP) {
+				len = v5param->einjv2_struct.length;
+				offset = offsetof(struct einjv2_extension_struct, component_arr);
+				nr_components = (len - offset) / 32;
+				acpi_os_unmap_iomem(v5param, sizeof(*v5param));
+				v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param) + (
+					(nr_components) * sizeof(struct syndrome_array)));
+			}
 			return v5param;
 		}
 	}
@@ -496,8 +514,49 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 			v5param->flags = flags;
 			v5param->memory_address = param1;
 			v5param->memory_address_range = param2;
-			v5param->apicid = param3;
-			v5param->pcie_sbdf = param4;
+
+			if (flags & SETWA_FLAGS_EINJV2) {
+				int count = 0, bytes_read, pos = 0;
+				unsigned int comp, synd;
+				struct syndrome_array *component_arr;
+
+				if (component_count > nr_components)
+					goto err_out;
+
+				v5param->einjv2_struct.component_arr_count = component_count;
+				component_arr = v5param->einjv2_struct.component_arr;
+
+				while (sscanf(user_input+pos, "%x %x\n%n", &comp, &synd,
+							&bytes_read) == 2) {
+					pos += bytes_read;
+					if (count > component_count)
+						goto err_out;
+
+					switch (type) {
+					case EINJV2_PROCESSOR_ERROR:
+						component_arr[count].comp_id.acpi_id = comp;
+						component_arr[count].comp_synd.proc_synd = synd;
+						break;
+					case EINJV2_MEMORY_ERROR:
+						component_arr[count].comp_id.device_id = comp;
+						component_arr[count].comp_synd.mem_synd = synd;
+						break;
+					case EINJV2_PCIE_ERROR:
+						component_arr[count].comp_id.pcie_sbdf = comp;
+						component_arr[count].comp_synd.pcie_synd = synd;
+						break;
+					}
+					count++;
+				}
+				if (count != component_count - 1)
+					goto err_out;
+
+				/* clear buffer after user input for next injection */
+				memset(user_input, 0, COMP_ARR_SIZE);
+			} else {
+				v5param->apicid = param3;
+				v5param->pcie_sbdf = param4;
+			}
 		} else {
 			switch (type) {
 			case ACPI_EINJ_PROCESSOR_CORRECTABLE:
@@ -570,6 +629,9 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 	rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
 
 	return rc;
+err_out:
+	memset(user_input, 0, COMP_ARR_SIZE);
+	return -EINVAL;
 }
 
 /* Inject the specified hardware error */
@@ -581,9 +643,14 @@ int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
 
 	/* If user manually set "flags", make sure it is legal */
 	if (flags && (flags &
-		~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+		~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF|SETWA_FLAGS_EINJV2)))
 		return -EINVAL;
 
+	/*check if type is a valid EINJv2 error type*/
+	if (flags & SETWA_FLAGS_EINJV2) {
+		if (!(type & available_error_type_v2))
+			return -EINVAL;
+	}
 	/*
 	 * We need extra sanity checks for memory errors.
 	 * Other types leap directly to injection.
@@ -913,7 +980,8 @@ static void __exit einj_remove(struct platform_device *pdev)
 			sizeof(struct set_error_type_with_address) :
 			sizeof(struct einj_parameter);
 
-		acpi_os_unmap_iomem(einj_param, size);
+		acpi_os_unmap_iomem(einj_param,
+				size + (nr_components * sizeof(struct syndrome_array)));
 		if (vendor_errors.size)
 			acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size);
 	}
-- 
2.34.1
Re: [PATCH 7/8] ACPI: APEI: EINJ: Enable EINJv2 error injections
Posted by John Allen 1 week ago
On Tue, Oct 22, 2024 at 02:34:28PM -0700, Zaid Alali wrote:
> Enable the driver to inject EINJv2 type errors. The component
> array values are parsed from user_input and expected to contain
> hex values for component id and syndrome separated by space,
> and multiple components are separated by new line as follows:
> 
> component_id1 component_syndrome1
> component_id2 component_syndrome2
>  :
> component_id(n) component_syndrome(n)
> 
> for example:
> 
> $comp_arr="0x1 0x2
> >0x1 0x4
> >0x2 0x4"
> $cd /sys/kernel/debug/apei/einj/
> $echo "$comp_arr" > einjv2_component_array
> 
> Signed-off-by: Zaid Alali <zaidal@os.amperecomputing.com>
> ---
>  drivers/acpi/apei/einj-core.c | 76 +++++++++++++++++++++++++++++++++--
>  1 file changed, 72 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
> index bd46a611eef7..bc833f42dfc7 100644
> --- a/drivers/acpi/apei/einj-core.c
> +++ b/drivers/acpi/apei/einj-core.c
> @@ -87,6 +87,13 @@ enum {
>  	SETWA_FLAGS_APICID = 1,
>  	SETWA_FLAGS_MEM = 2,
>  	SETWA_FLAGS_PCIE_SBDF = 4,
> +	SETWA_FLAGS_EINJV2 = 8,
> +};
> +
> +enum {
> +	EINJV2_PROCESSOR_ERROR = 0x1,
> +	EINJV2_MEMORY_ERROR = 0x2,
> +	EINJV2_PCIE_ERROR = 0x4,
>  };
>  
>  /*
> @@ -111,6 +118,7 @@ static char vendor_dev[64];
>  static struct debugfs_blob_wrapper einjv2_component_arr;
>  static u64 component_count;
>  static void *user_input;
> +static int nr_components;
>  static u32 available_error_type;
>  static u32 available_error_type_v2;
>  
> @@ -287,8 +295,18 @@ static void *einj_get_parameter_address(void)
>  
>  		v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
>  		if (v5param) {
> +			int offset, len;
> +
>  			acpi5 = 1;
>  			check_vendor_extension(pa_v5, v5param);
> +			if (available_error_type & ACPI65_EINJV2_SUPP) {
> +				len = v5param->einjv2_struct.length;
> +				offset = offsetof(struct einjv2_extension_struct, component_arr);
> +				nr_components = (len - offset) / 32;
> +				acpi_os_unmap_iomem(v5param, sizeof(*v5param));
> +				v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param) + (
> +					(nr_components) * sizeof(struct syndrome_array)));

The way this line is broken up doesn't look quite right. That paren on
the top line should get pulled on to the next line and aligned with the
beginning of acpi_os_unmap_iomem. See below example.

Also, it's a little awkward here to map the v5param above just to unmap it
here in the case of EINJv2. Is there a reason it needs to be done like
this or can we do something like this instead?

		if (available_error_type & ACPI65_EINJV2_SUPP) {
			len = v5param->einjv2_struct.length;
			offset = offsetof(struct einjv2_extension_struct, component_arr);
			nr_components = (len - offset) / 32;
			v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param) +
				  ((nr_components) * sizeof(struct syndrome_array)));
		else {
			v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
		}

		if (v5param) {
			acpi5 = 1;
			check_vendor_extension(pa_v5, v5param);
			return v5param;
		}

> +			}
>  			return v5param;
>  		}
>  	}
> @@ -496,8 +514,49 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>  			v5param->flags = flags;
>  			v5param->memory_address = param1;
>  			v5param->memory_address_range = param2;
> -			v5param->apicid = param3;
> -			v5param->pcie_sbdf = param4;
> +
> +			if (flags & SETWA_FLAGS_EINJV2) {

IMO, moving this chunk inside the conditional here to a helper function
would improve readability. As written, there are a couple too many
levels of indentation.

> +				int count = 0, bytes_read, pos = 0;
> +				unsigned int comp, synd;
> +				struct syndrome_array *component_arr;
> +
> +				if (component_count > nr_components)
> +					goto err_out;
> +
> +				v5param->einjv2_struct.component_arr_count = component_count;
> +				component_arr = v5param->einjv2_struct.component_arr;
> +
> +				while (sscanf(user_input+pos, "%x %x\n%n", &comp, &synd,

Arithmetic operators should have a single space on each side:
user_input + pos

> +							&bytes_read) == 2) {

Another alignment issue here. This would be nice to have aligned after
the sscanf paren. Like:
				while (sscanf(user_input+pos, "%x %x\n%n", &comp, &synd,
					      &bytes_read) == 2) {

Thanks,
John

> +					pos += bytes_read;
> +					if (count > component_count)
> +						goto err_out;
> +
> +					switch (type) {
> +					case EINJV2_PROCESSOR_ERROR:
> +						component_arr[count].comp_id.acpi_id = comp;
> +						component_arr[count].comp_synd.proc_synd = synd;
> +						break;
> +					case EINJV2_MEMORY_ERROR:
> +						component_arr[count].comp_id.device_id = comp;
> +						component_arr[count].comp_synd.mem_synd = synd;
> +						break;
> +					case EINJV2_PCIE_ERROR:
> +						component_arr[count].comp_id.pcie_sbdf = comp;
> +						component_arr[count].comp_synd.pcie_synd = synd;
> +						break;
> +					}
> +					count++;
> +				}
> +				if (count != component_count - 1)
> +					goto err_out;
> +
> +				/* clear buffer after user input for next injection */
> +				memset(user_input, 0, COMP_ARR_SIZE);
> +			} else {
> +				v5param->apicid = param3;
> +				v5param->pcie_sbdf = param4;
> +			}
>  		} else {
>  			switch (type) {
>  			case ACPI_EINJ_PROCESSOR_CORRECTABLE:
> @@ -570,6 +629,9 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>  	rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
>  
>  	return rc;
> +err_out:
> +	memset(user_input, 0, COMP_ARR_SIZE);
> +	return -EINVAL;
>  }
>  
>  /* Inject the specified hardware error */
> @@ -581,9 +643,14 @@ int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
>  
>  	/* If user manually set "flags", make sure it is legal */
>  	if (flags && (flags &
> -		~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
> +		~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF|SETWA_FLAGS_EINJV2)))
>  		return -EINVAL;
>  
> +	/*check if type is a valid EINJv2 error type*/
> +	if (flags & SETWA_FLAGS_EINJV2) {
> +		if (!(type & available_error_type_v2))
> +			return -EINVAL;
> +	}
>  	/*
>  	 * We need extra sanity checks for memory errors.
>  	 * Other types leap directly to injection.
> @@ -913,7 +980,8 @@ static void __exit einj_remove(struct platform_device *pdev)
>  			sizeof(struct set_error_type_with_address) :
>  			sizeof(struct einj_parameter);
>  
> -		acpi_os_unmap_iomem(einj_param, size);
> +		acpi_os_unmap_iomem(einj_param,
> +				size + (nr_components * sizeof(struct syndrome_array)));
>  		if (vendor_errors.size)
>  			acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size);
>  	}
> -- 
> 2.34.1
>
Re: [PATCH 7/8] ACPI: APEI: EINJ: Enable EINJv2 error injections
Posted by kernel test robot 1 month ago
Hi Zaid,

kernel test robot noticed the following build warnings:

[auto build test WARNING on rafael-pm/linux-next]
[also build test WARNING on rafael-pm/bleeding-edge linus/master v6.12-rc4 next-20241024]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zaid-Alali/ACPICA-Update-values-to-hex-to-follow-ACPI-specs/20241023-054034
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/20241022213429.1561784-8-zaidal%40os.amperecomputing.com
patch subject: [PATCH 7/8] ACPI: APEI: EINJ: Enable EINJv2 error injections
config: x86_64-randconfig-121-20241024 (https://download.01.org/0day-ci/archive/20241024/202410241620.oApALow5-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241024/202410241620.oApALow5-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410241620.oApALow5-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
   drivers/acpi/apei/einj-core.c:261:11: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct vendor_error_type_extension *v @@     got void [noderef] __iomem * @@
   drivers/acpi/apei/einj-core.c:261:11: sparse:     expected struct vendor_error_type_extension *v
   drivers/acpi/apei/einj-core.c:261:11: sparse:     got void [noderef] __iomem *
   drivers/acpi/apei/einj-core.c:270:29: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected void [noderef] __iomem *virt @@     got struct vendor_error_type_extension *v @@
   drivers/acpi/apei/einj-core.c:270:29: sparse:     expected void [noderef] __iomem *virt
   drivers/acpi/apei/einj-core.c:270:29: sparse:     got struct vendor_error_type_extension *v
   drivers/acpi/apei/einj-core.c:296:25: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct set_error_type_with_address *v5param @@     got void [noderef] __iomem * @@
   drivers/acpi/apei/einj-core.c:296:25: sparse:     expected struct set_error_type_with_address *v5param
   drivers/acpi/apei/einj-core.c:296:25: sparse:     got void [noderef] __iomem *
>> drivers/acpi/apei/einj-core.c:306:53: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected void [noderef] __iomem *virt @@     got struct set_error_type_with_address *v5param @@
   drivers/acpi/apei/einj-core.c:306:53: sparse:     expected void [noderef] __iomem *virt
   drivers/acpi/apei/einj-core.c:306:53: sparse:     got struct set_error_type_with_address *v5param
   drivers/acpi/apei/einj-core.c:307:41: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct set_error_type_with_address *v5param @@     got void [noderef] __iomem * @@
   drivers/acpi/apei/einj-core.c:307:41: sparse:     expected struct set_error_type_with_address *v5param
   drivers/acpi/apei/einj-core.c:307:41: sparse:     got void [noderef] __iomem *
   drivers/acpi/apei/einj-core.c:316:25: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct einj_parameter *v4param @@     got void [noderef] __iomem * @@
   drivers/acpi/apei/einj-core.c:316:25: sparse:     expected struct einj_parameter *v4param
   drivers/acpi/apei/einj-core.c:316:25: sparse:     got void [noderef] __iomem *
   drivers/acpi/apei/einj-core.c:320:45: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected void [noderef] __iomem *virt @@     got struct einj_parameter *v4param @@
   drivers/acpi/apei/einj-core.c:320:45: sparse:     expected void [noderef] __iomem *virt
   drivers/acpi/apei/einj-core.c:320:45: sparse:     got struct einj_parameter *v4param
   drivers/acpi/apei/einj-core.c:387:21: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct acpi_einj_trigger *trigger_tab @@     got void [noderef] __iomem * @@
   drivers/acpi/apei/einj-core.c:387:21: sparse:     expected struct acpi_einj_trigger *trigger_tab
   drivers/acpi/apei/einj-core.c:387:21: sparse:     got void [noderef] __iomem *
   drivers/acpi/apei/einj-core.c:413:17: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected void volatile [noderef] __iomem *addr @@     got struct acpi_einj_trigger *trigger_tab @@
   drivers/acpi/apei/einj-core.c:413:17: sparse:     expected void volatile [noderef] __iomem *addr
   drivers/acpi/apei/einj-core.c:413:17: sparse:     got struct acpi_einj_trigger *trigger_tab
   drivers/acpi/apei/einj-core.c:414:21: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct acpi_einj_trigger *trigger_tab @@     got void [noderef] __iomem * @@
   drivers/acpi/apei/einj-core.c:414:21: sparse:     expected struct acpi_einj_trigger *trigger_tab
   drivers/acpi/apei/einj-core.c:414:21: sparse:     got void [noderef] __iomem *
   drivers/acpi/apei/einj-core.c:477:25: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected void volatile [noderef] __iomem *addr @@     got struct acpi_einj_trigger *trigger_tab @@
   drivers/acpi/apei/einj-core.c:477:25: sparse:     expected void volatile [noderef] __iomem *addr
   drivers/acpi/apei/einj-core.c:477:25: sparse:     got struct acpi_einj_trigger *trigger_tab
   drivers/acpi/apei/einj-core.c:983:37: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected void [noderef] __iomem *virt @@     got void *static [assigned] [toplevel] einj_param @@
   drivers/acpi/apei/einj-core.c:983:37: sparse:     expected void [noderef] __iomem *virt
   drivers/acpi/apei/einj-core.c:983:37: sparse:     got void *static [assigned] [toplevel] einj_param

vim +306 drivers/acpi/apei/einj-core.c

   272	
   273	static void *einj_get_parameter_address(void)
   274	{
   275		int i;
   276		u64 pa_v4 = 0, pa_v5 = 0;
   277		struct acpi_whea_header *entry;
   278	
   279		entry = EINJ_TAB_ENTRY(einj_tab);
   280		for (i = 0; i < einj_tab->entries; i++) {
   281			if (entry->action == ACPI_EINJ_SET_ERROR_TYPE &&
   282			    entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
   283			    entry->register_region.space_id ==
   284			    ACPI_ADR_SPACE_SYSTEM_MEMORY)
   285				pa_v4 = get_unaligned(&entry->register_region.address);
   286			if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
   287			    entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
   288			    entry->register_region.space_id ==
   289			    ACPI_ADR_SPACE_SYSTEM_MEMORY)
   290				pa_v5 = get_unaligned(&entry->register_region.address);
   291			entry++;
   292		}
   293		if (pa_v5) {
   294			struct set_error_type_with_address *v5param;
   295	
   296			v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
   297			if (v5param) {
   298				int offset, len;
   299	
   300				acpi5 = 1;
   301				check_vendor_extension(pa_v5, v5param);
   302				if (available_error_type & ACPI65_EINJV2_SUPP) {
   303					len = v5param->einjv2_struct.length;
   304					offset = offsetof(struct einjv2_extension_struct, component_arr);
   305					nr_components = (len - offset) / 32;
 > 306					acpi_os_unmap_iomem(v5param, sizeof(*v5param));
   307					v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param) + (
   308						(nr_components) * sizeof(struct syndrome_array)));
   309				}
   310				return v5param;
   311			}
   312		}
   313		if (param_extension && pa_v4) {
   314			struct einj_parameter *v4param;
   315	
   316			v4param = acpi_os_map_iomem(pa_v4, sizeof(*v4param));
   317			if (!v4param)
   318				return NULL;
   319			if (v4param->reserved1 || v4param->reserved2) {
   320				acpi_os_unmap_iomem(v4param, sizeof(*v4param));
   321				return NULL;
   322			}
   323			return v4param;
   324		}
   325	
   326		return NULL;
   327	}
   328	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki