[PATCH v6 11/16] PCI/AER: Check log level once and remember it

Bjorn Helgaas posted 16 patches 7 months ago
There is a newer version of this series
[PATCH v6 11/16] PCI/AER: Check log level once and remember it
Posted by Bjorn Helgaas 7 months ago
From: Karolina Stolarek <karolina.stolarek@oracle.com>

When reporting an AER error, we check its type multiple times to determine
the log level for each message. Do this check only in the top-level
functions (aer_isr_one_error(), pci_print_aer()) and save the level in
struct aer_err_info.

[bhelgaas: save log level in struct aer_err_info instead of passing it
as a parameter]
Link: https://lore.kernel.org/r/20250321015806.954866-2-pandoh@google.com
Signed-off-by: Karolina Stolarek <karolina.stolarek@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h      |  1 +
 drivers/pci/pcie/aer.c | 21 ++++++++++-----------
 drivers/pci/pcie/dpc.c |  1 +
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index b81e99cd4b62..705f9ef58acc 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -588,6 +588,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
 struct aer_err_info {
 	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
 	int error_dev_num;
+	const char *level;		/* printk level */
 
 	unsigned int id:16;
 
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 4683a99c7568..73b03a195b14 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -672,21 +672,18 @@ static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
 	}
 }
 
-static void __aer_print_error(struct pci_dev *dev,
-			      struct aer_err_info *info)
+static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 {
 	const char **strings;
 	unsigned long status = info->status & ~info->mask;
-	const char *level, *errmsg;
+	const char *level = info->level;
+	const char *errmsg;
 	int i;
 
-	if (info->severity == AER_CORRECTABLE) {
+	if (info->severity == AER_CORRECTABLE)
 		strings = aer_correctable_error_string;
-		level = KERN_WARNING;
-	} else {
+	else
 		strings = aer_uncorrectable_error_string;
-		level = KERN_ERR;
-	}
 
 	for_each_set_bit(i, &status, 32) {
 		errmsg = strings[i];
@@ -714,7 +711,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 {
 	int layer, agent;
 	int id = pci_dev_id(dev);
-	const char *level;
+	const char *level = info->level;
 
 	pci_dev_aer_stats_incr(dev, info);
 
@@ -727,8 +724,6 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 	layer = AER_GET_LAYER_ERROR(info->severity, info->status);
 	agent = AER_GET_AGENT(info->severity, info->status);
 
-	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
-
 	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
 		   aer_error_severity_string[info->severity],
 		   aer_error_layer[layer], aer_agent_string[agent]);
@@ -774,9 +769,11 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
 	if (aer_severity == AER_CORRECTABLE) {
 		status = aer->cor_status;
 		mask = aer->cor_mask;
+		info.level = KERN_WARNING;
 	} else {
 		status = aer->uncor_status;
 		mask = aer->uncor_mask;
+		info.level = KERN_ERR;
 		tlp_header_valid = status & AER_LOG_TLP_MASKS;
 	}
 
@@ -1297,6 +1294,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
 		struct aer_err_info e_info = {
 			.id = ERR_COR_ID(e_src->id),
 			.severity = AER_CORRECTABLE,
+			.level = KERN_WARNING,
 			.multi_error_valid = multi ? 1 : 0,
 		};
 
@@ -1312,6 +1310,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
 		struct aer_err_info e_info = {
 			.id = ERR_UNCOR_ID(e_src->id),
 			.severity = fatal ? AER_FATAL : AER_NONFATAL,
+			.level = KERN_ERR,
 			.multi_error_valid = multi ? 1 : 0,
 		};
 
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 315bf2bfd570..34af0ea45c0d 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -252,6 +252,7 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
 	else
 		info->severity = AER_NONFATAL;
 
+	info->level = KERN_WARNING;
 	return 1;
 }
 
-- 
2.43.0
Re: [PATCH v6 11/16] PCI/AER: Check log level once and remember it
Posted by Ilpo Järvinen 7 months ago
On Mon, 19 May 2025, Bjorn Helgaas wrote:

> From: Karolina Stolarek <karolina.stolarek@oracle.com>
> 
> When reporting an AER error, we check its type multiple times to determine
> the log level for each message. Do this check only in the top-level
> functions (aer_isr_one_error(), pci_print_aer()) and save the level in
> struct aer_err_info.
> 
> [bhelgaas: save log level in struct aer_err_info instead of passing it
> as a parameter]
> Link: https://lore.kernel.org/r/20250321015806.954866-2-pandoh@google.com
> Signed-off-by: Karolina Stolarek <karolina.stolarek@oracle.com>
> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> ---
>  drivers/pci/pci.h      |  1 +
>  drivers/pci/pcie/aer.c | 21 ++++++++++-----------
>  drivers/pci/pcie/dpc.c |  1 +
>  3 files changed, 12 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index b81e99cd4b62..705f9ef58acc 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -588,6 +588,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
>  struct aer_err_info {
>  	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
>  	int error_dev_num;
> +	const char *level;		/* printk level */

As a general direction, wouldn't it be better to start adding these 
comments in the kerneldoc compatible format (even if not yet enabling the 
kerneldoc with /**)?

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>

-- 
 i.

>  
>  	unsigned int id:16;
>  
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 4683a99c7568..73b03a195b14 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -672,21 +672,18 @@ static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
>  	}
>  }
>  
> -static void __aer_print_error(struct pci_dev *dev,
> -			      struct aer_err_info *info)
> +static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>  {
>  	const char **strings;
>  	unsigned long status = info->status & ~info->mask;
> -	const char *level, *errmsg;
> +	const char *level = info->level;
> +	const char *errmsg;
>  	int i;
>  
> -	if (info->severity == AER_CORRECTABLE) {
> +	if (info->severity == AER_CORRECTABLE)
>  		strings = aer_correctable_error_string;
> -		level = KERN_WARNING;
> -	} else {
> +	else
>  		strings = aer_uncorrectable_error_string;
> -		level = KERN_ERR;
> -	}
>  
>  	for_each_set_bit(i, &status, 32) {
>  		errmsg = strings[i];
> @@ -714,7 +711,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>  {
>  	int layer, agent;
>  	int id = pci_dev_id(dev);
> -	const char *level;
> +	const char *level = info->level;
>  
>  	pci_dev_aer_stats_incr(dev, info);
>  
> @@ -727,8 +724,6 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>  	layer = AER_GET_LAYER_ERROR(info->severity, info->status);
>  	agent = AER_GET_AGENT(info->severity, info->status);
>  
> -	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
> -
>  	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
>  		   aer_error_severity_string[info->severity],
>  		   aer_error_layer[layer], aer_agent_string[agent]);
> @@ -774,9 +769,11 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>  	if (aer_severity == AER_CORRECTABLE) {
>  		status = aer->cor_status;
>  		mask = aer->cor_mask;
> +		info.level = KERN_WARNING;
>  	} else {
>  		status = aer->uncor_status;
>  		mask = aer->uncor_mask;
> +		info.level = KERN_ERR;
>  		tlp_header_valid = status & AER_LOG_TLP_MASKS;
>  	}
>  
> @@ -1297,6 +1294,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
>  		struct aer_err_info e_info = {
>  			.id = ERR_COR_ID(e_src->id),
>  			.severity = AER_CORRECTABLE,
> +			.level = KERN_WARNING,
>  			.multi_error_valid = multi ? 1 : 0,
>  		};
>  
> @@ -1312,6 +1310,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
>  		struct aer_err_info e_info = {
>  			.id = ERR_UNCOR_ID(e_src->id),
>  			.severity = fatal ? AER_FATAL : AER_NONFATAL,
> +			.level = KERN_ERR,
>  			.multi_error_valid = multi ? 1 : 0,
>  		};
>  
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index 315bf2bfd570..34af0ea45c0d 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -252,6 +252,7 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
>  	else
>  		info->severity = AER_NONFATAL;
>  
> +	info->level = KERN_WARNING;
>  	return 1;
>  }
>  
> 
Re: [PATCH v6 11/16] PCI/AER: Check log level once and remember it
Posted by Sathyanarayanan Kuppuswamy 7 months ago
On 5/19/25 2:35 PM, Bjorn Helgaas wrote:
> From: Karolina Stolarek <karolina.stolarek@oracle.com>
>
> When reporting an AER error, we check its type multiple times to determine
> the log level for each message. Do this check only in the top-level
> functions (aer_isr_one_error(), pci_print_aer()) and save the level in
> struct aer_err_info.
>
> [bhelgaas: save log level in struct aer_err_info instead of passing it
> as a parameter]
> Link: https://lore.kernel.org/r/20250321015806.954866-2-pandoh@google.com
> Signed-off-by: Karolina Stolarek <karolina.stolarek@oracle.com>
> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> ---

Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>

>   drivers/pci/pci.h      |  1 +
>   drivers/pci/pcie/aer.c | 21 ++++++++++-----------
>   drivers/pci/pcie/dpc.c |  1 +
>   3 files changed, 12 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index b81e99cd4b62..705f9ef58acc 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -588,6 +588,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
>   struct aer_err_info {
>   	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
>   	int error_dev_num;
> +	const char *level;		/* printk level */
>   
>   	unsigned int id:16;
>   
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 4683a99c7568..73b03a195b14 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -672,21 +672,18 @@ static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
>   	}
>   }
>   
> -static void __aer_print_error(struct pci_dev *dev,
> -			      struct aer_err_info *info)
> +static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>   {
>   	const char **strings;
>   	unsigned long status = info->status & ~info->mask;
> -	const char *level, *errmsg;
> +	const char *level = info->level;
> +	const char *errmsg;
>   	int i;
>   
> -	if (info->severity == AER_CORRECTABLE) {
> +	if (info->severity == AER_CORRECTABLE)
>   		strings = aer_correctable_error_string;
> -		level = KERN_WARNING;
> -	} else {
> +	else
>   		strings = aer_uncorrectable_error_string;
> -		level = KERN_ERR;
> -	}
>   
>   	for_each_set_bit(i, &status, 32) {
>   		errmsg = strings[i];
> @@ -714,7 +711,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>   {
>   	int layer, agent;
>   	int id = pci_dev_id(dev);
> -	const char *level;
> +	const char *level = info->level;
>   
>   	pci_dev_aer_stats_incr(dev, info);
>   
> @@ -727,8 +724,6 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>   	layer = AER_GET_LAYER_ERROR(info->severity, info->status);
>   	agent = AER_GET_AGENT(info->severity, info->status);
>   
> -	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
> -
>   	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
>   		   aer_error_severity_string[info->severity],
>   		   aer_error_layer[layer], aer_agent_string[agent]);
> @@ -774,9 +769,11 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>   	if (aer_severity == AER_CORRECTABLE) {
>   		status = aer->cor_status;
>   		mask = aer->cor_mask;
> +		info.level = KERN_WARNING;
>   	} else {
>   		status = aer->uncor_status;
>   		mask = aer->uncor_mask;
> +		info.level = KERN_ERR;
>   		tlp_header_valid = status & AER_LOG_TLP_MASKS;
>   	}
>   
> @@ -1297,6 +1294,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
>   		struct aer_err_info e_info = {
>   			.id = ERR_COR_ID(e_src->id),
>   			.severity = AER_CORRECTABLE,
> +			.level = KERN_WARNING,
>   			.multi_error_valid = multi ? 1 : 0,
>   		};
>   
> @@ -1312,6 +1310,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
>   		struct aer_err_info e_info = {
>   			.id = ERR_UNCOR_ID(e_src->id),
>   			.severity = fatal ? AER_FATAL : AER_NONFATAL,
> +			.level = KERN_ERR,
>   			.multi_error_valid = multi ? 1 : 0,
>   		};
>   
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index 315bf2bfd570..34af0ea45c0d 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -252,6 +252,7 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
>   	else
>   		info->severity = AER_NONFATAL;
>   
> +	info->level = KERN_WARNING;

As Weinan pointed out, it should be KERN_ERR.

>   	return 1;
>   }
>   

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer
[PATCH v6 11/16] PCI/AER: Check log level once and remember it
Posted by Weinan Liu 7 months ago
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index 315bf2bfd570..34af0ea45c0d 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -252,6 +252,7 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
>   else
>   info->severity = AER_NONFATAL;
>
> + info->level = KERN_WARNING;
>  return 1;
> }

I think the print level should be KERN_ERR for uncorrectable errors.
Re: [PATCH v6 11/16] PCI/AER: Check log level once and remember it
Posted by Bjorn Helgaas 7 months ago
On Mon, May 19, 2025 at 11:17:28PM +0000, Weinan Liu wrote:
> > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> > index 315bf2bfd570..34af0ea45c0d 100644
> > --- a/drivers/pci/pcie/dpc.c
> > +++ b/drivers/pci/pcie/dpc.c
> > @@ -252,6 +252,7 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
> >   else
> >   info->severity = AER_NONFATAL;
> >
> > + info->level = KERN_WARNING;
> >  return 1;
> > }
> 
> I think the print level should be KERN_ERR for uncorrectable errors.

Yes, thank you, fixed!  dpc_get_aer_uncorrect_severity() always sets
info->severity to AER_FATAL or AER_NONFATAL, and aer_print_error()
only uses KERN_WARNING for AER_CORRECTABLE.