The current implementation of pcie_do_recovery() assumes that the
recovery process is executed for the device that detected the error.
However, the DPC driver currently passes the error port that experienced
the DPC event to pcie_do_recovery().
Use the SOURCE ID register to correctly identify the device that
detected the error. When passing the error device, the
pcie_do_recovery() will find the upstream bridge and walk bridges
potentially AER affected. And subsequent commits will be able to
accurately access AER status of the error device.
Should not observe any functional changes.
Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
---
drivers/pci/pci.h | 2 +-
drivers/pci/pcie/dpc.c | 25 +++++++++++++++++++++----
drivers/pci/pcie/edr.c | 7 ++++---
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 34f65d69662e..de2f07cefa72 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -654,7 +654,7 @@ struct rcec_ea {
void pci_save_dpc_state(struct pci_dev *dev);
void pci_restore_dpc_state(struct pci_dev *dev);
void pci_dpc_init(struct pci_dev *pdev);
-void dpc_process_error(struct pci_dev *pdev);
+struct pci_dev *dpc_process_error(struct pci_dev *pdev);
pci_ers_result_t dpc_reset_link(struct pci_dev *pdev);
bool pci_dpc_recovered(struct pci_dev *pdev);
unsigned int dpc_tlp_log_len(struct pci_dev *dev);
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index bff29726c6a5..f6069f621683 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -260,10 +260,20 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
return 1;
}
-void dpc_process_error(struct pci_dev *pdev)
+/**
+ * dpc_process_error - handle the DPC error status
+ * @pdev: the port that experienced the containment event
+ *
+ * Return: the device that detected the error.
+ *
+ * NOTE: The device reference count is increased, the caller must decrement
+ * the reference count by calling pci_dev_put().
+ */
+struct pci_dev *dpc_process_error(struct pci_dev *pdev)
{
u16 cap = pdev->dpc_cap, status, source, reason, ext_reason;
struct aer_err_info info = {};
+ struct pci_dev *err_dev;
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
@@ -279,6 +289,7 @@ void dpc_process_error(struct pci_dev *pdev)
pci_aer_clear_nonfatal_status(pdev);
pci_aer_clear_fatal_status(pdev);
}
+ err_dev = pci_dev_get(pdev);
break;
case PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE:
case PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE:
@@ -290,6 +301,8 @@ void dpc_process_error(struct pci_dev *pdev)
"ERR_FATAL" : "ERR_NONFATAL",
pci_domain_nr(pdev->bus), PCI_BUS_NUM(source),
PCI_SLOT(source), PCI_FUNC(source));
+ err_dev = pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus),
+ PCI_BUS_NUM(source), source & 0xff);
break;
case PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT:
ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT;
@@ -304,8 +317,11 @@ void dpc_process_error(struct pci_dev *pdev)
if (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO &&
pdev->dpc_rp_extensions)
dpc_process_rp_pio_error(pdev);
+ err_dev = pci_dev_get(pdev);
break;
}
+
+ return err_dev;
}
static void pci_clear_surpdn_errors(struct pci_dev *pdev)
@@ -361,7 +377,7 @@ static bool dpc_is_surprise_removal(struct pci_dev *pdev)
static irqreturn_t dpc_handler(int irq, void *context)
{
- struct pci_dev *err_port = context;
+ struct pci_dev *err_port = context, *err_dev;
/*
* According to PCIe r6.0 sec 6.7.6, errors are an expected side effect
@@ -372,10 +388,11 @@ static irqreturn_t dpc_handler(int irq, void *context)
return IRQ_HANDLED;
}
- dpc_process_error(err_port);
+ err_dev = dpc_process_error(err_port);
/* We configure DPC so it only triggers on ERR_FATAL */
- pcie_do_recovery(err_port, pci_channel_io_frozen, dpc_reset_link);
+ pcie_do_recovery(err_dev, pci_channel_io_frozen, dpc_reset_link);
+ pci_dev_put(err_dev);
return IRQ_HANDLED;
}
diff --git a/drivers/pci/pcie/edr.c b/drivers/pci/pcie/edr.c
index 521fca2f40cb..3f971bb04433 100644
--- a/drivers/pci/pcie/edr.c
+++ b/drivers/pci/pcie/edr.c
@@ -150,7 +150,7 @@ static int acpi_send_edr_status(struct pci_dev *pdev, struct pci_dev *edev,
static void edr_handle_event(acpi_handle handle, u32 event, void *data)
{
- struct pci_dev *pdev = data, *err_port;
+ struct pci_dev *pdev = data, *err_port, *err_dev;
pci_ers_result_t estate = PCI_ERS_RESULT_DISCONNECT;
u16 status;
@@ -190,7 +190,7 @@ static void edr_handle_event(acpi_handle handle, u32 event, void *data)
goto send_ost;
}
- dpc_process_error(err_port);
+ err_dev = dpc_process_error(err_port);
pci_aer_raw_clear_status(err_port);
/*
@@ -198,7 +198,7 @@ static void edr_handle_event(acpi_handle handle, u32 event, void *data)
* or ERR_NONFATAL, since the link is already down, use the FATAL
* error recovery path for both cases.
*/
- estate = pcie_do_recovery(err_port, pci_channel_io_frozen, dpc_reset_link);
+ estate = pcie_do_recovery(err_dev, pci_channel_io_frozen, dpc_reset_link);
send_ost:
@@ -215,6 +215,7 @@ static void edr_handle_event(acpi_handle handle, u32 event, void *data)
acpi_send_edr_status(pdev, err_port, EDR_OST_FAILED);
}
+ pci_dev_put(err_dev);
pci_dev_put(err_port);
}
--
2.39.3
Hi Shuai, kernel test robot noticed the following build warnings: [auto build test WARNING on pci/for-linus] [also build test WARNING on linus/master v6.17-rc6 next-20250917] [cannot apply to pci/next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Shuai-Xue/PCI-DPC-Clarify-naming-for-error-port-in-DPC-Handling/20250917-143459 base: https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git for-linus patch link: https://lore.kernel.org/r/20250917063352.19429-3-xueshuai%40linux.alibaba.com patch subject: [PATCH v5 2/3] PCI/DPC: Run recovery on device that detected the error config: loongarch-allyesconfig (https://download.01.org/0day-ci/archive/20250918/202509180501.eB8FJ5Vt-lkp@intel.com/config) compiler: clang version 22.0.0git (https://github.com/llvm/llvm-project 7c861bcedf61607b6c087380ac711eb7ff918ca6) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250918/202509180501.eB8FJ5Vt-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202509180501.eB8FJ5Vt-lkp@intel.com/ All warnings (new ones prefixed by >>): In file included from <built-in>:3: In file included from include/linux/compiler_types.h:171: include/linux/compiler-clang.h:28:9: warning: '__SANITIZE_ADDRESS__' macro redefined [-Wmacro-redefined] 28 | #define __SANITIZE_ADDRESS__ | ^ <built-in>:371:9: note: previous definition is here 371 | #define __SANITIZE_ADDRESS__ 1 | ^ >> drivers/pci/pcie/edr.c:188:6: warning: variable 'err_dev' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] 188 | if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/pci/pcie/edr.c:218:14: note: uninitialized use occurs here 218 | pci_dev_put(err_dev); | ^~~~~~~ drivers/pci/pcie/edr.c:188:2: note: remove the 'if' if its condition is always false 188 | if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 189 | pci_err(err_port, "Invalid DPC trigger %#010x\n", status); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 190 | goto send_ost; | ~~~~~~~~~~~~~~ 191 | } | ~ drivers/pci/pcie/edr.c:181:6: warning: variable 'err_dev' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] 181 | if (!err_port->dpc_cap) { | ^~~~~~~~~~~~~~~~~~ drivers/pci/pcie/edr.c:218:14: note: uninitialized use occurs here 218 | pci_dev_put(err_dev); | ^~~~~~~ drivers/pci/pcie/edr.c:181:2: note: remove the 'if' if its condition is always false 181 | if (!err_port->dpc_cap) { | ^~~~~~~~~~~~~~~~~~~~~~~~~ 182 | pci_err(err_port, FW_BUG "This device doesn't support DPC\n"); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 183 | goto send_ost; | ~~~~~~~~~~~~~~ 184 | } | ~ drivers/pci/pcie/edr.c:153:50: note: initialize the variable 'err_dev' to silence this warning 153 | struct pci_dev *pdev = data, *err_port, *err_dev; | ^ | = NULL 3 warnings generated. vim +188 drivers/pci/pcie/edr.c ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 150 ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 151 static void edr_handle_event(acpi_handle handle, u32 event, void *data) ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 152 { 267102466d7b592 Shuai Xue 2025-09-17 153 struct pci_dev *pdev = data, *err_port, *err_dev; ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 154 pci_ers_result_t estate = PCI_ERS_RESULT_DISCONNECT; ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 155 u16 status; ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 156 ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 157 if (event != ACPI_NOTIFY_DISCONNECT_RECOVER) ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 158 return; ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 159 774820b362b07b9 Bjorn Helgaas 2023-04-07 160 /* 774820b362b07b9 Bjorn Helgaas 2023-04-07 161 * pdev is a Root Port or Downstream Port that is still present and 774820b362b07b9 Bjorn Helgaas 2023-04-07 162 * has triggered a containment event, e.g., DPC, so its child 774820b362b07b9 Bjorn Helgaas 2023-04-07 163 * devices have been disconnected (ACPI r6.5, sec 5.6.6). 774820b362b07b9 Bjorn Helgaas 2023-04-07 164 */ af03958da0678c3 Kuppuswamy Sathyanarayanan 2020-04-15 165 pci_info(pdev, "EDR event received\n"); af03958da0678c3 Kuppuswamy Sathyanarayanan 2020-04-15 166 774820b362b07b9 Bjorn Helgaas 2023-04-07 167 /* 774820b362b07b9 Bjorn Helgaas 2023-04-07 168 * Locate the port that experienced the containment event. pdev 774820b362b07b9 Bjorn Helgaas 2023-04-07 169 * may be that port or a parent of it (PCI Firmware r3.3, sec 774820b362b07b9 Bjorn Helgaas 2023-04-07 170 * 4.6.13). 774820b362b07b9 Bjorn Helgaas 2023-04-07 171 */ a56b1e47845b946 Shuai Xue 2025-09-17 172 err_port = acpi_dpc_port_get(pdev); a56b1e47845b946 Shuai Xue 2025-09-17 173 if (!err_port) { ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 174 pci_err(pdev, "Firmware failed to locate DPC port\n"); ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 175 return; ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 176 } ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 177 a56b1e47845b946 Shuai Xue 2025-09-17 178 pci_dbg(pdev, "Reported EDR dev: %s\n", pci_name(err_port)); ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 179 ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 180 /* If port does not support DPC, just send the OST */ a56b1e47845b946 Shuai Xue 2025-09-17 181 if (!err_port->dpc_cap) { a56b1e47845b946 Shuai Xue 2025-09-17 182 pci_err(err_port, FW_BUG "This device doesn't support DPC\n"); ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 183 goto send_ost; ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 184 } ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 185 ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 186 /* Check if there is a valid DPC trigger */ a56b1e47845b946 Shuai Xue 2025-09-17 187 pci_read_config_word(err_port, err_port->dpc_cap + PCI_EXP_DPC_STATUS, &status); ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 @188 if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) { a56b1e47845b946 Shuai Xue 2025-09-17 189 pci_err(err_port, "Invalid DPC trigger %#010x\n", status); ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 190 goto send_ost; ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 191 } ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 192 267102466d7b592 Shuai Xue 2025-09-17 193 err_dev = dpc_process_error(err_port); a56b1e47845b946 Shuai Xue 2025-09-17 194 pci_aer_raw_clear_status(err_port); ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 195 ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 196 /* ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 197 * Irrespective of whether the DPC event is triggered by ERR_FATAL ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 198 * or ERR_NONFATAL, since the link is already down, use the FATAL ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 199 * error recovery path for both cases. ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 200 */ 267102466d7b592 Shuai Xue 2025-09-17 201 estate = pcie_do_recovery(err_dev, pci_channel_io_frozen, dpc_reset_link); ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 202 ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 203 send_ost: ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 204 ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 205 /* ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 206 * If recovery is successful, send _OST(0xF, BDF << 16 | 0x80) ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 207 * to firmware. If not successful, send _OST(0xF, BDF << 16 | 0x81). ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 208 */ ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 209 if (estate == PCI_ERS_RESULT_RECOVERED) { a56b1e47845b946 Shuai Xue 2025-09-17 210 pci_dbg(err_port, "DPC port successfully recovered\n"); a56b1e47845b946 Shuai Xue 2025-09-17 211 pcie_clear_device_status(err_port); a56b1e47845b946 Shuai Xue 2025-09-17 212 acpi_send_edr_status(pdev, err_port, EDR_OST_SUCCESS); ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 213 } else { a56b1e47845b946 Shuai Xue 2025-09-17 214 pci_dbg(err_port, "DPC port recovery failed\n"); a56b1e47845b946 Shuai Xue 2025-09-17 215 acpi_send_edr_status(pdev, err_port, EDR_OST_FAILED); ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 216 } ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 217 267102466d7b592 Shuai Xue 2025-09-17 218 pci_dev_put(err_dev); a56b1e47845b946 Shuai Xue 2025-09-17 219 pci_dev_put(err_port); ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 220 } ac1c8e35a3262d0 Kuppuswamy Sathyanarayanan 2020-03-23 221 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.