CXL currently has separate trace routines for CXL Port errors and CXL
Endpoint errors. This is inconvenient for the user because they must enable
2 sets of trace routines. Make updates to the trace logging such that a
single trace routine logs both CXL Endpoint and CXL Port protocol errors.
Keep the trace log fields 'memdev' and 'host'. While these are not accurate
for non-Endpoints the fields will remain as-is to prevent breaking userspace
RAS trace consumers.
Add serial number parameter to the trace logging. This is used for EPs
and 0 is provided for CXL port devices without a serial number.
Below is output of correctable and uncorrectable protocol error logging.
CXL Root Port and CXL Endpoint examples are included below.
Root Port:
cxl_aer_correctable_error: memdev=0000:0c:00.0 host=pci0000:0c serial: 0 status='CRC Threshold Hit'
cxl_aer_uncorrectable_error: memdev=0000:0c:00.0 host=pci0000:0c serial: 0 status: 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity Error'
Endpoint:
cxl_aer_correctable_error: memdev=mem3 host=0000:0f:00.0 serial=0 status='CRC Threshold Hit'
cxl_aer_uncorrectable_error: memdev=mem3 host=0000:0f:00.0 serial: 0 status: 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity Error'
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
drivers/cxl/core/pci.c | 19 ++++-----
drivers/cxl/core/ras.c | 14 ++++---
drivers/cxl/core/trace.h | 84 +++++++++-------------------------------
3 files changed, 37 insertions(+), 80 deletions(-)
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index c9a4b528e0b8..156ce094a8b9 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -664,8 +664,8 @@ void read_cdat_data(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
-static void cxl_handle_cor_ras(struct device *dev,
- void __iomem *ras_base)
+static void cxl_handle_cor_ras(struct device *dev, u64 serial,
+ void __iomem *ras_base)
{
void __iomem *addr;
u32 status;
@@ -679,7 +679,7 @@ static void cxl_handle_cor_ras(struct device *dev,
status = readl(addr);
if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
- trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status);
+ trace_cxl_aer_correctable_error(dev, serial, status);
}
}
@@ -704,7 +704,8 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
* Log the state of the RAS status registers and prepare them to log the
* next error status. Return 1 if reset needed.
*/
-static bool cxl_handle_ras(struct device *dev, void __iomem *ras_base)
+static bool cxl_handle_ras(struct device *dev, u64 serial,
+ void __iomem *ras_base)
{
u32 hl[CXL_HEADERLOG_SIZE_U32];
void __iomem *addr;
@@ -733,7 +734,7 @@ static bool cxl_handle_ras(struct device *dev, void __iomem *ras_base)
}
header_log_copy(ras_base, hl);
- trace_cxl_aer_uncorrectable_error(to_cxl_memdev(dev), status, fe, hl);
+ trace_cxl_aer_uncorrectable_error(dev, serial, status, fe, hl);
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
return true;
@@ -744,13 +745,13 @@ static bool cxl_handle_ras(struct device *dev, void __iomem *ras_base)
static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
struct cxl_dport *dport)
{
- cxl_handle_cor_ras(&cxlds->cxlmd->dev, dport->regs.ras);
+ cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->serial, dport->regs.ras);
}
static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
struct cxl_dport *dport)
{
- return cxl_handle_ras(&cxlds->cxlmd->dev, dport->regs.ras);
+ return cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, dport->regs.ras);
}
/*
@@ -847,7 +848,7 @@ void cxl_cor_error_detected(struct pci_dev *pdev)
if (cxlds->rcd)
cxl_handle_rdport_errors(cxlds);
- cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->regs.ras);
+ cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras);
}
}
EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
@@ -876,7 +877,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
* chance the situation is recoverable dump the status of the RAS
* capability registers and bounce the active state of the memdev.
*/
- ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->regs.ras);
+ ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras);
}
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 962dc94fed8c..9588b39faabd 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -13,7 +13,7 @@ static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev,
{
u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
- trace_cxl_port_aer_correctable_error(&pdev->dev, status);
+ trace_cxl_aer_correctable_error(&pdev->dev, 0, status);
}
static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
@@ -28,8 +28,8 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
else
fe = status;
- trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe,
- ras_cap.header_log);
+ trace_cxl_aer_uncorrectable_error(&pdev->dev, 0, status, fe,
+ ras_cap.header_log);
}
static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
@@ -42,7 +42,8 @@ static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
if (!cxlds)
return;
- trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
+ trace_cxl_aer_correctable_error(&cxlds->cxlmd->dev, cxlds->serial,
+ status);
}
static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
@@ -62,8 +63,9 @@ static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
else
fe = status;
- trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe,
- ras_cap.header_log);
+ trace_cxl_aer_uncorrectable_error(&cxlds->cxlmd->dev,
+ cxlds->serial, status,
+ fe, ras_cap.header_log);
}
static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index 25ebfbc1616c..494d6db461a7 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -48,49 +48,22 @@
{ CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \
)
-TRACE_EVENT(cxl_port_aer_uncorrectable_error,
- TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl),
- TP_ARGS(dev, status, fe, hl),
- TP_STRUCT__entry(
- __string(device, dev_name(dev))
- __string(host, dev_name(dev->parent))
- __field(u32, status)
- __field(u32, first_error)
- __array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
- ),
- TP_fast_assign(
- __assign_str(device);
- __assign_str(host);
- __entry->status = status;
- __entry->first_error = fe;
- /*
- * Embed the 512B headerlog data for user app retrieval and
- * parsing, but no need to print this in the trace buffer.
- */
- memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
- ),
- TP_printk("device=%s host=%s status: '%s' first_error: '%s'",
- __get_str(device), __get_str(host),
- show_uc_errs(__entry->status),
- show_uc_errs(__entry->first_error)
- )
-);
-
TRACE_EVENT(cxl_aer_uncorrectable_error,
- TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl),
- TP_ARGS(cxlmd, status, fe, hl),
+ TP_PROTO(struct device *dev, u64 serial, u32 status, u32 fe,
+ u32 *hl),
+ TP_ARGS(dev, serial, status, fe, hl),
TP_STRUCT__entry(
- __string(memdev, dev_name(&cxlmd->dev))
- __string(host, dev_name(cxlmd->dev.parent))
+ __string(name, dev_name(dev))
+ __string(parent, dev_name(dev->parent))
__field(u64, serial)
__field(u32, status)
__field(u32, first_error)
__array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
),
TP_fast_assign(
- __assign_str(memdev);
- __assign_str(host);
- __entry->serial = cxlmd->cxlds->serial;
+ __assign_str(name);
+ __assign_str(parent);
+ __entry->serial = serial;
__entry->status = status;
__entry->first_error = fe;
/*
@@ -99,8 +72,8 @@ TRACE_EVENT(cxl_aer_uncorrectable_error,
*/
memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
),
- TP_printk("memdev=%s host=%s serial=%lld: status: '%s' first_error: '%s'",
- __get_str(memdev), __get_str(host), __entry->serial,
+ TP_printk("memdev=%s host=%s serial=%lld status='%s' first_error='%s'",
+ __get_str(name), __get_str(parent), __entry->serial,
show_uc_errs(__entry->status),
show_uc_errs(__entry->first_error)
)
@@ -124,42 +97,23 @@ TRACE_EVENT(cxl_aer_uncorrectable_error,
{ CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \
)
-TRACE_EVENT(cxl_port_aer_correctable_error,
- TP_PROTO(struct device *dev, u32 status),
- TP_ARGS(dev, status),
- TP_STRUCT__entry(
- __string(device, dev_name(dev))
- __string(host, dev_name(dev->parent))
- __field(u32, status)
- ),
- TP_fast_assign(
- __assign_str(device);
- __assign_str(host);
- __entry->status = status;
- ),
- TP_printk("device=%s host=%s status='%s'",
- __get_str(device), __get_str(host),
- show_ce_errs(__entry->status)
- )
-);
-
TRACE_EVENT(cxl_aer_correctable_error,
- TP_PROTO(const struct cxl_memdev *cxlmd, u32 status),
- TP_ARGS(cxlmd, status),
+ TP_PROTO(struct device *dev, u64 serial, u32 status),
+ TP_ARGS(dev, serial, status),
TP_STRUCT__entry(
- __string(memdev, dev_name(&cxlmd->dev))
- __string(host, dev_name(cxlmd->dev.parent))
+ __string(name, dev_name(dev))
+ __string(parent, dev_name(dev->parent))
__field(u64, serial)
__field(u32, status)
),
TP_fast_assign(
- __assign_str(memdev);
- __assign_str(host);
- __entry->serial = cxlmd->cxlds->serial;
+ __assign_str(name);
+ __assign_str(parent);
+ __entry->serial = serial;
__entry->status = status;
),
- TP_printk("memdev=%s host=%s serial=%lld: status: '%s'",
- __get_str(memdev), __get_str(host), __entry->serial,
+ TP_printk("memdev=%s host=%s serial=%lld status='%s'",
+ __get_str(name), __get_str(parent), __entry->serial,
show_ce_errs(__entry->status)
)
);
--
2.34.1
>-----Original Message----- >From: Terry Bowman <terry.bowman@amd.com> >Sent: 26 June 2025 23:43 >To: dave@stgolabs.net; Jonathan Cameron <jonathan.cameron@huawei.com>; >dave.jiang@intel.com; alison.schofield@intel.com; dan.j.williams@intel.com; >bhelgaas@google.com; Shiju Jose <shiju.jose@huawei.com>; >ming.li@zohomail.com; Smita.KoralahalliChannabasappa@amd.com; >rrichter@amd.com; dan.carpenter@linaro.org; >PradeepVineshReddy.Kodamati@amd.com; lukas@wunner.de; >Benjamin.Cheatham@amd.com; >sathyanarayanan.kuppuswamy@linux.intel.com; terry.bowman@amd.com; >linux-cxl@vger.kernel.org >Cc: linux-kernel@vger.kernel.org; linux-pci@vger.kernel.org >Subject: [PATCH v10 12/17] cxl/pci: Unify CXL trace logging for CXL Endpoints >and CXL Ports > >CXL currently has separate trace routines for CXL Port errors and CXL Endpoint >errors. This is inconvenient for the user because they must enable >2 sets of trace routines. Make updates to the trace logging such that a single >trace routine logs both CXL Endpoint and CXL Port protocol errors. > >Keep the trace log fields 'memdev' and 'host'. While these are not accurate for >non-Endpoints the fields will remain as-is to prevent breaking userspace RAS >trace consumers. > >Add serial number parameter to the trace logging. This is used for EPs and 0 is >provided for CXL port devices without a serial number. > >Below is output of correctable and uncorrectable protocol error logging. >CXL Root Port and CXL Endpoint examples are included below. > >Root Port: >cxl_aer_correctable_error: memdev=0000:0c:00.0 host=pci0000:0c serial: 0 >status='CRC Threshold Hit' >cxl_aer_uncorrectable_error: memdev=0000:0c:00.0 host=pci0000:0c serial: 0 >status: 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity >Error' > >Endpoint: >cxl_aer_correctable_error: memdev=mem3 host=0000:0f:00.0 serial=0 >status='CRC Threshold Hit' >cxl_aer_uncorrectable_error: memdev=mem3 host=0000:0f:00.0 serial: 0 status: >'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity Error' > >Signed-off-by: Terry Bowman <terry.bowman@amd.com> >--- > drivers/cxl/core/pci.c | 19 ++++----- > drivers/cxl/core/ras.c | 14 ++++--- > drivers/cxl/core/trace.h | 84 +++++++++------------------------------- > 3 files changed, 37 insertions(+), 80 deletions(-) > [...] > > static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data >*data) diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index >25ebfbc1616c..494d6db461a7 100644 >--- a/drivers/cxl/core/trace.h >+++ b/drivers/cxl/core/trace.h >@@ -48,49 +48,22 @@ > { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ > ) > >-TRACE_EVENT(cxl_port_aer_uncorrectable_error, >- TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl), >- TP_ARGS(dev, status, fe, hl), >- TP_STRUCT__entry( >- __string(device, dev_name(dev)) >- __string(host, dev_name(dev->parent)) >- __field(u32, status) >- __field(u32, first_error) >- __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) >- ), >- TP_fast_assign( >- __assign_str(device); >- __assign_str(host); >- __entry->status = status; >- __entry->first_error = fe; >- /* >- * Embed the 512B headerlog data for user app retrieval and >- * parsing, but no need to print this in the trace buffer. >- */ >- memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); >- ), >- TP_printk("device=%s host=%s status: '%s' first_error: '%s'", >- __get_str(device), __get_str(host), >- show_uc_errs(__entry->status), >- show_uc_errs(__entry->first_error) >- ) >-); >- > TRACE_EVENT(cxl_aer_uncorrectable_error, >- TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 >*hl), >- TP_ARGS(cxlmd, status, fe, hl), >+ TP_PROTO(struct device *dev, u64 serial, u32 status, u32 fe, >+ u32 *hl), >+ TP_ARGS(dev, serial, status, fe, hl), > TP_STRUCT__entry( >- __string(memdev, dev_name(&cxlmd->dev)) >- __string(host, dev_name(cxlmd->dev.parent)) >+ __string(name, dev_name(dev)) >+ __string(parent, dev_name(dev->parent)) Hi Terry, Thanks for considering the feedback given in v9 regarding the compatibility issue with the rasdaemon. https://lore.kernel.org/all/959acc682e6e4b52ac0283b37ee21026@huawei.com/ Probably some confusion w.r.t the feedback. Unfortunately TP_printk(...) is not an ABI that we need to keep stable, it's this structure, TP_STRUCT__entry(..) , that matters to the rasdaemon. > __field(u64, serial) > __field(u32, status) > __field(u32, first_error) > __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) > ), > TP_fast_assign( >- __assign_str(memdev); >- __assign_str(host); >- __entry->serial = cxlmd->cxlds->serial; >+ __assign_str(name); >+ __assign_str(parent); >+ __entry->serial = serial; > __entry->status = status; > __entry->first_error = fe; > /* >@@ -99,8 +72,8 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, > */ > memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); > ), >- TP_printk("memdev=%s host=%s serial=%lld: status: '%s' first_error: >'%s'", >- __get_str(memdev), __get_str(host), __entry->serial, >+ TP_printk("memdev=%s host=%s serial=%lld status='%s' >first_error='%s'", >+ __get_str(name), __get_str(parent), __entry->serial, > show_uc_errs(__entry->status), > show_uc_errs(__entry->first_error) > ) >@@ -124,42 +97,23 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, > { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" >} \ > ) > >-TRACE_EVENT(cxl_port_aer_correctable_error, >- TP_PROTO(struct device *dev, u32 status), >- TP_ARGS(dev, status), >- TP_STRUCT__entry( >- __string(device, dev_name(dev)) >- __string(host, dev_name(dev->parent)) >- __field(u32, status) >- ), >- TP_fast_assign( >- __assign_str(device); >- __assign_str(host); >- __entry->status = status; >- ), >- TP_printk("device=%s host=%s status='%s'", >- __get_str(device), __get_str(host), >- show_ce_errs(__entry->status) >- ) >-); >- > TRACE_EVENT(cxl_aer_correctable_error, >- TP_PROTO(const struct cxl_memdev *cxlmd, u32 status), >- TP_ARGS(cxlmd, status), >+ TP_PROTO(struct device *dev, u64 serial, u32 status), >+ TP_ARGS(dev, serial, status), > TP_STRUCT__entry( >- __string(memdev, dev_name(&cxlmd->dev)) >- __string(host, dev_name(cxlmd->dev.parent)) >+ __string(name, dev_name(dev)) >+ __string(parent, dev_name(dev->parent)) Same as above. > __field(u64, serial) > __field(u32, status) > ), > TP_fast_assign( >- __assign_str(memdev); >- __assign_str(host); >- __entry->serial = cxlmd->cxlds->serial; >+ __assign_str(name); >+ __assign_str(parent); >+ __entry->serial = serial; > __entry->status = status; > ), >- TP_printk("memdev=%s host=%s serial=%lld: status: '%s'", >- __get_str(memdev), __get_str(host), __entry->serial, >+ TP_printk("memdev=%s host=%s serial=%lld status='%s'", >+ __get_str(name), __get_str(parent), __entry->serial, > show_ce_errs(__entry->status) > ) > ); >-- >2.34.1 Thanks, Shiju
On 6/27/2025 7:22 AM, Shiju Jose wrote: >> -----Original Message----- >> From: Terry Bowman <terry.bowman@amd.com> >> Sent: 26 June 2025 23:43 >> To: dave@stgolabs.net; Jonathan Cameron <jonathan.cameron@huawei.com>; >> dave.jiang@intel.com; alison.schofield@intel.com; dan.j.williams@intel.com; >> bhelgaas@google.com; Shiju Jose <shiju.jose@huawei.com>; >> ming.li@zohomail.com; Smita.KoralahalliChannabasappa@amd.com; >> rrichter@amd.com; dan.carpenter@linaro.org; >> PradeepVineshReddy.Kodamati@amd.com; lukas@wunner.de; >> Benjamin.Cheatham@amd.com; >> sathyanarayanan.kuppuswamy@linux.intel.com; terry.bowman@amd.com; >> linux-cxl@vger.kernel.org >> Cc: linux-kernel@vger.kernel.org; linux-pci@vger.kernel.org >> Subject: [PATCH v10 12/17] cxl/pci: Unify CXL trace logging for CXL Endpoints >> and CXL Ports >> >> CXL currently has separate trace routines for CXL Port errors and CXL Endpoint >> errors. This is inconvenient for the user because they must enable >> 2 sets of trace routines. Make updates to the trace logging such that a single >> trace routine logs both CXL Endpoint and CXL Port protocol errors. >> >> Keep the trace log fields 'memdev' and 'host'. While these are not accurate for >> non-Endpoints the fields will remain as-is to prevent breaking userspace RAS >> trace consumers. >> >> Add serial number parameter to the trace logging. This is used for EPs and 0 is >> provided for CXL port devices without a serial number. >> >> Below is output of correctable and uncorrectable protocol error logging. >> CXL Root Port and CXL Endpoint examples are included below. >> >> Root Port: >> cxl_aer_correctable_error: memdev=0000:0c:00.0 host=pci0000:0c serial: 0 >> status='CRC Threshold Hit' >> cxl_aer_uncorrectable_error: memdev=0000:0c:00.0 host=pci0000:0c serial: 0 >> status: 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity >> Error' >> >> Endpoint: >> cxl_aer_correctable_error: memdev=mem3 host=0000:0f:00.0 serial=0 >> status='CRC Threshold Hit' >> cxl_aer_uncorrectable_error: memdev=mem3 host=0000:0f:00.0 serial: 0 status: >> 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity Error' >> >> Signed-off-by: Terry Bowman <terry.bowman@amd.com> >> --- >> drivers/cxl/core/pci.c | 19 ++++----- >> drivers/cxl/core/ras.c | 14 ++++--- >> drivers/cxl/core/trace.h | 84 +++++++++------------------------------- >> 3 files changed, 37 insertions(+), 80 deletions(-) >> > [...] >> static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data >> *data) diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index >> 25ebfbc1616c..494d6db461a7 100644 >> --- a/drivers/cxl/core/trace.h >> +++ b/drivers/cxl/core/trace.h >> @@ -48,49 +48,22 @@ >> { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ >> ) >> >> -TRACE_EVENT(cxl_port_aer_uncorrectable_error, >> - TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl), >> - TP_ARGS(dev, status, fe, hl), >> - TP_STRUCT__entry( >> - __string(device, dev_name(dev)) >> - __string(host, dev_name(dev->parent)) >> - __field(u32, status) >> - __field(u32, first_error) >> - __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) >> - ), >> - TP_fast_assign( >> - __assign_str(device); >> - __assign_str(host); >> - __entry->status = status; >> - __entry->first_error = fe; >> - /* >> - * Embed the 512B headerlog data for user app retrieval and >> - * parsing, but no need to print this in the trace buffer. >> - */ >> - memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); >> - ), >> - TP_printk("device=%s host=%s status: '%s' first_error: '%s'", >> - __get_str(device), __get_str(host), >> - show_uc_errs(__entry->status), >> - show_uc_errs(__entry->first_error) >> - ) >> -); >> - >> TRACE_EVENT(cxl_aer_uncorrectable_error, >> - TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 >> *hl), >> - TP_ARGS(cxlmd, status, fe, hl), >> + TP_PROTO(struct device *dev, u64 serial, u32 status, u32 fe, >> + u32 *hl), >> + TP_ARGS(dev, serial, status, fe, hl), >> TP_STRUCT__entry( >> - __string(memdev, dev_name(&cxlmd->dev)) >> - __string(host, dev_name(cxlmd->dev.parent)) >> + __string(name, dev_name(dev)) >> + __string(parent, dev_name(dev->parent)) > Hi Terry, > > Thanks for considering the feedback given in v9 regarding the compatibility issue > with the rasdaemon. > https://lore.kernel.org/all/959acc682e6e4b52ac0283b37ee21026@huawei.com/ > > Probably some confusion w.r.t the feedback. > Unfortunately TP_printk(...) is not an ABI that we need to keep stable, > it's this structure, TP_STRUCT__entry(..) , that matters to the rasdaemon. > >> Oh. Apologies, I didn't realize TP_STRUCT was an ABI requirement. I will change back the TP_STRUCT as well. -Terry >> __field(u64, serial) >> __field(u32, status) >> __field(u32, first_error) >> __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) >> ), >> TP_fast_assign( >> - __assign_str(memdev); >> - __assign_str(host); >> - __entry->serial = cxlmd->cxlds->serial; >> + __assign_str(name); >> + __assign_str(parent); >> + __entry->serial = serial; >> __entry->status = status; >> __entry->first_error = fe; >> /* >> @@ -99,8 +72,8 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, >> */ >> memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); >> ), >> - TP_printk("memdev=%s host=%s serial=%lld: status: '%s' first_error: >> '%s'", >> - __get_str(memdev), __get_str(host), __entry->serial, >> + TP_printk("memdev=%s host=%s serial=%lld status='%s' >> first_error='%s'", >> + __get_str(name), __get_str(parent), __entry->serial, >> show_uc_errs(__entry->status), >> show_uc_errs(__entry->first_error) >> ) >> @@ -124,42 +97,23 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, >> { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" >> } \ >> ) >> >> -TRACE_EVENT(cxl_port_aer_correctable_error, >> - TP_PROTO(struct device *dev, u32 status), >> - TP_ARGS(dev, status), >> - TP_STRUCT__entry( >> - __string(device, dev_name(dev)) >> - __string(host, dev_name(dev->parent)) >> - __field(u32, status) >> - ), >> - TP_fast_assign( >> - __assign_str(device); >> - __assign_str(host); >> - __entry->status = status; >> - ), >> - TP_printk("device=%s host=%s status='%s'", >> - __get_str(device), __get_str(host), >> - show_ce_errs(__entry->status) >> - ) >> -); >> - >> TRACE_EVENT(cxl_aer_correctable_error, >> - TP_PROTO(const struct cxl_memdev *cxlmd, u32 status), >> - TP_ARGS(cxlmd, status), >> + TP_PROTO(struct device *dev, u64 serial, u32 status), >> + TP_ARGS(dev, serial, status), >> TP_STRUCT__entry( >> - __string(memdev, dev_name(&cxlmd->dev)) >> - __string(host, dev_name(cxlmd->dev.parent)) >> + __string(name, dev_name(dev)) >> + __string(parent, dev_name(dev->parent)) > Same as above. >> __field(u64, serial) >> __field(u32, status) >> ), >> TP_fast_assign( >> - __assign_str(memdev); >> - __assign_str(host); >> - __entry->serial = cxlmd->cxlds->serial; >> + __assign_str(name); >> + __assign_str(parent); >> + __entry->serial = serial; >> __entry->status = status; >> ), >> - TP_printk("memdev=%s host=%s serial=%lld: status: '%s'", >> - __get_str(memdev), __get_str(host), __entry->serial, >> + TP_printk("memdev=%s host=%s serial=%lld status='%s'", >> + __get_str(name), __get_str(parent), __entry->serial, >> show_ce_errs(__entry->status) >> ) >> ); >> -- >> 2.34.1 > Thanks, > Shiju
On Fri, Jun 27, 2025 at 12:22:39PM +0000, Shiju Jose wrote: > >-----Original Message----- > >From: Terry Bowman <terry.bowman@amd.com> > >Sent: 26 June 2025 23:43 > >To: dave@stgolabs.net; Jonathan Cameron <jonathan.cameron@huawei.com>; > >dave.jiang@intel.com; alison.schofield@intel.com; dan.j.williams@intel.com; > >bhelgaas@google.com; Shiju Jose <shiju.jose@huawei.com>; > >ming.li@zohomail.com; Smita.KoralahalliChannabasappa@amd.com; > >rrichter@amd.com; dan.carpenter@linaro.org; > >PradeepVineshReddy.Kodamati@amd.com; lukas@wunner.de; > >Benjamin.Cheatham@amd.com; > >sathyanarayanan.kuppuswamy@linux.intel.com; terry.bowman@amd.com; > >linux-cxl@vger.kernel.org > >Cc: linux-kernel@vger.kernel.org; linux-pci@vger.kernel.org > >Subject: [PATCH v10 12/17] cxl/pci: Unify CXL trace logging for CXL Endpoints > >and CXL Ports > > big snip - > >-); > >- > > TRACE_EVENT(cxl_aer_uncorrectable_error, > >- TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 > >*hl), > >- TP_ARGS(cxlmd, status, fe, hl), > >+ TP_PROTO(struct device *dev, u64 serial, u32 status, u32 fe, > >+ u32 *hl), > >+ TP_ARGS(dev, serial, status, fe, hl), > > TP_STRUCT__entry( > >- __string(memdev, dev_name(&cxlmd->dev)) > >- __string(host, dev_name(cxlmd->dev.parent)) > >+ __string(name, dev_name(dev)) > >+ __string(parent, dev_name(dev->parent)) > > Hi Terry, > > Thanks for considering the feedback given in v9 regarding the compatibility issue > with the rasdaemon. > https://lore.kernel.org/all/959acc682e6e4b52ac0283b37ee21026@huawei.com/ > > Probably some confusion w.r.t the feedback. > Unfortunately TP_printk(...) is not an ABI that we need to keep stable, > it's this structure, TP_STRUCT__entry(..) , that matters to the rasdaemon. > I'm not so sure you should be letting him off the hook for TP_printk ;) It seems TP_printk should be kept aligned w TP_STRUCT_entry(). As a user who often looks at TP_printk output, I'd say keep them all in sync, and consider them ABI - ie. add to but don't modify. > > __field(u64, serial) > > __field(u32, status) > > __field(u32, first_error) > > __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) > > ), > > TP_fast_assign( > >- __assign_str(memdev); > >- __assign_str(host); > >- __entry->serial = cxlmd->cxlds->serial; > >+ __assign_str(name); > >+ __assign_str(parent); > >+ __entry->serial = serial; > > __entry->status = status; > > __entry->first_error = fe; > > /* > >@@ -99,8 +72,8 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, > > */ > > memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); > > ), > >- TP_printk("memdev=%s host=%s serial=%lld: status: '%s' first_error: > >'%s'", > >- __get_str(memdev), __get_str(host), __entry->serial, > >+ TP_printk("memdev=%s host=%s serial=%lld status='%s' > >first_error='%s'", > >+ __get_str(name), __get_str(parent), __entry->serial, > > show_uc_errs(__entry->status), > > show_uc_errs(__entry->first_error) > > ) snip
On 7/1/2025 8:18 PM, Alison Schofield wrote: > On Fri, Jun 27, 2025 at 12:22:39PM +0000, Shiju Jose wrote: >>> -----Original Message----- >>> From: Terry Bowman <terry.bowman@amd.com> >>> Sent: 26 June 2025 23:43 >>> To: dave@stgolabs.net; Jonathan Cameron <jonathan.cameron@huawei.com>; >>> dave.jiang@intel.com; alison.schofield@intel.com; dan.j.williams@intel.com; >>> bhelgaas@google.com; Shiju Jose <shiju.jose@huawei.com>; >>> ming.li@zohomail.com; Smita.KoralahalliChannabasappa@amd.com; >>> rrichter@amd.com; dan.carpenter@linaro.org; >>> PradeepVineshReddy.Kodamati@amd.com; lukas@wunner.de; >>> Benjamin.Cheatham@amd.com; >>> sathyanarayanan.kuppuswamy@linux.intel.com; terry.bowman@amd.com; >>> linux-cxl@vger.kernel.org >>> Cc: linux-kernel@vger.kernel.org; linux-pci@vger.kernel.org >>> Subject: [PATCH v10 12/17] cxl/pci: Unify CXL trace logging for CXL Endpoints >>> and CXL Ports >>> > big snip - > >>> -); >>> - >>> TRACE_EVENT(cxl_aer_uncorrectable_error, >>> - TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 >>> *hl), >>> - TP_ARGS(cxlmd, status, fe, hl), >>> + TP_PROTO(struct device *dev, u64 serial, u32 status, u32 fe, >>> + u32 *hl), >>> + TP_ARGS(dev, serial, status, fe, hl), >>> TP_STRUCT__entry( >>> - __string(memdev, dev_name(&cxlmd->dev)) >>> - __string(host, dev_name(cxlmd->dev.parent)) >>> + __string(name, dev_name(dev)) >>> + __string(parent, dev_name(dev->parent)) >> Hi Terry, >> >> Thanks for considering the feedback given in v9 regarding the compatibility issue >> with the rasdaemon. >> https://lore.kernel.org/all/959acc682e6e4b52ac0283b37ee21026@huawei.com/ >> >> Probably some confusion w.r.t the feedback. >> Unfortunately TP_printk(...) is not an ABI that we need to keep stable, >> it's this structure, TP_STRUCT__entry(..) , that matters to the rasdaemon. >> > I'm not so sure you should be letting him off the hook for TP_printk ;) > It seems TP_printk should be kept aligned w TP_STRUCT_entry(). As a > user who often looks at TP_printk output, I'd say keep them all in > sync, and consider them ABI - ie. add to but don't modify. > > I agree. I will keep TP_printk and TP_STRUCT aligned by using 'memdev' and 'host'. The only change here will be TP_PROTO and will be: TP_PROTO(struct device *dev, u64 serial, u32 status), Let me know if that's not ok. -Terry >>> __field(u64, serial) >>> __field(u32, status) >>> __field(u32, first_error) >>> __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) >>> ), >>> TP_fast_assign( >>> - __assign_str(memdev); >>> - __assign_str(host); >>> - __entry->serial = cxlmd->cxlds->serial; >>> + __assign_str(name); >>> + __assign_str(parent); >>> + __entry->serial = serial; >>> __entry->status = status; >>> __entry->first_error = fe; >>> /* >>> @@ -99,8 +72,8 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, >>> */ >>> memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); >>> ), >>> - TP_printk("memdev=%s host=%s serial=%lld: status: '%s' first_error: >>> '%s'", >>> - __get_str(memdev), __get_str(host), __entry->serial, >>> + TP_printk("memdev=%s host=%s serial=%lld status='%s' >>> first_error='%s'", >>> + __get_str(name), __get_str(parent), __entry->serial, >>> show_uc_errs(__entry->status), >>> show_uc_errs(__entry->first_error) >>> ) > snip >
© 2016 - 2025 Red Hat, Inc.