tools/testing/cxl/Kbuild | 2 ++ tools/testing/cxl/mock_cdat.c | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 tools/testing/cxl/mock_cdat.c
The cxl_region_shared_upstream_bandwidth_update() in clx_core works on
PCI/PCIe CXL device only while cxl_test was implemeneted by platform
device.
Mock a cxl_region_shared_upstream_bandwidth_update() which does nothing
for cxl_core so that the cxl_test goes well.
Abuse cxl_region_shared_upstream_bandwidth_update() on platform device
will cause a kernel panic with calltrace:
platform cxl_host_bridge.3: host supports CXL (restricted)
Oops: general protection fault, probably for non-canonical address 0x3ef17856fcae4fbd: 0000 [#1] PREEMPT SMP PTI
CPU: 1 UID: 0 PID: 9167 Comm: cxl Kdump: loaded Tainted: G OE 6.12.0-rc3-master+ #66
Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
Hardware name: LENOVO 90CXCTO1WW/, BIOS FCKT70AUS 04/23/2015
RIP: 0010:pci_bus_read_config_word+0x1c/0x60
Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 53 b8 87 00 00 00 48 83 ec 08 c7 44 24 04 00 00 00 00 f6 c2 01 75 29 <48> 8b 87 c0 00 00 00 48 89 cb 4c 8d 44 24 04 b9 02 00 00 00 48 8b
RSP: 0018:ffffa115034dfbb8 EFLAGS: 00010246
RAX: 0000000000000087 RBX: 0000000000000012 RCX: ffffa115034dfbfe
RDX: 0000000000000016 RSI: 000000006f4e2f4e RDI: 3ef17856fcae4efd
RBP: ffff8cc229121b48 R08: 0000000000000010 R09: 0000000000000000
R10: 0000000000000001 R11: ffff8cc225434360 R12: ffffa115034dfbfe
R13: 0000000000000000 R14: ffff8cc2f119a080 R15: ffffa115034dfc50
FS: 00007f31d93537c0(0000) GS:ffff8cc510a80000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f31d95f3370 CR3: 00000001163ea001 CR4: 00000000001726f0
Call Trace:
<TASK>
? __die_body.cold+0x19/0x27
? die_addr+0x38/0x60
? exc_general_protection+0x1f5/0x4b0
? asm_exc_general_protection+0x22/0x30
? pci_bus_read_config_word+0x1c/0x60
pcie_capability_read_word+0x93/0xb0
pcie_link_speed_mbps+0x18/0x50
cxl_pci_get_bandwidth+0x18/0x60 [cxl_core]
cxl_endpoint_gather_bandwidth.constprop.0+0xf4/0x230 [cxl_core]
? xas_store+0x54/0x660
? preempt_count_add+0x69/0xa0
? _raw_spin_lock+0x13/0x40
? __kmalloc_cache_noprof+0xe7/0x270
cxl_region_shared_upstream_bandwidth_update+0x9c/0x790 [cxl_core]
cxl_region_attach+0x520/0x7e0 [cxl_core]
store_targetN+0xf2/0x120 [cxl_core]
kernfs_fop_write_iter+0x13a/0x1f0
vfs_write+0x23b/0x410
ksys_write+0x53/0xd0
do_syscall_64+0x62/0x180
entry_SYSCALL_64_after_hwframe+0x76/0x7e
And Ying also reported a KASAN error with similar calltrace.
Reported-by: "Huang, Ying" <ying.huang@intel.com>
Closes: https://lore.kernel.org/linux-cxl/87y12w9vp5.fsf@yhuang6-desk2.ccr.corp.intel.com/
Fixes: a5ab0de0ebaa ("cxl: Calculate region bandwidth of targets with shared upstream link")
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
---
tools/testing/cxl/Kbuild | 2 ++
tools/testing/cxl/mock_cdat.c | 8 ++++++++
2 files changed, 10 insertions(+)
create mode 100644 tools/testing/cxl/mock_cdat.c
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index b1256fee3567..ed9f50dee3f5 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -15,6 +15,7 @@ ldflags-y += --wrap=devm_cxl_add_rch_dport
ldflags-y += --wrap=cxl_rcd_component_reg_phys
ldflags-y += --wrap=cxl_endpoint_parse_cdat
ldflags-y += --wrap=cxl_dport_init_ras_reporting
+ldflags-y += --wrap=cxl_region_shared_upstream_bandwidth_update
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
@@ -61,6 +62,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o
cxl_core-y += $(CXL_CORE_SRC)/hdm.o
cxl_core-y += $(CXL_CORE_SRC)/pmu.o
cxl_core-y += $(CXL_CORE_SRC)/cdat.o
+cxl_core-y += mock_cdat.o
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
cxl_core-y += config_check.o
diff --git a/tools/testing/cxl/mock_cdat.c b/tools/testing/cxl/mock_cdat.c
new file mode 100644
index 000000000000..99974153b3f6
--- /dev/null
+++ b/tools/testing/cxl/mock_cdat.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 FUJITSU LIMITED. All rights reserved. */
+
+#include <cxl.h>
+
+void __wrap_cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr)
+{
+}
--
2.44.0
On Wed, Oct 16, 2024 at 09:52:13AM +0800, Li Zhijian wrote: > The cxl_region_shared_upstream_bandwidth_update() in clx_core works on > PCI/PCIe CXL device only while cxl_test was implemeneted by platform > device. > > Mock a cxl_region_shared_upstream_bandwidth_update() which does nothing > for cxl_core so that the cxl_test goes well. > > Abuse cxl_region_shared_upstream_bandwidth_update() on platform device > will cause a kernel panic with calltrace: snip > --- > tools/testing/cxl/Kbuild | 2 ++ > tools/testing/cxl/mock_cdat.c | 8 ++++++++ > 2 files changed, 10 insertions(+) > create mode 100644 tools/testing/cxl/mock_cdat.c > > diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild > index b1256fee3567..ed9f50dee3f5 100644 > --- a/tools/testing/cxl/Kbuild > +++ b/tools/testing/cxl/Kbuild > @@ -15,6 +15,7 @@ ldflags-y += --wrap=devm_cxl_add_rch_dport > ldflags-y += --wrap=cxl_rcd_component_reg_phys > ldflags-y += --wrap=cxl_endpoint_parse_cdat > ldflags-y += --wrap=cxl_dport_init_ras_reporting > +ldflags-y += --wrap=cxl_region_shared_upstream_bandwidth_update > > DRIVERS := ../../../drivers > CXL_SRC := $(DRIVERS)/cxl > @@ -61,6 +62,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o > cxl_core-y += $(CXL_CORE_SRC)/hdm.o > cxl_core-y += $(CXL_CORE_SRC)/pmu.o > cxl_core-y += $(CXL_CORE_SRC)/cdat.o > +cxl_core-y += mock_cdat.o > cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o > cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o > cxl_core-y += config_check.o > diff --git a/tools/testing/cxl/mock_cdat.c b/tools/testing/cxl/mock_cdat.c > new file mode 100644 > index 000000000000..99974153b3f6 > --- /dev/null > +++ b/tools/testing/cxl/mock_cdat.c > @@ -0,0 +1,8 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* Copyright (c) 2024 FUJITSU LIMITED. All rights reserved. */ > + > +#include <cxl.h> > + > +void __wrap_cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr) > +{ > +} The addition of file mock_cdat.c made me wonder why this wrapper couldn't join all the other __wrap's defined in test/mock.c. I tried, as you probably did, and see the circular dependency. I mention it here in case anyone else has a way to simplify this. Otherwise LGTM: Reviewed-by: Alison Schofield <alison.schofield@intel.com> > -- > 2.44.0 >
Alison Schofield wrote: > On Wed, Oct 16, 2024 at 09:52:13AM +0800, Li Zhijian wrote: > > The cxl_region_shared_upstream_bandwidth_update() in clx_core works on > > PCI/PCIe CXL device only while cxl_test was implemeneted by platform > > device. > > > > Mock a cxl_region_shared_upstream_bandwidth_update() which does nothing > > for cxl_core so that the cxl_test goes well. > > > > Abuse cxl_region_shared_upstream_bandwidth_update() on platform device > > will cause a kernel panic with calltrace: > > snip > > > --- > > tools/testing/cxl/Kbuild | 2 ++ > > tools/testing/cxl/mock_cdat.c | 8 ++++++++ > > 2 files changed, 10 insertions(+) > > create mode 100644 tools/testing/cxl/mock_cdat.c > > > > diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild > > index b1256fee3567..ed9f50dee3f5 100644 > > --- a/tools/testing/cxl/Kbuild > > +++ b/tools/testing/cxl/Kbuild > > @@ -15,6 +15,7 @@ ldflags-y += --wrap=devm_cxl_add_rch_dport > > ldflags-y += --wrap=cxl_rcd_component_reg_phys > > ldflags-y += --wrap=cxl_endpoint_parse_cdat > > ldflags-y += --wrap=cxl_dport_init_ras_reporting > > +ldflags-y += --wrap=cxl_region_shared_upstream_bandwidth_update > > > > DRIVERS := ../../../drivers > > CXL_SRC := $(DRIVERS)/cxl > > @@ -61,6 +62,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o > > cxl_core-y += $(CXL_CORE_SRC)/hdm.o > > cxl_core-y += $(CXL_CORE_SRC)/pmu.o > > cxl_core-y += $(CXL_CORE_SRC)/cdat.o > > +cxl_core-y += mock_cdat.o > > cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o > > cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o > > cxl_core-y += config_check.o > > diff --git a/tools/testing/cxl/mock_cdat.c b/tools/testing/cxl/mock_cdat.c > > new file mode 100644 > > index 000000000000..99974153b3f6 > > --- /dev/null > > +++ b/tools/testing/cxl/mock_cdat.c > > @@ -0,0 +1,8 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* Copyright (c) 2024 FUJITSU LIMITED. All rights reserved. */ > > + > > +#include <cxl.h> > > + > > +void __wrap_cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr) > > +{ > > +} > > The addition of file mock_cdat.c made me wonder why this wrapper couldn't join > all the other __wrap's defined in test/mock.c. I tried, as you probably did, > and see the circular dependency. I mention it here in case anyone else has a > way to simplify this. Yeah, unfortunately symbols can only be mocked across EXPORT_SYMBOL() boundaries, but since the caller of this is internal to the CXL core it is not amenable to the wrap approach. So, unfortunately what this patch does is break the expectation that cxl_test can live alongside and not regress any production flows. I.e. what this patch does is replace cxl_region_shared_upstream_bandwidth_update() for all use cases, not just platform devices. Compare that to tools/testing/cxl/test/mock.c which arranges for all the mocked use cases to call back into the real routines in the real device case. Given that I think this puts the device type check back in play. However, instead of checking "dev_is_platform()" check "dev_is_pci()" to be consistent with all the other CXL core internal functions that exit early when passed cxl_test devices.
Hey Dan, Thanks for your review... I want to confirm with you On 18/10/2024 09:20, Dan Williams wrote: > Alison Schofield wrote: >> On Wed, Oct 16, 2024 at 09:52:13AM +0800, Li Zhijian wrote: >>> The cxl_region_shared_upstream_bandwidth_update() in clx_core works on >>> PCI/PCIe CXL device only while cxl_test was implemeneted by platform >>> device. >>> >>> Mock a cxl_region_shared_upstream_bandwidth_update() which does nothing >>> for cxl_core so that the cxl_test goes well. >>> >>> Abuse cxl_region_shared_upstream_bandwidth_update() on platform device >>> will cause a kernel panic with calltrace: >> >> snip >> >>> --- >>> tools/testing/cxl/Kbuild | 2 ++ >>> tools/testing/cxl/mock_cdat.c | 8 ++++++++ >>> 2 files changed, 10 insertions(+) >>> create mode 100644 tools/testing/cxl/mock_cdat.c >>> >>> diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild >>> index b1256fee3567..ed9f50dee3f5 100644 >>> --- a/tools/testing/cxl/Kbuild >>> +++ b/tools/testing/cxl/Kbuild >>> @@ -15,6 +15,7 @@ ldflags-y += --wrap=devm_cxl_add_rch_dport >>> ldflags-y += --wrap=cxl_rcd_component_reg_phys >>> ldflags-y += --wrap=cxl_endpoint_parse_cdat >>> ldflags-y += --wrap=cxl_dport_init_ras_reporting >>> +ldflags-y += --wrap=cxl_region_shared_upstream_bandwidth_update >>> >>> DRIVERS := ../../../drivers >>> CXL_SRC := $(DRIVERS)/cxl >>> @@ -61,6 +62,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o >>> cxl_core-y += $(CXL_CORE_SRC)/hdm.o >>> cxl_core-y += $(CXL_CORE_SRC)/pmu.o >>> cxl_core-y += $(CXL_CORE_SRC)/cdat.o >>> +cxl_core-y += mock_cdat.o >>> cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o >>> cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o >>> cxl_core-y += config_check.o >>> diff --git a/tools/testing/cxl/mock_cdat.c b/tools/testing/cxl/mock_cdat.c >>> new file mode 100644 >>> index 000000000000..99974153b3f6 >>> --- /dev/null >>> +++ b/tools/testing/cxl/mock_cdat.c >>> @@ -0,0 +1,8 @@ >>> +// SPDX-License-Identifier: GPL-2.0-only >>> +/* Copyright (c) 2024 FUJITSU LIMITED. All rights reserved. */ >>> + >>> +#include <cxl.h> >>> + >>> +void __wrap_cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr) >>> +{ >>> +} >> >> The addition of file mock_cdat.c made me wonder why this wrapper couldn't join >> all the other __wrap's defined in test/mock.c. I tried, as you probably did, >> and see the circular dependency. I mention it here in case anyone else has a >> way to simplify this. > > Yeah, unfortunately symbols can only be mocked across EXPORT_SYMBOL() > boundaries, but since the caller of this is internal to the CXL core it > is not amenable to the wrap approach. > > So, unfortunately what this patch does is break the expectation that > cxl_test can live alongside and not regress any production flows. I.e. > what this patch does is replace > cxl_region_shared_upstream_bandwidth_update() for all use cases, not > just platform devices. Yes, that's true. > > Compare that to tools/testing/cxl/test/mock.c which arranges for all the > mocked use cases to call back into the real routines in the real device > case. > > Given that I think this puts the device type check back in play. Just to confirm, do you mean add device type check to drivers/cxl/core/cdat.c --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -5,6 +5,7 @@ #include <linux/fw_table.h> #include <linux/node.h> #include <linux/overflow.h> +#include <linux/platform_device.h> #include "cxlpci.h" #include "cxlmem.h" #include "core.h" @@ -641,9 +642,13 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr, void *ptr; int rc; + if (dev_is_pci(cxlds->dev)) + return -ENODEV; + if (cxlds->rcd) return -ENODEV; or tools/testing/cxl/mock_cdat.c #include <cxl.h> void __wrap_cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr) { for (int i = 0; i < cxlr->params.nr_targets; i++) { struct cxl_endpoint_decoder *cxled = cxlr->params.targets[i]; struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; if (!dev_is_pci(cxlds->dev)) return; } cxl_region_shared_upstream_bandwidth_update(cxlr); } Thanks Zhijian > > However, instead of checking "dev_is_platform()" check "dev_is_pci()" to > be consistent with all the other CXL core internal functions that exit > early when passed cxl_test devices.
Zhijian Li (Fujitsu) wrote: > Hey Dan, > > Thanks for your review... > > I want to confirm with you [..] > > Just to confirm, do you mean add device type check to drivers/cxl/core/cdat.c > > --- a/drivers/cxl/core/cdat.c > +++ b/drivers/cxl/core/cdat.c > @@ -5,6 +5,7 @@ > #include <linux/fw_table.h> > #include <linux/node.h> > #include <linux/overflow.h> > +#include <linux/platform_device.h> > #include "cxlpci.h" > #include "cxlmem.h" > #include "core.h" > @@ -641,9 +642,13 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr, > void *ptr; > int rc; > > + if (dev_is_pci(cxlds->dev)) > + return -ENODEV; > + > if (cxlds->rcd) > return -ENODEV; > Yes, this one looks good to me. Place the determination as far out into a leaf function as possible.
Zhijian Li (Fujitsu) wrote: > Hey Dan, > > Thanks for your review... > > I want to confirm with you > > > On 18/10/2024 09:20, Dan Williams wrote: > > Alison Schofield wrote: > >> On Wed, Oct 16, 2024 at 09:52:13AM +0800, Li Zhijian wrote: > >>> The cxl_region_shared_upstream_bandwidth_update() in clx_core works on > >>> PCI/PCIe CXL device only while cxl_test was implemeneted by platform > >>> device. > >>> > >>> Mock a cxl_region_shared_upstream_bandwidth_update() which does nothing > >>> for cxl_core so that the cxl_test goes well. > >>> > >>> Abuse cxl_region_shared_upstream_bandwidth_update() on platform device > >>> will cause a kernel panic with calltrace: > >> > >> snip > >> > >>> --- > >>> tools/testing/cxl/Kbuild | 2 ++ > >>> tools/testing/cxl/mock_cdat.c | 8 ++++++++ > >>> 2 files changed, 10 insertions(+) > >>> create mode 100644 tools/testing/cxl/mock_cdat.c > >>> > >>> diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild > >>> index b1256fee3567..ed9f50dee3f5 100644 > >>> --- a/tools/testing/cxl/Kbuild > >>> +++ b/tools/testing/cxl/Kbuild > >>> @@ -15,6 +15,7 @@ ldflags-y += --wrap=devm_cxl_add_rch_dport > >>> ldflags-y += --wrap=cxl_rcd_component_reg_phys > >>> ldflags-y += --wrap=cxl_endpoint_parse_cdat > >>> ldflags-y += --wrap=cxl_dport_init_ras_reporting > >>> +ldflags-y += --wrap=cxl_region_shared_upstream_bandwidth_update > >>> > >>> DRIVERS := ../../../drivers > >>> CXL_SRC := $(DRIVERS)/cxl > >>> @@ -61,6 +62,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o > >>> cxl_core-y += $(CXL_CORE_SRC)/hdm.o > >>> cxl_core-y += $(CXL_CORE_SRC)/pmu.o > >>> cxl_core-y += $(CXL_CORE_SRC)/cdat.o > >>> +cxl_core-y += mock_cdat.o > >>> cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o > >>> cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o > >>> cxl_core-y += config_check.o > >>> diff --git a/tools/testing/cxl/mock_cdat.c b/tools/testing/cxl/mock_cdat.c > >>> new file mode 100644 > >>> index 000000000000..99974153b3f6 > >>> --- /dev/null > >>> +++ b/tools/testing/cxl/mock_cdat.c > >>> @@ -0,0 +1,8 @@ > >>> +// SPDX-License-Identifier: GPL-2.0-only > >>> +/* Copyright (c) 2024 FUJITSU LIMITED. All rights reserved. */ > >>> + > >>> +#include <cxl.h> > >>> + > >>> +void __wrap_cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr) > >>> +{ > >>> +} > >> > >> The addition of file mock_cdat.c made me wonder why this wrapper couldn't join > >> all the other __wrap's defined in test/mock.c. I tried, as you probably did, > >> and see the circular dependency. I mention it here in case anyone else has a > >> way to simplify this. > > > > Yeah, unfortunately symbols can only be mocked across EXPORT_SYMBOL() > > boundaries, but since the caller of this is internal to the CXL core it > > is not amenable to the wrap approach. > > > > So, unfortunately what this patch does is break the expectation that > > cxl_test can live alongside and not regress any production flows. I.e. > > what this patch does is replace > > cxl_region_shared_upstream_bandwidth_update() for all use cases, not > > just platform devices. > > Yes, that's true. > > > > > > Compare that to tools/testing/cxl/test/mock.c which arranges for all the > > mocked use cases to call back into the real routines in the real device > > case. > > > > Given that I think this puts the device type check back in play. > > Just to confirm, do you mean add device type check to drivers/cxl/core/cdat.c > > --- a/drivers/cxl/core/cdat.c > +++ b/drivers/cxl/core/cdat.c > @@ -5,6 +5,7 @@ > #include <linux/fw_table.h> > #include <linux/node.h> > #include <linux/overflow.h> > +#include <linux/platform_device.h> > #include "cxlpci.h" > #include "cxlmem.h" > #include "core.h" > @@ -641,9 +642,13 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr, > void *ptr; > int rc; > > + if (dev_is_pci(cxlds->dev)) Did you mean... if (!dev_is_pci(cxlds->dev)) ??? > + return -ENODEV; > + > if (cxlds->rcd) > return -ENODEV; > > > or > > tools/testing/cxl/mock_cdat.c > > #include <cxl.h> > > void __wrap_cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr) > { > for (int i = 0; i < cxlr->params.nr_targets; i++) { > struct cxl_endpoint_decoder *cxled = cxlr->params.targets[i]; > struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); > struct cxl_dev_state *cxlds = cxlmd->cxlds; > > if (!dev_is_pci(cxlds->dev)) > return; > } > > cxl_region_shared_upstream_bandwidth_update(cxlr); > } Couldn't this be done on the endpoint call? (Making it non-static?) int __wrap_cxl_endpoint_gather_bandwidth(... struct cxl_endpoint_decoder *cxled, ...) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; if (!dev_is_pci(cxlds->dev)) return -ENODEV; return cxl_endpoint_gather_bandwidth(...); } Ira > > > Thanks > Zhijian > > > > > However, instead of checking "dev_is_platform()" check "dev_is_pci()" to > > be consistent with all the other CXL core internal functions that exit > > early when passed cxl_test devices.
Ira Weiny wrote: [..] > > --- a/drivers/cxl/core/cdat.c > > +++ b/drivers/cxl/core/cdat.c > > @@ -5,6 +5,7 @@ > > #include <linux/fw_table.h> > > #include <linux/node.h> > > #include <linux/overflow.h> > > +#include <linux/platform_device.h> > > #include "cxlpci.h" > > #include "cxlmem.h" > > #include "core.h" > > @@ -641,9 +642,13 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr, > > void *ptr; > > int rc; > > > > + if (dev_is_pci(cxlds->dev)) > > Did you mean... > > if (!dev_is_pci(cxlds->dev)) > ??? Yes, skip non-PCI. [..] > Couldn't this be done on the endpoint call? (Making it non-static?) > > int __wrap_cxl_endpoint_gather_bandwidth(... > struct cxl_endpoint_decoder *cxled, > ...) > { > struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); > struct cxl_dev_state *cxlds = cxlmd->cxlds; > > if (!dev_is_pci(cxlds->dev)) > return -ENODEV; > > return cxl_endpoint_gather_bandwidth(...); > } No, cxl_endpoint_gather_bandwidth() is called internal to cxl_core, there is no opportunity to optionally wrap it, only replace it. In order for this path to be mocked the cxl_region driver would need to be moved to a module and then cxl_region_shared_upstream_bandwidth_update() could be unit tested.
Dan Williams wrote: > Alison Schofield wrote: > > On Wed, Oct 16, 2024 at 09:52:13AM +0800, Li Zhijian wrote: > > > The cxl_region_shared_upstream_bandwidth_update() in clx_core works on > > > PCI/PCIe CXL device only while cxl_test was implemeneted by platform > > > device. > > > > > > Mock a cxl_region_shared_upstream_bandwidth_update() which does nothing > > > for cxl_core so that the cxl_test goes well. > > > > > > Abuse cxl_region_shared_upstream_bandwidth_update() on platform device > > > will cause a kernel panic with calltrace: > > > > snip > > > > > --- > > > tools/testing/cxl/Kbuild | 2 ++ > > > tools/testing/cxl/mock_cdat.c | 8 ++++++++ > > > 2 files changed, 10 insertions(+) > > > create mode 100644 tools/testing/cxl/mock_cdat.c > > > > > > diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild > > > index b1256fee3567..ed9f50dee3f5 100644 > > > --- a/tools/testing/cxl/Kbuild > > > +++ b/tools/testing/cxl/Kbuild > > > @@ -15,6 +15,7 @@ ldflags-y += --wrap=devm_cxl_add_rch_dport > > > ldflags-y += --wrap=cxl_rcd_component_reg_phys > > > ldflags-y += --wrap=cxl_endpoint_parse_cdat > > > ldflags-y += --wrap=cxl_dport_init_ras_reporting > > > +ldflags-y += --wrap=cxl_region_shared_upstream_bandwidth_update > > > > > > DRIVERS := ../../../drivers > > > CXL_SRC := $(DRIVERS)/cxl > > > @@ -61,6 +62,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o > > > cxl_core-y += $(CXL_CORE_SRC)/hdm.o > > > cxl_core-y += $(CXL_CORE_SRC)/pmu.o > > > cxl_core-y += $(CXL_CORE_SRC)/cdat.o > > > +cxl_core-y += mock_cdat.o > > > cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o > > > cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o > > > cxl_core-y += config_check.o > > > diff --git a/tools/testing/cxl/mock_cdat.c b/tools/testing/cxl/mock_cdat.c > > > new file mode 100644 > > > index 000000000000..99974153b3f6 > > > --- /dev/null > > > +++ b/tools/testing/cxl/mock_cdat.c > > > @@ -0,0 +1,8 @@ > > > +// SPDX-License-Identifier: GPL-2.0-only > > > +/* Copyright (c) 2024 FUJITSU LIMITED. All rights reserved. */ > > > + > > > +#include <cxl.h> > > > + > > > +void __wrap_cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr) > > > +{ > > > +} > > > > The addition of file mock_cdat.c made me wonder why this wrapper couldn't join > > all the other __wrap's defined in test/mock.c. I tried, as you probably did, > > and see the circular dependency. I mention it here in case anyone else has a > > way to simplify this. > > Yeah, unfortunately symbols can only be mocked across EXPORT_SYMBOL() > boundaries, but since the caller of this is internal to the CXL core it > is not amenable to the wrap approach. > > So, unfortunately what this patch does is break the expectation that > cxl_test can live alongside and not regress any production flows. I.e. > what this patch does is replace > cxl_region_shared_upstream_bandwidth_update() for all use cases, not > just platform devices. > > Compare that to tools/testing/cxl/test/mock.c which arranges for all the > mocked use cases to call back into the real routines in the real device > case. > > Given that I think this puts the device type check back in play. > > However, instead of checking "dev_is_platform()" check "dev_is_pci()" to > be consistent with all the other CXL core internal functions that exit > early when passed cxl_test devices. Zhijian, Looks like we will need a spin of this fix. Do you have time to do that? Ira
Hi, Zhijian, Li Zhijian <lizhijian@fujitsu.com> writes: > The cxl_region_shared_upstream_bandwidth_update() in clx_core works on > PCI/PCIe CXL device only while cxl_test was implemeneted by platform > device. > > Mock a cxl_region_shared_upstream_bandwidth_update() which does nothing > for cxl_core so that the cxl_test goes well. > > Abuse cxl_region_shared_upstream_bandwidth_update() on platform device > will cause a kernel panic with calltrace: > platform cxl_host_bridge.3: host supports CXL (restricted) > Oops: general protection fault, probably for non-canonical address 0x3ef17856fcae4fbd: 0000 [#1] PREEMPT SMP PTI > CPU: 1 UID: 0 PID: 9167 Comm: cxl Kdump: loaded Tainted: G OE 6.12.0-rc3-master+ #66 > Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE > Hardware name: LENOVO 90CXCTO1WW/, BIOS FCKT70AUS 04/23/2015 > RIP: 0010:pci_bus_read_config_word+0x1c/0x60 > Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 53 b8 87 00 00 00 48 83 ec 08 c7 44 24 04 00 00 00 00 f6 c2 01 75 29 <48> 8b 87 c0 00 00 00 48 89 cb 4c 8d 44 24 04 b9 02 00 00 00 48 8b > RSP: 0018:ffffa115034dfbb8 EFLAGS: 00010246 > RAX: 0000000000000087 RBX: 0000000000000012 RCX: ffffa115034dfbfe > RDX: 0000000000000016 RSI: 000000006f4e2f4e RDI: 3ef17856fcae4efd > RBP: ffff8cc229121b48 R08: 0000000000000010 R09: 0000000000000000 > R10: 0000000000000001 R11: ffff8cc225434360 R12: ffffa115034dfbfe > R13: 0000000000000000 R14: ffff8cc2f119a080 R15: ffffa115034dfc50 > FS: 00007f31d93537c0(0000) GS:ffff8cc510a80000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 00007f31d95f3370 CR3: 00000001163ea001 CR4: 00000000001726f0 > Call Trace: > <TASK> > ? __die_body.cold+0x19/0x27 > ? die_addr+0x38/0x60 > ? exc_general_protection+0x1f5/0x4b0 > ? asm_exc_general_protection+0x22/0x30 > ? pci_bus_read_config_word+0x1c/0x60 > pcie_capability_read_word+0x93/0xb0 > pcie_link_speed_mbps+0x18/0x50 > cxl_pci_get_bandwidth+0x18/0x60 [cxl_core] > cxl_endpoint_gather_bandwidth.constprop.0+0xf4/0x230 [cxl_core] > ? xas_store+0x54/0x660 > ? preempt_count_add+0x69/0xa0 > ? _raw_spin_lock+0x13/0x40 > ? __kmalloc_cache_noprof+0xe7/0x270 > cxl_region_shared_upstream_bandwidth_update+0x9c/0x790 [cxl_core] > cxl_region_attach+0x520/0x7e0 [cxl_core] > store_targetN+0xf2/0x120 [cxl_core] > kernfs_fop_write_iter+0x13a/0x1f0 > vfs_write+0x23b/0x410 > ksys_write+0x53/0xd0 > do_syscall_64+0x62/0x180 > entry_SYSCALL_64_after_hwframe+0x76/0x7e > > And Ying also reported a KASAN error with similar calltrace. > > Reported-by: "Huang, Ying" <ying.huang@intel.com> > Closes: https://lore.kernel.org/linux-cxl/87y12w9vp5.fsf@yhuang6-desk2.ccr.corp.intel.com/ > Fixes: a5ab0de0ebaa ("cxl: Calculate region bandwidth of targets with shared upstream link") > Signed-off-by: Li Zhijian <lizhijian@fujitsu.com> This fixes the KASAN error report in my test too. Thanks! Feel free to add Tested-by: "Huang, Ying" <ying.huang@intel.com> -- Best Regards, Huang, Ying
© 2016 - 2024 Red Hat, Inc.