[PATCH] cxl/memdev: Avoid mailbox functionality on device memory CXL devices

Ira Weiny posted 1 patch 2 years, 6 months ago
drivers/cxl/core/mbox.c   |  9 +++++++++
drivers/cxl/core/memdev.c | 26 ++++++++++++++++++++++++++
drivers/cxl/mem.c         | 18 ++++++++++--------
drivers/cxl/pci.c         |  5 ++++-
drivers/cxl/pmem.c        |  3 +++
5 files changed, 52 insertions(+), 9 deletions(-)
[PATCH] cxl/memdev: Avoid mailbox functionality on device memory CXL devices
Posted by Ira Weiny 2 years, 6 months ago
Using the proposed type-2 cxl-test device[1] the following
splat was observed:

  BUG: kernel NULL pointer dereference, address: 0000000000000278
  [...]
  RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
  [...]
  Call Trace:
   <TASK>
   ? __die+0x1f/0x70
   ? page_fault_oops+0x149/0x420
   ? fixup_exception+0x22/0x310
   ? kernelmode_fixup_or_oops+0x84/0x110
   ? exc_page_fault+0x6d/0x150
   ? asm_exc_page_fault+0x22/0x30
   ? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
   cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem]
   platform_probe+0x40/0x90
   really_probe+0x19e/0x3e0
   ? __pfx___driver_attach+0x10/0x10
   __driver_probe_device+0x78/0x160
   driver_probe_device+0x1f/0x90
   __driver_attach+0xce/0x1c0
   bus_for_each_dev+0x63/0xa0
   bus_add_driver+0x112/0x210
   driver_register+0x55/0x100
   ? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem]
   [...]

Commit f6b8ab32e3ec made the mailbox functionality optional.  However,
some mailbox functionality was merged after that patch.  Therefore some
mailbox functionality can be accessed on a device which did not set up
the mailbox.

While no devices currently exist, commit f6b8ab32e3ec is incomplete.
Complete the checks for memdev state to bring the code to a consistent
state for when type-2 devices are introduced.

[1] https://lore.kernel.org/all/168592160379.1948938.12863272903570476312.stgit@dwillia2-xfh.jf.intel.com/

Fixes: f6b8ab32e3ec ("cxl/memdev: Make mailbox functionality optional")
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/cxl/core/mbox.c   |  9 +++++++++
 drivers/cxl/core/memdev.c | 26 ++++++++++++++++++++++++++
 drivers/cxl/mem.c         | 18 ++++++++++--------
 drivers/cxl/pci.c         |  5 ++++-
 drivers/cxl/pmem.c        |  3 +++
 5 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index d6d067fbee97..eb1758fb8cdf 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -482,6 +482,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
 
 	dev_dbg(dev, "Query IOCTL\n");
 
+	if (!mds)
+		return -EIO;
+
 	if (get_user(n_commands, &q->n_commands))
 		return -EFAULT;
 
@@ -586,6 +589,9 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
 
 	dev_dbg(dev, "Send IOCTL\n");
 
+	if (!mds)
+		return -EIO;
+
 	if (copy_from_user(&send, s, sizeof(send)))
 		return -EFAULT;
 
@@ -1245,6 +1251,9 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 	int nr_records = 0;
 	int rc;
 
+	if (!mds)
+		return -EIO;
+
 	rc = mutex_lock_interruptible(&mds->poison.lock);
 	if (rc)
 		return rc;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index f99e7ec3cc40..629e479f751b 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -201,6 +201,19 @@ static ssize_t security_erase_store(struct device *dev,
 static struct device_attribute dev_attr_security_erase =
 	__ATTR(erase, 0200, NULL, security_erase_store);
 
+static umode_t cxl_memdev_security_visible(struct kobject *kobj,
+					   struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+	if (!mds)
+		return 0;
+
+	return a->mode;
+}
+
 static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
 {
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
@@ -332,6 +345,9 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	struct cxl_region *cxlr;
 	int rc;
 
+	if (!mds)
+		return -EIO;
+
 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
 
@@ -380,6 +396,9 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	struct cxl_region *cxlr;
 	int rc;
 
+	if (!mds)
+		return -EIO;
+
 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
 
@@ -480,6 +499,7 @@ static struct attribute_group cxl_memdev_pmem_attribute_group = {
 static struct attribute_group cxl_memdev_security_attribute_group = {
 	.name = "security",
 	.attrs = cxl_memdev_security_attributes,
+	.is_visible = cxl_memdev_security_visible,
 };
 
 static const struct attribute_group *cxl_memdev_attribute_groups[] = {
@@ -542,6 +562,9 @@ static void cxl_memdev_security_shutdown(struct device *dev)
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 
+	if (!mds)
+		return;
+
 	if (mds->security.poll)
 		cancel_delayed_work_sync(&mds->security.poll_dwork);
 }
@@ -997,6 +1020,9 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd)
 	struct device *dev = &cxlmd->dev;
 	struct kernfs_node *sec;
 
+	if (!mds)
+		return 0;
+
 	sec = sysfs_get_dirent(dev->kobj.sd, "security");
 	if (!sec) {
 		dev_err(dev, "sysfs_get_dirent 'security' failed\n");
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 317c7548e4e9..4755a890018d 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -132,12 +132,14 @@ static int cxl_mem_probe(struct device *dev)
 	dentry = cxl_debugfs_create_dir(dev_name(dev));
 	debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
 
-	if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
-		debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
-				    &cxl_poison_inject_fops);
-	if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
-		debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
-				    &cxl_poison_clear_fops);
+	if (mds) {
+		if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
+			debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
+					    &cxl_poison_inject_fops);
+		if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
+			debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
+					    &cxl_poison_clear_fops);
+	}
 
 	rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
 	if (rc)
@@ -222,8 +224,8 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
 		struct cxl_memdev_state *mds =
 			to_cxl_memdev_state(cxlmd->cxlds);
 
-		if (!test_bit(CXL_POISON_ENABLED_LIST,
-			      mds->poison.enabled_cmds))
+		if (!mds || !test_bit(CXL_POISON_ENABLED_LIST,
+				      mds->poison.enabled_cmds))
 			return 0;
 	}
 	return a->mode;
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 1cb1494c28fe..93f6140432cd 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -122,7 +122,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
 	struct cxl_dev_state *cxlds = dev_id->cxlds;
 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
 
-	if (!cxl_mbox_background_complete(cxlds))
+	if (!mds || !cxl_mbox_background_complete(cxlds))
 		return IRQ_NONE;
 
 	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
@@ -624,6 +624,9 @@ static irqreturn_t cxl_event_thread(int irq, void *id)
 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
 	u32 status;
 
+	if (!mds)
+		return IRQ_HANDLED;
+
 	do {
 		/*
 		 * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 7cb8994f8809..f1adfdd1a2b3 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -70,6 +70,9 @@ static int cxl_nvdimm_probe(struct device *dev)
 	struct nvdimm *nvdimm;
 	int rc;
 
+	if (WARN_ON_ONCE(!mds))
+		return -EIO;
+
 	set_exclusive_cxl_commands(mds, exclusive_cmds);
 	rc = devm_add_action_or_reset(dev, clear_exclusive, mds);
 	if (rc)

---
base-commit: 20ea1e7d13c1b544fe67c4a8dc3943bb1ab33e6f
change-id: 20230728-cxl-fix-devmemdev-5003ce927f68

Best regards,
-- 
Ira Weiny <ira.weiny@intel.com>
RE: [PATCH] cxl/memdev: Avoid mailbox functionality on device memory CXL devices
Posted by Dan Williams 2 years, 6 months ago
Ira Weiny wrote:
> Using the proposed type-2 cxl-test device[1] the following
> splat was observed:
> 
>   BUG: kernel NULL pointer dereference, address: 0000000000000278
>   [...]
>   RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]

It would be useful to decode this to a line number, the rest of this
call trace is not adding much.

>   [...]
>   Call Trace:
>    <TASK>
>    ? __die+0x1f/0x70
>    ? page_fault_oops+0x149/0x420
>    ? fixup_exception+0x22/0x310
>    ? kernelmode_fixup_or_oops+0x84/0x110
>    ? exc_page_fault+0x6d/0x150
>    ? asm_exc_page_fault+0x22/0x30
>    ? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
>    cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem]
>    platform_probe+0x40/0x90
>    really_probe+0x19e/0x3e0
>    ? __pfx___driver_attach+0x10/0x10
>    __driver_probe_device+0x78/0x160
>    driver_probe_device+0x1f/0x90
>    __driver_attach+0xce/0x1c0
>    bus_for_each_dev+0x63/0xa0
>    bus_add_driver+0x112/0x210
>    driver_register+0x55/0x100
>    ? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem]
>    [...]
> 
> Commit f6b8ab32e3ec made the mailbox functionality optional.  However,
> some mailbox functionality was merged after that patch.  Therefore some
> mailbox functionality can be accessed on a device which did not set up
> the mailbox.

cxl_memdev_security_init() definitely needs to move out of
devm_cxl_add_memdev() and after that I do not think @mds NULL checks
need to be sprinkled everywhere. In other words something is wrong at a
higher level if we get into some of these helper functions without the
memory device state.

So definitely this uncovered a problem where cxl_memdev_security_init()
needs to move, but the rest of the mds NULL checks need clear
reproduction scenarios and expect most of them are precluded higher in
the call stack.
Re: [PATCH] cxl/memdev: Avoid mailbox functionality on device memory CXL devices
Posted by Davidlohr Bueso 2 years, 6 months ago
On Fri, 28 Jul 2023, Dan Williams wrote:

>Ira Weiny wrote:
>> Using the proposed type-2 cxl-test device[1] the following
>> splat was observed:
>>
>>   BUG: kernel NULL pointer dereference, address: 0000000000000278
>>   [...]
>>   RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
>
>It would be useful to decode this to a line number, the rest of this
>call trace is not adding much.
>
>>   [...]
>>   Call Trace:
>>    <TASK>
>>    ? __die+0x1f/0x70
>>    ? page_fault_oops+0x149/0x420
>>    ? fixup_exception+0x22/0x310
>>    ? kernelmode_fixup_or_oops+0x84/0x110
>>    ? exc_page_fault+0x6d/0x150
>>    ? asm_exc_page_fault+0x22/0x30
>>    ? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
>>    cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem]
>>    platform_probe+0x40/0x90
>>    really_probe+0x19e/0x3e0
>>    ? __pfx___driver_attach+0x10/0x10
>>    __driver_probe_device+0x78/0x160
>>    driver_probe_device+0x1f/0x90
>>    __driver_attach+0xce/0x1c0
>>    bus_for_each_dev+0x63/0xa0
>>    bus_add_driver+0x112/0x210
>>    driver_register+0x55/0x100
>>    ? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem]
>>    [...]
>>
>> Commit f6b8ab32e3ec made the mailbox functionality optional.  However,
>> some mailbox functionality was merged after that patch.  Therefore some
>> mailbox functionality can be accessed on a device which did not set up
>> the mailbox.
>
>cxl_memdev_security_init() definitely needs to move out of
>devm_cxl_add_memdev() and after that I do not think @mds NULL checks
>need to be sprinkled everywhere. In other words something is wrong at a
>higher level if we get into some of these helper functions without the
>memory device state.

Right, so we can move it directly into cxl_pci_probe() - just as with other
mbox based functionality. This leaves me wondering, however, what to do about
the cxl_memdev_security_shutdown() counterpart. As with the below diff, leaving
it as is and just adding a mds nil check might still be considering a layering
violation in that it would be asymmetrical wrt to the init; but this is tightly
coupled with cxl_memdev_unregister().

Ira does the below fix the crash?

Thanks,
Davidlohr

----8<-------
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 14b547c07f54..4d1bf80c0e54 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -561,7 +561,7 @@ static void cxl_memdev_security_shutdown(struct device *dev)
  	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
  	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
  
-	if (mds->security.poll)
+	if (mds && mds->security.poll)
  		cancel_delayed_work_sync(&mds->security.poll_dwork);
  }
  
@@ -1009,11 +1009,11 @@ static void put_sanitize(void *data)
  	sysfs_put(mds->security.sanitize_node);
  }
  
-static int cxl_memdev_security_init(struct cxl_memdev *cxlmd)
+int cxl_memdev_security_state_init(struct cxl_memdev_state *mds)
  {
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
-	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
-	struct device *dev = &cxlmd->dev;
+
+	struct cxl_dev_state *cxlds = &mds->cxlds;
+	struct device *dev = &cxlds->cxlmd->dev;
  	struct kernfs_node *sec;
  
  	sec = sysfs_get_dirent(dev->kobj.sd, "security");
@@ -1029,7 +1029,8 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd)
  	}
  
  	return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds);
- }
+}
+EXPORT_SYMBOL_NS_GPL(cxl_memdev_security_state_init, CXL);
  
  struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
  {
@@ -1059,10 +1060,6 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
  	if (rc)
  		goto err;
  
-	rc = cxl_memdev_security_init(cxlmd);
-	if (rc)
-		goto err;
-
  	rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
  	if (rc)
  		return ERR_PTR(rc);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index f86afef90c91..441270770519 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -884,6 +884,7 @@ static inline void cxl_mem_active_dec(void)
  #endif
  
  int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd);
+int cxl_memdev_security_state_init(struct cxl_memdev_state *mds);
  
  struct cxl_hdm {
  	struct cxl_component_regs regs;
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 1cb1494c28fe..5242dbf0044d 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -887,6 +887,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  	if (IS_ERR(cxlmd))
  		return PTR_ERR(cxlmd);
  
+	rc = cxl_memdev_security_state_init(mds);
+	if (rc)
+		return rc;
+
  	rc = cxl_memdev_setup_fw_upload(mds);
  	if (rc)
  		return rc;
Re: [PATCH] cxl/memdev: Avoid mailbox functionality on device memory CXL devices
Posted by Ira Weiny 2 years, 6 months ago
Davidlohr Bueso wrote:
> On Fri, 28 Jul 2023, Dan Williams wrote:
> 
> >Ira Weiny wrote:
> >> Using the proposed type-2 cxl-test device[1] the following
> >> splat was observed:
> >>
> >>   BUG: kernel NULL pointer dereference, address: 0000000000000278
> >>   [...]
> >>   RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
> >
> >It would be useful to decode this to a line number, the rest of this
> >call trace is not adding much.
> >
> >>   [...]
> >>   Call Trace:
> >>    <TASK>
> >>    ? __die+0x1f/0x70
> >>    ? page_fault_oops+0x149/0x420
> >>    ? fixup_exception+0x22/0x310
> >>    ? kernelmode_fixup_or_oops+0x84/0x110
> >>    ? exc_page_fault+0x6d/0x150
> >>    ? asm_exc_page_fault+0x22/0x30
> >>    ? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
> >>    cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem]
> >>    platform_probe+0x40/0x90
> >>    really_probe+0x19e/0x3e0
> >>    ? __pfx___driver_attach+0x10/0x10
> >>    __driver_probe_device+0x78/0x160
> >>    driver_probe_device+0x1f/0x90
> >>    __driver_attach+0xce/0x1c0
> >>    bus_for_each_dev+0x63/0xa0
> >>    bus_add_driver+0x112/0x210
> >>    driver_register+0x55/0x100
> >>    ? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem]
> >>    [...]
> >>
> >> Commit f6b8ab32e3ec made the mailbox functionality optional.  However,
> >> some mailbox functionality was merged after that patch.  Therefore some
> >> mailbox functionality can be accessed on a device which did not set up
> >> the mailbox.
> >
> >cxl_memdev_security_init() definitely needs to move out of
> >devm_cxl_add_memdev() and after that I do not think @mds NULL checks
> >need to be sprinkled everywhere. In other words something is wrong at a
> >higher level if we get into some of these helper functions without the
> >memory device state.
> 
> Right, so we can move it directly into cxl_pci_probe() - just as with other
> mbox based functionality. This leaves me wondering, however, what to do about
> the cxl_memdev_security_shutdown() counterpart. As with the below diff, leaving
> it as is and just adding a mds nil check might still be considering a layering
> violation in that it would be asymmetrical wrt to the init; but this is tightly
> coupled with cxl_memdev_unregister().
> 
> Ira does the below fix the crash?

I had to apply it by hand but yea it fixes the immediate crash.

Did you want to submit that as part of other work?

Ira

> 
> Thanks,
> Davidlohr
> 
> ----8<-------
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index 14b547c07f54..4d1bf80c0e54 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -561,7 +561,7 @@ static void cxl_memdev_security_shutdown(struct device *dev)
>   	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>   	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
>   
> -	if (mds->security.poll)
> +	if (mds && mds->security.poll)
>   		cancel_delayed_work_sync(&mds->security.poll_dwork);
>   }
>   
> @@ -1009,11 +1009,11 @@ static void put_sanitize(void *data)
>   	sysfs_put(mds->security.sanitize_node);
>   }
>   
> -static int cxl_memdev_security_init(struct cxl_memdev *cxlmd)
> +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds)
>   {
> -	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> -	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
> -	struct device *dev = &cxlmd->dev;
> +
> +	struct cxl_dev_state *cxlds = &mds->cxlds;
> +	struct device *dev = &cxlds->cxlmd->dev;
>   	struct kernfs_node *sec;
>   
>   	sec = sysfs_get_dirent(dev->kobj.sd, "security");
> @@ -1029,7 +1029,8 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd)
>   	}
>   
>   	return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds);
> - }
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_memdev_security_state_init, CXL);
>   
>   struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
>   {
> @@ -1059,10 +1060,6 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
>   	if (rc)
>   		goto err;
>   
> -	rc = cxl_memdev_security_init(cxlmd);
> -	if (rc)
> -		goto err;
> -
>   	rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
>   	if (rc)
>   		return ERR_PTR(rc);
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index f86afef90c91..441270770519 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -884,6 +884,7 @@ static inline void cxl_mem_active_dec(void)
>   #endif
>   
>   int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd);
> +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds);
>   
>   struct cxl_hdm {
>   	struct cxl_component_regs regs;
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 1cb1494c28fe..5242dbf0044d 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -887,6 +887,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>   	if (IS_ERR(cxlmd))
>   		return PTR_ERR(cxlmd);
>   
> +	rc = cxl_memdev_security_state_init(mds);
> +	if (rc)
> +		return rc;
> +
>   	rc = cxl_memdev_setup_fw_upload(mds);
>   	if (rc)
>   		return rc;
Re: [PATCH] cxl/memdev: Avoid mailbox functionality on device memory CXL devices
Posted by Dave Jiang 2 years, 6 months ago

On 7/28/23 16:00, Ira Weiny wrote:
> Using the proposed type-2 cxl-test device[1] the following
> splat was observed:
> 
>    BUG: kernel NULL pointer dereference, address: 0000000000000278
>    [...]
>    RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
>    [...]
>    Call Trace:
>     <TASK>
>     ? __die+0x1f/0x70
>     ? page_fault_oops+0x149/0x420
>     ? fixup_exception+0x22/0x310
>     ? kernelmode_fixup_or_oops+0x84/0x110
>     ? exc_page_fault+0x6d/0x150
>     ? asm_exc_page_fault+0x22/0x30
>     ? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
>     cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem]
>     platform_probe+0x40/0x90
>     really_probe+0x19e/0x3e0
>     ? __pfx___driver_attach+0x10/0x10
>     __driver_probe_device+0x78/0x160
>     driver_probe_device+0x1f/0x90
>     __driver_attach+0xce/0x1c0
>     bus_for_each_dev+0x63/0xa0
>     bus_add_driver+0x112/0x210
>     driver_register+0x55/0x100
>     ? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem]
>     [...]
> 
> Commit f6b8ab32e3ec made the mailbox functionality optional.  However,
> some mailbox functionality was merged after that patch.  Therefore some
> mailbox functionality can be accessed on a device which did not set up
> the mailbox.
> 
> While no devices currently exist, commit f6b8ab32e3ec is incomplete.
> Complete the checks for memdev state to bring the code to a consistent
> state for when type-2 devices are introduced.
> 
> [1] https://lore.kernel.org/all/168592160379.1948938.12863272903570476312.stgit@dwillia2-xfh.jf.intel.com/
> 
> Fixes: f6b8ab32e3ec ("cxl/memdev: Make mailbox functionality optional")
> Cc: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>


I think you'll need to coordinate with this patch from Davidlohr?
https://lore.kernel.org/linux-cxl/20230726051940.3570-4-dave@stgolabs.net/



> ---
>   drivers/cxl/core/mbox.c   |  9 +++++++++
>   drivers/cxl/core/memdev.c | 26 ++++++++++++++++++++++++++
>   drivers/cxl/mem.c         | 18 ++++++++++--------
>   drivers/cxl/pci.c         |  5 ++++-
>   drivers/cxl/pmem.c        |  3 +++
>   5 files changed, 52 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index d6d067fbee97..eb1758fb8cdf 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -482,6 +482,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
>   
>   	dev_dbg(dev, "Query IOCTL\n");
>   
> +	if (!mds)
> +		return -EIO;
> +
>   	if (get_user(n_commands, &q->n_commands))
>   		return -EFAULT;
>   
> @@ -586,6 +589,9 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
>   
>   	dev_dbg(dev, "Send IOCTL\n");
>   
> +	if (!mds)
> +		return -EIO;
> +
>   	if (copy_from_user(&send, s, sizeof(send)))
>   		return -EFAULT;
>   
> @@ -1245,6 +1251,9 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
>   	int nr_records = 0;
>   	int rc;
>   
> +	if (!mds)
> +		return -EIO;
> +
>   	rc = mutex_lock_interruptible(&mds->poison.lock);
>   	if (rc)
>   		return rc;
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index f99e7ec3cc40..629e479f751b 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -201,6 +201,19 @@ static ssize_t security_erase_store(struct device *dev,
>   static struct device_attribute dev_attr_security_erase =
>   	__ATTR(erase, 0200, NULL, security_erase_store);
>   
> +static umode_t cxl_memdev_security_visible(struct kobject *kobj,
> +					   struct attribute *a, int n)
> +{
> +	struct device *dev = kobj_to_dev(kobj);
> +	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> +	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
> +
> +	if (!mds)
> +		return 0;
> +
> +	return a->mode;
> +}
> +
>   static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
>   {
>   	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> @@ -332,6 +345,9 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
>   	struct cxl_region *cxlr;
>   	int rc;
>   
> +	if (!mds)
> +		return -EIO;
> +
>   	if (!IS_ENABLED(CONFIG_DEBUG_FS))
>   		return 0;
>   
> @@ -380,6 +396,9 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
>   	struct cxl_region *cxlr;
>   	int rc;
>   
> +	if (!mds)
> +		return -EIO;
> +
>   	if (!IS_ENABLED(CONFIG_DEBUG_FS))
>   		return 0;
>   
> @@ -480,6 +499,7 @@ static struct attribute_group cxl_memdev_pmem_attribute_group = {
>   static struct attribute_group cxl_memdev_security_attribute_group = {
>   	.name = "security",
>   	.attrs = cxl_memdev_security_attributes,
> +	.is_visible = cxl_memdev_security_visible,
>   };
>   
>   static const struct attribute_group *cxl_memdev_attribute_groups[] = {
> @@ -542,6 +562,9 @@ static void cxl_memdev_security_shutdown(struct device *dev)
>   	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>   	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
>   
> +	if (!mds)
> +		return;
> +
>   	if (mds->security.poll)
>   		cancel_delayed_work_sync(&mds->security.poll_dwork);
>   }
> @@ -997,6 +1020,9 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd)
>   	struct device *dev = &cxlmd->dev;
>   	struct kernfs_node *sec;
>   
> +	if (!mds)
> +		return 0;
> +
>   	sec = sysfs_get_dirent(dev->kobj.sd, "security");
>   	if (!sec) {
>   		dev_err(dev, "sysfs_get_dirent 'security' failed\n");
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 317c7548e4e9..4755a890018d 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -132,12 +132,14 @@ static int cxl_mem_probe(struct device *dev)
>   	dentry = cxl_debugfs_create_dir(dev_name(dev));
>   	debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
>   
> -	if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
> -		debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
> -				    &cxl_poison_inject_fops);
> -	if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
> -		debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
> -				    &cxl_poison_clear_fops);
> +	if (mds) {
> +		if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
> +			debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
> +					    &cxl_poison_inject_fops);
> +		if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
> +			debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
> +					    &cxl_poison_clear_fops);
> +	}
>   
>   	rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
>   	if (rc)
> @@ -222,8 +224,8 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
>   		struct cxl_memdev_state *mds =
>   			to_cxl_memdev_state(cxlmd->cxlds);
>   
> -		if (!test_bit(CXL_POISON_ENABLED_LIST,
> -			      mds->poison.enabled_cmds))
> +		if (!mds || !test_bit(CXL_POISON_ENABLED_LIST,
> +				      mds->poison.enabled_cmds))
>   			return 0;
>   	}
>   	return a->mode;
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 1cb1494c28fe..93f6140432cd 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -122,7 +122,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
>   	struct cxl_dev_state *cxlds = dev_id->cxlds;
>   	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
>   
> -	if (!cxl_mbox_background_complete(cxlds))
> +	if (!mds || !cxl_mbox_background_complete(cxlds))
>   		return IRQ_NONE;
>   
>   	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
> @@ -624,6 +624,9 @@ static irqreturn_t cxl_event_thread(int irq, void *id)
>   	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
>   	u32 status;
>   
> +	if (!mds)
> +		return IRQ_HANDLED;
> +
>   	do {
>   		/*
>   		 * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
> diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
> index 7cb8994f8809..f1adfdd1a2b3 100644
> --- a/drivers/cxl/pmem.c
> +++ b/drivers/cxl/pmem.c
> @@ -70,6 +70,9 @@ static int cxl_nvdimm_probe(struct device *dev)
>   	struct nvdimm *nvdimm;
>   	int rc;
>   
> +	if (WARN_ON_ONCE(!mds))
> +		return -EIO;
> +
>   	set_exclusive_cxl_commands(mds, exclusive_cmds);
>   	rc = devm_add_action_or_reset(dev, clear_exclusive, mds);
>   	if (rc)
> 
> ---
> base-commit: 20ea1e7d13c1b544fe67c4a8dc3943bb1ab33e6f
> change-id: 20230728-cxl-fix-devmemdev-5003ce927f68
> 
> Best regards,