drivers/pps/pps.c | 83 ++++++++++++++++++++------------------ include/linux/pps_kernel.h | 1 - 2 files changed, 44 insertions(+), 40 deletions(-)
On a board running ntpd and gpsd, I'm seeing a consistent use-after-free
in sys_exit() from gpsd when rebooting:
pps pps1: removed
------------[ cut here ]------------
kobject: '(null)' (00000000db4bec24): is not initialized, yet kobject_put() is being called.
WARNING: CPU: 2 PID: 440 at lib/kobject.c:734 kobject_put+0x120/0x150
CPU: 2 UID: 299 PID: 440 Comm: gpsd Not tainted 6.11.0-rc6-00308-gb31c44928842 #1
Hardware name: Raspberry Pi 4 Model B Rev 1.1 (DT)
pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : kobject_put+0x120/0x150
lr : kobject_put+0x120/0x150
sp : ffffffc0803d3ae0
x29: ffffffc0803d3ae0 x28: ffffff8042dc9738 x27: 0000000000000001
x26: 0000000000000000 x25: ffffff8042dc9040 x24: ffffff8042dc9440
x23: ffffff80402a4620 x22: ffffff8042ef4bd0 x21: ffffff80405cb600
x20: 000000000008001b x19: ffffff8040b3b6e0 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000 x15: 696e6920746f6e20
x14: 7369203a29343263 x13: 205d303434542020 x12: 0000000000000000
x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000
Call trace:
kobject_put+0x120/0x150
cdev_put+0x20/0x3c
__fput+0x2c4/0x2d8
____fput+0x1c/0x38
task_work_run+0x70/0xfc
do_exit+0x2a0/0x924
do_group_exit+0x34/0x90
get_signal+0x7fc/0x8c0
do_signal+0x128/0x13b4
do_notify_resume+0xdc/0x160
el0_svc+0xd4/0xf8
el0t_64_sync_handler+0x140/0x14c
el0t_64_sync+0x190/0x194
---[ end trace 0000000000000000 ]---
...followed by more symptoms of corruption, with similar stacks:
refcount_t: underflow; use-after-free.
kernel BUG at lib/list_debug.c:62!
Kernel panic - not syncing: Oops - BUG: Fatal exception
This happens because pps_device_destruct() frees the pps_device with the
embedded cdev immediately after calling cdev_del(), but, as the comment
above cdev_del() notes, fops for previously opened cdevs are still
callable even after cdev_del() returns. I think this bug has always
been there: I can't explain why it suddenly started happening every time
I reboot this particular board.
In commit d953e0e837e6 ("pps: Fix a use-after free bug when
unregistering a source."), George Spelvin suggested removing the
embedded cdev. That seems like the simplest way to fix this, so I've
implemented his suggestion, with pps_idr becoming the source of truth
for which minor corresponds to which device.
But now that pps_idr defines userspace visibility instead of cdev_add(),
we need to be sure the pps->dev kobject refcount can't reach zero while
userspace can still find it again. So, the idr_remove() call moves to
pps_unregister_cdev(), and pps_idr now holds a reference to the pps->dev
kobject.
pps_core: source serial1 got cdev (251:1)
<...>
pps pps1: removed
pps_core: unregistering pps1
pps_core: deallocating pps1
Fixes: d953e0e837e6 ("pps: Fix a use-after free bug when unregistering a source.")
Cc: stable@vger.kernel.org
Signed-off-by: Calvin Owens <calvin@wbinvd.org>
---
Changes in v2:
- Don't move pr_debug() from pps_device_destruct() to pps_unregister_cdev()
- Actually add stable@vger.kernel.org to CC
---
drivers/pps/pps.c | 83 ++++++++++++++++++++------------------
include/linux/pps_kernel.h | 1 -
2 files changed, 44 insertions(+), 40 deletions(-)
diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
index 5d19baae6a38..6980ab17f314 100644
--- a/drivers/pps/pps.c
+++ b/drivers/pps/pps.c
@@ -25,7 +25,7 @@
* Local variables
*/
-static dev_t pps_devt;
+static int pps_major;
static struct class *pps_class;
static DEFINE_MUTEX(pps_idr_lock);
@@ -296,19 +296,35 @@ static long pps_cdev_compat_ioctl(struct file *file,
#define pps_cdev_compat_ioctl NULL
#endif
+static struct pps_device *pps_idr_get(unsigned long id)
+{
+ struct pps_device *pps;
+
+ mutex_lock(&pps_idr_lock);
+ pps = idr_find(&pps_idr, id);
+ if (pps)
+ kobject_get(&pps->dev->kobj);
+
+ mutex_unlock(&pps_idr_lock);
+ return pps;
+}
+
static int pps_cdev_open(struct inode *inode, struct file *file)
{
- struct pps_device *pps = container_of(inode->i_cdev,
- struct pps_device, cdev);
+ struct pps_device *pps = pps_idr_get(iminor(inode));
+
+ if (!pps)
+ return -ENODEV;
+
file->private_data = pps;
- kobject_get(&pps->dev->kobj);
return 0;
}
static int pps_cdev_release(struct inode *inode, struct file *file)
{
- struct pps_device *pps = container_of(inode->i_cdev,
- struct pps_device, cdev);
+ struct pps_device *pps = file->private_data;
+
+ WARN_ON(pps->id != iminor(inode));
kobject_put(&pps->dev->kobj);
return 0;
}
@@ -332,14 +348,7 @@ static void pps_device_destruct(struct device *dev)
{
struct pps_device *pps = dev_get_drvdata(dev);
- cdev_del(&pps->cdev);
-
- /* Now we can release the ID for re-use */
pr_debug("deallocating pps%d\n", pps->id);
- mutex_lock(&pps_idr_lock);
- idr_remove(&pps_idr, pps->id);
- mutex_unlock(&pps_idr_lock);
-
kfree(dev);
kfree(pps);
}
@@ -364,39 +373,26 @@ int pps_register_cdev(struct pps_device *pps)
goto out_unlock;
}
pps->id = err;
- mutex_unlock(&pps_idr_lock);
- devt = MKDEV(MAJOR(pps_devt), pps->id);
-
- cdev_init(&pps->cdev, &pps_cdev_fops);
- pps->cdev.owner = pps->info.owner;
-
- err = cdev_add(&pps->cdev, devt, 1);
- if (err) {
- pr_err("%s: failed to add char device %d:%d\n",
- pps->info.name, MAJOR(pps_devt), pps->id);
- goto free_idr;
- }
+ devt = MKDEV(pps_major, pps->id);
pps->dev = device_create(pps_class, pps->info.dev, devt, pps,
"pps%d", pps->id);
if (IS_ERR(pps->dev)) {
err = PTR_ERR(pps->dev);
- goto del_cdev;
+ goto free_idr;
}
/* Override the release function with our own */
pps->dev->release = pps_device_destruct;
- pr_debug("source %s got cdev (%d:%d)\n", pps->info.name,
- MAJOR(pps_devt), pps->id);
+ pr_debug("source %s got cdev (%d:%d)\n", pps->info.name, pps_major,
+ pps->id);
+ kobject_get(&pps->dev->kobj);
+ mutex_unlock(&pps_idr_lock);
return 0;
-del_cdev:
- cdev_del(&pps->cdev);
-
free_idr:
- mutex_lock(&pps_idr_lock);
idr_remove(&pps_idr, pps->id);
out_unlock:
mutex_unlock(&pps_idr_lock);
@@ -408,6 +404,12 @@ void pps_unregister_cdev(struct pps_device *pps)
pr_debug("unregistering pps%d\n", pps->id);
pps->lookup_cookie = NULL;
device_destroy(pps_class, pps->dev->devt);
+
+ /* Now we can release the ID for re-use */
+ mutex_lock(&pps_idr_lock);
+ idr_remove(&pps_idr, pps->id);
+ kobject_put(&pps->dev->kobj);
+ mutex_unlock(&pps_idr_lock);
}
/*
@@ -427,6 +429,11 @@ void pps_unregister_cdev(struct pps_device *pps)
* so that it will not be used again, even if the pps device cannot
* be removed from the idr due to pending references holding the minor
* number in use.
+ *
+ * Since pps_idr holds a reference to the kobject, the returned
+ * pps_device is guaranteed to be valid until pps_unregister_cdev() is
+ * called on it. But after calling pps_unregister_cdev(), it may be
+ * freed at any time.
*/
struct pps_device *pps_lookup_dev(void const *cookie)
{
@@ -449,13 +456,11 @@ EXPORT_SYMBOL(pps_lookup_dev);
static void __exit pps_exit(void)
{
class_destroy(pps_class);
- unregister_chrdev_region(pps_devt, PPS_MAX_SOURCES);
+ __unregister_chrdev(pps_major, 0, PPS_MAX_SOURCES, "pps");
}
static int __init pps_init(void)
{
- int err;
-
pps_class = class_create("pps");
if (IS_ERR(pps_class)) {
pr_err("failed to allocate class\n");
@@ -463,8 +468,9 @@ static int __init pps_init(void)
}
pps_class->dev_groups = pps_groups;
- err = alloc_chrdev_region(&pps_devt, 0, PPS_MAX_SOURCES, "pps");
- if (err < 0) {
+ pps_major = __register_chrdev(0, 0, PPS_MAX_SOURCES, "pps",
+ &pps_cdev_fops);
+ if (pps_major < 0) {
pr_err("failed to allocate char device region\n");
goto remove_class;
}
@@ -477,8 +483,7 @@ static int __init pps_init(void)
remove_class:
class_destroy(pps_class);
-
- return err;
+ return pps_major;
}
subsys_initcall(pps_init);
diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h
index 78c8ac4951b5..8ee312788118 100644
--- a/include/linux/pps_kernel.h
+++ b/include/linux/pps_kernel.h
@@ -56,7 +56,6 @@ struct pps_device {
unsigned int id; /* PPS source unique ID */
void const *lookup_cookie; /* For pps_lookup_dev() only */
- struct cdev cdev;
struct device *dev;
struct fasync_struct *async_queue; /* fasync method */
spinlock_t lock;
--
2.45.2
On Fri, Sep 13, 2024 at 05:24:29PM -0700, Calvin Owens wrote: > On a board running ntpd and gpsd, I'm seeing a consistent use-after-free > in sys_exit() from gpsd when rebooting: > > pps pps1: removed > ------------[ cut here ]------------ > kobject: '(null)' (00000000db4bec24): is not initialized, yet kobject_put() is being called. Something is wrong with the reference counting here... > WARNING: CPU: 2 PID: 440 at lib/kobject.c:734 kobject_put+0x120/0x150 > CPU: 2 UID: 299 PID: 440 Comm: gpsd Not tainted 6.11.0-rc6-00308-gb31c44928842 #1 > Hardware name: Raspberry Pi 4 Model B Rev 1.1 (DT) > pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) > pc : kobject_put+0x120/0x150 > lr : kobject_put+0x120/0x150 > sp : ffffffc0803d3ae0 > x29: ffffffc0803d3ae0 x28: ffffff8042dc9738 x27: 0000000000000001 > x26: 0000000000000000 x25: ffffff8042dc9040 x24: ffffff8042dc9440 > x23: ffffff80402a4620 x22: ffffff8042ef4bd0 x21: ffffff80405cb600 > x20: 000000000008001b x19: ffffff8040b3b6e0 x18: 0000000000000000 > x17: 0000000000000000 x16: 0000000000000000 x15: 696e6920746f6e20 > x14: 7369203a29343263 x13: 205d303434542020 x12: 0000000000000000 > x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 > x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 > x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 > x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 > Call trace: > kobject_put+0x120/0x150 > cdev_put+0x20/0x3c > __fput+0x2c4/0x2d8 > ____fput+0x1c/0x38 > task_work_run+0x70/0xfc > do_exit+0x2a0/0x924 > do_group_exit+0x34/0x90 > get_signal+0x7fc/0x8c0 > do_signal+0x128/0x13b4 > do_notify_resume+0xdc/0x160 > el0_svc+0xd4/0xf8 > el0t_64_sync_handler+0x140/0x14c > el0t_64_sync+0x190/0x194 > ---[ end trace 0000000000000000 ]--- > > ...followed by more symptoms of corruption, with similar stacks: > > refcount_t: underflow; use-after-free. > kernel BUG at lib/list_debug.c:62! > Kernel panic - not syncing: Oops - BUG: Fatal exception > > This happens because pps_device_destruct() frees the pps_device with the > embedded cdev immediately after calling cdev_del(), but, as the comment > above cdev_del() notes, fops for previously opened cdevs are still > callable even after cdev_del() returns. I think this bug has always > been there: I can't explain why it suddenly started happening every time > I reboot this particular board. > > In commit d953e0e837e6 ("pps: Fix a use-after free bug when > unregistering a source."), George Spelvin suggested removing the > embedded cdev. That seems like the simplest way to fix this, so I've > implemented his suggestion, with pps_idr becoming the source of truth > for which minor corresponds to which device. You remove it, but now the structure has no reference counting at all, so you should make it a real "struct device" not just containing a pointer to one. > But now that pps_idr defines userspace visibility instead of cdev_add(), > we need to be sure the pps->dev kobject refcount can't reach zero while > userspace can still find it again. So, the idr_remove() call moves to > pps_unregister_cdev(), and pps_idr now holds a reference to the pps->dev > kobject. An idr shouldn't be doing the reference counting here, the struct device should be doing it, right? > > pps_core: source serial1 got cdev (251:1) > <...> > pps pps1: removed > pps_core: unregistering pps1 > pps_core: deallocating pps1 > > Fixes: d953e0e837e6 ("pps: Fix a use-after free bug when unregistering a source.") > Cc: stable@vger.kernel.org > Signed-off-by: Calvin Owens <calvin@wbinvd.org> > --- > Changes in v2: > - Don't move pr_debug() from pps_device_destruct() to pps_unregister_cdev() > - Actually add stable@vger.kernel.org to CC > --- > drivers/pps/pps.c | 83 ++++++++++++++++++++------------------ > include/linux/pps_kernel.h | 1 - > 2 files changed, 44 insertions(+), 40 deletions(-) > > diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c > index 5d19baae6a38..6980ab17f314 100644 > --- a/drivers/pps/pps.c > +++ b/drivers/pps/pps.c > @@ -25,7 +25,7 @@ > * Local variables > */ > > -static dev_t pps_devt; > +static int pps_major; > static struct class *pps_class; > > static DEFINE_MUTEX(pps_idr_lock); > @@ -296,19 +296,35 @@ static long pps_cdev_compat_ioctl(struct file *file, > #define pps_cdev_compat_ioctl NULL > #endif > > +static struct pps_device *pps_idr_get(unsigned long id) > +{ > + struct pps_device *pps; > + > + mutex_lock(&pps_idr_lock); > + pps = idr_find(&pps_idr, id); > + if (pps) > + kobject_get(&pps->dev->kobj); A driver should never call "raw" kobject calls, this alone makes this not ok :( Please move the structure to be embedded in and then it should be simpler. thanks, greg k-h
Hi Greg, On Sunday 10/13 at 17:04 +0200, Greg Kroah-Hartman wrote: > On Fri, Sep 13, 2024 at 05:24:29PM -0700, Calvin Owens wrote: > > On a board running ntpd and gpsd, I'm seeing a consistent use-after-free > > in sys_exit() from gpsd when rebooting: > > > > pps pps1: removed > > ------------[ cut here ]------------ > > kobject: '(null)' (00000000db4bec24): is not initialized, yet kobject_put() is being called. > > Something is wrong with the reference counting here... > > > WARNING: CPU: 2 PID: 440 at lib/kobject.c:734 kobject_put+0x120/0x150 > > CPU: 2 UID: 299 PID: 440 Comm: gpsd Not tainted 6.11.0-rc6-00308-gb31c44928842 #1 > > Hardware name: Raspberry Pi 4 Model B Rev 1.1 (DT) > > pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) > > pc : kobject_put+0x120/0x150 > > lr : kobject_put+0x120/0x150 > > sp : ffffffc0803d3ae0 > > x29: ffffffc0803d3ae0 x28: ffffff8042dc9738 x27: 0000000000000001 > > x26: 0000000000000000 x25: ffffff8042dc9040 x24: ffffff8042dc9440 > > x23: ffffff80402a4620 x22: ffffff8042ef4bd0 x21: ffffff80405cb600 > > x20: 000000000008001b x19: ffffff8040b3b6e0 x18: 0000000000000000 > > x17: 0000000000000000 x16: 0000000000000000 x15: 696e6920746f6e20 > > x14: 7369203a29343263 x13: 205d303434542020 x12: 0000000000000000 > > x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 > > x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 > > x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 > > x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 > > Call trace: > > kobject_put+0x120/0x150 > > cdev_put+0x20/0x3c > > __fput+0x2c4/0x2d8 > > ____fput+0x1c/0x38 > > task_work_run+0x70/0xfc > > do_exit+0x2a0/0x924 > > do_group_exit+0x34/0x90 > > get_signal+0x7fc/0x8c0 > > do_signal+0x128/0x13b4 > > do_notify_resume+0xdc/0x160 > > el0_svc+0xd4/0xf8 > > el0t_64_sync_handler+0x140/0x14c > > el0t_64_sync+0x190/0x194 > > ---[ end trace 0000000000000000 ]--- > > > > ...followed by more symptoms of corruption, with similar stacks: > > > > refcount_t: underflow; use-after-free. > > kernel BUG at lib/list_debug.c:62! > > Kernel panic - not syncing: Oops - BUG: Fatal exception > > > > This happens because pps_device_destruct() frees the pps_device with the > > embedded cdev immediately after calling cdev_del(), but, as the comment > > above cdev_del() notes, fops for previously opened cdevs are still > > callable even after cdev_del() returns. I think this bug has always > > been there: I can't explain why it suddenly started happening every time > > I reboot this particular board. > > > > In commit d953e0e837e6 ("pps: Fix a use-after free bug when > > unregistering a source."), George Spelvin suggested removing the > > embedded cdev. That seems like the simplest way to fix this, so I've > > implemented his suggestion, with pps_idr becoming the source of truth > > for which minor corresponds to which device. > > You remove it, but now the structure has no reference counting at all, > so you should make it a real "struct device" not just containing a > pointer to one. It still uses the device refcount. I didn't change that, see the kobject_get() (which is confusing and should be get_device() as you note below) in pps_cdev_open() before my patch. > > But now that pps_idr defines userspace visibility instead of cdev_add(), > > we need to be sure the pps->dev kobject refcount can't reach zero while > > userspace can still find it again. So, the idr_remove() call moves to > > pps_unregister_cdev(), and pps_idr now holds a reference to the pps->dev > > kobject. > > An idr shouldn't be doing the reference counting here, the struct device > should be doing it, right? Right, I was trying to explain that it is now necessary to call get_device() to acquire a reference while the device minor is indexed by the idr. Before, the minor was deindexed in the idr in ->release(). But now, since it is visible the moment it is indexed by the idr, deindexing it in ->release() means this could happen during removal: CPU1 CPU2 --- --- pps_unregister_cdev() <...> sys_open() <...> idr_lookup() idr_remove() put_device() <--- ref goes to zero get_device() <--- BOOM pps_device_destruct() pps_idr_remove() By calling get_device() in pps_idr_get(), and by moving the idr deindexing from ->release() to pps_unregister_cdev(), we ensure that can't happen. > > > > pps_core: source serial1 got cdev (251:1) > > <...> > > pps pps1: removed > > pps_core: unregistering pps1 > > pps_core: deallocating pps1 > > > > Fixes: d953e0e837e6 ("pps: Fix a use-after free bug when unregistering a source.") > > Cc: stable@vger.kernel.org > > Signed-off-by: Calvin Owens <calvin@wbinvd.org> > > --- > > Changes in v2: > > - Don't move pr_debug() from pps_device_destruct() to pps_unregister_cdev() > > - Actually add stable@vger.kernel.org to CC > > --- > > drivers/pps/pps.c | 83 ++++++++++++++++++++------------------ > > include/linux/pps_kernel.h | 1 - > > 2 files changed, 44 insertions(+), 40 deletions(-) > > > > diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c > > index 5d19baae6a38..6980ab17f314 100644 > > --- a/drivers/pps/pps.c > > +++ b/drivers/pps/pps.c > > @@ -25,7 +25,7 @@ > > * Local variables > > */ > > > > -static dev_t pps_devt; > > +static int pps_major; > > static struct class *pps_class; > > > > static DEFINE_MUTEX(pps_idr_lock); > > @@ -296,19 +296,35 @@ static long pps_cdev_compat_ioctl(struct file *file, > > #define pps_cdev_compat_ioctl NULL > > #endif > > > > +static struct pps_device *pps_idr_get(unsigned long id) > > +{ > > + struct pps_device *pps; > > + > > + mutex_lock(&pps_idr_lock); > > + pps = idr_find(&pps_idr, id); > > + if (pps) > > + kobject_get(&pps->dev->kobj); > > A driver should never call "raw" kobject calls, this alone makes this > not ok :( Oh this is silly, sorry: it's simply open coding put_device() and get_device(), I'll fix that. > Please move the structure to be embedded in and then it should be > simpler. I actually tried to do that, but got hung up on the API: what's the right way to do what device_create() does for an embedded device struct? Today, the pps core does: pps = kzalloc(sizeof(*pps)); ...then later in pps_register_cdev(): idr_alloc(&newmin, ...); pps->dev = device_create(..., MKDEV(maj, newmin), ...); To embed the device struct, I guess this would become something like: pps = kzalloc(sizeof(*pps)); device_initialize(&pps->dev); idr_alloc(&newmin, ...); /* ??? */ device_register(&pps->dev); ...but the question marks are what I'm not seeing: what's the proper way to accomplish what device_create() does, but in an embedded dev struct? Obviously I could just copy device_create(), but I'm sure that's not what you want. Thanks, Calvin > thanks, > > greg k-h
On a board running ntpd and gpsd, I'm seeing a consistent use-after-free
in sys_exit() from gpsd when rebooting:
pps pps1: removed
------------[ cut here ]------------
kobject: '(null)' (00000000db4bec24): is not initialized, yet kobject_put() is being called.
WARNING: CPU: 2 PID: 440 at lib/kobject.c:734 kobject_put+0x120/0x150
CPU: 2 UID: 299 PID: 440 Comm: gpsd Not tainted 6.11.0-rc6-00308-gb31c44928842 #1
Hardware name: Raspberry Pi 4 Model B Rev 1.1 (DT)
pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : kobject_put+0x120/0x150
lr : kobject_put+0x120/0x150
sp : ffffffc0803d3ae0
x29: ffffffc0803d3ae0 x28: ffffff8042dc9738 x27: 0000000000000001
x26: 0000000000000000 x25: ffffff8042dc9040 x24: ffffff8042dc9440
x23: ffffff80402a4620 x22: ffffff8042ef4bd0 x21: ffffff80405cb600
x20: 000000000008001b x19: ffffff8040b3b6e0 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000 x15: 696e6920746f6e20
x14: 7369203a29343263 x13: 205d303434542020 x12: 0000000000000000
x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000
Call trace:
kobject_put+0x120/0x150
cdev_put+0x20/0x3c
__fput+0x2c4/0x2d8
____fput+0x1c/0x38
task_work_run+0x70/0xfc
do_exit+0x2a0/0x924
do_group_exit+0x34/0x90
get_signal+0x7fc/0x8c0
do_signal+0x128/0x13b4
do_notify_resume+0xdc/0x160
el0_svc+0xd4/0xf8
el0t_64_sync_handler+0x140/0x14c
el0t_64_sync+0x190/0x194
---[ end trace 0000000000000000 ]---
...followed by more symptoms of corruption, with similar stacks:
refcount_t: underflow; use-after-free.
kernel BUG at lib/list_debug.c:62!
Kernel panic - not syncing: Oops - BUG: Fatal exception
This happens because pps_device_destruct() frees the pps_device with the
embedded cdev immediately after calling cdev_del(), but, as the comment
above cdev_del() notes, fops for previously opened cdevs are still
callable even after cdev_del() returns. I think this bug has always
been there: I can't explain why it suddenly started happening every time
I reboot this particular board.
In commit d953e0e837e6 ("pps: Fix a use-after free bug when
unregistering a source."), George Spelvin suggested removing the
embedded cdev. That seems like the simplest way to fix this, so I've
implemented his suggestion, using __register_chrdev() with pps_idr
becoming the source of truth for which minor corresponds to which
device.
But now that pps_idr defines userspace visibility instead of cdev_add(),
we need to be sure the pps->dev refcount can't reach zero while
userspace can still find it again. So, the idr_remove() call moves to
pps_unregister_cdev(), and pps_idr now holds a reference to pps->dev.
pps_core: source serial1 got cdev (251:1)
<...>
pps pps1: removed
pps_core: unregistering pps1
pps_core: deallocating pps1
Fixes: d953e0e837e6 ("pps: Fix a use-after free bug when unregistering a source.")
Cc: stable@vger.kernel.org
Signed-off-by: Calvin Owens <calvin@wbinvd.org>
---
Changes in v3:
- Shorten patch title
- Embed the device struct in the pps struct
- Use foo_device(&pps->dev) instead of kobject_foo(&pps->dev.kobj)
Changes in v2:
- Don't move pr_debug() from pps_device_destruct() to pps_unregister_cdev()
- Actually add stable@vger.kernel.org to CC
---
drivers/pps/clients/pps-gpio.c | 2 +-
drivers/pps/clients/pps-ktimer.c | 4 +-
drivers/pps/clients/pps-ldisc.c | 6 +-
drivers/pps/clients/pps_parport.c | 4 +-
drivers/pps/kapi.c | 10 +--
drivers/pps/kc.c | 10 +--
drivers/pps/pps.c | 127 ++++++++++++++++--------------
include/linux/pps_kernel.h | 3 +-
8 files changed, 85 insertions(+), 81 deletions(-)
diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c
index 791fdc9326dd..a21bf56e7a87 100644
--- a/drivers/pps/clients/pps-gpio.c
+++ b/drivers/pps/clients/pps-gpio.c
@@ -214,7 +214,7 @@ static int pps_gpio_probe(struct platform_device *pdev)
return -EINVAL;
}
- dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n",
+ dev_info(&data->pps->dev, "Registered IRQ %d as PPS source\n",
data->irq);
return 0;
diff --git a/drivers/pps/clients/pps-ktimer.c b/drivers/pps/clients/pps-ktimer.c
index d33106bd7a29..00cf406377a1 100644
--- a/drivers/pps/clients/pps-ktimer.c
+++ b/drivers/pps/clients/pps-ktimer.c
@@ -56,7 +56,7 @@ static struct pps_source_info pps_ktimer_info = {
static void __exit pps_ktimer_exit(void)
{
- dev_info(pps->dev, "ktimer PPS source unregistered\n");
+ dev_info(&pps->dev, "ktimer PPS source unregistered\n");
del_timer_sync(&ktimer);
pps_unregister_source(pps);
@@ -74,7 +74,7 @@ static int __init pps_ktimer_init(void)
timer_setup(&ktimer, pps_ktimer_event, 0);
mod_timer(&ktimer, jiffies + HZ);
- dev_info(pps->dev, "ktimer PPS source registered\n");
+ dev_info(&pps->dev, "ktimer PPS source registered\n");
return 0;
}
diff --git a/drivers/pps/clients/pps-ldisc.c b/drivers/pps/clients/pps-ldisc.c
index 443d6bae19d1..49ae33cca03d 100644
--- a/drivers/pps/clients/pps-ldisc.c
+++ b/drivers/pps/clients/pps-ldisc.c
@@ -32,7 +32,7 @@ static void pps_tty_dcd_change(struct tty_struct *tty, bool active)
pps_event(pps, &ts, active ? PPS_CAPTUREASSERT :
PPS_CAPTURECLEAR, NULL);
- dev_dbg(pps->dev, "PPS %s at %lu\n",
+ dev_dbg(&pps->dev, "PPS %s at %lu\n",
active ? "assert" : "clear", jiffies);
}
@@ -69,7 +69,7 @@ static int pps_tty_open(struct tty_struct *tty)
goto err_unregister;
}
- dev_info(pps->dev, "source \"%s\" added\n", info.path);
+ dev_info(&pps->dev, "source \"%s\" added\n", info.path);
return 0;
@@ -89,7 +89,7 @@ static void pps_tty_close(struct tty_struct *tty)
if (WARN_ON(!pps))
return;
- dev_info(pps->dev, "removed\n");
+ dev_info(&pps->dev, "removed\n");
pps_unregister_source(pps);
}
diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c
index abaffb4e1c1c..24db06750297 100644
--- a/drivers/pps/clients/pps_parport.c
+++ b/drivers/pps/clients/pps_parport.c
@@ -81,7 +81,7 @@ static void parport_irq(void *handle)
/* check the signal (no signal means the pulse is lost this time) */
if (!signal_is_set(port)) {
local_irq_restore(flags);
- dev_err(dev->pps->dev, "lost the signal\n");
+ dev_err(&dev->pps->dev, "lost the signal\n");
goto out_assert;
}
@@ -98,7 +98,7 @@ static void parport_irq(void *handle)
/* timeout */
dev->cw_err++;
if (dev->cw_err >= CLEAR_WAIT_MAX_ERRORS) {
- dev_err(dev->pps->dev, "disabled clear edge capture after %d"
+ dev_err(&dev->pps->dev, "disabled clear edge capture after %d"
" timeouts\n", dev->cw_err);
dev->cw = 0;
dev->cw_err = 0;
diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c
index d9d566f70ed1..a76685c147ee 100644
--- a/drivers/pps/kapi.c
+++ b/drivers/pps/kapi.c
@@ -41,7 +41,7 @@ static void pps_add_offset(struct pps_ktime *ts, struct pps_ktime *offset)
static void pps_echo_client_default(struct pps_device *pps, int event,
void *data)
{
- dev_info(pps->dev, "echo %s %s\n",
+ dev_info(&pps->dev, "echo %s %s\n",
event & PPS_CAPTUREASSERT ? "assert" : "",
event & PPS_CAPTURECLEAR ? "clear" : "");
}
@@ -112,7 +112,7 @@ struct pps_device *pps_register_source(struct pps_source_info *info,
goto kfree_pps;
}
- dev_info(pps->dev, "new PPS source %s\n", info->name);
+ dev_info(&pps->dev, "new PPS source %s\n", info->name);
return pps;
@@ -166,7 +166,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event,
/* check event type */
BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0);
- dev_dbg(pps->dev, "PPS event at %lld.%09ld\n",
+ dev_dbg(&pps->dev, "PPS event at %lld.%09ld\n",
(s64)ts->ts_real.tv_sec, ts->ts_real.tv_nsec);
timespec_to_pps_ktime(&ts_real, ts->ts_real);
@@ -188,7 +188,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event,
/* Save the time stamp */
pps->assert_tu = ts_real;
pps->assert_sequence++;
- dev_dbg(pps->dev, "capture assert seq #%u\n",
+ dev_dbg(&pps->dev, "capture assert seq #%u\n",
pps->assert_sequence);
captured = ~0;
@@ -202,7 +202,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event,
/* Save the time stamp */
pps->clear_tu = ts_real;
pps->clear_sequence++;
- dev_dbg(pps->dev, "capture clear seq #%u\n",
+ dev_dbg(&pps->dev, "capture clear seq #%u\n",
pps->clear_sequence);
captured = ~0;
diff --git a/drivers/pps/kc.c b/drivers/pps/kc.c
index 50dc59af45be..fbd23295afd7 100644
--- a/drivers/pps/kc.c
+++ b/drivers/pps/kc.c
@@ -43,11 +43,11 @@ int pps_kc_bind(struct pps_device *pps, struct pps_bind_args *bind_args)
pps_kc_hardpps_mode = 0;
pps_kc_hardpps_dev = NULL;
spin_unlock_irq(&pps_kc_hardpps_lock);
- dev_info(pps->dev, "unbound kernel"
+ dev_info(&pps->dev, "unbound kernel"
" consumer\n");
} else {
spin_unlock_irq(&pps_kc_hardpps_lock);
- dev_err(pps->dev, "selected kernel consumer"
+ dev_err(&pps->dev, "selected kernel consumer"
" is not bound\n");
return -EINVAL;
}
@@ -57,11 +57,11 @@ int pps_kc_bind(struct pps_device *pps, struct pps_bind_args *bind_args)
pps_kc_hardpps_mode = bind_args->edge;
pps_kc_hardpps_dev = pps;
spin_unlock_irq(&pps_kc_hardpps_lock);
- dev_info(pps->dev, "bound kernel consumer: "
+ dev_info(&pps->dev, "bound kernel consumer: "
"edge=0x%x\n", bind_args->edge);
} else {
spin_unlock_irq(&pps_kc_hardpps_lock);
- dev_err(pps->dev, "another kernel consumer"
+ dev_err(&pps->dev, "another kernel consumer"
" is already bound\n");
return -EINVAL;
}
@@ -83,7 +83,7 @@ void pps_kc_remove(struct pps_device *pps)
pps_kc_hardpps_mode = 0;
pps_kc_hardpps_dev = NULL;
spin_unlock_irq(&pps_kc_hardpps_lock);
- dev_info(pps->dev, "unbound kernel consumer"
+ dev_info(&pps->dev, "unbound kernel consumer"
" on device removal\n");
} else
spin_unlock_irq(&pps_kc_hardpps_lock);
diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
index 25d47907db17..6a02245ea35f 100644
--- a/drivers/pps/pps.c
+++ b/drivers/pps/pps.c
@@ -25,7 +25,7 @@
* Local variables
*/
-static dev_t pps_devt;
+static int pps_major;
static struct class *pps_class;
static DEFINE_MUTEX(pps_idr_lock);
@@ -62,7 +62,7 @@ static int pps_cdev_pps_fetch(struct pps_device *pps, struct pps_fdata *fdata)
else {
unsigned long ticks;
- dev_dbg(pps->dev, "timeout %lld.%09d\n",
+ dev_dbg(&pps->dev, "timeout %lld.%09d\n",
(long long) fdata->timeout.sec,
fdata->timeout.nsec);
ticks = fdata->timeout.sec * HZ;
@@ -80,7 +80,7 @@ static int pps_cdev_pps_fetch(struct pps_device *pps, struct pps_fdata *fdata)
/* Check for pending signals */
if (err == -ERESTARTSYS) {
- dev_dbg(pps->dev, "pending signal caught\n");
+ dev_dbg(&pps->dev, "pending signal caught\n");
return -EINTR;
}
@@ -98,7 +98,7 @@ static long pps_cdev_ioctl(struct file *file,
switch (cmd) {
case PPS_GETPARAMS:
- dev_dbg(pps->dev, "PPS_GETPARAMS\n");
+ dev_dbg(&pps->dev, "PPS_GETPARAMS\n");
spin_lock_irq(&pps->lock);
@@ -114,7 +114,7 @@ static long pps_cdev_ioctl(struct file *file,
break;
case PPS_SETPARAMS:
- dev_dbg(pps->dev, "PPS_SETPARAMS\n");
+ dev_dbg(&pps->dev, "PPS_SETPARAMS\n");
/* Check the capabilities */
if (!capable(CAP_SYS_TIME))
@@ -124,14 +124,14 @@ static long pps_cdev_ioctl(struct file *file,
if (err)
return -EFAULT;
if (!(params.mode & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR))) {
- dev_dbg(pps->dev, "capture mode unspecified (%x)\n",
+ dev_dbg(&pps->dev, "capture mode unspecified (%x)\n",
params.mode);
return -EINVAL;
}
/* Check for supported capabilities */
if ((params.mode & ~pps->info.mode) != 0) {
- dev_dbg(pps->dev, "unsupported capabilities (%x)\n",
+ dev_dbg(&pps->dev, "unsupported capabilities (%x)\n",
params.mode);
return -EINVAL;
}
@@ -144,7 +144,7 @@ static long pps_cdev_ioctl(struct file *file,
/* Restore the read only parameters */
if ((params.mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) {
/* section 3.3 of RFC 2783 interpreted */
- dev_dbg(pps->dev, "time format unspecified (%x)\n",
+ dev_dbg(&pps->dev, "time format unspecified (%x)\n",
params.mode);
pps->params.mode |= PPS_TSFMT_TSPEC;
}
@@ -165,7 +165,7 @@ static long pps_cdev_ioctl(struct file *file,
break;
case PPS_GETCAP:
- dev_dbg(pps->dev, "PPS_GETCAP\n");
+ dev_dbg(&pps->dev, "PPS_GETCAP\n");
err = put_user(pps->info.mode, iuarg);
if (err)
@@ -176,7 +176,7 @@ static long pps_cdev_ioctl(struct file *file,
case PPS_FETCH: {
struct pps_fdata fdata;
- dev_dbg(pps->dev, "PPS_FETCH\n");
+ dev_dbg(&pps->dev, "PPS_FETCH\n");
err = copy_from_user(&fdata, uarg, sizeof(struct pps_fdata));
if (err)
@@ -206,7 +206,7 @@ static long pps_cdev_ioctl(struct file *file,
case PPS_KC_BIND: {
struct pps_bind_args bind_args;
- dev_dbg(pps->dev, "PPS_KC_BIND\n");
+ dev_dbg(&pps->dev, "PPS_KC_BIND\n");
/* Check the capabilities */
if (!capable(CAP_SYS_TIME))
@@ -218,7 +218,7 @@ static long pps_cdev_ioctl(struct file *file,
/* Check for supported capabilities */
if ((bind_args.edge & ~pps->info.mode) != 0) {
- dev_err(pps->dev, "unsupported capabilities (%x)\n",
+ dev_err(&pps->dev, "unsupported capabilities (%x)\n",
bind_args.edge);
return -EINVAL;
}
@@ -227,7 +227,7 @@ static long pps_cdev_ioctl(struct file *file,
if (bind_args.tsformat != PPS_TSFMT_TSPEC ||
(bind_args.edge & ~PPS_CAPTUREBOTH) != 0 ||
bind_args.consumer != PPS_KC_HARDPPS) {
- dev_err(pps->dev, "invalid kernel consumer bind"
+ dev_err(&pps->dev, "invalid kernel consumer bind"
" parameters (%x)\n", bind_args.edge);
return -EINVAL;
}
@@ -259,7 +259,7 @@ static long pps_cdev_compat_ioctl(struct file *file,
struct pps_fdata fdata;
int err;
- dev_dbg(pps->dev, "PPS_FETCH\n");
+ dev_dbg(&pps->dev, "PPS_FETCH\n");
err = copy_from_user(&compat, uarg, sizeof(struct pps_fdata_compat));
if (err)
@@ -296,20 +296,36 @@ static long pps_cdev_compat_ioctl(struct file *file,
#define pps_cdev_compat_ioctl NULL
#endif
+static struct pps_device *pps_idr_get(unsigned long id)
+{
+ struct pps_device *pps;
+
+ mutex_lock(&pps_idr_lock);
+ pps = idr_find(&pps_idr, id);
+ if (pps)
+ get_device(&pps->dev);
+
+ mutex_unlock(&pps_idr_lock);
+ return pps;
+}
+
static int pps_cdev_open(struct inode *inode, struct file *file)
{
- struct pps_device *pps = container_of(inode->i_cdev,
- struct pps_device, cdev);
+ struct pps_device *pps = pps_idr_get(iminor(inode));
+
+ if (!pps)
+ return -ENODEV;
+
file->private_data = pps;
- kobject_get(&pps->dev->kobj);
return 0;
}
static int pps_cdev_release(struct inode *inode, struct file *file)
{
- struct pps_device *pps = container_of(inode->i_cdev,
- struct pps_device, cdev);
- kobject_put(&pps->dev->kobj);
+ struct pps_device *pps = file->private_data;
+
+ WARN_ON(pps->id != iminor(inode));
+ put_device(&pps->dev);
return 0;
}
@@ -331,22 +347,13 @@ static void pps_device_destruct(struct device *dev)
{
struct pps_device *pps = dev_get_drvdata(dev);
- cdev_del(&pps->cdev);
-
- /* Now we can release the ID for re-use */
pr_debug("deallocating pps%d\n", pps->id);
- mutex_lock(&pps_idr_lock);
- idr_remove(&pps_idr, pps->id);
- mutex_unlock(&pps_idr_lock);
-
- kfree(dev);
kfree(pps);
}
int pps_register_cdev(struct pps_device *pps)
{
int err;
- dev_t devt;
mutex_lock(&pps_idr_lock);
/*
@@ -363,40 +370,29 @@ int pps_register_cdev(struct pps_device *pps)
goto out_unlock;
}
pps->id = err;
- mutex_unlock(&pps_idr_lock);
-
- devt = MKDEV(MAJOR(pps_devt), pps->id);
-
- cdev_init(&pps->cdev, &pps_cdev_fops);
- pps->cdev.owner = pps->info.owner;
- err = cdev_add(&pps->cdev, devt, 1);
- if (err) {
- pr_err("%s: failed to add char device %d:%d\n",
- pps->info.name, MAJOR(pps_devt), pps->id);
+ pps->dev.class = pps_class;
+ pps->dev.parent = pps->info.dev;
+ pps->dev.devt = MKDEV(pps_major, pps->id);
+ dev_set_drvdata(&pps->dev, pps);
+ dev_set_name(&pps->dev, "pps%d", pps->id);
+ err = device_register(&pps->dev);
+ if (err)
goto free_idr;
- }
- pps->dev = device_create(pps_class, pps->info.dev, devt, pps,
- "pps%d", pps->id);
- if (IS_ERR(pps->dev)) {
- err = PTR_ERR(pps->dev);
- goto del_cdev;
- }
/* Override the release function with our own */
- pps->dev->release = pps_device_destruct;
+ pps->dev.release = pps_device_destruct;
- pr_debug("source %s got cdev (%d:%d)\n", pps->info.name,
- MAJOR(pps_devt), pps->id);
+ pr_debug("source %s got cdev (%d:%d)\n", pps->info.name, pps_major,
+ pps->id);
+ get_device(&pps->dev);
+ mutex_unlock(&pps_idr_lock);
return 0;
-del_cdev:
- cdev_del(&pps->cdev);
-
free_idr:
- mutex_lock(&pps_idr_lock);
idr_remove(&pps_idr, pps->id);
+ put_device(&pps->dev);
out_unlock:
mutex_unlock(&pps_idr_lock);
return err;
@@ -406,7 +402,13 @@ void pps_unregister_cdev(struct pps_device *pps)
{
pr_debug("unregistering pps%d\n", pps->id);
pps->lookup_cookie = NULL;
- device_destroy(pps_class, pps->dev->devt);
+ device_destroy(pps_class, pps->dev.devt);
+
+ /* Now we can release the ID for re-use */
+ mutex_lock(&pps_idr_lock);
+ idr_remove(&pps_idr, pps->id);
+ put_device(&pps->dev);
+ mutex_unlock(&pps_idr_lock);
}
/*
@@ -426,6 +428,11 @@ void pps_unregister_cdev(struct pps_device *pps)
* so that it will not be used again, even if the pps device cannot
* be removed from the idr due to pending references holding the minor
* number in use.
+ *
+ * Since pps_idr holds a reference to the device, the returned
+ * pps_device is guaranteed to be valid until pps_unregister_cdev() is
+ * called on it. But after calling pps_unregister_cdev(), it may be
+ * freed at any time.
*/
struct pps_device *pps_lookup_dev(void const *cookie)
{
@@ -448,13 +455,11 @@ EXPORT_SYMBOL(pps_lookup_dev);
static void __exit pps_exit(void)
{
class_destroy(pps_class);
- unregister_chrdev_region(pps_devt, PPS_MAX_SOURCES);
+ __unregister_chrdev(pps_major, 0, PPS_MAX_SOURCES, "pps");
}
static int __init pps_init(void)
{
- int err;
-
pps_class = class_create("pps");
if (IS_ERR(pps_class)) {
pr_err("failed to allocate class\n");
@@ -462,8 +467,9 @@ static int __init pps_init(void)
}
pps_class->dev_groups = pps_groups;
- err = alloc_chrdev_region(&pps_devt, 0, PPS_MAX_SOURCES, "pps");
- if (err < 0) {
+ pps_major = __register_chrdev(0, 0, PPS_MAX_SOURCES, "pps",
+ &pps_cdev_fops);
+ if (pps_major < 0) {
pr_err("failed to allocate char device region\n");
goto remove_class;
}
@@ -476,8 +482,7 @@ static int __init pps_init(void)
remove_class:
class_destroy(pps_class);
-
- return err;
+ return pps_major;
}
subsys_initcall(pps_init);
diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h
index 78c8ac4951b5..c7abce28ed29 100644
--- a/include/linux/pps_kernel.h
+++ b/include/linux/pps_kernel.h
@@ -56,8 +56,7 @@ struct pps_device {
unsigned int id; /* PPS source unique ID */
void const *lookup_cookie; /* For pps_lookup_dev() only */
- struct cdev cdev;
- struct device *dev;
+ struct device dev;
struct fasync_struct *async_queue; /* fasync method */
spinlock_t lock;
};
--
2.45.2
Hi Calvin, kernel test robot noticed the following build errors: [auto build test ERROR on linus/master] [also build test ERROR on v6.12-rc6 next-20241105] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Calvin-Owens/pps-Fix-a-use-after-free/20241105-155819 base: linus/master patch link: https://lore.kernel.org/r/abc43b18f21379c21a4d2c63372a04b2746be665.1730792731.git.calvin%40wbinvd.org patch subject: [PATCH v3] pps: Fix a use-after-free config: um-allmodconfig (https://download.01.org/0day-ci/archive/20241106/202411061205.AZijSWPc-lkp@intel.com/config) compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 639a7ac648f1e50ccd2556e17d401c04f9cce625) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241106/202411061205.AZijSWPc-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202411061205.AZijSWPc-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from drivers/ptp/ptp_ocp.c:10: In file included from include/linux/pci.h:38: In file included from include/linux/interrupt.h:11: In file included from include/linux/hardirq.h:11: In file included from arch/um/include/asm/hardirq.h:5: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:14: In file included from arch/um/include/asm/io.h:24: include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 548 | val = __raw_readb(PCI_IOBASE + addr); | ~~~~~~~~~~ ^ include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 561 | val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr)); | ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu' 37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x)) | ^ In file included from drivers/ptp/ptp_ocp.c:10: In file included from include/linux/pci.h:38: In file included from include/linux/interrupt.h:11: In file included from include/linux/hardirq.h:11: In file included from arch/um/include/asm/hardirq.h:5: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:14: In file included from arch/um/include/asm/io.h:24: include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 574 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr)); | ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu' 35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x)) | ^ In file included from drivers/ptp/ptp_ocp.c:10: In file included from include/linux/pci.h:38: In file included from include/linux/interrupt.h:11: In file included from include/linux/hardirq.h:11: In file included from arch/um/include/asm/hardirq.h:5: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:14: In file included from arch/um/include/asm/io.h:24: include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 585 | __raw_writeb(value, PCI_IOBASE + addr); | ~~~~~~~~~~ ^ include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 595 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); | ~~~~~~~~~~ ^ include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 605 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); | ~~~~~~~~~~ ^ include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 693 | readsb(PCI_IOBASE + addr, buffer, count); | ~~~~~~~~~~ ^ include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 701 | readsw(PCI_IOBASE + addr, buffer, count); | ~~~~~~~~~~ ^ include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 709 | readsl(PCI_IOBASE + addr, buffer, count); | ~~~~~~~~~~ ^ include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 718 | writesb(PCI_IOBASE + addr, buffer, count); | ~~~~~~~~~~ ^ include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 727 | writesw(PCI_IOBASE + addr, buffer, count); | ~~~~~~~~~~ ^ include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 736 | writesl(PCI_IOBASE + addr, buffer, count); | ~~~~~~~~~~ ^ In file included from drivers/ptp/ptp_ocp.c:10: In file included from include/linux/pci.h:1650: In file included from include/linux/dmapool.h:14: In file included from include/linux/scatterlist.h:8: In file included from include/linux/mm.h:2213: include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion] 518 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" | ~~~~~~~~~~~ ^ ~~~ >> drivers/ptp/ptp_ocp.c:4423:23: error: passing 'struct device' to parameter of incompatible type 'struct device *'; take the address with & 4423 | ptp_ocp_symlink(bp, pps->dev, "pps"); | ^~~~~~~~ | & drivers/ptp/ptp_ocp.c:4387:52: note: passing argument to parameter 'child' here 4387 | ptp_ocp_symlink(struct ptp_ocp *bp, struct device *child, const char *link) | ^ 13 warnings and 1 error generated. vim +4423 drivers/ptp/ptp_ocp.c 773bda96492153 Jonathan Lemon 2021-08-03 4411 773bda96492153 Jonathan Lemon 2021-08-03 4412 static int 773bda96492153 Jonathan Lemon 2021-08-03 4413 ptp_ocp_complete(struct ptp_ocp *bp) 773bda96492153 Jonathan Lemon 2021-08-03 4414 { 773bda96492153 Jonathan Lemon 2021-08-03 4415 struct pps_device *pps; 773bda96492153 Jonathan Lemon 2021-08-03 4416 char buf[32]; d7875b4b078f7e Vadim Fedorenko 2024-08-29 4417 773bda96492153 Jonathan Lemon 2021-08-03 4418 sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp)); 773bda96492153 Jonathan Lemon 2021-08-03 4419 ptp_ocp_link_child(bp, buf, "ptp"); 773bda96492153 Jonathan Lemon 2021-08-03 4420 773bda96492153 Jonathan Lemon 2021-08-03 4421 pps = pps_lookup_dev(bp->ptp); 773bda96492153 Jonathan Lemon 2021-08-03 4422 if (pps) 773bda96492153 Jonathan Lemon 2021-08-03 @4423 ptp_ocp_symlink(bp, pps->dev, "pps"); 773bda96492153 Jonathan Lemon 2021-08-03 4424 f67bf662d2cffa Jonathan Lemon 2021-09-14 4425 ptp_ocp_debugfs_add_device(bp); f67bf662d2cffa Jonathan Lemon 2021-09-14 4426 773bda96492153 Jonathan Lemon 2021-08-03 4427 return 0; 773bda96492153 Jonathan Lemon 2021-08-03 4428 } 773bda96492153 Jonathan Lemon 2021-08-03 4429 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi Calvin, kernel test robot noticed the following build errors: [auto build test ERROR on linus/master] [also build test ERROR on v6.12-rc6 next-20241105] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Calvin-Owens/pps-Fix-a-use-after-free/20241105-155819 base: linus/master patch link: https://lore.kernel.org/r/abc43b18f21379c21a4d2c63372a04b2746be665.1730792731.git.calvin%40wbinvd.org patch subject: [PATCH v3] pps: Fix a use-after-free config: i386-buildonly-randconfig-004-20241105 (https://download.01.org/0day-ci/archive/20241106/202411060043.hC5KjYAw-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241106/202411060043.hC5KjYAw-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202411060043.hC5KjYAw-lkp@intel.com/ All errors (new ones prefixed by >>): drivers/ptp/ptp_ocp.c: In function 'ptp_ocp_complete': >> drivers/ptp/ptp_ocp.c:4423:40: error: incompatible type for argument 2 of 'ptp_ocp_symlink' 4423 | ptp_ocp_symlink(bp, pps->dev, "pps"); | ~~~^~~~~ | | | struct device drivers/ptp/ptp_ocp.c:4387:52: note: expected 'struct device *' but argument is of type 'struct device' 4387 | ptp_ocp_symlink(struct ptp_ocp *bp, struct device *child, const char *link) | ~~~~~~~~~~~~~~~^~~~~ vim +/ptp_ocp_symlink +4423 drivers/ptp/ptp_ocp.c 773bda96492153 Jonathan Lemon 2021-08-03 4411 773bda96492153 Jonathan Lemon 2021-08-03 4412 static int 773bda96492153 Jonathan Lemon 2021-08-03 4413 ptp_ocp_complete(struct ptp_ocp *bp) 773bda96492153 Jonathan Lemon 2021-08-03 4414 { 773bda96492153 Jonathan Lemon 2021-08-03 4415 struct pps_device *pps; 773bda96492153 Jonathan Lemon 2021-08-03 4416 char buf[32]; d7875b4b078f7e Vadim Fedorenko 2024-08-29 4417 773bda96492153 Jonathan Lemon 2021-08-03 4418 sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp)); 773bda96492153 Jonathan Lemon 2021-08-03 4419 ptp_ocp_link_child(bp, buf, "ptp"); 773bda96492153 Jonathan Lemon 2021-08-03 4420 773bda96492153 Jonathan Lemon 2021-08-03 4421 pps = pps_lookup_dev(bp->ptp); 773bda96492153 Jonathan Lemon 2021-08-03 4422 if (pps) 773bda96492153 Jonathan Lemon 2021-08-03 @4423 ptp_ocp_symlink(bp, pps->dev, "pps"); 773bda96492153 Jonathan Lemon 2021-08-03 4424 f67bf662d2cffa Jonathan Lemon 2021-09-14 4425 ptp_ocp_debugfs_add_device(bp); f67bf662d2cffa Jonathan Lemon 2021-09-14 4426 773bda96492153 Jonathan Lemon 2021-08-03 4427 return 0; 773bda96492153 Jonathan Lemon 2021-08-03 4428 } 773bda96492153 Jonathan Lemon 2021-08-03 4429 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Mon, Nov 04, 2024 at 11:56:19PM -0800, Calvin Owens wrote: > - dev_info(pps->dev, "removed\n"); > + dev_info(&pps->dev, "removed\n"); Nit, when drivers work properly, they are quiet, no need for these dev_info() calls. > static int pps_cdev_release(struct inode *inode, struct file *file) > { > - struct pps_device *pps = container_of(inode->i_cdev, > - struct pps_device, cdev); > - kobject_put(&pps->dev->kobj); > + struct pps_device *pps = file->private_data; > + > + WARN_ON(pps->id != iminor(inode)); If this can happen, handle it and move on. Don't just reboot the machine if it's something that could be triggered (remember about panic-on-warn systems.) thanks, greg k-h
On Tuesday 11/05 at 10:44 +0100, Greg Kroah-Hartman wrote: > On Mon, Nov 04, 2024 at 11:56:19PM -0800, Calvin Owens wrote: > > - dev_info(pps->dev, "removed\n"); > > + dev_info(&pps->dev, "removed\n"); > > Nit, when drivers work properly, they are quiet, no need for these > dev_info() calls. I'll change these to dev_dbg(). > > static int pps_cdev_release(struct inode *inode, struct file *file) > > { > > - struct pps_device *pps = container_of(inode->i_cdev, > > - struct pps_device, cdev); > > - kobject_put(&pps->dev->kobj); > > + struct pps_device *pps = file->private_data; > > + > > + WARN_ON(pps->id != iminor(inode)); > > If this can happen, handle it and move on. Don't just reboot the > machine if it's something that could be triggered (remember about > panic-on-warn systems.) It's a fairly paranoid WARN(): it's a bug if it happens, and I don't think it can happen. Should I remove it? The test-robot found a couple *pps->dev dereferences I missed, I'll send a v4 in the next day or two with those fixed as well. Thanks, Calvin > thanks, > > greg k-h
On a board running ntpd and gpsd, I'm seeing a consistent use-after-free
in sys_exit() from gpsd when rebooting:
pps pps1: removed
------------[ cut here ]------------
kobject: '(null)' (00000000db4bec24): is not initialized, yet kobject_put() is being called.
WARNING: CPU: 2 PID: 440 at lib/kobject.c:734 kobject_put+0x120/0x150
CPU: 2 UID: 299 PID: 440 Comm: gpsd Not tainted 6.11.0-rc6-00308-gb31c44928842 #1
Hardware name: Raspberry Pi 4 Model B Rev 1.1 (DT)
pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : kobject_put+0x120/0x150
lr : kobject_put+0x120/0x150
sp : ffffffc0803d3ae0
x29: ffffffc0803d3ae0 x28: ffffff8042dc9738 x27: 0000000000000001
x26: 0000000000000000 x25: ffffff8042dc9040 x24: ffffff8042dc9440
x23: ffffff80402a4620 x22: ffffff8042ef4bd0 x21: ffffff80405cb600
x20: 000000000008001b x19: ffffff8040b3b6e0 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000 x15: 696e6920746f6e20
x14: 7369203a29343263 x13: 205d303434542020 x12: 0000000000000000
x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000
Call trace:
kobject_put+0x120/0x150
cdev_put+0x20/0x3c
__fput+0x2c4/0x2d8
____fput+0x1c/0x38
task_work_run+0x70/0xfc
do_exit+0x2a0/0x924
do_group_exit+0x34/0x90
get_signal+0x7fc/0x8c0
do_signal+0x128/0x13b4
do_notify_resume+0xdc/0x160
el0_svc+0xd4/0xf8
el0t_64_sync_handler+0x140/0x14c
el0t_64_sync+0x190/0x194
---[ end trace 0000000000000000 ]---
...followed by more symptoms of corruption, with similar stacks:
refcount_t: underflow; use-after-free.
kernel BUG at lib/list_debug.c:62!
Kernel panic - not syncing: Oops - BUG: Fatal exception
This happens because pps_device_destruct() frees the pps_device with the
embedded cdev immediately after calling cdev_del(), but, as the comment
above cdev_del() notes, fops for previously opened cdevs are still
callable even after cdev_del() returns. I think this bug has always
been there: I can't explain why it suddenly started happening every time
I reboot this particular board.
In commit d953e0e837e6 ("pps: Fix a use-after free bug when
unregistering a source."), George Spelvin suggested removing the
embedded cdev. That seems like the simplest way to fix this, so I've
implemented his suggestion, using __register_chrdev() with pps_idr
becoming the source of truth for which minor corresponds to which
device.
But now that pps_idr defines userspace visibility instead of cdev_add(),
we need to be sure the pps->dev refcount can't reach zero while
userspace can still find it again. So, the idr_remove() call moves to
pps_unregister_cdev(), and pps_idr now holds a reference to pps->dev.
pps_core: source serial1 got cdev (251:1)
<...>
pps pps1: removed
pps_core: unregistering pps1
pps_core: deallocating pps1
Fixes: d953e0e837e6 ("pps: Fix a use-after free bug when unregistering a source.")
Cc: stable@vger.kernel.org
Signed-off-by: Calvin Owens <calvin@wbinvd.org>
---
Changes in v4:
- Fix compile error in ptp_ocp
- Change five dev_info() calls to dev_dbg()
Changes in v3:
- Shorten patch title
- Embed the device struct in the pps struct
- Use foo_device(&pps->dev) instead of kobject_foo(&pps->dev.kobj)
Changes in v2:
- Don't move pr_debug() from pps_device_destruct() to pps_unregister_cdev()
- Actually add stable@vger.kernel.org to CC
---
drivers/pps/clients/pps-gpio.c | 4 +-
drivers/pps/clients/pps-ktimer.c | 4 +-
drivers/pps/clients/pps-ldisc.c | 6 +-
drivers/pps/clients/pps_parport.c | 4 +-
drivers/pps/kapi.c | 10 +--
drivers/pps/kc.c | 10 +--
drivers/pps/pps.c | 127 ++++++++++++++++--------------
drivers/ptp/ptp_ocp.c | 2 +-
include/linux/pps_kernel.h | 3 +-
9 files changed, 87 insertions(+), 83 deletions(-)
diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c
index 791fdc9326dd..93e662912b53 100644
--- a/drivers/pps/clients/pps-gpio.c
+++ b/drivers/pps/clients/pps-gpio.c
@@ -214,8 +214,8 @@ static int pps_gpio_probe(struct platform_device *pdev)
return -EINVAL;
}
- dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n",
- data->irq);
+ dev_dbg(&data->pps->dev, "Registered IRQ %d as PPS source\n",
+ data->irq);
return 0;
}
diff --git a/drivers/pps/clients/pps-ktimer.c b/drivers/pps/clients/pps-ktimer.c
index d33106bd7a29..2f465549b843 100644
--- a/drivers/pps/clients/pps-ktimer.c
+++ b/drivers/pps/clients/pps-ktimer.c
@@ -56,7 +56,7 @@ static struct pps_source_info pps_ktimer_info = {
static void __exit pps_ktimer_exit(void)
{
- dev_info(pps->dev, "ktimer PPS source unregistered\n");
+ dev_dbg(&pps->dev, "ktimer PPS source unregistered\n");
del_timer_sync(&ktimer);
pps_unregister_source(pps);
@@ -74,7 +74,7 @@ static int __init pps_ktimer_init(void)
timer_setup(&ktimer, pps_ktimer_event, 0);
mod_timer(&ktimer, jiffies + HZ);
- dev_info(pps->dev, "ktimer PPS source registered\n");
+ dev_dbg(&pps->dev, "ktimer PPS source registered\n");
return 0;
}
diff --git a/drivers/pps/clients/pps-ldisc.c b/drivers/pps/clients/pps-ldisc.c
index 443d6bae19d1..fa5660f3c4b7 100644
--- a/drivers/pps/clients/pps-ldisc.c
+++ b/drivers/pps/clients/pps-ldisc.c
@@ -32,7 +32,7 @@ static void pps_tty_dcd_change(struct tty_struct *tty, bool active)
pps_event(pps, &ts, active ? PPS_CAPTUREASSERT :
PPS_CAPTURECLEAR, NULL);
- dev_dbg(pps->dev, "PPS %s at %lu\n",
+ dev_dbg(&pps->dev, "PPS %s at %lu\n",
active ? "assert" : "clear", jiffies);
}
@@ -69,7 +69,7 @@ static int pps_tty_open(struct tty_struct *tty)
goto err_unregister;
}
- dev_info(pps->dev, "source \"%s\" added\n", info.path);
+ dev_dbg(&pps->dev, "source \"%s\" added\n", info.path);
return 0;
@@ -89,7 +89,7 @@ static void pps_tty_close(struct tty_struct *tty)
if (WARN_ON(!pps))
return;
- dev_info(pps->dev, "removed\n");
+ dev_info(&pps->dev, "removed\n");
pps_unregister_source(pps);
}
diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c
index abaffb4e1c1c..24db06750297 100644
--- a/drivers/pps/clients/pps_parport.c
+++ b/drivers/pps/clients/pps_parport.c
@@ -81,7 +81,7 @@ static void parport_irq(void *handle)
/* check the signal (no signal means the pulse is lost this time) */
if (!signal_is_set(port)) {
local_irq_restore(flags);
- dev_err(dev->pps->dev, "lost the signal\n");
+ dev_err(&dev->pps->dev, "lost the signal\n");
goto out_assert;
}
@@ -98,7 +98,7 @@ static void parport_irq(void *handle)
/* timeout */
dev->cw_err++;
if (dev->cw_err >= CLEAR_WAIT_MAX_ERRORS) {
- dev_err(dev->pps->dev, "disabled clear edge capture after %d"
+ dev_err(&dev->pps->dev, "disabled clear edge capture after %d"
" timeouts\n", dev->cw_err);
dev->cw = 0;
dev->cw_err = 0;
diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c
index d9d566f70ed1..92d1b62ea239 100644
--- a/drivers/pps/kapi.c
+++ b/drivers/pps/kapi.c
@@ -41,7 +41,7 @@ static void pps_add_offset(struct pps_ktime *ts, struct pps_ktime *offset)
static void pps_echo_client_default(struct pps_device *pps, int event,
void *data)
{
- dev_info(pps->dev, "echo %s %s\n",
+ dev_info(&pps->dev, "echo %s %s\n",
event & PPS_CAPTUREASSERT ? "assert" : "",
event & PPS_CAPTURECLEAR ? "clear" : "");
}
@@ -112,7 +112,7 @@ struct pps_device *pps_register_source(struct pps_source_info *info,
goto kfree_pps;
}
- dev_info(pps->dev, "new PPS source %s\n", info->name);
+ dev_dbg(&pps->dev, "new PPS source %s\n", info->name);
return pps;
@@ -166,7 +166,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event,
/* check event type */
BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0);
- dev_dbg(pps->dev, "PPS event at %lld.%09ld\n",
+ dev_dbg(&pps->dev, "PPS event at %lld.%09ld\n",
(s64)ts->ts_real.tv_sec, ts->ts_real.tv_nsec);
timespec_to_pps_ktime(&ts_real, ts->ts_real);
@@ -188,7 +188,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event,
/* Save the time stamp */
pps->assert_tu = ts_real;
pps->assert_sequence++;
- dev_dbg(pps->dev, "capture assert seq #%u\n",
+ dev_dbg(&pps->dev, "capture assert seq #%u\n",
pps->assert_sequence);
captured = ~0;
@@ -202,7 +202,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event,
/* Save the time stamp */
pps->clear_tu = ts_real;
pps->clear_sequence++;
- dev_dbg(pps->dev, "capture clear seq #%u\n",
+ dev_dbg(&pps->dev, "capture clear seq #%u\n",
pps->clear_sequence);
captured = ~0;
diff --git a/drivers/pps/kc.c b/drivers/pps/kc.c
index 50dc59af45be..fbd23295afd7 100644
--- a/drivers/pps/kc.c
+++ b/drivers/pps/kc.c
@@ -43,11 +43,11 @@ int pps_kc_bind(struct pps_device *pps, struct pps_bind_args *bind_args)
pps_kc_hardpps_mode = 0;
pps_kc_hardpps_dev = NULL;
spin_unlock_irq(&pps_kc_hardpps_lock);
- dev_info(pps->dev, "unbound kernel"
+ dev_info(&pps->dev, "unbound kernel"
" consumer\n");
} else {
spin_unlock_irq(&pps_kc_hardpps_lock);
- dev_err(pps->dev, "selected kernel consumer"
+ dev_err(&pps->dev, "selected kernel consumer"
" is not bound\n");
return -EINVAL;
}
@@ -57,11 +57,11 @@ int pps_kc_bind(struct pps_device *pps, struct pps_bind_args *bind_args)
pps_kc_hardpps_mode = bind_args->edge;
pps_kc_hardpps_dev = pps;
spin_unlock_irq(&pps_kc_hardpps_lock);
- dev_info(pps->dev, "bound kernel consumer: "
+ dev_info(&pps->dev, "bound kernel consumer: "
"edge=0x%x\n", bind_args->edge);
} else {
spin_unlock_irq(&pps_kc_hardpps_lock);
- dev_err(pps->dev, "another kernel consumer"
+ dev_err(&pps->dev, "another kernel consumer"
" is already bound\n");
return -EINVAL;
}
@@ -83,7 +83,7 @@ void pps_kc_remove(struct pps_device *pps)
pps_kc_hardpps_mode = 0;
pps_kc_hardpps_dev = NULL;
spin_unlock_irq(&pps_kc_hardpps_lock);
- dev_info(pps->dev, "unbound kernel consumer"
+ dev_info(&pps->dev, "unbound kernel consumer"
" on device removal\n");
} else
spin_unlock_irq(&pps_kc_hardpps_lock);
diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
index 25d47907db17..6a02245ea35f 100644
--- a/drivers/pps/pps.c
+++ b/drivers/pps/pps.c
@@ -25,7 +25,7 @@
* Local variables
*/
-static dev_t pps_devt;
+static int pps_major;
static struct class *pps_class;
static DEFINE_MUTEX(pps_idr_lock);
@@ -62,7 +62,7 @@ static int pps_cdev_pps_fetch(struct pps_device *pps, struct pps_fdata *fdata)
else {
unsigned long ticks;
- dev_dbg(pps->dev, "timeout %lld.%09d\n",
+ dev_dbg(&pps->dev, "timeout %lld.%09d\n",
(long long) fdata->timeout.sec,
fdata->timeout.nsec);
ticks = fdata->timeout.sec * HZ;
@@ -80,7 +80,7 @@ static int pps_cdev_pps_fetch(struct pps_device *pps, struct pps_fdata *fdata)
/* Check for pending signals */
if (err == -ERESTARTSYS) {
- dev_dbg(pps->dev, "pending signal caught\n");
+ dev_dbg(&pps->dev, "pending signal caught\n");
return -EINTR;
}
@@ -98,7 +98,7 @@ static long pps_cdev_ioctl(struct file *file,
switch (cmd) {
case PPS_GETPARAMS:
- dev_dbg(pps->dev, "PPS_GETPARAMS\n");
+ dev_dbg(&pps->dev, "PPS_GETPARAMS\n");
spin_lock_irq(&pps->lock);
@@ -114,7 +114,7 @@ static long pps_cdev_ioctl(struct file *file,
break;
case PPS_SETPARAMS:
- dev_dbg(pps->dev, "PPS_SETPARAMS\n");
+ dev_dbg(&pps->dev, "PPS_SETPARAMS\n");
/* Check the capabilities */
if (!capable(CAP_SYS_TIME))
@@ -124,14 +124,14 @@ static long pps_cdev_ioctl(struct file *file,
if (err)
return -EFAULT;
if (!(params.mode & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR))) {
- dev_dbg(pps->dev, "capture mode unspecified (%x)\n",
+ dev_dbg(&pps->dev, "capture mode unspecified (%x)\n",
params.mode);
return -EINVAL;
}
/* Check for supported capabilities */
if ((params.mode & ~pps->info.mode) != 0) {
- dev_dbg(pps->dev, "unsupported capabilities (%x)\n",
+ dev_dbg(&pps->dev, "unsupported capabilities (%x)\n",
params.mode);
return -EINVAL;
}
@@ -144,7 +144,7 @@ static long pps_cdev_ioctl(struct file *file,
/* Restore the read only parameters */
if ((params.mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) {
/* section 3.3 of RFC 2783 interpreted */
- dev_dbg(pps->dev, "time format unspecified (%x)\n",
+ dev_dbg(&pps->dev, "time format unspecified (%x)\n",
params.mode);
pps->params.mode |= PPS_TSFMT_TSPEC;
}
@@ -165,7 +165,7 @@ static long pps_cdev_ioctl(struct file *file,
break;
case PPS_GETCAP:
- dev_dbg(pps->dev, "PPS_GETCAP\n");
+ dev_dbg(&pps->dev, "PPS_GETCAP\n");
err = put_user(pps->info.mode, iuarg);
if (err)
@@ -176,7 +176,7 @@ static long pps_cdev_ioctl(struct file *file,
case PPS_FETCH: {
struct pps_fdata fdata;
- dev_dbg(pps->dev, "PPS_FETCH\n");
+ dev_dbg(&pps->dev, "PPS_FETCH\n");
err = copy_from_user(&fdata, uarg, sizeof(struct pps_fdata));
if (err)
@@ -206,7 +206,7 @@ static long pps_cdev_ioctl(struct file *file,
case PPS_KC_BIND: {
struct pps_bind_args bind_args;
- dev_dbg(pps->dev, "PPS_KC_BIND\n");
+ dev_dbg(&pps->dev, "PPS_KC_BIND\n");
/* Check the capabilities */
if (!capable(CAP_SYS_TIME))
@@ -218,7 +218,7 @@ static long pps_cdev_ioctl(struct file *file,
/* Check for supported capabilities */
if ((bind_args.edge & ~pps->info.mode) != 0) {
- dev_err(pps->dev, "unsupported capabilities (%x)\n",
+ dev_err(&pps->dev, "unsupported capabilities (%x)\n",
bind_args.edge);
return -EINVAL;
}
@@ -227,7 +227,7 @@ static long pps_cdev_ioctl(struct file *file,
if (bind_args.tsformat != PPS_TSFMT_TSPEC ||
(bind_args.edge & ~PPS_CAPTUREBOTH) != 0 ||
bind_args.consumer != PPS_KC_HARDPPS) {
- dev_err(pps->dev, "invalid kernel consumer bind"
+ dev_err(&pps->dev, "invalid kernel consumer bind"
" parameters (%x)\n", bind_args.edge);
return -EINVAL;
}
@@ -259,7 +259,7 @@ static long pps_cdev_compat_ioctl(struct file *file,
struct pps_fdata fdata;
int err;
- dev_dbg(pps->dev, "PPS_FETCH\n");
+ dev_dbg(&pps->dev, "PPS_FETCH\n");
err = copy_from_user(&compat, uarg, sizeof(struct pps_fdata_compat));
if (err)
@@ -296,20 +296,36 @@ static long pps_cdev_compat_ioctl(struct file *file,
#define pps_cdev_compat_ioctl NULL
#endif
+static struct pps_device *pps_idr_get(unsigned long id)
+{
+ struct pps_device *pps;
+
+ mutex_lock(&pps_idr_lock);
+ pps = idr_find(&pps_idr, id);
+ if (pps)
+ get_device(&pps->dev);
+
+ mutex_unlock(&pps_idr_lock);
+ return pps;
+}
+
static int pps_cdev_open(struct inode *inode, struct file *file)
{
- struct pps_device *pps = container_of(inode->i_cdev,
- struct pps_device, cdev);
+ struct pps_device *pps = pps_idr_get(iminor(inode));
+
+ if (!pps)
+ return -ENODEV;
+
file->private_data = pps;
- kobject_get(&pps->dev->kobj);
return 0;
}
static int pps_cdev_release(struct inode *inode, struct file *file)
{
- struct pps_device *pps = container_of(inode->i_cdev,
- struct pps_device, cdev);
- kobject_put(&pps->dev->kobj);
+ struct pps_device *pps = file->private_data;
+
+ WARN_ON(pps->id != iminor(inode));
+ put_device(&pps->dev);
return 0;
}
@@ -331,22 +347,13 @@ static void pps_device_destruct(struct device *dev)
{
struct pps_device *pps = dev_get_drvdata(dev);
- cdev_del(&pps->cdev);
-
- /* Now we can release the ID for re-use */
pr_debug("deallocating pps%d\n", pps->id);
- mutex_lock(&pps_idr_lock);
- idr_remove(&pps_idr, pps->id);
- mutex_unlock(&pps_idr_lock);
-
- kfree(dev);
kfree(pps);
}
int pps_register_cdev(struct pps_device *pps)
{
int err;
- dev_t devt;
mutex_lock(&pps_idr_lock);
/*
@@ -363,40 +370,29 @@ int pps_register_cdev(struct pps_device *pps)
goto out_unlock;
}
pps->id = err;
- mutex_unlock(&pps_idr_lock);
-
- devt = MKDEV(MAJOR(pps_devt), pps->id);
-
- cdev_init(&pps->cdev, &pps_cdev_fops);
- pps->cdev.owner = pps->info.owner;
- err = cdev_add(&pps->cdev, devt, 1);
- if (err) {
- pr_err("%s: failed to add char device %d:%d\n",
- pps->info.name, MAJOR(pps_devt), pps->id);
+ pps->dev.class = pps_class;
+ pps->dev.parent = pps->info.dev;
+ pps->dev.devt = MKDEV(pps_major, pps->id);
+ dev_set_drvdata(&pps->dev, pps);
+ dev_set_name(&pps->dev, "pps%d", pps->id);
+ err = device_register(&pps->dev);
+ if (err)
goto free_idr;
- }
- pps->dev = device_create(pps_class, pps->info.dev, devt, pps,
- "pps%d", pps->id);
- if (IS_ERR(pps->dev)) {
- err = PTR_ERR(pps->dev);
- goto del_cdev;
- }
/* Override the release function with our own */
- pps->dev->release = pps_device_destruct;
+ pps->dev.release = pps_device_destruct;
- pr_debug("source %s got cdev (%d:%d)\n", pps->info.name,
- MAJOR(pps_devt), pps->id);
+ pr_debug("source %s got cdev (%d:%d)\n", pps->info.name, pps_major,
+ pps->id);
+ get_device(&pps->dev);
+ mutex_unlock(&pps_idr_lock);
return 0;
-del_cdev:
- cdev_del(&pps->cdev);
-
free_idr:
- mutex_lock(&pps_idr_lock);
idr_remove(&pps_idr, pps->id);
+ put_device(&pps->dev);
out_unlock:
mutex_unlock(&pps_idr_lock);
return err;
@@ -406,7 +402,13 @@ void pps_unregister_cdev(struct pps_device *pps)
{
pr_debug("unregistering pps%d\n", pps->id);
pps->lookup_cookie = NULL;
- device_destroy(pps_class, pps->dev->devt);
+ device_destroy(pps_class, pps->dev.devt);
+
+ /* Now we can release the ID for re-use */
+ mutex_lock(&pps_idr_lock);
+ idr_remove(&pps_idr, pps->id);
+ put_device(&pps->dev);
+ mutex_unlock(&pps_idr_lock);
}
/*
@@ -426,6 +428,11 @@ void pps_unregister_cdev(struct pps_device *pps)
* so that it will not be used again, even if the pps device cannot
* be removed from the idr due to pending references holding the minor
* number in use.
+ *
+ * Since pps_idr holds a reference to the device, the returned
+ * pps_device is guaranteed to be valid until pps_unregister_cdev() is
+ * called on it. But after calling pps_unregister_cdev(), it may be
+ * freed at any time.
*/
struct pps_device *pps_lookup_dev(void const *cookie)
{
@@ -448,13 +455,11 @@ EXPORT_SYMBOL(pps_lookup_dev);
static void __exit pps_exit(void)
{
class_destroy(pps_class);
- unregister_chrdev_region(pps_devt, PPS_MAX_SOURCES);
+ __unregister_chrdev(pps_major, 0, PPS_MAX_SOURCES, "pps");
}
static int __init pps_init(void)
{
- int err;
-
pps_class = class_create("pps");
if (IS_ERR(pps_class)) {
pr_err("failed to allocate class\n");
@@ -462,8 +467,9 @@ static int __init pps_init(void)
}
pps_class->dev_groups = pps_groups;
- err = alloc_chrdev_region(&pps_devt, 0, PPS_MAX_SOURCES, "pps");
- if (err < 0) {
+ pps_major = __register_chrdev(0, 0, PPS_MAX_SOURCES, "pps",
+ &pps_cdev_fops);
+ if (pps_major < 0) {
pr_err("failed to allocate char device region\n");
goto remove_class;
}
@@ -476,8 +482,7 @@ static int __init pps_init(void)
remove_class:
class_destroy(pps_class);
-
- return err;
+ return pps_major;
}
subsys_initcall(pps_init);
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 5feecaadde8e..120db96d9e95 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -4420,7 +4420,7 @@ ptp_ocp_complete(struct ptp_ocp *bp)
pps = pps_lookup_dev(bp->ptp);
if (pps)
- ptp_ocp_symlink(bp, pps->dev, "pps");
+ ptp_ocp_symlink(bp, &pps->dev, "pps");
ptp_ocp_debugfs_add_device(bp);
diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h
index 78c8ac4951b5..c7abce28ed29 100644
--- a/include/linux/pps_kernel.h
+++ b/include/linux/pps_kernel.h
@@ -56,8 +56,7 @@ struct pps_device {
unsigned int id; /* PPS source unique ID */
void const *lookup_cookie; /* For pps_lookup_dev() only */
- struct cdev cdev;
- struct device *dev;
+ struct device dev;
struct fasync_struct *async_queue; /* fasync method */
spinlock_t lock;
};
--
2.45.2
On 14/09/24 02:24, Calvin Owens wrote: > On a board running ntpd and gpsd, I'm seeing a consistent use-after-free > in sys_exit() from gpsd when rebooting: > > pps pps1: removed > ------------[ cut here ]------------ > kobject: '(null)' (00000000db4bec24): is not initialized, yet kobject_put() is being called. > WARNING: CPU: 2 PID: 440 at lib/kobject.c:734 kobject_put+0x120/0x150 > CPU: 2 UID: 299 PID: 440 Comm: gpsd Not tainted 6.11.0-rc6-00308-gb31c44928842 #1 > Hardware name: Raspberry Pi 4 Model B Rev 1.1 (DT) > pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) > pc : kobject_put+0x120/0x150 > lr : kobject_put+0x120/0x150 > sp : ffffffc0803d3ae0 > x29: ffffffc0803d3ae0 x28: ffffff8042dc9738 x27: 0000000000000001 > x26: 0000000000000000 x25: ffffff8042dc9040 x24: ffffff8042dc9440 > x23: ffffff80402a4620 x22: ffffff8042ef4bd0 x21: ffffff80405cb600 > x20: 000000000008001b x19: ffffff8040b3b6e0 x18: 0000000000000000 > x17: 0000000000000000 x16: 0000000000000000 x15: 696e6920746f6e20 > x14: 7369203a29343263 x13: 205d303434542020 x12: 0000000000000000 > x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 > x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 > x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 > x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 > Call trace: > kobject_put+0x120/0x150 > cdev_put+0x20/0x3c > __fput+0x2c4/0x2d8 > ____fput+0x1c/0x38 > task_work_run+0x70/0xfc > do_exit+0x2a0/0x924 > do_group_exit+0x34/0x90 > get_signal+0x7fc/0x8c0 > do_signal+0x128/0x13b4 > do_notify_resume+0xdc/0x160 > el0_svc+0xd4/0xf8 > el0t_64_sync_handler+0x140/0x14c > el0t_64_sync+0x190/0x194 > ---[ end trace 0000000000000000 ]--- > > ...followed by more symptoms of corruption, with similar stacks: > > refcount_t: underflow; use-after-free. > kernel BUG at lib/list_debug.c:62! > Kernel panic - not syncing: Oops - BUG: Fatal exception > > This happens because pps_device_destruct() frees the pps_device with the > embedded cdev immediately after calling cdev_del(), but, as the comment > above cdev_del() notes, fops for previously opened cdevs are still > callable even after cdev_del() returns. I think this bug has always > been there: I can't explain why it suddenly started happening every time > I reboot this particular board. > > In commit d953e0e837e6 ("pps: Fix a use-after free bug when > unregistering a source."), George Spelvin suggested removing the > embedded cdev. That seems like the simplest way to fix this, so I've > implemented his suggestion, with pps_idr becoming the source of truth > for which minor corresponds to which device. > > But now that pps_idr defines userspace visibility instead of cdev_add(), > we need to be sure the pps->dev kobject refcount can't reach zero while > userspace can still find it again. So, the idr_remove() call moves to > pps_unregister_cdev(), and pps_idr now holds a reference to the pps->dev > kobject. > > pps_core: source serial1 got cdev (251:1) > <...> > pps pps1: removed > pps_core: unregistering pps1 > pps_core: deallocating pps1 > > Fixes: d953e0e837e6 ("pps: Fix a use-after free bug when unregistering a source.") > Cc: stable@vger.kernel.org > Signed-off-by: Calvin Owens <calvin@wbinvd.org> > --- > Changes in v2: > - Don't move pr_debug() from pps_device_destruct() to pps_unregister_cdev() > - Actually add stable@vger.kernel.org to CC > --- > drivers/pps/pps.c | 83 ++++++++++++++++++++------------------ > include/linux/pps_kernel.h | 1 - > 2 files changed, 44 insertions(+), 40 deletions(-) > > diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c > index 5d19baae6a38..6980ab17f314 100644 > --- a/drivers/pps/pps.c > +++ b/drivers/pps/pps.c > @@ -25,7 +25,7 @@ > * Local variables > */ > > -static dev_t pps_devt; > +static int pps_major; > static struct class *pps_class; > > static DEFINE_MUTEX(pps_idr_lock); > @@ -296,19 +296,35 @@ static long pps_cdev_compat_ioctl(struct file *file, > #define pps_cdev_compat_ioctl NULL > #endif > > +static struct pps_device *pps_idr_get(unsigned long id) > +{ > + struct pps_device *pps; > + > + mutex_lock(&pps_idr_lock); > + pps = idr_find(&pps_idr, id); > + if (pps) > + kobject_get(&pps->dev->kobj); > + > + mutex_unlock(&pps_idr_lock); > + return pps; > +} > + > static int pps_cdev_open(struct inode *inode, struct file *file) > { > - struct pps_device *pps = container_of(inode->i_cdev, > - struct pps_device, cdev); > + struct pps_device *pps = pps_idr_get(iminor(inode)); > + > + if (!pps) > + return -ENODEV; > + > file->private_data = pps; > - kobject_get(&pps->dev->kobj); > return 0; > } > > static int pps_cdev_release(struct inode *inode, struct file *file) > { > - struct pps_device *pps = container_of(inode->i_cdev, > - struct pps_device, cdev); > + struct pps_device *pps = file->private_data; > + > + WARN_ON(pps->id != iminor(inode)); > kobject_put(&pps->dev->kobj); > return 0; > } > @@ -332,14 +348,7 @@ static void pps_device_destruct(struct device *dev) > { > struct pps_device *pps = dev_get_drvdata(dev); > > - cdev_del(&pps->cdev); > - > - /* Now we can release the ID for re-use */ > pr_debug("deallocating pps%d\n", pps->id); > - mutex_lock(&pps_idr_lock); > - idr_remove(&pps_idr, pps->id); > - mutex_unlock(&pps_idr_lock); > - > kfree(dev); > kfree(pps); > } > @@ -364,39 +373,26 @@ int pps_register_cdev(struct pps_device *pps) > goto out_unlock; > } > pps->id = err; > - mutex_unlock(&pps_idr_lock); > > - devt = MKDEV(MAJOR(pps_devt), pps->id); > - > - cdev_init(&pps->cdev, &pps_cdev_fops); > - pps->cdev.owner = pps->info.owner; > - > - err = cdev_add(&pps->cdev, devt, 1); > - if (err) { > - pr_err("%s: failed to add char device %d:%d\n", > - pps->info.name, MAJOR(pps_devt), pps->id); > - goto free_idr; > - } > + devt = MKDEV(pps_major, pps->id); > pps->dev = device_create(pps_class, pps->info.dev, devt, pps, > "pps%d", pps->id); > if (IS_ERR(pps->dev)) { > err = PTR_ERR(pps->dev); > - goto del_cdev; > + goto free_idr; > } > > /* Override the release function with our own */ > pps->dev->release = pps_device_destruct; > > - pr_debug("source %s got cdev (%d:%d)\n", pps->info.name, > - MAJOR(pps_devt), pps->id); > + pr_debug("source %s got cdev (%d:%d)\n", pps->info.name, pps_major, > + pps->id); > > + kobject_get(&pps->dev->kobj); > + mutex_unlock(&pps_idr_lock); > return 0; > > -del_cdev: > - cdev_del(&pps->cdev); > - > free_idr: > - mutex_lock(&pps_idr_lock); > idr_remove(&pps_idr, pps->id); > out_unlock: > mutex_unlock(&pps_idr_lock); > @@ -408,6 +404,12 @@ void pps_unregister_cdev(struct pps_device *pps) > pr_debug("unregistering pps%d\n", pps->id); > pps->lookup_cookie = NULL; > device_destroy(pps_class, pps->dev->devt); > + > + /* Now we can release the ID for re-use */ > + mutex_lock(&pps_idr_lock); > + idr_remove(&pps_idr, pps->id); > + kobject_put(&pps->dev->kobj); > + mutex_unlock(&pps_idr_lock); > } > > /* > @@ -427,6 +429,11 @@ void pps_unregister_cdev(struct pps_device *pps) > * so that it will not be used again, even if the pps device cannot > * be removed from the idr due to pending references holding the minor > * number in use. > + * > + * Since pps_idr holds a reference to the kobject, the returned > + * pps_device is guaranteed to be valid until pps_unregister_cdev() is > + * called on it. But after calling pps_unregister_cdev(), it may be > + * freed at any time. > */ > struct pps_device *pps_lookup_dev(void const *cookie) > { > @@ -449,13 +456,11 @@ EXPORT_SYMBOL(pps_lookup_dev); > static void __exit pps_exit(void) > { > class_destroy(pps_class); > - unregister_chrdev_region(pps_devt, PPS_MAX_SOURCES); > + __unregister_chrdev(pps_major, 0, PPS_MAX_SOURCES, "pps"); > } > > static int __init pps_init(void) > { > - int err; > - > pps_class = class_create("pps"); > if (IS_ERR(pps_class)) { > pr_err("failed to allocate class\n"); > @@ -463,8 +468,9 @@ static int __init pps_init(void) > } > pps_class->dev_groups = pps_groups; > > - err = alloc_chrdev_region(&pps_devt, 0, PPS_MAX_SOURCES, "pps"); > - if (err < 0) { > + pps_major = __register_chrdev(0, 0, PPS_MAX_SOURCES, "pps", > + &pps_cdev_fops); > + if (pps_major < 0) { > pr_err("failed to allocate char device region\n"); > goto remove_class; > } > @@ -477,8 +483,7 @@ static int __init pps_init(void) > > remove_class: > class_destroy(pps_class); > - > - return err; > + return pps_major; > } > > subsys_initcall(pps_init); > diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h > index 78c8ac4951b5..8ee312788118 100644 > --- a/include/linux/pps_kernel.h > +++ b/include/linux/pps_kernel.h > @@ -56,7 +56,6 @@ struct pps_device { > > unsigned int id; /* PPS source unique ID */ > void const *lookup_cookie; /* For pps_lookup_dev() only */ > - struct cdev cdev; > struct device *dev; > struct fasync_struct *async_queue; /* fasync method */ > spinlock_t lock; Acked-by: Rodolfo Giometti <giometti@enneenne.com> -- GNU/Linux Solutions e-mail: giometti@enneenne.com Linux Device Driver giometti@linux.it Embedded Systems phone: +39 349 2432127 UNIX programming
© 2016 - 2024 Red Hat, Inc.