From nobody Fri Dec 19 12:31:44 2025 Received: from szxga04-in.huawei.com (szxga04-in.huawei.com [45.249.212.190]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0BD3B7488; Wed, 20 Dec 2023 02:01:05 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=huawei.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=huawei.com Received: from mail.maildlp.com (unknown [172.19.88.234]) by szxga04-in.huawei.com (SkyGuard) with ESMTP id 4SvxZm4Cl8z29gGv; Wed, 20 Dec 2023 09:59:44 +0800 (CST) Received: from dggpeml500026.china.huawei.com (unknown [7.185.36.106]) by mail.maildlp.com (Postfix) with ESMTPS id D9C43140499; Wed, 20 Dec 2023 10:00:57 +0800 (CST) Received: from huawei.com (10.175.101.6) by dggpeml500026.china.huawei.com (7.185.36.106) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2507.35; Wed, 20 Dec 2023 10:00:57 +0800 From: Zhengchao Shao To: , CC: , , , , , , , , , , , Subject: [PATCH v2] ipc/mqueue: fix potential sleeping issue in mqueue_flush_file Date: Wed, 20 Dec 2023 10:12:08 +0800 Message-ID: <20231220021208.2634523-1-shaozhengchao@huawei.com> X-Mailer: git-send-email 2.34.1 Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-ClientProxiedBy: dggems704-chm.china.huawei.com (10.3.19.181) To dggpeml500026.china.huawei.com (7.185.36.106) Content-Type: text/plain; charset="utf-8" I analyze the potential sleeping issue of the following processes: Thread A Thread B ... netlink_create //ref =3D 1 do_mq_notify ... sock =3D netlink_getsockbyfilp ... //ref =3D 2 info->notify_sock =3D sock; ... ... netlink_sendmsg ... skb =3D netlink_alloc_large_skb = //skb->head is vmalloced ... netlink_unicast ... sk =3D netlink_getsockbyportid = //ref =3D 3 ... netlink_sendskb ... __netlink_sendskb ... skb_queue_tail //put skb to= sk_receive_queue ... sock_put //ref =3D 2 ... ... ... netlink_release ... deferred_put_nlk_sk //ref =3D 1 mqueue_flush_file spin_lock remove_notification netlink_sendskb sock_put //ref =3D 0 sk_free ... __sk_destruct netlink_sock_destruct skb_queue_purge //get skb from sk_receive_queue ... __skb_queue_purge_reason kfree_skb_reason __kfree_skb ... skb_release_all skb_release_head_state netlink_skb_destructor vfree(skb->head) //sleeping while holding spinlo= ck In netlink_sendmsg, if the memory pointed to by skb->head is allocated by vmalloc, and is put to sk_receive_queue queue, also the skb is not freed. When the mqueue executes flush, the sleeping bug will occur. Use mutex lock instead of spin lock in mqueue_flush_file. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao --- v2: CCed some networking maintainer & netdev list --- ipc/mqueue.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 5eea4dc0509e..f6f92e3f82e4 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -118,9 +118,9 @@ struct posix_msg_tree_node { * Solution: use _release and _acquire barriers. * * 3) There is intentionally no barrier when setting current->state - * to TASK_INTERRUPTIBLE: spin_unlock(&info->lock) provides the + * to TASK_INTERRUPTIBLE: mutex_unlock(&info->lock) provides the * release memory barrier, and the wakeup is triggered when holding - * info->lock, i.e. spin_lock(&info->lock) provided a pairing + * info->lock, i.e. mutex_lock(&info->lock) provided a pairing * acquire memory barrier. */ =20 @@ -132,7 +132,7 @@ struct ext_wait_queue { /* queue of sleeping tasks */ }; =20 struct mqueue_inode_info { - spinlock_t lock; + struct mutex lock; struct inode vfs_inode; wait_queue_head_t wait_q; =20 @@ -312,7 +312,7 @@ static struct inode *mqueue_get_inode(struct super_bloc= k *sb, inode->i_size =3D FILENT_SIZE; /* mqueue specific info */ info =3D MQUEUE_I(inode); - spin_lock_init(&info->lock); + mutex_init(&info->lock); init_waitqueue_head(&info->wait_q); INIT_LIST_HEAD(&info->e_wait_q[0].list); INIT_LIST_HEAD(&info->e_wait_q[1].list); @@ -523,11 +523,11 @@ static void mqueue_evict_inode(struct inode *inode) =20 ipc_ns =3D get_ns_from_inode(inode); info =3D MQUEUE_I(inode); - spin_lock(&info->lock); + mutex_lock(&info->lock); while ((msg =3D msg_get(info)) !=3D NULL) list_add_tail(&msg->m_list, &tmp_msg); kfree(info->node_cache); - spin_unlock(&info->lock); + mutex_unlock(&info->lock); =20 list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) { list_del(&msg->m_list); @@ -640,7 +640,7 @@ static ssize_t mqueue_read_file(struct file *filp, char= __user *u_data, char buffer[FILENT_SIZE]; ssize_t ret; =20 - spin_lock(&info->lock); + mutex_lock(&info->lock); snprintf(buffer, sizeof(buffer), "QSIZE:%-10lu NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n", info->qsize, @@ -649,7 +649,7 @@ static ssize_t mqueue_read_file(struct file *filp, char= __user *u_data, info->notify.sigev_notify =3D=3D SIGEV_SIGNAL) ? info->notify.sigev_signo : 0, pid_vnr(info->notify_owner)); - spin_unlock(&info->lock); + mutex_unlock(&info->lock); buffer[sizeof(buffer)-1] =3D '\0'; =20 ret =3D simple_read_from_buffer(u_data, count, off, buffer, @@ -665,11 +665,11 @@ static int mqueue_flush_file(struct file *filp, fl_ow= ner_t id) { struct mqueue_inode_info *info =3D MQUEUE_I(file_inode(filp)); =20 - spin_lock(&info->lock); + mutex_lock(&info->lock); if (task_tgid(current) =3D=3D info->notify_owner) remove_notification(info); =20 - spin_unlock(&info->lock); + mutex_unlock(&info->lock); return 0; } =20 @@ -680,13 +680,13 @@ static __poll_t mqueue_poll_file(struct file *filp, s= truct poll_table_struct *po =20 poll_wait(filp, &info->wait_q, poll_tab); =20 - spin_lock(&info->lock); + mutex_lock(&info->lock); if (info->attr.mq_curmsgs) retval =3D EPOLLIN | EPOLLRDNORM; =20 if (info->attr.mq_curmsgs < info->attr.mq_maxmsg) retval |=3D EPOLLOUT | EPOLLWRNORM; - spin_unlock(&info->lock); + mutex_unlock(&info->lock); =20 return retval; } @@ -724,7 +724,7 @@ static int wq_sleep(struct mqueue_inode_info *info, int= sr, /* memory barrier not required, we hold info->lock */ __set_current_state(TASK_INTERRUPTIBLE); =20 - spin_unlock(&info->lock); + mutex_unlock(&info->lock); time =3D schedule_hrtimeout_range_clock(timeout, 0, HRTIMER_MODE_ABS, CLOCK_REALTIME); =20 @@ -734,7 +734,7 @@ static int wq_sleep(struct mqueue_inode_info *info, int= sr, retval =3D 0; goto out; } - spin_lock(&info->lock); + mutex_lock(&info->lock); =20 /* we hold info->lock, so no memory barrier required */ if (READ_ONCE(ewp->state) =3D=3D STATE_READY) { @@ -752,7 +752,7 @@ static int wq_sleep(struct mqueue_inode_info *info, int= sr, } list_del(&ewp->list); out_unlock: - spin_unlock(&info->lock); + mutex_unlock(&info->lock); out: return retval; } @@ -1125,7 +1125,7 @@ static int do_mq_timedsend(mqd_t mqdes, const char __= user *u_msg_ptr, if (!info->node_cache) new_leaf =3D kmalloc(sizeof(*new_leaf), GFP_KERNEL); =20 - spin_lock(&info->lock); + mutex_lock(&info->lock); =20 if (!info->node_cache && new_leaf) { /* Save our speculative allocation into the cache */ @@ -1166,7 +1166,7 @@ static int do_mq_timedsend(mqd_t mqdes, const char __= user *u_msg_ptr, simple_inode_init_ts(inode); } out_unlock: - spin_unlock(&info->lock); + mutex_unlock(&info->lock); wake_up_q(&wake_q); out_free: if (ret) @@ -1230,7 +1230,7 @@ static int do_mq_timedreceive(mqd_t mqdes, char __use= r *u_msg_ptr, if (!info->node_cache) new_leaf =3D kmalloc(sizeof(*new_leaf), GFP_KERNEL); =20 - spin_lock(&info->lock); + mutex_lock(&info->lock); =20 if (!info->node_cache && new_leaf) { /* Save our speculative allocation into the cache */ @@ -1242,7 +1242,7 @@ static int do_mq_timedreceive(mqd_t mqdes, char __use= r *u_msg_ptr, =20 if (info->attr.mq_curmsgs =3D=3D 0) { if (f.file->f_flags & O_NONBLOCK) { - spin_unlock(&info->lock); + mutex_unlock(&info->lock); ret =3D -EAGAIN; } else { wait.task =3D current; @@ -1261,7 +1261,7 @@ static int do_mq_timedreceive(mqd_t mqdes, char __use= r *u_msg_ptr, =20 /* There is now free space in queue. */ pipelined_receive(&wake_q, info); - spin_unlock(&info->lock); + mutex_unlock(&info->lock); wake_up_q(&wake_q); ret =3D 0; } @@ -1391,7 +1391,7 @@ static int do_mq_notify(mqd_t mqdes, const struct sig= event *notification) info =3D MQUEUE_I(inode); =20 ret =3D 0; - spin_lock(&info->lock); + mutex_lock(&info->lock); if (notification =3D=3D NULL) { if (info->notify_owner =3D=3D task_tgid(current)) { remove_notification(info); @@ -1424,7 +1424,7 @@ static int do_mq_notify(mqd_t mqdes, const struct sig= event *notification) info->notify_user_ns =3D get_user_ns(current_user_ns()); inode_set_atime_to_ts(inode, inode_set_ctime_current(inode)); } - spin_unlock(&info->lock); + mutex_unlock(&info->lock); out_fput: fdput(f); out: @@ -1470,7 +1470,7 @@ static int do_mq_getsetattr(int mqdes, struct mq_attr= *new, struct mq_attr *old) inode =3D file_inode(f.file); info =3D MQUEUE_I(inode); =20 - spin_lock(&info->lock); + mutex_lock(&info->lock); =20 if (old) { *old =3D info->attr; @@ -1488,7 +1488,7 @@ static int do_mq_getsetattr(int mqdes, struct mq_attr= *new, struct mq_attr *old) inode_set_atime_to_ts(inode, inode_set_ctime_current(inode)); } =20 - spin_unlock(&info->lock); + mutex_unlock(&info->lock); fdput(f); return 0; } --=20 2.34.1