[Xen-devel] [PATCH] xen/xenbus: fix self-deadlock after killing user process

Juergen Gross posted 1 patch 2 weeks ago
Failed in applying to current master (apply log)
drivers/xen/xenbus/xenbus_dev_frontend.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)

[Xen-devel] [PATCH] xen/xenbus: fix self-deadlock after killing user process

Posted by Juergen Gross 2 weeks ago
In case a user process using xenbus has open transactions and is killed
e.g. via ctrl-C the following cleanup of the allocated resources might
result in a deadlock due to trying to end a transaction in the xenbus
worker thread:

[ 2551.474706] INFO: task xenbus:37 blocked for more than 120 seconds.
[ 2551.492215]       Tainted: P           OE     5.0.0-29-generic #5
[ 2551.510263] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 2551.528585] xenbus          D    0    37      2 0x80000080
[ 2551.528590] Call Trace:
[ 2551.528603]  __schedule+0x2c0/0x870
[ 2551.528606]  ? _cond_resched+0x19/0x40
[ 2551.528632]  schedule+0x2c/0x70
[ 2551.528637]  xs_talkv+0x1ec/0x2b0
[ 2551.528642]  ? wait_woken+0x80/0x80
[ 2551.528645]  xs_single+0x53/0x80
[ 2551.528648]  xenbus_transaction_end+0x3b/0x70
[ 2551.528651]  xenbus_file_free+0x5a/0x160
[ 2551.528654]  xenbus_dev_queue_reply+0xc4/0x220
[ 2551.528657]  xenbus_thread+0x7de/0x880
[ 2551.528660]  ? wait_woken+0x80/0x80
[ 2551.528665]  kthread+0x121/0x140
[ 2551.528667]  ? xb_read+0x1d0/0x1d0
[ 2551.528670]  ? kthread_park+0x90/0x90
[ 2551.528673]  ret_from_fork+0x35/0x40

Fix this by doing the cleanup via a workqueue instead.

Reported-by: James Dingwall <james@dingwall.me.uk>
Fixes: fd8aa9095a95c ("xen: optimize xenbus driver for multiple concurrent xenstore accesses")
Cc: <stable@vger.kernel.org> # 4.11
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xenbus/xenbus_dev_frontend.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 08adc590f631..597af455a522 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -55,6 +55,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/miscdevice.h>
+#include <linux/workqueue.h>
 
 #include <xen/xenbus.h>
 #include <xen/xen.h>
@@ -116,6 +117,8 @@ struct xenbus_file_priv {
 	wait_queue_head_t read_waitq;
 
 	struct kref kref;
+
+	struct work_struct wq;
 };
 
 /* Read out any raw xenbus messages queued up. */
@@ -300,14 +303,14 @@ static void watch_fired(struct xenbus_watch *watch,
 	mutex_unlock(&adap->dev_data->reply_mutex);
 }
 
-static void xenbus_file_free(struct kref *kref)
+static void xenbus_worker(struct work_struct *wq)
 {
 	struct xenbus_file_priv *u;
 	struct xenbus_transaction_holder *trans, *tmp;
 	struct watch_adapter *watch, *tmp_watch;
 	struct read_buffer *rb, *tmp_rb;
 
-	u = container_of(kref, struct xenbus_file_priv, kref);
+	u = container_of(wq, struct xenbus_file_priv, wq);
 
 	/*
 	 * No need for locking here because there are no other users,
@@ -333,6 +336,18 @@ static void xenbus_file_free(struct kref *kref)
 	kfree(u);
 }
 
+static void xenbus_file_free(struct kref *kref)
+{
+	struct xenbus_file_priv *u;
+
+	/*
+	 * We might be called in xenbus_thread().
+	 * Use workqueue to avoid deadlock.
+	 */
+	u = container_of(kref, struct xenbus_file_priv, kref);
+	schedule_work(&u->wq);
+}
+
 static struct xenbus_transaction_holder *xenbus_get_transaction(
 	struct xenbus_file_priv *u, uint32_t tx_id)
 {
@@ -650,6 +665,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
 	INIT_LIST_HEAD(&u->watches);
 	INIT_LIST_HEAD(&u->read_buffers);
 	init_waitqueue_head(&u->read_waitq);
+	INIT_WORK(&u->wq, xenbus_worker);
 
 	mutex_init(&u->reply_mutex);
 	mutex_init(&u->msgbuffer_mutex);
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] xen/xenbus: fix self-deadlock after killing user process

Posted by Boris Ostrovsky 2 weeks ago
On 10/1/19 11:03 AM, Juergen Gross wrote:
> In case a user process using xenbus has open transactions and is killed
> e.g. via ctrl-C the following cleanup of the allocated resources might
> result in a deadlock due to trying to end a transaction in the xenbus
> worker thread:
>
> [ 2551.474706] INFO: task xenbus:37 blocked for more than 120 seconds.
> [ 2551.492215]       Tainted: P           OE     5.0.0-29-generic #5
> [ 2551.510263] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [ 2551.528585] xenbus          D    0    37      2 0x80000080
> [ 2551.528590] Call Trace:
> [ 2551.528603]  __schedule+0x2c0/0x870
> [ 2551.528606]  ? _cond_resched+0x19/0x40
> [ 2551.528632]  schedule+0x2c/0x70
> [ 2551.528637]  xs_talkv+0x1ec/0x2b0
> [ 2551.528642]  ? wait_woken+0x80/0x80
> [ 2551.528645]  xs_single+0x53/0x80
> [ 2551.528648]  xenbus_transaction_end+0x3b/0x70
> [ 2551.528651]  xenbus_file_free+0x5a/0x160
> [ 2551.528654]  xenbus_dev_queue_reply+0xc4/0x220
> [ 2551.528657]  xenbus_thread+0x7de/0x880
> [ 2551.528660]  ? wait_woken+0x80/0x80
> [ 2551.528665]  kthread+0x121/0x140
> [ 2551.528667]  ? xb_read+0x1d0/0x1d0
> [ 2551.528670]  ? kthread_park+0x90/0x90
> [ 2551.528673]  ret_from_fork+0x35/0x40
>
> Fix this by doing the cleanup via a workqueue instead.
>
> Reported-by: James Dingwall <james@dingwall.me.uk>
> Fixes: fd8aa9095a95c ("xen: optimize xenbus driver for multiple concurrent xenstore accesses")
> Cc: <stable@vger.kernel.org> # 4.11
> Signed-off-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] xen/xenbus: fix self-deadlock after killing user process

Posted by James Dingwall 1 week ago
On Tue, Oct 01, 2019 at 01:37:24PM -0400, Boris Ostrovsky wrote:
> On 10/1/19 11:03 AM, Juergen Gross wrote:
> > In case a user process using xenbus has open transactions and is killed
> > e.g. via ctrl-C the following cleanup of the allocated resources might
> > result in a deadlock due to trying to end a transaction in the xenbus
> > worker thread:
> >
> > [ 2551.474706] INFO: task xenbus:37 blocked for more than 120 seconds.
> > [ 2551.492215]       Tainted: P           OE     5.0.0-29-generic #5
> > [ 2551.510263] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> > [ 2551.528585] xenbus          D    0    37      2 0x80000080
> > [ 2551.528590] Call Trace:
> > [ 2551.528603]  __schedule+0x2c0/0x870
> > [ 2551.528606]  ? _cond_resched+0x19/0x40
> > [ 2551.528632]  schedule+0x2c/0x70
> > [ 2551.528637]  xs_talkv+0x1ec/0x2b0
> > [ 2551.528642]  ? wait_woken+0x80/0x80
> > [ 2551.528645]  xs_single+0x53/0x80
> > [ 2551.528648]  xenbus_transaction_end+0x3b/0x70
> > [ 2551.528651]  xenbus_file_free+0x5a/0x160
> > [ 2551.528654]  xenbus_dev_queue_reply+0xc4/0x220
> > [ 2551.528657]  xenbus_thread+0x7de/0x880
> > [ 2551.528660]  ? wait_woken+0x80/0x80
> > [ 2551.528665]  kthread+0x121/0x140
> > [ 2551.528667]  ? xb_read+0x1d0/0x1d0
> > [ 2551.528670]  ? kthread_park+0x90/0x90
> > [ 2551.528673]  ret_from_fork+0x35/0x40
> >
> > Fix this by doing the cleanup via a workqueue instead.
> >
> > Reported-by: James Dingwall <james@dingwall.me.uk>
> > Fixes: fd8aa9095a95c ("xen: optimize xenbus driver for multiple concurrent xenstore accesses")
> > Cc: <stable@vger.kernel.org> # 4.11
> > Signed-off-by: Juergen Gross <jgross@suse.com>
> 
> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> 

Tested-by: James Dingwall <james@dingwall.me.uk>

This patch does resolve the observed issue although for my (extreme and 
not representative of our normal workload) test case the worker still 
gets blocked for some time if the xenstore-rm is interrupted and no 
concurrent xenstore commands can run.  I assume that the worker 
completes the rm and then does a rollback in the background rather than 
being interrupted early as a result of the userspace program being 
terminated.

Thanks,
James

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel