[PATCH v2] virtio_console: fix race between hvc put_chars and virtqueue teardown on freeze

Sungho Bae posted 1 patch 5 days, 8 hours ago
drivers/char/virtio_console.c | 80 ++++++++++++++++++++++++++++++++++-
1 file changed, 78 insertions(+), 2 deletions(-)
[PATCH v2] virtio_console: fix race between hvc put_chars and virtqueue teardown on freeze
Posted by Sungho Bae 5 days, 8 hours ago
From: Sungho Bae <baver.bae@lge.com>

With no_console_suspend enabled, hvc console output can continue while
virtio_console is freezing. In that window, put_chars can still enqueue
buffers to the output virtqueue while virtcons_freeze is tearing queues
down, triggering a BUG_ON in virtqueue_detach_unused_buf_split:

  BUG_ON(vq->vq.num_free != vq->split.vring.num)

Add a pm_freezing flag to ports_device. Set it via smp_store_release()
at the start of virtcons_freeze(); put_chars() and __send_to_port() drop
output while the flag is set, checked via smp_load_acquire().

The check in __send_to_port() is placed under outvq_lock, making it
atomic with remove_port_data() which also acquires outvq_lock. Once
remove_port_data() returns for a given port, no concurrent
__send_to_port() can add buffers before remove_vqs() tears down the vq.

After setting pm_freezing, acquire and release outvq_lock for each port
before calling virtio_reset_device(). A TX thread that already passed
the pm_freezing check may still hold outvq_lock while spinning for host
acknowledgment (the nonblock=false hvc path); the drain loop ensures
all such threads have completed before the device is reset.

Clear pm_freezing in virtcons_restore() only after all port->out_vq
pointers have been reassigned to the newly allocated virtqueues,
preventing TX paths from dereferencing freed vqs during restore.

Also fix two races in __send_to_port() uncovered by this work: load
port->portdev via READ_ONCE() and check for NULL to guard against
concurrent hot-unplug, and move out_vq = port->out_vq inside
outvq_lock after the pm_freezing check to avoid a stale pointer.

Link: https://sashiko.dev/#/patchset/20260515225259.1054-1-baver.bae%40gmail.com
Signed-off-by: Sungho Bae <baver.bae@lge.com>
---
 drivers/char/virtio_console.c | 80 ++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 2 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 9a33217c68d9..fad673f733a8 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -157,6 +157,12 @@ struct ports_device {
 
 	/* Major number for this device.  Ports will be created as minors. */
 	int chr_major;
+
+	/*
+	 * Set to true during PM freeze to block TX paths that may race
+	 * with virtqueue teardown (e.g. hvc put_chars with no_console_suspend).
+	 */
+	bool pm_freezing;
 };
 
 struct port_stats {
@@ -601,11 +607,30 @@ static ssize_t __send_to_port(struct port *port, struct scatterlist *sg,
 	int err;
 	unsigned long flags;
 	unsigned int len;
-
-	out_vq = port->out_vq;
+	struct ports_device *portdev;
 
 	spin_lock_irqsave(&port->outvq_lock, flags);
 
+	portdev = READ_ONCE(port->portdev);
+
+	if (!portdev) {
+		in_count = 0;
+		goto done;
+	}
+
+	/*
+	 * Check freeze flag under the lock so that the flag check and
+	 * virtqueue_add_outbuf() are atomic with respect to
+	 * remove_port_data() which also takes outvq_lock.  This
+	 * guarantees that once remove_port_data() returns, no new
+	 * buffers can be added before remove_vqs() tears down the vq.
+	 * Pairs with smp_store_release() in virtcons_freeze/restore.
+	 */
+	if (smp_load_acquire(&portdev->pm_freezing)) /* pairs with freeze/restore */
+		goto done;
+
+	out_vq = port->out_vq;
+
 	reclaim_consumed_buffers(port);
 
 	err = virtqueue_add_outbuf(out_vq, sg, nents, data, GFP_ATOMIC);
@@ -1110,11 +1135,36 @@ static ssize_t put_chars(u32 vtermno, const u8 *buf, size_t count)
 	struct scatterlist sg[1];
 	void *data;
 	int ret;
+	struct ports_device *portdev;
 
 	port = find_port_by_vtermno(vtermno);
 	if (!port)
 		return -EPIPE;
 
+	/*
+	 * Silently drop output in two cases, both by returning count so
+	 * that the hvc layer does not spin-retry:
+	 *
+	 *  1. Device hot-unplug (!portdev): portdev was NULLed by
+	 *     unplug_port() after hvc_remove() was already called, so
+	 *     the hvc layer will stop invoking put_chars() very soon.
+	 *     Returning count avoids a pointless retry loop in the
+	 *     interim.
+	 *
+	 *  2. PM freeze (pm_freezing): the hvc console stays active
+	 *     under no_console_suspend but virtqueues are being torn
+	 *     down.  Drop the output silently so the hvc layer does not
+	 *     stall suspend.
+	 *
+	 * This early check avoids a pointless GFP_ATOMIC allocation;
+	 * __send_to_port() rechecks under outvq_lock for correctness.
+	 * Pairs with smp_store_release() in virtcons_freeze/restore.
+	 */
+	portdev = READ_ONCE(port->portdev);
+	if (!portdev ||
+	    smp_load_acquire(&portdev->pm_freezing)) /* pairs with freeze/restore */
+		return count;
+
 	data = kmemdup(buf, count, GFP_ATOMIC);
 	if (!data)
 		return -ENOMEM;
@@ -1972,6 +2022,7 @@ static int virtcons_probe(struct virtio_device *vdev)
 	/* Attach this portdev to this virtio_device, and vice-versa. */
 	portdev->vdev = vdev;
 	vdev->priv = portdev;
+	portdev->pm_freezing = false;
 
 	portdev->chr_major = register_chrdev(0, "virtio-portsdev",
 					     &portdev_fops);
@@ -2092,6 +2143,24 @@ static int virtcons_freeze(struct virtio_device *vdev)
 
 	portdev = vdev->priv;
 
+	/*
+	 * Block TX paths (put_chars, __send_to_port) before resetting the
+	 * device and tearing down virtqueues.  This prevents races with
+	 * hvc console writes that remain active under no_console_suspend.
+	 */
+	smp_store_release(&portdev->pm_freezing, true);
+
+	/*
+	 * Synchronize with any concurrent __send_to_port() that may have
+	 * passed the pm_freezing check. By acquiring and releasing the
+	 * outvq_lock for each port, we ensure all active TX paths have
+	 * completed before we reset the device.
+	 */
+	list_for_each_entry(port, &portdev->ports, list) {
+		spin_lock_irq(&port->outvq_lock);
+		spin_unlock_irq(&port->outvq_lock);
+	}
+
 	virtio_reset_device(vdev);
 
 	if (use_multiport(portdev))
@@ -2153,6 +2222,13 @@ static int virtcons_restore(struct virtio_device *vdev)
 		if (port->guest_connected)
 			send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 1);
 	}
+
+	/*
+	 * Allow TX paths only after all port->out_vq pointers have
+	 * been reassigned to the newly allocated virtqueues.
+	 */
+	smp_store_release(&portdev->pm_freezing, false);
+
 	return 0;
 }
 #endif
-- 
2.43.0