xen/arch/arm/livepatch.c | 5 +++++ xen/arch/x86/livepatch.c | 40 ++++++++++++++++++++++++++++++++++++++++ xen/common/livepatch.c | 8 ++++++++ xen/include/xen/livepatch.h | 1 + 4 files changed, 54 insertions(+)
The safety of livepatching depends on every stack having been unwound, but
there is one corner case where this is not true. The Sharing/Paging/Monitor
infrastructure may use waitqueues, which copy the stack frame sideways and
longjmp() to a different vcpu.
This case is rare, and can be worked around by pausing the offending
domain(s), waiting for their rings to drain, then performing a livepatch.
In the case that there is an active waitqueue, fail the livepatch attempt with
-EBUSY, which is preforable to the fireworks which occur from trying to unwind
the old stack frame at a later point.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
CC: Ross Lagerwall <ross.lagerwall@citrix.com>
CC: Juergen Gross <jgross@suse.com>
This fix wants backporting, and is long overdue for posting upstream.
v1.5:
* Send out a non-stale patch this time.
---
xen/arch/arm/livepatch.c | 5 +++++
xen/arch/x86/livepatch.c | 40 ++++++++++++++++++++++++++++++++++++++++
xen/common/livepatch.c | 8 ++++++++
xen/include/xen/livepatch.h | 1 +
4 files changed, 54 insertions(+)
diff --git a/xen/arch/arm/livepatch.c b/xen/arch/arm/livepatch.c
index 00c5e2bc45..915e9d926a 100644
--- a/xen/arch/arm/livepatch.c
+++ b/xen/arch/arm/livepatch.c
@@ -18,6 +18,11 @@
void *vmap_of_xen_text;
+int arch_livepatch_safety_check(void)
+{
+ return 0;
+}
+
int arch_livepatch_quiesce(void)
{
mfn_t text_mfn;
diff --git a/xen/arch/x86/livepatch.c b/xen/arch/x86/livepatch.c
index c82cf53b9e..2749cbc5cf 100644
--- a/xen/arch/x86/livepatch.c
+++ b/xen/arch/x86/livepatch.c
@@ -10,10 +10,50 @@
#include <xen/vmap.h>
#include <xen/livepatch_elf.h>
#include <xen/livepatch.h>
+#include <xen/sched.h>
#include <asm/nmi.h>
#include <asm/livepatch.h>
+static bool has_active_waitqueue(const struct vm_event_domain *ved)
+{
+ /* ved may be xzalloc()'d without INIT_LIST_HEAD() yet. */
+ return (ved && !list_head_is_null(&ved->wq.list) &&
+ !list_empty(&ved->wq.list));
+}
+
+/*
+ * x86's implementation of waitqueue violates the livepatching safey principle
+ * of having unwound every CPUs stack before modifying live content.
+ *
+ * Search through every domain and check that no vCPUs have an active
+ * waitqueue.
+ */
+int arch_livepatch_safety_check(void)
+{
+ struct domain *d;
+
+ for_each_domain ( d )
+ {
+#ifdef CONFIG_MEM_SHARING
+ if ( has_active_waitqueue(d->vm_event_share) )
+ goto fail;
+#endif
+#ifdef CONFIG_MEM_PAGING
+ if ( has_active_waitqueue(d->vm_event_paging) )
+ goto fail;
+#endif
+ if ( has_active_waitqueue(d->vm_event_monitor) )
+ goto fail;
+ }
+
+ return 0;
+
+ fail:
+ printk(XENLOG_ERR LIVEPATCH "%pd found with active waitqueue\n", d);
+ return -EBUSY;
+}
+
int arch_livepatch_quiesce(void)
{
/* Disable WP to allow changes to read-only pages. */
diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
index 962647616a..8386e611f2 100644
--- a/xen/common/livepatch.c
+++ b/xen/common/livepatch.c
@@ -1060,6 +1060,14 @@ static int apply_payload(struct payload *data)
unsigned int i;
int rc;
+ rc = arch_livepatch_safety_check();
+ if ( rc )
+ {
+ printk(XENLOG_ERR LIVEPATCH "%s: Safety checks failed: %d\n",
+ data->name, rc);
+ return rc;
+ }
+
printk(XENLOG_INFO LIVEPATCH "%s: Applying %u functions\n",
data->name, data->nfuncs);
diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
index 1b1817ca0d..69ede75d20 100644
--- a/xen/include/xen/livepatch.h
+++ b/xen/include/xen/livepatch.h
@@ -104,6 +104,7 @@ static inline int livepatch_verify_distance(const struct livepatch_func *func)
* These functions are called around the critical region patching live code,
* for an architecture to take make appropratie global state adjustments.
*/
+int arch_livepatch_safety_check(void);
int arch_livepatch_quiesce(void);
void arch_livepatch_revive(void);
--
2.11.0
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On 05.11.19 20:49, Andrew Cooper wrote: > The safety of livepatching depends on every stack having been unwound, but > there is one corner case where this is not true. The Sharing/Paging/Monitor > infrastructure may use waitqueues, which copy the stack frame sideways and > longjmp() to a different vcpu. > > This case is rare, and can be worked around by pausing the offending > domain(s), waiting for their rings to drain, then performing a livepatch. > > In the case that there is an active waitqueue, fail the livepatch attempt with > -EBUSY, which is preforable to the fireworks which occur from trying to unwind > the old stack frame at a later point. > > Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Release-acked-by: Juergen Gross <jgross@suse.com> Juergen _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 11/5/19 11:49 AM, Andrew Cooper wrote: > The safety of livepatching depends on every stack having been unwound, but > there is one corner case where this is not true. The Sharing/Paging/Monitor > infrastructure may use waitqueues, which copy the stack frame sideways and > longjmp() to a different vcpu. > > This case is rare, and can be worked around by pausing the offending > domain(s), waiting for their rings to drain, then performing a livepatch. > > In the case that there is an active waitqueue, fail the livepatch attempt with > -EBUSY, which is preforable to the fireworks which occur from trying to unwind > the old stack frame at a later point. > > Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > --- > CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > CC: Ross Lagerwall <ross.lagerwall@citrix.com> > CC: Juergen Gross <jgross@suse.com> > > This fix wants backporting, and is long overdue for posting upstream. > > v1.5: > * Send out a non-stale patch this time. > --- > xen/arch/arm/livepatch.c | 5 +++++ > xen/arch/x86/livepatch.c | 40 ++++++++++++++++++++++++++++++++++++++++ > xen/common/livepatch.c | 8 ++++++++ > xen/include/xen/livepatch.h | 1 + > 4 files changed, 54 insertions(+) > > diff --git a/xen/arch/arm/livepatch.c b/xen/arch/arm/livepatch.c > index 00c5e2bc45..915e9d926a 100644 > --- a/xen/arch/arm/livepatch.c > +++ b/xen/arch/arm/livepatch.c > @@ -18,6 +18,11 @@ > > void *vmap_of_xen_text; > > +int arch_livepatch_safety_check(void) > +{ > + return 0; > +} > + > int arch_livepatch_quiesce(void) > { > mfn_t text_mfn; > diff --git a/xen/arch/x86/livepatch.c b/xen/arch/x86/livepatch.c > index c82cf53b9e..2749cbc5cf 100644 > --- a/xen/arch/x86/livepatch.c > +++ b/xen/arch/x86/livepatch.c > @@ -10,10 +10,50 @@ > #include <xen/vmap.h> > #include <xen/livepatch_elf.h> > #include <xen/livepatch.h> > +#include <xen/sched.h> > > #include <asm/nmi.h> > #include <asm/livepatch.h> > > +static bool has_active_waitqueue(const struct vm_event_domain *ved) > +{ > + /* ved may be xzalloc()'d without INIT_LIST_HEAD() yet. */ > + return (ved && !list_head_is_null(&ved->wq.list) && > + !list_empty(&ved->wq.list)); > +} > + > +/* > + * x86's implementation of waitqueue violates the livepatching safey principle > + * of having unwound every CPUs stack before modifying live content. > + * > + * Search through every domain and check that no vCPUs have an active > + * waitqueue. > + */ > +int arch_livepatch_safety_check(void) > +{ > + struct domain *d; > + > + for_each_domain ( d ) > + { > +#ifdef CONFIG_MEM_SHARING > + if ( has_active_waitqueue(d->vm_event_share) ) > + goto fail; > +#endif > +#ifdef CONFIG_MEM_PAGING > + if ( has_active_waitqueue(d->vm_event_paging) )' Is this supposed to be CONFIG_HAS_MEM_PAGING? _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 11/5/19 11:49 AM, Andrew Cooper wrote: > The safety of livepatching depends on every stack having been unwound, but > there is one corner case where this is not true. The Sharing/Paging/Monitor > infrastructure may use waitqueues, which copy the stack frame sideways and > longjmp() to a different vcpu. > > This case is rare, and can be worked around by pausing the offending > domain(s), waiting for their rings to drain, then performing a livepatch. > > In the case that there is an active waitqueue, fail the livepatch attempt with > -EBUSY, which is preforable to the fireworks which occur from trying to unwind > the old stack frame at a later point. > > Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > --- > CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > CC: Ross Lagerwall <ross.lagerwall@citrix.com> > CC: Juergen Gross <jgross@suse.com> > > This fix wants backporting, and is long overdue for posting upstream. > > v1.5: > * Send out a non-stale patch this time. > --- > xen/arch/arm/livepatch.c | 5 +++++ > xen/arch/x86/livepatch.c | 40 ++++++++++++++++++++++++++++++++++++++++ > xen/common/livepatch.c | 8 ++++++++ > xen/include/xen/livepatch.h | 1 + > 4 files changed, 54 insertions(+) > > diff --git a/xen/arch/arm/livepatch.c b/xen/arch/arm/livepatch.c > index 00c5e2bc45..915e9d926a 100644 > --- a/xen/arch/arm/livepatch.c > +++ b/xen/arch/arm/livepatch.c > @@ -18,6 +18,11 @@ > > void *vmap_of_xen_text; > > +int arch_livepatch_safety_check(void) > +{ > + return 0; > +} > + > int arch_livepatch_quiesce(void) > { > mfn_t text_mfn; > diff --git a/xen/arch/x86/livepatch.c b/xen/arch/x86/livepatch.c > index c82cf53b9e..2749cbc5cf 100644 > --- a/xen/arch/x86/livepatch.c > +++ b/xen/arch/x86/livepatch.c > @@ -10,10 +10,50 @@ > #include <xen/vmap.h> > #include <xen/livepatch_elf.h> > #include <xen/livepatch.h> > +#include <xen/sched.h> > > #include <asm/nmi.h> > #include <asm/livepatch.h> > > +static bool has_active_waitqueue(const struct vm_event_domain *ved) > +{ > + /* ved may be xzalloc()'d without INIT_LIST_HEAD() yet. */ > + return (ved && !list_head_is_null(&ved->wq.list) && > + !list_empty(&ved->wq.list)); > +} > + > +/* > + * x86's implementation of waitqueue violates the livepatching safey principle > + * of having unwound every CPUs stack before modifying live content. > + * > + * Search through every domain and check that no vCPUs have an active > + * waitqueue. > + */ > +int arch_livepatch_safety_check(void) > +{ > + struct domain *d; > + > + for_each_domain ( d ) > + { > +#ifdef CONFIG_MEM_SHARING > + if ( has_active_waitqueue(d->vm_event_share) ) > + goto fail; > +#endif > +#ifdef CONFIG_MEM_PAGING > + if ( has_active_waitqueue(d->vm_event_paging) ) > + goto fail; > +#endif > + if ( has_active_waitqueue(d->vm_event_monitor) ) > + goto fail; > + } > + > + return 0; > + > + fail: > + printk(XENLOG_ERR LIVEPATCH "%pd found with active waitqueue\n", d); > + return -EBUSY; > +} > + > int arch_livepatch_quiesce(void) > { > /* Disable WP to allow changes to read-only pages. */ > diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c > index 962647616a..8386e611f2 100644 > --- a/xen/common/livepatch.c > +++ b/xen/common/livepatch.c > @@ -1060,6 +1060,14 @@ static int apply_payload(struct payload *data) > unsigned int i; > int rc; > > + rc = arch_livepatch_safety_check(); > + if ( rc ) > + { > + printk(XENLOG_ERR LIVEPATCH "%s: Safety checks failed: %d\n", > + data->name, rc); > + return rc; > + } > + Would it make sense to call arch_livepatch_safety_check from arch_livepatch_quiesce rather than directly, so that arch_livepatch_safety_check is also called from revert_payload? --Sarah _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
© 2016 - 2024 Red Hat, Inc.