Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage
of several hypercalls:
* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST
* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX
* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE
* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX
Pick the most efficient hypercalls available.
Signed-off-by: Wei Liu <liuwe@microsoft.com>
---
v3:
1. Address more comments.
2. Fix usage of max_vp_index.
3. Use the fill_gva_list algorithm from Linux.
v2:
1. Address Roger and Jan's comments re types etc.
2. Fix pointer arithmetic.
3. Misc improvement to code.
---
xen/arch/x86/guest/hyperv/Makefile | 1 +
xen/arch/x86/guest/hyperv/private.h | 9 ++
xen/arch/x86/guest/hyperv/tlb.c | 173 +++++++++++++++++++++++++++-
xen/arch/x86/guest/hyperv/util.c | 74 ++++++++++++
4 files changed, 256 insertions(+), 1 deletion(-)
create mode 100644 xen/arch/x86/guest/hyperv/util.c
diff --git a/xen/arch/x86/guest/hyperv/Makefile b/xen/arch/x86/guest/hyperv/Makefile
index 18902c33e9..0e39410968 100644
--- a/xen/arch/x86/guest/hyperv/Makefile
+++ b/xen/arch/x86/guest/hyperv/Makefile
@@ -1,2 +1,3 @@
obj-y += hyperv.o
obj-y += tlb.o
+obj-y += util.o
diff --git a/xen/arch/x86/guest/hyperv/private.h b/xen/arch/x86/guest/hyperv/private.h
index 509bedaafa..79a77930a0 100644
--- a/xen/arch/x86/guest/hyperv/private.h
+++ b/xen/arch/x86/guest/hyperv/private.h
@@ -24,12 +24,21 @@
#include <xen/cpumask.h>
#include <xen/percpu.h>
+#include <xen/types.h>
DECLARE_PER_CPU(void *, hv_input_page);
DECLARE_PER_CPU(void *, hv_vp_assist);
DECLARE_PER_CPU(unsigned int, hv_vp_index);
+static inline unsigned int hv_vp_index(unsigned int cpu)
+{
+ return per_cpu(hv_vp_index, cpu);
+}
+
int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
unsigned int flags);
+/* Returns number of banks, -ev if error */
+int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask);
+
#endif /* __XEN_HYPERV_PRIVIATE_H__ */
diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c
index 48f527229e..8cd1c6f0ed 100644
--- a/xen/arch/x86/guest/hyperv/tlb.c
+++ b/xen/arch/x86/guest/hyperv/tlb.c
@@ -19,17 +19,188 @@
* Copyright (c) 2020 Microsoft.
*/
+#include <xen/cpu.h>
#include <xen/cpumask.h>
#include <xen/errno.h>
+#include <asm/guest/hyperv.h>
+#include <asm/guest/hyperv-hcall.h>
+#include <asm/guest/hyperv-tlfs.h>
+
#include "private.h"
+/*
+ * It is possible to encode up to 4096 pages using the lower 12 bits
+ * in an element of gva_list
+ */
+#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
+
+static unsigned int fill_gva_list(uint64_t *gva_list, const void *va,
+ unsigned int order)
+{
+ unsigned long cur = (unsigned long)va;
+ /* end is 1 past the range to be flushed */
+ unsigned long end = cur + (PAGE_SIZE << order);
+ unsigned int n = 0;
+
+ do {
+ unsigned long diff = end - cur;
+
+ gva_list[n] = cur & PAGE_MASK;
+
+ /*
+ * Use lower 12 bits to encode the number of additional pages
+ * to flush
+ */
+ if ( diff >= HV_TLB_FLUSH_UNIT )
+ {
+ gva_list[n] |= ~PAGE_MASK;
+ cur += HV_TLB_FLUSH_UNIT;
+ }
+ else
+ {
+ gva_list[n] |= (diff - 1) >> PAGE_SHIFT;
+ cur = end;
+ }
+
+ n++;
+ } while ( cur < end );
+
+ return n;
+}
+
+static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va,
+ unsigned int flags)
+{
+ struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page);
+ int nr_banks;
+ unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK;
+ uint64_t *gva_list;
+
+ if ( !flush || local_irq_is_enabled() )
+ {
+ ASSERT_UNREACHABLE();
+ return ~0ULL;
+ }
+
+ if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) )
+ return ~0ULL;
+
+ flush->address_space = 0;
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ if ( !(flags & FLUSH_TLB_GLOBAL) )
+ flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+
+ nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask);
+ if ( nr_banks < 0 )
+ return ~0ULL;
+
+ max_gvas =
+ (PAGE_SIZE - sizeof(*flush) - nr_banks *
+ sizeof(flush->hv_vp_set.bank_contents[0])) /
+ sizeof(uint64_t); /* gva is represented as uint64_t */
+
+ /*
+ * Flush the entire address space if va is NULL or if there is not
+ * enough space for gva_list.
+ */
+ if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas )
+ return hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0,
+ nr_banks, virt_to_maddr(flush), 0);
+
+ /*
+ * The calculation of gva_list address requires the structure to
+ * be 64 bits aligned.
+ */
+ BUILD_BUG_ON(sizeof(*flush) % sizeof(uint64_t));
+ gva_list = (uint64_t *)flush + sizeof(*flush) / sizeof(uint64_t) + nr_banks;
+
+ return hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
+ fill_gva_list(gva_list, va, order),
+ nr_banks, virt_to_maddr(flush), 0);
+}
+
+/* Maximum number of gvas for hv_tlb_flush */
+#define MAX_GVAS ((PAGE_SIZE - sizeof(struct hv_tlb_flush)) / sizeof(uint64_t))
+
int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
unsigned int flags)
{
- return -EOPNOTSUPP;
+ unsigned long irq_flags;
+ struct hv_tlb_flush *flush = this_cpu(hv_input_page);
+ unsigned int order = flags & FLUSH_ORDER_MASK;
+ uint64_t ret;
+
+ if ( !flush || cpumask_empty(mask) )
+ {
+ ASSERT_UNREACHABLE();
+ return -EINVAL;
+ }
+
+ local_irq_save(irq_flags);
+
+ flush->address_space = 0;
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush->processor_mask = 0;
+ if ( !(flags & FLUSH_TLB_GLOBAL) )
+ flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+
+ if ( cpumask_equal(mask, &cpu_online_map) )
+ flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+ else
+ {
+ unsigned int cpu;
+
+ /*
+ * Normally VP indices are in ascending order and match Xen's
+ * idea of CPU ids. Check the last index to see if VP index is
+ * >= 64. If so, we can skip setting up parameters for
+ * non-applicable hypercalls without looking further.
+ */
+ if ( hv_vp_index(cpumask_last(mask)) >= 64 )
+ goto do_ex_hypercall;
+
+ for_each_cpu ( cpu, mask )
+ {
+ unsigned int vpid = hv_vp_index(cpu);
+
+ if ( vpid >= ms_hyperv.max_vp_index )
+ {
+ local_irq_restore(irq_flags);
+ return -ENXIO;
+ }
+
+ if ( vpid >= 64 )
+ goto do_ex_hypercall;
+
+ __set_bit(vpid, &flush->processor_mask);
+ }
+ }
+
+ /*
+ * Flush the entire address space if va is NULL or if there is not
+ * enough space for gva_list.
+ */
+ if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > MAX_GVAS )
+ ret = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+ virt_to_maddr(flush), 0);
+ else
+ ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
+ fill_gva_list(flush->gva_list, va, order),
+ 0, virt_to_maddr(flush), 0);
+ goto done;
+
+ do_ex_hypercall:
+ ret = flush_tlb_ex(mask, va, flags);
+
+ done:
+ local_irq_restore(irq_flags);
+
+ return ret & HV_HYPERCALL_RESULT_MASK ? -ENXIO : 0;
}
+#undef MAX_GVAS
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/x86/guest/hyperv/util.c b/xen/arch/x86/guest/hyperv/util.c
new file mode 100644
index 0000000000..0abb37b05f
--- /dev/null
+++ b/xen/arch/x86/guest/hyperv/util.c
@@ -0,0 +1,74 @@
+/******************************************************************************
+ * arch/x86/guest/hyperv/util.c
+ *
+ * Hyper-V utility functions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2020 Microsoft.
+ */
+
+#include <xen/cpu.h>
+#include <xen/cpumask.h>
+#include <xen/errno.h>
+
+#include <asm/guest/hyperv.h>
+#include <asm/guest/hyperv-tlfs.h>
+
+#include "private.h"
+
+int cpumask_to_vpset(struct hv_vpset *vpset,
+ const cpumask_t *mask)
+{
+ int nr = 1;
+ unsigned int cpu, vcpu_bank, vcpu_offset;
+ unsigned int max_banks = ms_hyperv.max_vp_index / 64;
+
+ /* Up to 64 banks can be represented by valid_bank_mask */
+ if ( max_banks > 64 )
+ return -E2BIG;
+
+ /* Clear all banks to avoid flushing unwanted CPUs */
+ for ( vcpu_bank = 0; vcpu_bank < max_banks; vcpu_bank++ )
+ vpset->bank_contents[vcpu_bank] = 0;
+
+ vpset->valid_bank_mask = 0;
+ vpset->format = HV_GENERIC_SET_SPARSE_4K;
+
+ for_each_cpu ( cpu, mask )
+ {
+ unsigned int vcpu = hv_vp_index(cpu);
+
+ vcpu_bank = vcpu / 64;
+ vcpu_offset = vcpu % 64;
+
+ __set_bit(vcpu_offset, &vpset->bank_contents[vcpu_bank]);
+ __set_bit(vcpu_bank, &vpset->valid_bank_mask);
+
+ if ( vcpu_bank >= nr )
+ nr = vcpu_bank + 1;
+ }
+
+ return nr;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--
2.20.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On Mon, Feb 17, 2020 at 01:55:17PM +0000, Wei Liu wrote:
> Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage
> of several hypercalls:
>
> * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST
> * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX
> * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE
> * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX
>
> Pick the most efficient hypercalls available.
>
> Signed-off-by: Wei Liu <liuwe@microsoft.com>
Just two comments below.
> ---
> v3:
> 1. Address more comments.
> 2. Fix usage of max_vp_index.
> 3. Use the fill_gva_list algorithm from Linux.
>
> v2:
> 1. Address Roger and Jan's comments re types etc.
> 2. Fix pointer arithmetic.
> 3. Misc improvement to code.
> ---
> xen/arch/x86/guest/hyperv/Makefile | 1 +
> xen/arch/x86/guest/hyperv/private.h | 9 ++
> xen/arch/x86/guest/hyperv/tlb.c | 173 +++++++++++++++++++++++++++-
> xen/arch/x86/guest/hyperv/util.c | 74 ++++++++++++
> 4 files changed, 256 insertions(+), 1 deletion(-)
> create mode 100644 xen/arch/x86/guest/hyperv/util.c
>
> diff --git a/xen/arch/x86/guest/hyperv/Makefile b/xen/arch/x86/guest/hyperv/Makefile
> index 18902c33e9..0e39410968 100644
> --- a/xen/arch/x86/guest/hyperv/Makefile
> +++ b/xen/arch/x86/guest/hyperv/Makefile
> @@ -1,2 +1,3 @@
> obj-y += hyperv.o
> obj-y += tlb.o
> +obj-y += util.o
> diff --git a/xen/arch/x86/guest/hyperv/private.h b/xen/arch/x86/guest/hyperv/private.h
> index 509bedaafa..79a77930a0 100644
> --- a/xen/arch/x86/guest/hyperv/private.h
> +++ b/xen/arch/x86/guest/hyperv/private.h
> @@ -24,12 +24,21 @@
>
> #include <xen/cpumask.h>
> #include <xen/percpu.h>
> +#include <xen/types.h>
Do you still need to include types.h?
None of the additions to this header done in this patch seems to
require it AFAICT.
>
> DECLARE_PER_CPU(void *, hv_input_page);
> DECLARE_PER_CPU(void *, hv_vp_assist);
> DECLARE_PER_CPU(unsigned int, hv_vp_index);
>
> +static inline unsigned int hv_vp_index(unsigned int cpu)
> +{
> + return per_cpu(hv_vp_index, cpu);
> +}
> +
> int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
> unsigned int flags);
>
> +/* Returns number of banks, -ev if error */
> +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask);
> +
> #endif /* __XEN_HYPERV_PRIVIATE_H__ */
> diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c
> index 48f527229e..8cd1c6f0ed 100644
> --- a/xen/arch/x86/guest/hyperv/tlb.c
> +++ b/xen/arch/x86/guest/hyperv/tlb.c
> @@ -19,17 +19,188 @@
> * Copyright (c) 2020 Microsoft.
> */
>
> +#include <xen/cpu.h>
> #include <xen/cpumask.h>
> #include <xen/errno.h>
>
> +#include <asm/guest/hyperv.h>
> +#include <asm/guest/hyperv-hcall.h>
> +#include <asm/guest/hyperv-tlfs.h>
> +
> #include "private.h"
>
> +/*
> + * It is possible to encode up to 4096 pages using the lower 12 bits
> + * in an element of gva_list
> + */
> +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
> +
> +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va,
> + unsigned int order)
> +{
> + unsigned long cur = (unsigned long)va;
> + /* end is 1 past the range to be flushed */
> + unsigned long end = cur + (PAGE_SIZE << order);
> + unsigned int n = 0;
> +
> + do {
> + unsigned long diff = end - cur;
> +
> + gva_list[n] = cur & PAGE_MASK;
> +
> + /*
> + * Use lower 12 bits to encode the number of additional pages
> + * to flush
> + */
> + if ( diff >= HV_TLB_FLUSH_UNIT )
> + {
> + gva_list[n] |= ~PAGE_MASK;
> + cur += HV_TLB_FLUSH_UNIT;
> + }
> + else
> + {
> + gva_list[n] |= (diff - 1) >> PAGE_SHIFT;
> + cur = end;
> + }
> +
> + n++;
> + } while ( cur < end );
> +
> + return n;
> +}
> +
> +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va,
> + unsigned int flags)
> +{
> + struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page);
> + int nr_banks;
> + unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK;
> + uint64_t *gva_list;
> +
> + if ( !flush || local_irq_is_enabled() )
> + {
> + ASSERT_UNREACHABLE();
> + return ~0ULL;
> + }
> +
> + if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) )
> + return ~0ULL;
> +
> + flush->address_space = 0;
> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
> + if ( !(flags & FLUSH_TLB_GLOBAL) )
> + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
> +
> + nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask);
> + if ( nr_banks < 0 )
> + return ~0ULL;
> +
> + max_gvas =
> + (PAGE_SIZE - sizeof(*flush) - nr_banks *
> + sizeof(flush->hv_vp_set.bank_contents[0])) /
> + sizeof(uint64_t); /* gva is represented as uint64_t */
> +
> + /*
> + * Flush the entire address space if va is NULL or if there is not
> + * enough space for gva_list.
> + */
> + if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas )
> + return hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0,
> + nr_banks, virt_to_maddr(flush), 0);
> +
> + /*
> + * The calculation of gva_list address requires the structure to
> + * be 64 bits aligned.
> + */
> + BUILD_BUG_ON(sizeof(*flush) % sizeof(uint64_t));
> + gva_list = (uint64_t *)flush + sizeof(*flush) / sizeof(uint64_t) + nr_banks;
> +
> + return hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
> + fill_gva_list(gva_list, va, order),
> + nr_banks, virt_to_maddr(flush), 0);
> +}
> +
> +/* Maximum number of gvas for hv_tlb_flush */
> +#define MAX_GVAS ((PAGE_SIZE - sizeof(struct hv_tlb_flush)) / sizeof(uint64_t))
> +
> int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
> unsigned int flags)
> {
> - return -EOPNOTSUPP;
> + unsigned long irq_flags;
> + struct hv_tlb_flush *flush = this_cpu(hv_input_page);
> + unsigned int order = flags & FLUSH_ORDER_MASK;
I think you need a - 1 here, as FLUSH_ORDER(x) is defined as ((x)+1).
So if a user has specified order 0 here you would get order 1 instead.
unsigned int order = (flags - 1) & FLUSH_ORDER_MASK;
Sorry for not noticing this earlier.
> + uint64_t ret;
> +
> + if ( !flush || cpumask_empty(mask) )
> + {
> + ASSERT_UNREACHABLE();
> + return -EINVAL;
> + }
> +
> + local_irq_save(irq_flags);
I think you disable interrupts in order to prevent re-entering this
function, and hence avoid an interrupt from triggering in the middle
and also attempting to do a TLB flush using the same per-CPU input
page.
As pointed out to me by Jan, we can also get #MC and #NMI which will
still happen despite interrupts being disabled, and hence you might
want to assert that you are not in #MC or #NMI context before
accessing the per-CPU hv_input_page (or else just return an error
and avoid using the assisted flush). I have a patch that will
hopefully be able to signal when in #MC or #NMI context.
Thanks, Roger.
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On Mon, Feb 17, 2020 at 06:34:12PM +0100, Roger Pau Monné wrote:
> On Mon, Feb 17, 2020 at 01:55:17PM +0000, Wei Liu wrote:
> > Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage
> > of several hypercalls:
> >
> > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST
> > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX
> > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE
> > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX
> >
> > Pick the most efficient hypercalls available.
> >
> > Signed-off-by: Wei Liu <liuwe@microsoft.com>
>
> Just two comments below.
>
> > ---
> > v3:
> > 1. Address more comments.
> > 2. Fix usage of max_vp_index.
> > 3. Use the fill_gva_list algorithm from Linux.
> >
> > v2:
> > 1. Address Roger and Jan's comments re types etc.
> > 2. Fix pointer arithmetic.
> > 3. Misc improvement to code.
> > ---
> > xen/arch/x86/guest/hyperv/Makefile | 1 +
> > xen/arch/x86/guest/hyperv/private.h | 9 ++
> > xen/arch/x86/guest/hyperv/tlb.c | 173 +++++++++++++++++++++++++++-
> > xen/arch/x86/guest/hyperv/util.c | 74 ++++++++++++
> > 4 files changed, 256 insertions(+), 1 deletion(-)
> > create mode 100644 xen/arch/x86/guest/hyperv/util.c
> >
> > diff --git a/xen/arch/x86/guest/hyperv/Makefile b/xen/arch/x86/guest/hyperv/Makefile
> > index 18902c33e9..0e39410968 100644
> > --- a/xen/arch/x86/guest/hyperv/Makefile
> > +++ b/xen/arch/x86/guest/hyperv/Makefile
> > @@ -1,2 +1,3 @@
> > obj-y += hyperv.o
> > obj-y += tlb.o
> > +obj-y += util.o
> > diff --git a/xen/arch/x86/guest/hyperv/private.h b/xen/arch/x86/guest/hyperv/private.h
> > index 509bedaafa..79a77930a0 100644
> > --- a/xen/arch/x86/guest/hyperv/private.h
> > +++ b/xen/arch/x86/guest/hyperv/private.h
> > @@ -24,12 +24,21 @@
> >
> > #include <xen/cpumask.h>
> > #include <xen/percpu.h>
> > +#include <xen/types.h>
>
> Do you still need to include types.h?
>
Not anymore.
> None of the additions to this header done in this patch seems to
> require it AFAICT.
>
> >
> > DECLARE_PER_CPU(void *, hv_input_page);
> > DECLARE_PER_CPU(void *, hv_vp_assist);
> > DECLARE_PER_CPU(unsigned int, hv_vp_index);
> >
> > +static inline unsigned int hv_vp_index(unsigned int cpu)
> > +{
> > + return per_cpu(hv_vp_index, cpu);
> > +}
> > +
> > int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
> > unsigned int flags);
> >
> > +/* Returns number of banks, -ev if error */
> > +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask);
> > +
> > #endif /* __XEN_HYPERV_PRIVIATE_H__ */
> > diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c
> > index 48f527229e..8cd1c6f0ed 100644
> > --- a/xen/arch/x86/guest/hyperv/tlb.c
> > +++ b/xen/arch/x86/guest/hyperv/tlb.c
> > @@ -19,17 +19,188 @@
> > * Copyright (c) 2020 Microsoft.
> > */
> >
> > +#include <xen/cpu.h>
> > #include <xen/cpumask.h>
> > #include <xen/errno.h>
> >
> > +#include <asm/guest/hyperv.h>
> > +#include <asm/guest/hyperv-hcall.h>
> > +#include <asm/guest/hyperv-tlfs.h>
> > +
> > #include "private.h"
> >
> > +/*
> > + * It is possible to encode up to 4096 pages using the lower 12 bits
> > + * in an element of gva_list
> > + */
> > +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
> > +
> > +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va,
> > + unsigned int order)
> > +{
> > + unsigned long cur = (unsigned long)va;
> > + /* end is 1 past the range to be flushed */
> > + unsigned long end = cur + (PAGE_SIZE << order);
> > + unsigned int n = 0;
> > +
> > + do {
> > + unsigned long diff = end - cur;
> > +
> > + gva_list[n] = cur & PAGE_MASK;
> > +
> > + /*
> > + * Use lower 12 bits to encode the number of additional pages
> > + * to flush
> > + */
> > + if ( diff >= HV_TLB_FLUSH_UNIT )
> > + {
> > + gva_list[n] |= ~PAGE_MASK;
> > + cur += HV_TLB_FLUSH_UNIT;
> > + }
> > + else
> > + {
> > + gva_list[n] |= (diff - 1) >> PAGE_SHIFT;
> > + cur = end;
> > + }
> > +
> > + n++;
> > + } while ( cur < end );
> > +
> > + return n;
> > +}
> > +
> > +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va,
> > + unsigned int flags)
> > +{
> > + struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page);
> > + int nr_banks;
> > + unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK;
> > + uint64_t *gva_list;
> > +
> > + if ( !flush || local_irq_is_enabled() )
> > + {
> > + ASSERT_UNREACHABLE();
> > + return ~0ULL;
> > + }
> > +
> > + if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) )
> > + return ~0ULL;
> > +
> > + flush->address_space = 0;
> > + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
> > + if ( !(flags & FLUSH_TLB_GLOBAL) )
> > + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
> > +
> > + nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask);
> > + if ( nr_banks < 0 )
> > + return ~0ULL;
> > +
> > + max_gvas =
> > + (PAGE_SIZE - sizeof(*flush) - nr_banks *
> > + sizeof(flush->hv_vp_set.bank_contents[0])) /
> > + sizeof(uint64_t); /* gva is represented as uint64_t */
> > +
> > + /*
> > + * Flush the entire address space if va is NULL or if there is not
> > + * enough space for gva_list.
> > + */
> > + if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas )
> > + return hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0,
> > + nr_banks, virt_to_maddr(flush), 0);
> > +
> > + /*
> > + * The calculation of gva_list address requires the structure to
> > + * be 64 bits aligned.
> > + */
> > + BUILD_BUG_ON(sizeof(*flush) % sizeof(uint64_t));
> > + gva_list = (uint64_t *)flush + sizeof(*flush) / sizeof(uint64_t) + nr_banks;
> > +
> > + return hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
> > + fill_gva_list(gva_list, va, order),
> > + nr_banks, virt_to_maddr(flush), 0);
> > +}
> > +
> > +/* Maximum number of gvas for hv_tlb_flush */
> > +#define MAX_GVAS ((PAGE_SIZE - sizeof(struct hv_tlb_flush)) / sizeof(uint64_t))
> > +
> > int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
> > unsigned int flags)
> > {
> > - return -EOPNOTSUPP;
> > + unsigned long irq_flags;
> > + struct hv_tlb_flush *flush = this_cpu(hv_input_page);
> > + unsigned int order = flags & FLUSH_ORDER_MASK;
>
> I think you need a - 1 here, as FLUSH_ORDER(x) is defined as ((x)+1).
> So if a user has specified order 0 here you would get order 1 instead.
>
> unsigned int order = (flags - 1) & FLUSH_ORDER_MASK;
Yes, indeed. That's what flush_area_local does. I will fix this.
BTW, I think your series also needs fixing. The patch that introduced
hypervisor_flush_tlb hook. I took the snippet from that patch directly.
>
> Sorry for not noticing this earlier.
Thanks for noticing this. :-)
>
> > + uint64_t ret;
> > +
> > + if ( !flush || cpumask_empty(mask) )
> > + {
> > + ASSERT_UNREACHABLE();
> > + return -EINVAL;
> > + }
> > +
> > + local_irq_save(irq_flags);
>
> I think you disable interrupts in order to prevent re-entering this
> function, and hence avoid an interrupt from triggering in the middle
> and also attempting to do a TLB flush using the same per-CPU input
> page.
>
> As pointed out to me by Jan, we can also get #MC and #NMI which will
> still happen despite interrupts being disabled, and hence you might
> want to assert that you are not in #MC or #NMI context before
> accessing the per-CPU hv_input_page (or else just return an error
> and avoid using the assisted flush). I have a patch that will
> hopefully be able to signal when in #MC or #NMI context.
>
This function should return an error in that case. It is better to fall
back to native path than crashing.
Wei.
> Thanks, Roger.
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On Tue, Feb 18, 2020 at 10:40:29AM +0000, Wei Liu wrote:
> >
> > > + uint64_t ret;
> > > +
> > > + if ( !flush || cpumask_empty(mask) )
> > > + {
> > > + ASSERT_UNREACHABLE();
> > > + return -EINVAL;
> > > + }
> > > +
> > > + local_irq_save(irq_flags);
> >
> > I think you disable interrupts in order to prevent re-entering this
> > function, and hence avoid an interrupt from triggering in the middle
> > and also attempting to do a TLB flush using the same per-CPU input
> > page.
> >
> > As pointed out to me by Jan, we can also get #MC and #NMI which will
> > still happen despite interrupts being disabled, and hence you might
> > want to assert that you are not in #MC or #NMI context before
> > accessing the per-CPU hv_input_page (or else just return an error
> > and avoid using the assisted flush). I have a patch that will
> > hopefully be able to signal when in #MC or #NMI context.
> >
>
> This function should return an error in that case. It is better to fall
> back to native path than crashing.
>
I briefly read through the other thread about what is allowed in #NMI or
#MC context. The discussion centred around if some operation should be
allowed to happen in those contexts in the first place.
For now I will just add a comment in the Hyper-V code. Once that
discussion is resolved, Hyper-V code can follow suite where applicable.
Wei.
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
© 2016 - 2026 Red Hat, Inc.