[v2] x86: build adjustments

[PATCH v2 0/7] x86: build adjustments

Posted by Jan Beulich 5 years, 6 months ago

This is a in part just loosely connected set of changes in particular
aiming at further shim size binary reduction.

One patch of v1 did go in. Besides the dropping of that one, there
was a small adjustment to what is now patch 4 (based on feedback) and
I did notice an omission in patch 1.

1: x86/EFI: sanitize build logic
2: x86: don't build with EFI support in shim-exclusive mode
3: x86: shrink struct arch_{vcpu,domain} when !HVM
4: bitmap: move to/from xenctl_bitmap conversion helpers
5: x86: move domain_cpu_policy_changed()
6: x86: move cpu_{up,down}_helper()
7: x86: don't include domctl and alike in shim-exclusive builds

Jan

[PATCH v2 1/7] x86/EFI: sanitize build logic

Posted by Jan Beulich 5 years, 6 months ago

With changes done over time and as far as linking goes, the only special
thing about building with EFI support enabled is the need for the dummy
relocations object for xen.gz uniformly in all build stages. All other
efi/*.o can be consumed from the built_in*.o files.

In efi/Makefile, besides moving relocs-dummy.o to "extra", also properly
split between obj-y and obj-bin-y.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Drop a now stale piece of Makefile logic.

--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -113,28 +113,35 @@ $(TARGET): $(TARGET)-syms $(efi-y) boot/
 		{ echo "No Multiboot2 header found" >&2; false; }
 	mv $(TMP) $(TARGET)
 
+# Check if the compiler supports the MS ABI.
+export XEN_BUILD_EFI := $(shell $(CC) $(XEN_CFLAGS) -c efi/check.c -o efi/check.o 2>/dev/null && echo y)
+# Check if the linker supports PE.
+XEN_BUILD_PE := $(if $(XEN_BUILD_EFI),$(shell $(LD) -mi386pep --subsystem=10 -o efi/check.efi efi/check.o 2>/dev/null && echo y))
+CFLAGS-$(XEN_BUILD_EFI) += -DXEN_BUILD_EFI
+
 ALL_OBJS := $(BASEDIR)/arch/x86/boot/built_in.o $(BASEDIR)/arch/x86/efi/built_in.o $(ALL_OBJS)
+EFI_OBJS-$(XEN_BUILD_EFI) := efi/relocs-dummy.o
 
 ifeq ($(CONFIG_LTO),y)
 # Gather all LTO objects together
 prelink_lto.o: $(ALL_OBJS)
 	$(LD_LTO) -r -o $@ $^
 
-prelink-efi_lto.o: $(ALL_OBJS) efi/runtime.o efi/compat.o
-	$(LD_LTO) -r -o $@ $(filter-out %/efi/built_in.o,$^)
+prelink-efi_lto.o: $(ALL_OBJS)
+	$(LD_LTO) -r -o $@ $^
 
 # Link it with all the binary objects
-prelink.o: $(patsubst %/built_in.o,%/built_in_bin.o,$(ALL_OBJS)) prelink_lto.o
+prelink.o: $(patsubst %/built_in.o,%/built_in_bin.o,$(ALL_OBJS)) prelink_lto.o $(EFI_OBJS-y)
 	$(LD) $(XEN_LDFLAGS) -r -o $@ $^
 
-prelink-efi.o: $(patsubst %/built_in.o,%/built_in_bin.o,$(ALL_OBJS)) prelink-efi_lto.o efi/boot.init.o
+prelink-efi.o: $(patsubst %/built_in.o,%/built_in_bin.o,$(ALL_OBJS)) prelink-efi_lto.o
 	$(LD) $(XEN_LDFLAGS) -r -o $@ $^
 else
-prelink.o: $(ALL_OBJS)
+prelink.o: $(ALL_OBJS) $(EFI_OBJS-y)
 	$(LD) $(XEN_LDFLAGS) -r -o $@ $^
 
-prelink-efi.o: $(ALL_OBJS) efi/boot.init.o efi/runtime.o efi/compat.o
-	$(LD) $(XEN_LDFLAGS) -r -o $@ $(filter-out %/efi/built_in.o,$^)
+prelink-efi.o: $(ALL_OBJS)
+	$(LD) $(XEN_LDFLAGS) -r -o $@ $^
 endif
 
 $(TARGET)-syms: prelink.o xen.lds
@@ -171,12 +178,6 @@ EFI_LDFLAGS += --minor-image-version=$(X
 EFI_LDFLAGS += --major-os-version=2 --minor-os-version=0
 EFI_LDFLAGS += --major-subsystem-version=2 --minor-subsystem-version=0
 
-# Check if the compiler supports the MS ABI.
-export XEN_BUILD_EFI := $(shell $(CC) $(XEN_CFLAGS) -c efi/check.c -o efi/check.o 2>/dev/null && echo y)
-# Check if the linker supports PE.
-XEN_BUILD_PE := $(if $(XEN_BUILD_EFI),$(shell $(LD) -mi386pep --subsystem=10 -o efi/check.efi efi/check.o 2>/dev/null && echo y))
-CFLAGS-$(XEN_BUILD_EFI) += -DXEN_BUILD_EFI
-
 $(TARGET).efi: VIRT_BASE = 0x$(shell $(NM) efi/relocs-dummy.o | sed -n 's, A VIRT_START$$,,p')
 $(TARGET).efi: ALT_BASE = 0x$(shell $(NM) efi/relocs-dummy.o | sed -n 's, A ALT_START$$,,p')
 
@@ -223,9 +224,6 @@ $(TARGET).efi: FORCE
 	echo '$(if $(filter y,$(XEN_BUILD_EFI)),xen.efi generation,EFI support) disabled'
 endif
 
-efi/boot.init.o efi/runtime.o efi/compat.o efi/buildid.o efi/relocs-dummy.o: $(BASEDIR)/arch/x86/efi/built_in.o
-efi/boot.init.o efi/runtime.o efi/compat.o efi/buildid.o efi/relocs-dummy.o: ;
-
 asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(BASEDIR)/include/asm-x86/asm-macros.h
 	$(CC) $(filter-out -Wa$(comma)% -flto,$(c_flags)) -S -o $@ $<
 
--- a/xen/arch/x86/efi/Makefile
+++ b/xen/arch/x86/efi/Makefile
@@ -14,6 +14,7 @@ $(call cc-option-add,cflags-stack-bounda
 $(EFIOBJ): CFLAGS-stack-boundary := $(cflags-stack-boundary)
 
 obj-y := stub.o
-obj-$(XEN_BUILD_EFI) := $(EFIOBJ) relocs-dummy.o
-extra-$(XEN_BUILD_EFI) += buildid.o
+obj-$(XEN_BUILD_EFI) := $(filter-out %.init.o,$(EFIOBJ))
+obj-bin-$(XEN_BUILD_EFI) := $(filter %.init.o,$(EFIOBJ))
+extra-$(XEN_BUILD_EFI) += buildid.o relocs-dummy.o
 nocov-$(XEN_BUILD_EFI) += stub.o

Re: [PATCH v2 1/7] x86/EFI: sanitize build logic

Posted by Andrew Cooper 5 years, 6 months ago

On 07/08/2020 12:32, Jan Beulich wrote:
> With changes done over time and as far as linking goes, the only special
> thing about building with EFI support enabled is the need for the dummy
> relocations object for xen.gz uniformly in all build stages. All other
> efi/*.o can be consumed from the built_in*.o files.
>
> In efi/Makefile, besides moving relocs-dummy.o to "extra", also properly
> split between obj-y and obj-bin-y.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

I'd prefer to see this all in Kconfig, but this is a clear improvement
in its own right.

[PATCH v2 2/7] x86: don't build with EFI support in shim-exclusive mode

Posted by Jan Beulich 5 years, 6 months ago

There's no need for xen.efi at all, and there's also no need for EFI
support in xen.gz since the shim runs in PVH mode, i.e. without any
firmware (and hence by implication also without EFI one).

The slightly odd looking use of $(space) is to ensure the new ifneq()
evaluates consistently between "build" and "install" invocations of
make.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
There are further anomalies associated with the need to use $(space)
here:
- xen.efi rebuilding gets suppressed when installing (typically as
  root) from a non-root-owned tree. I think we should similarly suppress
  re-building of xen.gz as well in this case, as tool chains available
  may vary (and hence a partial or full re-build may mistakenly occur).
- xen.lds (re-)generation has a dependency issue: The value of
  XEN_BUILD_EFI changing between builds (like would happen on a pre-
  built tree with a shim-exclusive config, on which then this patch
  would be applied) does not cause it to be re-built. Anthony's
  switching to Linux'es build system will address this afaict, so I
  didn't see a need to supply a separate patch.

--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -80,7 +80,9 @@ x86_emulate.o: x86_emulate/x86_emulate.c
 
 efi-y := $(shell if [ ! -r $(BASEDIR)/include/xen/compile.h -o \
                       -O $(BASEDIR)/include/xen/compile.h ]; then \
-                         echo '$(TARGET).efi'; fi)
+                         echo '$(TARGET).efi'; fi) \
+         $(space)
+efi-$(CONFIG_PV_SHIM_EXCLUSIVE) :=
 
 ifneq ($(build_id_linker),)
 notes_phdrs = --notes
@@ -113,11 +115,13 @@ $(TARGET): $(TARGET)-syms $(efi-y) boot/
 		{ echo "No Multiboot2 header found" >&2; false; }
 	mv $(TMP) $(TARGET)
 
+ifneq ($(efi-y),)
 # Check if the compiler supports the MS ABI.
 export XEN_BUILD_EFI := $(shell $(CC) $(XEN_CFLAGS) -c efi/check.c -o efi/check.o 2>/dev/null && echo y)
 # Check if the linker supports PE.
 XEN_BUILD_PE := $(if $(XEN_BUILD_EFI),$(shell $(LD) -mi386pep --subsystem=10 -o efi/check.efi efi/check.o 2>/dev/null && echo y))
 CFLAGS-$(XEN_BUILD_EFI) += -DXEN_BUILD_EFI
+endif
 
 ALL_OBJS := $(BASEDIR)/arch/x86/boot/built_in.o $(BASEDIR)/arch/x86/efi/built_in.o $(ALL_OBJS)
 EFI_OBJS-$(XEN_BUILD_EFI) := efi/relocs-dummy.o

[PATCH v2 3/7] x86: shrink struct arch_{vcpu,domain} when !HVM

Posted by Jan Beulich 5 years, 6 months ago

While this won't affect overall memory overhead (struct vcpu as well as
struct domain get allocated as single pages) nor code size (the offsets
into the base structures are too large to be representable as signed 8-
bit displacements), it'll allow the tail of struct pv_{domain,vcpu} to
share a cache line with subsequent struct arch_{domain,vcpu} fields.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
RFC: There is a risk associated with this: If we still have code
     somewhere accessing the HVM parts of the structures without a prior
     type check of the guest, this going to end up worse than the so far
     not uncommon case of the access simply going to space unused by PV.
     We may therefore want to consider whether to further restrict when
     this conversion to union gets done.
     And of course there's also the risk of future compilers complaining
     about this abuse of unions. But this is limited to code that's dead
     in !HVM configs, so only an apparent problem.

--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -709,7 +709,7 @@ long arch_do_domctl(
         unsigned int fmp = domctl->u.ioport_mapping.first_mport;
         unsigned int np = domctl->u.ioport_mapping.nr_ports;
         unsigned int add = domctl->u.ioport_mapping.add_mapping;
-        struct hvm_domain *hvm;
+        hvm_domain_t *hvm;
         struct g2m_ioport *g2m_ioport;
         int found = 0;
 
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -319,7 +319,7 @@ struct arch_domain
 
     union {
         struct pv_domain pv;
-        struct hvm_domain hvm;
+        hvm_domain_t hvm;
     };
 
     struct paging_domain paging;
@@ -582,7 +582,7 @@ struct arch_vcpu
     /* Virtual Machine Extensions */
     union {
         struct pv_vcpu pv;
-        struct hvm_vcpu hvm;
+        hvm_vcpu_t hvm;
     };
 
     /*
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -99,7 +99,13 @@ struct hvm_pi_ops {
 
 #define MAX_NR_IOREQ_SERVERS 8
 
-struct hvm_domain {
+typedef
+#ifdef CONFIG_HVM
+struct
+#else
+union
+#endif
+hvm_domain {
     /* Guest page range used for non-default ioreq servers */
     struct {
         unsigned long base;
@@ -203,7 +209,7 @@ struct hvm_domain {
 #ifdef CONFIG_MEM_SHARING
     struct mem_sharing_domain mem_sharing;
 #endif
-};
+} hvm_domain_t;
 
 #endif /* __ASM_X86_HVM_DOMAIN_H__ */
 
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -149,7 +149,13 @@ struct altp2mvcpu {
 
 #define vcpu_altp2m(v) ((v)->arch.hvm.avcpu)
 
-struct hvm_vcpu {
+typedef
+#ifdef CONFIG_HVM
+struct
+#else
+union
+#endif
+hvm_vcpu {
     /* Guest control-register and EFER values, just as the guest sees them. */
     unsigned long       guest_cr[5];
     unsigned long       guest_efer;
@@ -213,7 +219,7 @@ struct hvm_vcpu {
     struct x86_event     inject_event;
 
     struct viridian_vcpu *viridian;
-};
+} hvm_vcpu_t;
 
 #endif /* __ASM_X86_HVM_VCPU_H__ */

Re: [PATCH v2 3/7] x86: shrink struct arch_{vcpu,domain} when !HVM

Posted by Andrew Cooper 5 years, 6 months ago

On 07/08/2020 12:33, Jan Beulich wrote:
> While this won't affect overall memory overhead (struct vcpu as well as
> struct domain get allocated as single pages) nor code size (the offsets
> into the base structures are too large to be representable as signed 8-
> bit displacements), it'll allow the tail of struct pv_{domain,vcpu} to
> share a cache line with subsequent struct arch_{domain,vcpu} fields.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> RFC: There is a risk associated with this: If we still have code
>      somewhere accessing the HVM parts of the structures without a prior
>      type check of the guest, this going to end up worse than the so far
>      not uncommon case of the access simply going to space unused by PV.
>      We may therefore want to consider whether to further restrict when
>      this conversion to union gets done.
>      And of course there's also the risk of future compilers complaining
>      about this abuse of unions. But this is limited to code that's dead
>      in !HVM configs, so only an apparent problem.
>
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -99,7 +99,13 @@ struct hvm_pi_ops {
>  
>  #define MAX_NR_IOREQ_SERVERS 8
>  
> -struct hvm_domain {
> +typedef
> +#ifdef CONFIG_HVM
> +struct
> +#else
> +union
> +#endif
> +hvm_domain {
>      /* Guest page range used for non-default ioreq servers */
>      struct {
>          unsigned long base;
> @@ -203,7 +209,7 @@ struct hvm_domain {
>  #ifdef CONFIG_MEM_SHARING
>      struct mem_sharing_domain mem_sharing;
>  #endif
> -};
> +} hvm_domain_t;

Honestly, I'd say no to this patch even it resulted in a 100x speedup,
because I am totally lost for words about this construct, and the effect
it comprehensibility of our code.

Seeing as improved cache locality appears to be the sole benefit,
marginal as it is, you can achieve that in a better way by rearranging
struct domain/vcpu to put the pv/hvm union at the end.

~Andrew

Re: [PATCH v2 3/7] x86: shrink struct arch_{vcpu,domain} when !HVM

Posted by Jan Beulich 5 years, 5 months ago

On 07.08.2020 19:14, Andrew Cooper wrote:
> On 07/08/2020 12:33, Jan Beulich wrote:
>> While this won't affect overall memory overhead (struct vcpu as well as
>> struct domain get allocated as single pages) nor code size (the offsets
>> into the base structures are too large to be representable as signed 8-
>> bit displacements), it'll allow the tail of struct pv_{domain,vcpu} to
>> share a cache line with subsequent struct arch_{domain,vcpu} fields.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> RFC: There is a risk associated with this: If we still have code
>>      somewhere accessing the HVM parts of the structures without a prior
>>      type check of the guest, this going to end up worse than the so far
>>      not uncommon case of the access simply going to space unused by PV.
>>      We may therefore want to consider whether to further restrict when
>>      this conversion to union gets done.
>>      And of course there's also the risk of future compilers complaining
>>      about this abuse of unions. But this is limited to code that's dead
>>      in !HVM configs, so only an apparent problem.
>>
>> --- a/xen/include/asm-x86/hvm/domain.h
>> +++ b/xen/include/asm-x86/hvm/domain.h
>> @@ -99,7 +99,13 @@ struct hvm_pi_ops {
>>  
>>  #define MAX_NR_IOREQ_SERVERS 8
>>  
>> -struct hvm_domain {
>> +typedef
>> +#ifdef CONFIG_HVM
>> +struct
>> +#else
>> +union
>> +#endif
>> +hvm_domain {
>>      /* Guest page range used for non-default ioreq servers */
>>      struct {
>>          unsigned long base;
>> @@ -203,7 +209,7 @@ struct hvm_domain {
>>  #ifdef CONFIG_MEM_SHARING
>>      struct mem_sharing_domain mem_sharing;
>>  #endif
>> -};
>> +} hvm_domain_t;
> 
> Honestly, I'd say no to this patch even it resulted in a 100x speedup,
> because I am totally lost for words about this construct, and the effect
> it comprehensibility of our code.

Okay - away it goes.

Jan

[PATCH v2 4/7] bitmap: move to/from xenctl_bitmap conversion helpers

Posted by Jan Beulich 5 years, 6 months ago

A subsequent change will exclude domctl.c from getting built for a
particular configuration, yet the two functions get used from elsewhere.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Move function decls to xen/bitmap.h.

--- a/xen/common/bitmap.c
+++ b/xen/common/bitmap.c
@@ -9,6 +9,8 @@
 #include <xen/errno.h>
 #include <xen/bitmap.h>
 #include <xen/bitops.h>
+#include <xen/cpumask.h>
+#include <xen/guest_access.h>
 #include <asm/byteorder.h>
 
 /*
@@ -384,3 +386,87 @@ void bitmap_byte_to_long(unsigned long *
 }
 
 #endif
+
+int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
+                            const unsigned long *bitmap, unsigned int nbits)
+{
+    unsigned int guest_bytes, copy_bytes, i;
+    uint8_t zero = 0;
+    int err = 0;
+    uint8_t *bytemap = xmalloc_array(uint8_t, (nbits + 7) / 8);
+
+    if ( !bytemap )
+        return -ENOMEM;
+
+    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
+    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
+
+    bitmap_long_to_byte(bytemap, bitmap, nbits);
+
+    if ( copy_bytes != 0 )
+        if ( copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
+            err = -EFAULT;
+
+    for ( i = copy_bytes; !err && i < guest_bytes; i++ )
+        if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) )
+            err = -EFAULT;
+
+    xfree(bytemap);
+
+    return err;
+}
+
+int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
+                            const struct xenctl_bitmap *xenctl_bitmap,
+                            unsigned int nbits)
+{
+    unsigned int guest_bytes, copy_bytes;
+    int err = 0;
+    uint8_t *bytemap = xzalloc_array(uint8_t, (nbits + 7) / 8);
+
+    if ( !bytemap )
+        return -ENOMEM;
+
+    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
+    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
+
+    if ( copy_bytes != 0 )
+    {
+        if ( copy_from_guest(bytemap, xenctl_bitmap->bitmap, copy_bytes) )
+            err = -EFAULT;
+        if ( (xenctl_bitmap->nr_bits & 7) && (guest_bytes == copy_bytes) )
+            bytemap[guest_bytes-1] &= ~(0xff << (xenctl_bitmap->nr_bits & 7));
+    }
+
+    if ( !err )
+        bitmap_byte_to_long(bitmap, bytemap, nbits);
+
+    xfree(bytemap);
+
+    return err;
+}
+
+int cpumask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_cpumap,
+                             const cpumask_t *cpumask)
+{
+    return bitmap_to_xenctl_bitmap(xenctl_cpumap, cpumask_bits(cpumask),
+                                   nr_cpu_ids);
+}
+
+int xenctl_bitmap_to_cpumask(cpumask_var_t *cpumask,
+                             const struct xenctl_bitmap *xenctl_cpumap)
+{
+    int err = 0;
+
+    if ( alloc_cpumask_var(cpumask) ) {
+        err = xenctl_bitmap_to_bitmap(cpumask_bits(*cpumask), xenctl_cpumap,
+                                      nr_cpu_ids);
+        /* In case of error, cleanup is up to us, as the caller won't care! */
+        if ( err )
+            free_cpumask_var(*cpumask);
+    }
+    else
+        err = -ENOMEM;
+
+    return err;
+}
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -34,91 +34,6 @@
 
 static DEFINE_SPINLOCK(domctl_lock);
 
-static int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
-                                   const unsigned long *bitmap,
-                                   unsigned int nbits)
-{
-    unsigned int guest_bytes, copy_bytes, i;
-    uint8_t zero = 0;
-    int err = 0;
-    uint8_t *bytemap = xmalloc_array(uint8_t, (nbits + 7) / 8);
-
-    if ( !bytemap )
-        return -ENOMEM;
-
-    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
-    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
-
-    bitmap_long_to_byte(bytemap, bitmap, nbits);
-
-    if ( copy_bytes != 0 )
-        if ( copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
-            err = -EFAULT;
-
-    for ( i = copy_bytes; !err && i < guest_bytes; i++ )
-        if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) )
-            err = -EFAULT;
-
-    xfree(bytemap);
-
-    return err;
-}
-
-int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
-                            const struct xenctl_bitmap *xenctl_bitmap,
-                            unsigned int nbits)
-{
-    unsigned int guest_bytes, copy_bytes;
-    int err = 0;
-    uint8_t *bytemap = xzalloc_array(uint8_t, (nbits + 7) / 8);
-
-    if ( !bytemap )
-        return -ENOMEM;
-
-    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
-    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
-
-    if ( copy_bytes != 0 )
-    {
-        if ( copy_from_guest(bytemap, xenctl_bitmap->bitmap, copy_bytes) )
-            err = -EFAULT;
-        if ( (xenctl_bitmap->nr_bits & 7) && (guest_bytes == copy_bytes) )
-            bytemap[guest_bytes-1] &= ~(0xff << (xenctl_bitmap->nr_bits & 7));
-    }
-
-    if ( !err )
-        bitmap_byte_to_long(bitmap, bytemap, nbits);
-
-    xfree(bytemap);
-
-    return err;
-}
-
-int cpumask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_cpumap,
-                             const cpumask_t *cpumask)
-{
-    return bitmap_to_xenctl_bitmap(xenctl_cpumap, cpumask_bits(cpumask),
-                                   nr_cpu_ids);
-}
-
-int xenctl_bitmap_to_cpumask(cpumask_var_t *cpumask,
-                             const struct xenctl_bitmap *xenctl_cpumap)
-{
-    int err = 0;
-
-    if ( alloc_cpumask_var(cpumask) ) {
-        err = xenctl_bitmap_to_bitmap(cpumask_bits(*cpumask), xenctl_cpumap,
-                                      nr_cpu_ids);
-        /* In case of error, cleanup is up to us, as the caller won't care! */
-        if ( err )
-            free_cpumask_var(*cpumask);
-    }
-    else
-        err = -ENOMEM;
-
-    return err;
-}
-
 static int nodemask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_nodemap,
                                      const nodemask_t *nodemask)
 {
--- a/xen/include/xen/bitmap.h
+++ b/xen/include/xen/bitmap.h
@@ -273,6 +273,13 @@ static inline void bitmap_clear(unsigned
 void bitmap_long_to_byte(uint8_t *bp, const unsigned long *lp, int nbits);
 void bitmap_byte_to_long(unsigned long *lp, const uint8_t *bp, int nbits);
 
+struct xenctl_bitmap;
+int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
+                            const struct xenctl_bitmap *xenctl_bitmap,
+                            unsigned int nbits);
+int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
+                            const unsigned long *bitmap, unsigned int nbits);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __XEN_BITMAP_H */
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -27,9 +27,6 @@ struct xen_domctl_getdomaininfo;
 void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info);
 void arch_get_domain_info(const struct domain *d,
                           struct xen_domctl_getdomaininfo *info);
-int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
-                            const struct xenctl_bitmap *xenctl_bitmap,
-                            unsigned int nbits);
 
 /*
  * Arch-specifics.

Re: [PATCH v2 4/7] bitmap: move to/from xenctl_bitmap conversion helpers

Posted by Andrew Cooper 5 years, 6 months ago

On 07/08/2020 12:33, Jan Beulich wrote:
> --- a/xen/common/bitmap.c
> +++ b/xen/common/bitmap.c
> @@ -384,3 +386,87 @@ void bitmap_byte_to_long(unsigned long *
>  }
>  
>  #endif
> +
> +int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
> +                            const unsigned long *bitmap, unsigned int nbits)
> +{
> +    unsigned int guest_bytes, copy_bytes, i;
> +    uint8_t zero = 0;
> +    int err = 0;
> +    uint8_t *bytemap = xmalloc_array(uint8_t, (nbits + 7) / 8);
> +
> +    if ( !bytemap )
> +        return -ENOMEM;
> +
> +    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
> +    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
> +
> +    bitmap_long_to_byte(bytemap, bitmap, nbits);
> +
> +    if ( copy_bytes != 0 )
> +        if ( copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
> +            err = -EFAULT;
> +
> +    for ( i = copy_bytes; !err && i < guest_bytes; i++ )
> +        if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) )
> +            err = -EFAULT;
> +
> +    xfree(bytemap);
> +
> +    return err;
> +}
> +
> +int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
> +                            const struct xenctl_bitmap *xenctl_bitmap,
> +                            unsigned int nbits)
> +{
> +    unsigned int guest_bytes, copy_bytes;
> +    int err = 0;
> +    uint8_t *bytemap = xzalloc_array(uint8_t, (nbits + 7) / 8);
> +
> +    if ( !bytemap )
> +        return -ENOMEM;
> +
> +    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
> +    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
> +
> +    if ( copy_bytes != 0 )
> +    {
> +        if ( copy_from_guest(bytemap, xenctl_bitmap->bitmap, copy_bytes) )
> +            err = -EFAULT;
> +        if ( (xenctl_bitmap->nr_bits & 7) && (guest_bytes == copy_bytes) )
> +            bytemap[guest_bytes-1] &= ~(0xff << (xenctl_bitmap->nr_bits & 7));
> +    }
> +
> +    if ( !err )
> +        bitmap_byte_to_long(bitmap, bytemap, nbits);
> +
> +    xfree(bytemap);
> +
> +    return err;
> +}
> +
> +int cpumask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_cpumap,
> +                             const cpumask_t *cpumask)
> +{
> +    return bitmap_to_xenctl_bitmap(xenctl_cpumap, cpumask_bits(cpumask),
> +                                   nr_cpu_ids);
> +}
> +
> +int xenctl_bitmap_to_cpumask(cpumask_var_t *cpumask,
> +                             const struct xenctl_bitmap *xenctl_cpumap)
> +{
> +    int err = 0;
> +
> +    if ( alloc_cpumask_var(cpumask) ) {

At a minimum, please fix this style during the move.

However, the more I look at this code, the more concerned I get.

There is absolutely no need to allocate(/doubly allocate) memory or
double/triple bounce buffer the data.  All that is necessary is some
careful handling of the copy length, and trailing zeroing.

The cpumask variants should be trivial static inline wrapper.  The fact
that they're not suggests an API error.

In reality, these are just data-shuffling helpers, and would probably
live better in a guest-access.c if we had a suitable one to hand, but I
guess bitmap.c will have to do for now.

~Andrew

Re: [PATCH v2 4/7] bitmap: move to/from xenctl_bitmap conversion helpers

Posted by Jan Beulich 5 years, 5 months ago

On 07.08.2020 19:50, Andrew Cooper wrote:
> On 07/08/2020 12:33, Jan Beulich wrote:
>> --- a/xen/common/bitmap.c
>> +++ b/xen/common/bitmap.c
>> @@ -384,3 +386,87 @@ void bitmap_byte_to_long(unsigned long *
>>  }
>>  
>>  #endif
>> +
>> +int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
>> +                            const unsigned long *bitmap, unsigned int nbits)
>> +{
>> +    unsigned int guest_bytes, copy_bytes, i;
>> +    uint8_t zero = 0;
>> +    int err = 0;
>> +    uint8_t *bytemap = xmalloc_array(uint8_t, (nbits + 7) / 8);
>> +
>> +    if ( !bytemap )
>> +        return -ENOMEM;
>> +
>> +    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
>> +    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
>> +
>> +    bitmap_long_to_byte(bytemap, bitmap, nbits);
>> +
>> +    if ( copy_bytes != 0 )
>> +        if ( copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
>> +            err = -EFAULT;
>> +
>> +    for ( i = copy_bytes; !err && i < guest_bytes; i++ )
>> +        if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) )
>> +            err = -EFAULT;
>> +
>> +    xfree(bytemap);
>> +
>> +    return err;
>> +}
>> +
>> +int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
>> +                            const struct xenctl_bitmap *xenctl_bitmap,
>> +                            unsigned int nbits)
>> +{
>> +    unsigned int guest_bytes, copy_bytes;
>> +    int err = 0;
>> +    uint8_t *bytemap = xzalloc_array(uint8_t, (nbits + 7) / 8);
>> +
>> +    if ( !bytemap )
>> +        return -ENOMEM;
>> +
>> +    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
>> +    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
>> +
>> +    if ( copy_bytes != 0 )
>> +    {
>> +        if ( copy_from_guest(bytemap, xenctl_bitmap->bitmap, copy_bytes) )
>> +            err = -EFAULT;
>> +        if ( (xenctl_bitmap->nr_bits & 7) && (guest_bytes == copy_bytes) )
>> +            bytemap[guest_bytes-1] &= ~(0xff << (xenctl_bitmap->nr_bits & 7));
>> +    }
>> +
>> +    if ( !err )
>> +        bitmap_byte_to_long(bitmap, bytemap, nbits);
>> +
>> +    xfree(bytemap);
>> +
>> +    return err;
>> +}
>> +
>> +int cpumask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_cpumap,
>> +                             const cpumask_t *cpumask)
>> +{
>> +    return bitmap_to_xenctl_bitmap(xenctl_cpumap, cpumask_bits(cpumask),
>> +                                   nr_cpu_ids);
>> +}
>> +
>> +int xenctl_bitmap_to_cpumask(cpumask_var_t *cpumask,
>> +                             const struct xenctl_bitmap *xenctl_cpumap)
>> +{
>> +    int err = 0;
>> +
>> +    if ( alloc_cpumask_var(cpumask) ) {
> 
> At a minimum, please fix this style during the move.

Oh, I should have noticed this. I've also spotted a 2nd style issue.

> However, the more I look at this code, the more concerned I get.
> 
> There is absolutely no need to allocate(/doubly allocate) memory or
> double/triple bounce buffer the data.  All that is necessary is some
> careful handling of the copy length, and trailing zeroing.
> 
> The cpumask variants should be trivial static inline wrapper.  The fact
> that they're not suggests an API error.
> 
> In reality, these are just data-shuffling helpers, and would probably
> live better in a guest-access.c if we had a suitable one to hand, but I
> guess bitmap.c will have to do for now.

But changing the implementation is certainly way beyond the purpose
here. (I also can't spot any pointless double allocation - the one
in xenctl_bitmap_to_cpumask() allocates an output of the function.)

Jan

Re: [PATCH v2 4/7] bitmap: move to/from xenctl_bitmap conversion helpers

Posted by Julien Grall 5 years, 6 months ago

Hi,

On 07/08/2020 12:33, Jan Beulich wrote:
> A subsequent change will exclude domctl.c from getting built for a
> particular configuration, yet the two functions get used from elsewhere.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Julien Grall <jgrall@amazon.com>

Cheers,

> ---
> v2: Move function decls to xen/bitmap.h.
> 
> --- a/xen/common/bitmap.c
> +++ b/xen/common/bitmap.c
> @@ -9,6 +9,8 @@
>   #include <xen/errno.h>
>   #include <xen/bitmap.h>
>   #include <xen/bitops.h>
> +#include <xen/cpumask.h>
> +#include <xen/guest_access.h>
>   #include <asm/byteorder.h>
>   
>   /*
> @@ -384,3 +386,87 @@ void bitmap_byte_to_long(unsigned long *
>   }
>   
>   #endif
> +
> +int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
> +                            const unsigned long *bitmap, unsigned int nbits)
> +{
> +    unsigned int guest_bytes, copy_bytes, i;
> +    uint8_t zero = 0;
> +    int err = 0;
> +    uint8_t *bytemap = xmalloc_array(uint8_t, (nbits + 7) / 8);
> +
> +    if ( !bytemap )
> +        return -ENOMEM;
> +
> +    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
> +    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
> +
> +    bitmap_long_to_byte(bytemap, bitmap, nbits);
> +
> +    if ( copy_bytes != 0 )
> +        if ( copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
> +            err = -EFAULT;
> +
> +    for ( i = copy_bytes; !err && i < guest_bytes; i++ )
> +        if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) )
> +            err = -EFAULT;
> +
> +    xfree(bytemap);
> +
> +    return err;
> +}
> +
> +int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
> +                            const struct xenctl_bitmap *xenctl_bitmap,
> +                            unsigned int nbits)
> +{
> +    unsigned int guest_bytes, copy_bytes;
> +    int err = 0;
> +    uint8_t *bytemap = xzalloc_array(uint8_t, (nbits + 7) / 8);
> +
> +    if ( !bytemap )
> +        return -ENOMEM;
> +
> +    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
> +    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
> +
> +    if ( copy_bytes != 0 )
> +    {
> +        if ( copy_from_guest(bytemap, xenctl_bitmap->bitmap, copy_bytes) )
> +            err = -EFAULT;
> +        if ( (xenctl_bitmap->nr_bits & 7) && (guest_bytes == copy_bytes) )
> +            bytemap[guest_bytes-1] &= ~(0xff << (xenctl_bitmap->nr_bits & 7));
> +    }
> +
> +    if ( !err )
> +        bitmap_byte_to_long(bitmap, bytemap, nbits);
> +
> +    xfree(bytemap);
> +
> +    return err;
> +}
> +
> +int cpumask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_cpumap,
> +                             const cpumask_t *cpumask)
> +{
> +    return bitmap_to_xenctl_bitmap(xenctl_cpumap, cpumask_bits(cpumask),
> +                                   nr_cpu_ids);
> +}
> +
> +int xenctl_bitmap_to_cpumask(cpumask_var_t *cpumask,
> +                             const struct xenctl_bitmap *xenctl_cpumap)
> +{
> +    int err = 0;
> +
> +    if ( alloc_cpumask_var(cpumask) ) {
> +        err = xenctl_bitmap_to_bitmap(cpumask_bits(*cpumask), xenctl_cpumap,
> +                                      nr_cpu_ids);
> +        /* In case of error, cleanup is up to us, as the caller won't care! */
> +        if ( err )
> +            free_cpumask_var(*cpumask);
> +    }
> +    else
> +        err = -ENOMEM;
> +
> +    return err;
> +}
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -34,91 +34,6 @@
>   
>   static DEFINE_SPINLOCK(domctl_lock);
>   
> -static int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
> -                                   const unsigned long *bitmap,
> -                                   unsigned int nbits)
> -{
> -    unsigned int guest_bytes, copy_bytes, i;
> -    uint8_t zero = 0;
> -    int err = 0;
> -    uint8_t *bytemap = xmalloc_array(uint8_t, (nbits + 7) / 8);
> -
> -    if ( !bytemap )
> -        return -ENOMEM;
> -
> -    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
> -    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
> -
> -    bitmap_long_to_byte(bytemap, bitmap, nbits);
> -
> -    if ( copy_bytes != 0 )
> -        if ( copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
> -            err = -EFAULT;
> -
> -    for ( i = copy_bytes; !err && i < guest_bytes; i++ )
> -        if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) )
> -            err = -EFAULT;
> -
> -    xfree(bytemap);
> -
> -    return err;
> -}
> -
> -int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
> -                            const struct xenctl_bitmap *xenctl_bitmap,
> -                            unsigned int nbits)
> -{
> -    unsigned int guest_bytes, copy_bytes;
> -    int err = 0;
> -    uint8_t *bytemap = xzalloc_array(uint8_t, (nbits + 7) / 8);
> -
> -    if ( !bytemap )
> -        return -ENOMEM;
> -
> -    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
> -    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
> -
> -    if ( copy_bytes != 0 )
> -    {
> -        if ( copy_from_guest(bytemap, xenctl_bitmap->bitmap, copy_bytes) )
> -            err = -EFAULT;
> -        if ( (xenctl_bitmap->nr_bits & 7) && (guest_bytes == copy_bytes) )
> -            bytemap[guest_bytes-1] &= ~(0xff << (xenctl_bitmap->nr_bits & 7));
> -    }
> -
> -    if ( !err )
> -        bitmap_byte_to_long(bitmap, bytemap, nbits);
> -
> -    xfree(bytemap);
> -
> -    return err;
> -}
> -
> -int cpumask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_cpumap,
> -                             const cpumask_t *cpumask)
> -{
> -    return bitmap_to_xenctl_bitmap(xenctl_cpumap, cpumask_bits(cpumask),
> -                                   nr_cpu_ids);
> -}
> -
> -int xenctl_bitmap_to_cpumask(cpumask_var_t *cpumask,
> -                             const struct xenctl_bitmap *xenctl_cpumap)
> -{
> -    int err = 0;
> -
> -    if ( alloc_cpumask_var(cpumask) ) {
> -        err = xenctl_bitmap_to_bitmap(cpumask_bits(*cpumask), xenctl_cpumap,
> -                                      nr_cpu_ids);
> -        /* In case of error, cleanup is up to us, as the caller won't care! */
> -        if ( err )
> -            free_cpumask_var(*cpumask);
> -    }
> -    else
> -        err = -ENOMEM;
> -
> -    return err;
> -}
> -
>   static int nodemask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_nodemap,
>                                        const nodemask_t *nodemask)
>   {
> --- a/xen/include/xen/bitmap.h
> +++ b/xen/include/xen/bitmap.h
> @@ -273,6 +273,13 @@ static inline void bitmap_clear(unsigned
>   void bitmap_long_to_byte(uint8_t *bp, const unsigned long *lp, int nbits);
>   void bitmap_byte_to_long(unsigned long *lp, const uint8_t *bp, int nbits);
>   
> +struct xenctl_bitmap;
> +int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
> +                            const struct xenctl_bitmap *xenctl_bitmap,
> +                            unsigned int nbits);
> +int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
> +                            const unsigned long *bitmap, unsigned int nbits);
> +
>   #endif /* __ASSEMBLY__ */
>   
>   #endif /* __XEN_BITMAP_H */
> --- a/xen/include/xen/domain.h
> +++ b/xen/include/xen/domain.h
> @@ -27,9 +27,6 @@ struct xen_domctl_getdomaininfo;
>   void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info);
>   void arch_get_domain_info(const struct domain *d,
>                             struct xen_domctl_getdomaininfo *info);
> -int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
> -                            const struct xenctl_bitmap *xenctl_bitmap,
> -                            unsigned int nbits);
>   
>   /*
>    * Arch-specifics.
> 

-- 
Julien Grall

[PATCH v2 5/7] x86: move domain_cpu_policy_changed()

Posted by Jan Beulich 5 years, 6 months ago

This is in preparation of making the building of domctl.c conditional.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -294,6 +294,173 @@ void update_guest_memory_policy(struct v
     }
 }
 
+void domain_cpu_policy_changed(struct domain *d)
+{
+    const struct cpuid_policy *p = d->arch.cpuid;
+    struct vcpu *v;
+
+    if ( is_pv_domain(d) )
+    {
+        if ( ((levelling_caps & LCAP_1cd) == LCAP_1cd) )
+        {
+            uint64_t mask = cpuidmask_defaults._1cd;
+            uint32_t ecx = p->basic._1c;
+            uint32_t edx = p->basic._1d;
+
+            /*
+             * Must expose hosts HTT and X2APIC value so a guest using native
+             * CPUID can correctly interpret other leaves which cannot be
+             * masked.
+             */
+            if ( cpu_has_x2apic )
+                ecx |= cpufeat_mask(X86_FEATURE_X2APIC);
+            if ( cpu_has_htt )
+                edx |= cpufeat_mask(X86_FEATURE_HTT);
+
+            switch ( boot_cpu_data.x86_vendor )
+            {
+            case X86_VENDOR_INTEL:
+                /*
+                 * Intel masking MSRs are documented as AND masks.
+                 * Experimentally, they are applied after OSXSAVE and APIC
+                 * are fast-forwarded from real hardware state.
+                 */
+                mask &= ((uint64_t)edx << 32) | ecx;
+
+                if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) )
+                    ecx = cpufeat_mask(X86_FEATURE_OSXSAVE);
+                else
+                    ecx = 0;
+                edx = cpufeat_mask(X86_FEATURE_APIC);
+
+                mask |= ((uint64_t)edx << 32) | ecx;
+                break;
+
+            case X86_VENDOR_AMD:
+            case X86_VENDOR_HYGON:
+                mask &= ((uint64_t)ecx << 32) | edx;
+
+                /*
+                 * AMD masking MSRs are documented as overrides.
+                 * Experimentally, fast-forwarding of the OSXSAVE and APIC
+                 * bits from real hardware state only occurs if the MSR has
+                 * the respective bits set.
+                 */
+                if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) )
+                    ecx = cpufeat_mask(X86_FEATURE_OSXSAVE);
+                else
+                    ecx = 0;
+                edx = cpufeat_mask(X86_FEATURE_APIC);
+
+                /*
+                 * If the Hypervisor bit is set in the policy, we can also
+                 * forward it into real CPUID.
+                 */
+                if ( p->basic.hypervisor )
+                    ecx |= cpufeat_mask(X86_FEATURE_HYPERVISOR);
+
+                mask |= ((uint64_t)ecx << 32) | edx;
+                break;
+            }
+
+            d->arch.pv.cpuidmasks->_1cd = mask;
+        }
+
+        if ( ((levelling_caps & LCAP_6c) == LCAP_6c) )
+        {
+            uint64_t mask = cpuidmask_defaults._6c;
+
+            if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+                mask &= (~0ULL << 32) | p->basic.raw[6].c;
+
+            d->arch.pv.cpuidmasks->_6c = mask;
+        }
+
+        if ( ((levelling_caps & LCAP_7ab0) == LCAP_7ab0) )
+        {
+            uint64_t mask = cpuidmask_defaults._7ab0;
+
+            /*
+             * Leaf 7[0].eax is max_subleaf, not a feature mask.  Take it
+             * wholesale from the policy, but clamp the features in 7[0].ebx
+             * per usual.
+             */
+            if ( boot_cpu_data.x86_vendor &
+                 (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
+                mask = (((uint64_t)p->feat.max_subleaf << 32) |
+                        ((uint32_t)mask & p->feat._7b0));
+
+            d->arch.pv.cpuidmasks->_7ab0 = mask;
+        }
+
+        if ( ((levelling_caps & LCAP_Da1) == LCAP_Da1) )
+        {
+            uint64_t mask = cpuidmask_defaults.Da1;
+            uint32_t eax = p->xstate.Da1;
+
+            if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+                mask &= (~0ULL << 32) | eax;
+
+            d->arch.pv.cpuidmasks->Da1 = mask;
+        }
+
+        if ( ((levelling_caps & LCAP_e1cd) == LCAP_e1cd) )
+        {
+            uint64_t mask = cpuidmask_defaults.e1cd;
+            uint32_t ecx = p->extd.e1c;
+            uint32_t edx = p->extd.e1d;
+
+            /*
+             * Must expose hosts CMP_LEGACY value so a guest using native
+             * CPUID can correctly interpret other leaves which cannot be
+             * masked.
+             */
+            if ( cpu_has_cmp_legacy )
+                ecx |= cpufeat_mask(X86_FEATURE_CMP_LEGACY);
+
+            /*
+             * If not emulating AMD or Hygon, clear the duplicated features
+             * in e1d.
+             */
+            if ( !(p->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
+                edx &= ~CPUID_COMMON_1D_FEATURES;
+
+            switch ( boot_cpu_data.x86_vendor )
+            {
+            case X86_VENDOR_INTEL:
+                mask &= ((uint64_t)edx << 32) | ecx;
+                break;
+
+            case X86_VENDOR_AMD:
+            case X86_VENDOR_HYGON:
+                mask &= ((uint64_t)ecx << 32) | edx;
+
+                /*
+                 * Fast-forward bits - Must be set in the masking MSR for
+                 * fast-forwarding to occur in hardware.
+                 */
+                ecx = 0;
+                edx = cpufeat_mask(X86_FEATURE_APIC);
+
+                mask |= ((uint64_t)ecx << 32) | edx;
+                break;
+            }
+
+            d->arch.pv.cpuidmasks->e1cd = mask;
+        }
+    }
+
+    for_each_vcpu ( d, v )
+    {
+        cpuid_policy_updated(v);
+
+        /* If PMU version is zero then the guest doesn't have VPMU */
+        if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+             p->basic.pmu_version == 0 )
+            vpmu_destroy(v);
+    }
+}
+
 #ifndef CONFIG_BIGMEM
 /*
  * The hole may be at or above the 44-bit boundary, so we need to determine
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -49,173 +49,6 @@ static int gdbsx_guest_mem_io(domid_t do
 }
 #endif
 
-void domain_cpu_policy_changed(struct domain *d)
-{
-    const struct cpuid_policy *p = d->arch.cpuid;
-    struct vcpu *v;
-
-    if ( is_pv_domain(d) )
-    {
-        if ( ((levelling_caps & LCAP_1cd) == LCAP_1cd) )
-        {
-            uint64_t mask = cpuidmask_defaults._1cd;
-            uint32_t ecx = p->basic._1c;
-            uint32_t edx = p->basic._1d;
-
-            /*
-             * Must expose hosts HTT and X2APIC value so a guest using native
-             * CPUID can correctly interpret other leaves which cannot be
-             * masked.
-             */
-            if ( cpu_has_x2apic )
-                ecx |= cpufeat_mask(X86_FEATURE_X2APIC);
-            if ( cpu_has_htt )
-                edx |= cpufeat_mask(X86_FEATURE_HTT);
-
-            switch ( boot_cpu_data.x86_vendor )
-            {
-            case X86_VENDOR_INTEL:
-                /*
-                 * Intel masking MSRs are documented as AND masks.
-                 * Experimentally, they are applied after OSXSAVE and APIC
-                 * are fast-forwarded from real hardware state.
-                 */
-                mask &= ((uint64_t)edx << 32) | ecx;
-
-                if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) )
-                    ecx = cpufeat_mask(X86_FEATURE_OSXSAVE);
-                else
-                    ecx = 0;
-                edx = cpufeat_mask(X86_FEATURE_APIC);
-
-                mask |= ((uint64_t)edx << 32) | ecx;
-                break;
-
-            case X86_VENDOR_AMD:
-            case X86_VENDOR_HYGON:
-                mask &= ((uint64_t)ecx << 32) | edx;
-
-                /*
-                 * AMD masking MSRs are documented as overrides.
-                 * Experimentally, fast-forwarding of the OSXSAVE and APIC
-                 * bits from real hardware state only occurs if the MSR has
-                 * the respective bits set.
-                 */
-                if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) )
-                    ecx = cpufeat_mask(X86_FEATURE_OSXSAVE);
-                else
-                    ecx = 0;
-                edx = cpufeat_mask(X86_FEATURE_APIC);
-
-                /*
-                 * If the Hypervisor bit is set in the policy, we can also
-                 * forward it into real CPUID.
-                 */
-                if ( p->basic.hypervisor )
-                    ecx |= cpufeat_mask(X86_FEATURE_HYPERVISOR);
-
-                mask |= ((uint64_t)ecx << 32) | edx;
-                break;
-            }
-
-            d->arch.pv.cpuidmasks->_1cd = mask;
-        }
-
-        if ( ((levelling_caps & LCAP_6c) == LCAP_6c) )
-        {
-            uint64_t mask = cpuidmask_defaults._6c;
-
-            if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
-                mask &= (~0ULL << 32) | p->basic.raw[6].c;
-
-            d->arch.pv.cpuidmasks->_6c = mask;
-        }
-
-        if ( ((levelling_caps & LCAP_7ab0) == LCAP_7ab0) )
-        {
-            uint64_t mask = cpuidmask_defaults._7ab0;
-
-            /*
-             * Leaf 7[0].eax is max_subleaf, not a feature mask.  Take it
-             * wholesale from the policy, but clamp the features in 7[0].ebx
-             * per usual.
-             */
-            if ( boot_cpu_data.x86_vendor &
-                 (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
-                mask = (((uint64_t)p->feat.max_subleaf << 32) |
-                        ((uint32_t)mask & p->feat._7b0));
-
-            d->arch.pv.cpuidmasks->_7ab0 = mask;
-        }
-
-        if ( ((levelling_caps & LCAP_Da1) == LCAP_Da1) )
-        {
-            uint64_t mask = cpuidmask_defaults.Da1;
-            uint32_t eax = p->xstate.Da1;
-
-            if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
-                mask &= (~0ULL << 32) | eax;
-
-            d->arch.pv.cpuidmasks->Da1 = mask;
-        }
-
-        if ( ((levelling_caps & LCAP_e1cd) == LCAP_e1cd) )
-        {
-            uint64_t mask = cpuidmask_defaults.e1cd;
-            uint32_t ecx = p->extd.e1c;
-            uint32_t edx = p->extd.e1d;
-
-            /*
-             * Must expose hosts CMP_LEGACY value so a guest using native
-             * CPUID can correctly interpret other leaves which cannot be
-             * masked.
-             */
-            if ( cpu_has_cmp_legacy )
-                ecx |= cpufeat_mask(X86_FEATURE_CMP_LEGACY);
-
-            /*
-             * If not emulating AMD or Hygon, clear the duplicated features
-             * in e1d.
-             */
-            if ( !(p->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
-                edx &= ~CPUID_COMMON_1D_FEATURES;
-
-            switch ( boot_cpu_data.x86_vendor )
-            {
-            case X86_VENDOR_INTEL:
-                mask &= ((uint64_t)edx << 32) | ecx;
-                break;
-
-            case X86_VENDOR_AMD:
-            case X86_VENDOR_HYGON:
-                mask &= ((uint64_t)ecx << 32) | edx;
-
-                /*
-                 * Fast-forward bits - Must be set in the masking MSR for
-                 * fast-forwarding to occur in hardware.
-                 */
-                ecx = 0;
-                edx = cpufeat_mask(X86_FEATURE_APIC);
-
-                mask |= ((uint64_t)ecx << 32) | edx;
-                break;
-            }
-
-            d->arch.pv.cpuidmasks->e1cd = mask;
-        }
-    }
-
-    for_each_vcpu ( d, v )
-    {
-        cpuid_policy_updated(v);
-
-        /* If PMU version is zero then the guest doesn't have VPMU */
-        if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-             p->basic.pmu_version == 0 )
-            vpmu_destroy(v);
-    }
-}
-
 static int update_domain_cpu_policy(struct domain *d,
                                     xen_domctl_cpu_policy_t *xdpc)
 {

Re: [PATCH v2 5/7] x86: move domain_cpu_policy_changed()

Posted by Andrew Cooper 5 years, 6 months ago

On 07/08/2020 12:34, Jan Beulich wrote:
> This is in preparation of making the building of domctl.c conditional.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

[PATCH v2 6/7] x86: move cpu_{up,down}_helper()

Posted by Jan Beulich 5 years, 6 months ago

This is in preparation of making the building of sysctl.c conditional.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -22,6 +22,7 @@
 #include <asm/hardirq.h>
 #include <asm/hpet.h>
 #include <asm/hvm/support.h>
+#include <asm/setup.h>
 #include <irq_vectors.h>
 #include <mach_apic.h>
 
@@ -396,3 +397,36 @@ void call_function_interrupt(struct cpu_
     perfc_incr(ipis);
     smp_call_function_interrupt();
 }
+
+long cpu_up_helper(void *data)
+{
+    unsigned int cpu = (unsigned long)data;
+    int ret = cpu_up(cpu);
+
+    /* Have one more go on EBUSY. */
+    if ( ret == -EBUSY )
+        ret = cpu_up(cpu);
+
+    if ( !ret && !opt_smt &&
+         cpu_data[cpu].compute_unit_id == INVALID_CUID &&
+         cpumask_weight(per_cpu(cpu_sibling_mask, cpu)) > 1 )
+    {
+        ret = cpu_down_helper(data);
+        if ( ret )
+            printk("Could not re-offline CPU%u (%d)\n", cpu, ret);
+        else
+            ret = -EPERM;
+    }
+
+    return ret;
+}
+
+long cpu_down_helper(void *data)
+{
+    int cpu = (unsigned long)data;
+    int ret = cpu_down(cpu);
+    /* Have one more go on EBUSY. */
+    if ( ret == -EBUSY )
+        ret = cpu_down(cpu);
+    return ret;
+}
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -79,39 +79,6 @@ static void l3_cache_get(void *arg)
         l3_info->size = info.size / 1024; /* in KB unit */
 }
 
-long cpu_up_helper(void *data)
-{
-    unsigned int cpu = (unsigned long)data;
-    int ret = cpu_up(cpu);
-
-    /* Have one more go on EBUSY. */
-    if ( ret == -EBUSY )
-        ret = cpu_up(cpu);
-
-    if ( !ret && !opt_smt &&
-         cpu_data[cpu].compute_unit_id == INVALID_CUID &&
-         cpumask_weight(per_cpu(cpu_sibling_mask, cpu)) > 1 )
-    {
-        ret = cpu_down_helper(data);
-        if ( ret )
-            printk("Could not re-offline CPU%u (%d)\n", cpu, ret);
-        else
-            ret = -EPERM;
-    }
-
-    return ret;
-}
-
-long cpu_down_helper(void *data)
-{
-    int cpu = (unsigned long)data;
-    int ret = cpu_down(cpu);
-    /* Have one more go on EBUSY. */
-    if ( ret == -EBUSY )
-        ret = cpu_down(cpu);
-    return ret;
-}
-
 static long smt_up_down_helper(void *data)
 {
     bool up = (bool)data;

Re: [PATCH v2 6/7] x86: move cpu_{up,down}_helper()

Posted by Andrew Cooper 5 years, 6 months ago

On 07/08/2020 12:34, Jan Beulich wrote:
> This is in preparation of making the building of sysctl.c conditional.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

[PATCH v2 7/7] x86: don't include domctl and alike in shim-exclusive builds

Posted by Jan Beulich 5 years, 6 months ago

There is no need for platform-wide, system-wide, or per-domain control
in this case. Hence avoid including this dead code in the build.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_GDBSX) += debug.o
 obj-y += delay.o
 obj-y += desc.o
 obj-bin-y += dmi_scan.init.o
-obj-y += domctl.o
 obj-y += domain.o
 obj-bin-y += dom0_build.init.o
 obj-y += domain_page.o
@@ -51,7 +50,6 @@ obj-y += numa.o
 obj-y += pci.o
 obj-y += percpu.o
 obj-y += physdev.o x86_64/physdev.o
-obj-y += platform_hypercall.o x86_64/platform_hypercall.o
 obj-y += psr.o
 obj-y += setup.o
 obj-y += shutdown.o
@@ -60,7 +58,6 @@ obj-y += smpboot.o
 obj-y += spec_ctrl.o
 obj-y += srat.o
 obj-y += string.o
-obj-y += sysctl.o
 obj-y += time.o
 obj-y += trace.o
 obj-y += traps.o
@@ -71,6 +68,13 @@ obj-$(CONFIG_TBOOT) += tboot.o
 obj-y += hpet.o
 obj-y += vm_event.o
 obj-y += xstate.o
+
+ifneq ($(CONFIG_PV_SHIM_EXCLUSIVE),y)
+obj-y += domctl.o
+obj-y += platform_hypercall.o x86_64/platform_hypercall.o
+obj-y += sysctl.o
+endif
+
 extra-y += asm-macros.i
 
 ifneq ($(CONFIG_HVM),y)
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -47,6 +47,8 @@
 /* Per-CPU variable for enforcing the lock ordering */
 DEFINE_PER_CPU(int, mm_lock_level);
 
+#ifndef CONFIG_PV_SHIM_EXCLUSIVE
+
 /************************************************/
 /*              LOG DIRTY SUPPORT               */
 /************************************************/
@@ -628,6 +630,8 @@ void paging_log_dirty_init(struct domain
     d->arch.paging.log_dirty.ops = ops;
 }
 
+#endif /* CONFIG_PV_SHIM_EXCLUSIVE */
+
 /************************************************/
 /*           CODE FOR PAGING SUPPORT            */
 /************************************************/
@@ -667,7 +671,7 @@ void paging_vcpu_init(struct vcpu *v)
         shadow_vcpu_init(v);
 }
 
-
+#ifndef CONFIG_PV_SHIM_EXCLUSIVE
 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
                   XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
                   bool_t resuming)
@@ -788,6 +792,7 @@ long paging_domctl_continuation(XEN_GUES
 
     return ret;
 }
+#endif /* CONFIG_PV_SHIM_EXCLUSIVE */
 
 /* Call when destroying a domain */
 int paging_teardown(struct domain *d)
@@ -803,10 +808,12 @@ int paging_teardown(struct domain *d)
     if ( preempted )
         return -ERESTART;
 
+#ifndef CONFIG_PV_SHIM_EXCLUSIVE
     /* clean up log dirty resources. */
     rc = paging_free_log_dirty_bitmap(d, 0);
     if ( rc == -ERESTART )
         return rc;
+#endif
 
     /* Move populate-on-demand cache back to domain_list for destruction */
     rc = p2m_pod_empty_cache(d);
--- a/xen/arch/x86/pv/hypercall.c
+++ b/xen/arch/x86/pv/hypercall.c
@@ -42,7 +42,9 @@ const hypercall_table_t pv_hypercall_tab
     COMPAT_CALL(set_callbacks),
     HYPERCALL(fpu_taskswitch),
     HYPERCALL(sched_op_compat),
+#ifndef CONFIG_PV_SHIM_EXCLUSIVE
     COMPAT_CALL(platform_op),
+#endif
     HYPERCALL(set_debugreg),
     HYPERCALL(get_debugreg),
     COMPAT_CALL(update_descriptor),
@@ -72,8 +74,10 @@ const hypercall_table_t pv_hypercall_tab
 #endif
     HYPERCALL(event_channel_op),
     COMPAT_CALL(physdev_op),
+#ifndef CONFIG_PV_SHIM_EXCLUSIVE
     HYPERCALL(sysctl),
     HYPERCALL(domctl),
+#endif
 #ifdef CONFIG_KEXEC
     COMPAT_CALL(kexec_op),
 #endif
@@ -89,7 +93,9 @@ const hypercall_table_t pv_hypercall_tab
     HYPERCALL(hypfs_op),
 #endif
     HYPERCALL(mca),
+#ifndef CONFIG_PV_SHIM_EXCLUSIVE
     HYPERCALL(arch_1),
+#endif
 };
 
 #undef do_arch_1
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -6,7 +6,6 @@ obj-$(CONFIG_CORE_PARKING) += core_parki
 obj-y += cpu.o
 obj-$(CONFIG_DEBUG_TRACE) += debugtrace.o
 obj-$(CONFIG_HAS_DEVICE_TREE) += device_tree.o
-obj-y += domctl.o
 obj-y += domain.o
 obj-y += event_2l.o
 obj-y += event_channel.o
@@ -26,7 +25,6 @@ obj-$(CONFIG_NEEDS_LIST_SORT) += list_so
 obj-$(CONFIG_LIVEPATCH) += livepatch.o livepatch_elf.o
 obj-$(CONFIG_MEM_ACCESS) += mem_access.o
 obj-y += memory.o
-obj-y += monitor.o
 obj-y += multicall.o
 obj-y += notifier.o
 obj-y += page_alloc.o
@@ -47,7 +45,6 @@ obj-y += spinlock.o
 obj-y += stop_machine.o
 obj-y += string.o
 obj-y += symbols.o
-obj-y += sysctl.o
 obj-y += tasklet.o
 obj-y += time.o
 obj-y += timer.o
@@ -66,6 +63,12 @@ obj-bin-$(CONFIG_X86) += $(foreach n,dec
 
 obj-$(CONFIG_COMPAT) += $(addprefix compat/,domain.o kernel.o memory.o multicall.o xlat.o)
 
+ifneq ($(CONFIG_PV_SHIM_EXCLUSIVE),y)
+obj-y += domctl.o
+obj-y += monitor.o
+obj-y += sysctl.o
+endif
+
 extra-y := symbols-dummy.o
 
 obj-$(CONFIG_COVERAGE) += coverage/
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -154,6 +154,8 @@ struct paging_mode {
 /*****************************************************************************
  * Log dirty code */
 
+#ifndef CONFIG_PV_SHIM_EXCLUSIVE
+
 /* get the dirty bitmap for a specific range of pfns */
 void paging_log_dirty_range(struct domain *d,
                             unsigned long begin_pfn,
@@ -202,6 +204,15 @@ struct sh_dirty_vram {
     s_time_t last_dirty;
 };
 
+#else /* !CONFIG_PV_SHIM_EXCLUSIVE */
+
+static inline void paging_log_dirty_init(struct domain *d,
+                                         const struct log_dirty_ops *ops) {}
+static inline void paging_mark_dirty(struct domain *d, mfn_t gmfn) {}
+static inline void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn) {}
+
+#endif /* CONFIG_PV_SHIM_EXCLUSIVE */
+
 /*****************************************************************************
  * Entry points into the paging-assistance code */
 
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -125,6 +125,10 @@ struct vnuma_info {
     struct xen_vmemrange *vmemrange;
 };
 
+#ifndef CONFIG_PV_SHIM_EXCLUSIVE
 void vnuma_destroy(struct vnuma_info *vnuma);
+#else
+static inline void vnuma_destroy(struct vnuma_info *vnuma) { ASSERT(!vnuma); }
+#endif
 
 #endif /* __XEN_DOMAIN_H__ */