Implement memcmp(), memcpy(), and memset() to override the compiler's
built-in versions in order to guarantee that the compiler won't generate
out-of-line calls to external functions via the PLT. This allows the
helpers to be safely used in guest code, as KVM selftests don't support
dynamic loading of guest code.
Steal the implementations from the kernel's generic versions, sans the
optimizations in memcmp() for unaligned accesses.
Put the utilities in a separate compilation unit and build with
-ffreestanding to fudge around a gcc "feature" where it will optimize
memset(), memcpy(), etc... by generating a recursive call. I.e. the
compiler optimizes itself into infinite recursion. Alternatively, the
individual functions could be tagged with
optimize("no-tree-loop-distribute-patterns"), but using "optimize" for
anything but debug is discouraged, and Linus NAK'd the use of the flag
in the kernel proper[*].
https://lore.kernel.org/lkml/CAHk-=wik-oXnUpfZ6Hw37uLykc-_P0Apyn2XuX-odh-3Nzop8w@mail.gmail.com
Cc: Andrew Jones <andrew.jones@linux.dev>
Cc: Anup Patel <anup@brainfault.org>
Cc: Atish Patra <atishp@atishpatra.org>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
tools/testing/selftests/kvm/Makefile | 8 ++++-
.../selftests/kvm/include/kvm_util_base.h | 10 ++++++
tools/testing/selftests/kvm/lib/kvm_string.c | 33 +++++++++++++++++++
3 files changed, 50 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/kvm/lib/kvm_string.c
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 4c122f1b1737..92a0c05645b5 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -48,6 +48,8 @@ LIBKVM += lib/rbtree.c
LIBKVM += lib/sparsebit.c
LIBKVM += lib/test_util.c
+LIBKVM_STRING += lib/kvm_string.c
+
LIBKVM_x86_64 += lib/x86_64/apic.c
LIBKVM_x86_64 += lib/x86_64/handlers.S
LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
@@ -220,7 +222,8 @@ LIBKVM_C := $(filter %.c,$(LIBKVM))
LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
@@ -231,6 +234,9 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
+
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 24fde97f6121..bdb751f4825c 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -173,6 +173,16 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
+/*
+ * Override the "basic" built-in string helpers so that they can be used in
+ * guest code. KVM selftests don't support dynamic loading in guest code and
+ * will jump into the weeds if the compiler decides to insert an out-of-line
+ * call via the PLT.
+ */
+int memcmp(const void *cs, const void *ct, size_t count);
+void *memcpy(void *dest, const void *src, size_t count);
+void *memset(void *s, int c, size_t count);
+
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
unsigned int kvm_check_cap(long cap);
diff --git a/tools/testing/selftests/kvm/lib/kvm_string.c b/tools/testing/selftests/kvm/lib/kvm_string.c
new file mode 100644
index 000000000000..a60d56d4e5b8
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_string.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "kvm_util.h"
+
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+ const unsigned char *su1, *su2;
+ int res = 0;
+
+ for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) {
+ if ((res = *su1 - *su2) != 0)
+ break;
+ }
+ return res;
+}
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest;
+ const char *s = src;
+
+ while (count--)
+ *tmp++ = *s++;
+ return dest;
+}
+
+void *memset(void *s, int c, size_t count)
+{
+ char *xs = s;
+
+ while (count--)
+ *xs++ = c;
+ return s;
+}
--
2.37.2.789.g6183377224-goog
On Thu, Sep 08, 2022 at 11:31:30PM +0000, Sean Christopherson wrote:
> Implement memcmp(), memcpy(), and memset() to override the compiler's
> built-in versions in order to guarantee that the compiler won't generate
> out-of-line calls to external functions via the PLT. This allows the
> helpers to be safely used in guest code, as KVM selftests don't support
> dynamic loading of guest code.
>
> Steal the implementations from the kernel's generic versions, sans the
> optimizations in memcmp() for unaligned accesses.
>
> Put the utilities in a separate compilation unit and build with
> -ffreestanding to fudge around a gcc "feature" where it will optimize
> memset(), memcpy(), etc... by generating a recursive call. I.e. the
> compiler optimizes itself into infinite recursion. Alternatively, the
> individual functions could be tagged with
> optimize("no-tree-loop-distribute-patterns"), but using "optimize" for
> anything but debug is discouraged, and Linus NAK'd the use of the flag
> in the kernel proper[*].
>
> https://lore.kernel.org/lkml/CAHk-=wik-oXnUpfZ6Hw37uLykc-_P0Apyn2XuX-odh-3Nzop8w@mail.gmail.com
>
> Cc: Andrew Jones <andrew.jones@linux.dev>
> Cc: Anup Patel <anup@brainfault.org>
> Cc: Atish Patra <atishp@atishpatra.org>
> Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
> Cc: Janosch Frank <frankja@linux.ibm.com>
> Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> tools/testing/selftests/kvm/Makefile | 8 ++++-
> .../selftests/kvm/include/kvm_util_base.h | 10 ++++++
> tools/testing/selftests/kvm/lib/kvm_string.c | 33 +++++++++++++++++++
> 3 files changed, 50 insertions(+), 1 deletion(-)
> create mode 100644 tools/testing/selftests/kvm/lib/kvm_string.c
>
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index 4c122f1b1737..92a0c05645b5 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -48,6 +48,8 @@ LIBKVM += lib/rbtree.c
> LIBKVM += lib/sparsebit.c
> LIBKVM += lib/test_util.c
>
> +LIBKVM_STRING += lib/kvm_string.c
Can this file be named lib/string.c instead? This file has nothing to do
with KVM per-se.
> +
> LIBKVM_x86_64 += lib/x86_64/apic.c
> LIBKVM_x86_64 += lib/x86_64/handlers.S
> LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
> @@ -220,7 +222,8 @@ LIBKVM_C := $(filter %.c,$(LIBKVM))
> LIBKVM_S := $(filter %.S,$(LIBKVM))
> LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
> LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
> -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
> +LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
> +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
>
> EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
>
> @@ -231,6 +234,9 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
> $(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
> $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
>
> +$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
> + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
A comment here would be helpful to document why LIBKVM_STRING_OBJ needs
a special case.
> +
> x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
> $(TEST_GEN_PROGS): $(LIBKVM_OBJS)
> $(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
> diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
> index 24fde97f6121..bdb751f4825c 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util_base.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
> @@ -173,6 +173,16 @@ struct vm_guest_mode_params {
> };
> extern const struct vm_guest_mode_params vm_guest_mode_params[];
>
> +/*
> + * Override the "basic" built-in string helpers so that they can be used in
> + * guest code. KVM selftests don't support dynamic loading in guest code and
> + * will jump into the weeds if the compiler decides to insert an out-of-line
> + * call via the PLT.
> + */
> +int memcmp(const void *cs, const void *ct, size_t count);
> +void *memcpy(void *dest, const void *src, size_t count);
> +void *memset(void *s, int c, size_t count);
> +
> int open_path_or_exit(const char *path, int flags);
> int open_kvm_dev_path_or_exit(void);
> unsigned int kvm_check_cap(long cap);
> diff --git a/tools/testing/selftests/kvm/lib/kvm_string.c b/tools/testing/selftests/kvm/lib/kvm_string.c
> new file mode 100644
> index 000000000000..a60d56d4e5b8
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/lib/kvm_string.c
> @@ -0,0 +1,33 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#include "kvm_util.h"
Is this include necesary?
> +
> +int memcmp(const void *cs, const void *ct, size_t count)
> +{
> + const unsigned char *su1, *su2;
> + int res = 0;
> +
> + for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) {
> + if ((res = *su1 - *su2) != 0)
> + break;
> + }
> + return res;
> +}
> +
> +void *memcpy(void *dest, const void *src, size_t count)
> +{
> + char *tmp = dest;
> + const char *s = src;
> +
> + while (count--)
> + *tmp++ = *s++;
> + return dest;
> +}
> +
> +void *memset(void *s, int c, size_t count)
> +{
> + char *xs = s;
> +
> + while (count--)
> + *xs++ = c;
> + return s;
> +}
> --
> 2.37.2.789.g6183377224-goog
>
On Thu, Sep 22, 2022, David Matlack wrote:
> > +LIBKVM_STRING += lib/kvm_string.c
>
> Can this file be named lib/string.c instead? This file has nothing to do
> with KVM per-se.
Yes and no. I deliberately chose kvm_string to avoid confusion with
tools/lib/string.c and tools/include/nolibc/string.h. The implementations
themselves aren't KVM specific, but the reason the file _exists_ is 100% unique
to KVM as there is no other environment where tools and/or selftests link to
glibc but need to override the string ops.
I'm not completely opposed to calling it string.c, but my preference is to keep
it kvm_string.c so that it's slightly more obvious that KVM selftests are a
special snowflake.
> > diff --git a/tools/testing/selftests/kvm/lib/kvm_string.c b/tools/testing/selftests/kvm/lib/kvm_string.c
> > new file mode 100644
> > index 000000000000..a60d56d4e5b8
> > --- /dev/null
> > +++ b/tools/testing/selftests/kvm/lib/kvm_string.c
> > @@ -0,0 +1,33 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +#include "kvm_util.h"
>
> Is this include necesary?
Nope, I added the include because I also added declarations in kvm_util_base.h,
but that's unnecessary because stddef.h also provides the declarations, and those
_must_ match the prototypes of the definitions. So yeah, this is better written as:
// SPDX-License-Identifier: GPL-2.0-only
#include <stddef.h>
/*
* Override the "basic" built-in string helpers so that they can be used in
* guest code. KVM selftests don't support dynamic loading in guest code and
* will jump into the weeds if the compiler decides to insert an out-of-line
* call via the PLT.
*/
int memcmp(const void *cs, const void *ct, size_t count)
{
const unsigned char *su1, *su2;
int res = 0;
for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) {
if ((res = *su1 - *su2) != 0)
break;
}
return res;
}
void *memcpy(void *dest, const void *src, size_t count)
{
char *tmp = dest;
const char *s = src;
while (count--)
*tmp++ = *s++;
return dest;
}
void *memset(void *s, int c, size_t count)
{
char *xs = s;
while (count--)
*xs++ = c;
return s;
}
On Thu, Sep 22, 2022 at 10:40 AM Sean Christopherson <seanjc@google.com> wrote:
>
> On Thu, Sep 22, 2022, David Matlack wrote:
> > > +LIBKVM_STRING += lib/kvm_string.c
> >
> > Can this file be named lib/string.c instead? This file has nothing to do
> > with KVM per-se.
>
> Yes and no. I deliberately chose kvm_string to avoid confusion with
> tools/lib/string.c and tools/include/nolibc/string.h. The implementations
> themselves aren't KVM specific, but the reason the file _exists_ is 100% unique
> to KVM as there is no other environment where tools and/or selftests link to
> glibc but need to override the string ops.
>
> I'm not completely opposed to calling it string.c, but my preference is to keep
> it kvm_string.c so that it's slightly more obvious that KVM selftests are a
> special snowflake.
Makes sense, the "kvm" prefix just made me do a double-take. Perhaps
string_overrides.c? That would make it clear that this file is
overriding string functions. And the fact that this file is in the KVM
selftests directory signals that the overrides are specific to the KVM
selftests.
>
> > > diff --git a/tools/testing/selftests/kvm/lib/kvm_string.c b/tools/testing/selftests/kvm/lib/kvm_string.c
> > > new file mode 100644
> > > index 000000000000..a60d56d4e5b8
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/kvm/lib/kvm_string.c
> > > @@ -0,0 +1,33 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +#include "kvm_util.h"
> >
> > Is this include necesary?
>
> Nope, I added the include because I also added declarations in kvm_util_base.h,
> but that's unnecessary because stddef.h also provides the declarations, and those
> _must_ match the prototypes of the definitions. So yeah, this is better written as:
>
> // SPDX-License-Identifier: GPL-2.0-only
> #include <stddef.h>
>
> /*
> * Override the "basic" built-in string helpers so that they can be used in
> * guest code. KVM selftests don't support dynamic loading in guest code and
> * will jump into the weeds if the compiler decides to insert an out-of-line
> * call via the PLT.
> */
> int memcmp(const void *cs, const void *ct, size_t count)
> {
> const unsigned char *su1, *su2;
> int res = 0;
>
> for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) {
> if ((res = *su1 - *su2) != 0)
> break;
> }
> return res;
> }
>
> void *memcpy(void *dest, const void *src, size_t count)
> {
> char *tmp = dest;
> const char *s = src;
>
> while (count--)
> *tmp++ = *s++;
> return dest;
> }
>
> void *memset(void *s, int c, size_t count)
> {
> char *xs = s;
>
> while (count--)
> *xs++ = c;
> return s;
> }
On Thu, Sep 22, 2022, David Matlack wrote: > On Thu, Sep 22, 2022 at 10:40 AM Sean Christopherson <seanjc@google.com> wrote: > > > > On Thu, Sep 22, 2022, David Matlack wrote: > > > > +LIBKVM_STRING += lib/kvm_string.c > > > > > > Can this file be named lib/string.c instead? This file has nothing to do > > > with KVM per-se. > > > > Yes and no. I deliberately chose kvm_string to avoid confusion with > > tools/lib/string.c and tools/include/nolibc/string.h. The implementations > > themselves aren't KVM specific, but the reason the file _exists_ is 100% unique > > to KVM as there is no other environment where tools and/or selftests link to > > glibc but need to override the string ops. > > > > I'm not completely opposed to calling it string.c, but my preference is to keep > > it kvm_string.c so that it's slightly more obvious that KVM selftests are a > > special snowflake. > > Makes sense, the "kvm" prefix just made me do a double-take. Perhaps > string_overrides.c? That would make it clear that this file is > overriding string functions. And the fact that this file is in the KVM > selftests directory signals that the overrides are specific to the KVM > selftests. I like that, string_overrides.c it is.
© 2016 - 2026 Red Hat, Inc.