tools/misc/xen-hvmcrash.c | 77 +++++++-------------------------------- 1 file changed, 13 insertions(+), 64 deletions(-)
xen-hvmcrash would previously save records, overwrite the instruction
pointer with a bogus value, and then restore them to crash a domain
just enough to cause the guest OS to memdump.
This approach is found to be unreliable when tested on a guest running
Windows 10 x64, with some executions doing nothing at all.
Another approach would be to trigger NMIs. This approach is found to be
unreliable when tested on Linux (Ubuntu 22.04), as Linux will ignore
NMIs if it is not configured to handle such.
Injecting a double fault abort to all vCPUs is found to be more
reliable at crashing and invoking memdumps from Windows and Linux
domains.
This patch modifies the xen-hvmcrash tool to inject #DF to all vCPUs
belonging to the specified domain, instead of overwriting RIP.
Signed-off-by: Matthew Barnes <matthew.barnes@cloud.com>
---
tools/misc/xen-hvmcrash.c | 77 +++++++--------------------------------
1 file changed, 13 insertions(+), 64 deletions(-)
diff --git a/tools/misc/xen-hvmcrash.c b/tools/misc/xen-hvmcrash.c
index 1d058fa40a47..8ef1beb388f8 100644
--- a/tools/misc/xen-hvmcrash.c
+++ b/tools/misc/xen-hvmcrash.c
@@ -38,22 +38,21 @@
#include <sys/stat.h>
#include <arpa/inet.h>
+#define XC_WANT_COMPAT_DEVICEMODEL_API
#include <xenctrl.h>
#include <xen/xen.h>
#include <xen/domctl.h>
#include <xen/hvm/save.h>
+#define X86_ABORT_DF 8
+
int
main(int argc, char **argv)
{
int domid;
xc_interface *xch;
xc_domaininfo_t dominfo;
- int ret;
- uint32_t len;
- uint8_t *buf;
- uint32_t off;
- struct hvm_save_descriptor *descriptor;
+ int vcpu_id, ret;
if (argc != 2 || !argv[1] || (domid = atoi(argv[1])) < 0) {
fprintf(stderr, "usage: %s <domid>\n", argv[0]);
@@ -77,66 +76,16 @@ main(int argc, char **argv)
exit(1);
}
- ret = xc_domain_pause(xch, domid);
- if (ret < 0) {
- perror("xc_domain_pause");
- exit(-1);
- }
-
- /*
- * Calling with zero buffer length should return the buffer length
- * required.
- */
- ret = xc_domain_hvm_getcontext(xch, domid, 0, 0);
- if (ret < 0) {
- perror("xc_domain_hvm_getcontext");
- exit(1);
- }
-
- len = ret;
- buf = malloc(len);
- if (buf == NULL) {
- perror("malloc");
- exit(1);
- }
-
- ret = xc_domain_hvm_getcontext(xch, domid, buf, len);
- if (ret < 0) {
- perror("xc_domain_hvm_getcontext");
- exit(1);
- }
-
- off = 0;
-
- while (off < len) {
- descriptor = (struct hvm_save_descriptor *)(buf + off);
-
- off += sizeof (struct hvm_save_descriptor);
-
- if (descriptor->typecode == HVM_SAVE_CODE(CPU)) {
- HVM_SAVE_TYPE(CPU) *cpu;
-
- /* Overwrite EIP/RIP with some recognisable but bogus value */
- cpu = (HVM_SAVE_TYPE(CPU) *)(buf + off);
- printf("CPU[%d]: RIP = %" PRIx64 "\n", descriptor->instance, cpu->rip);
- cpu->rip = 0xf001;
- } else if (descriptor->typecode == HVM_SAVE_CODE(END)) {
- break;
+ for (vcpu_id = 0; vcpu_id <= dominfo.max_vcpu_id; vcpu_id++) {
+ printf("Injecting #DF to vcpu ID #%d...\n", vcpu_id);
+ ret = xc_hvm_inject_trap(xch, domid, vcpu_id,
+ X86_ABORT_DF,
+ XEN_DMOP_EVENT_hw_exc, 0,
+ NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Could not inject #DF to vcpu ID #%d\n", vcpu_id);
+ perror("xc_hvm_inject_trap");
}
-
- off += descriptor->length;
- }
-
- ret = xc_domain_hvm_setcontext(xch, domid, buf, len);
- if (ret < 0) {
- perror("xc_domain_hvm_setcontext");
- exit(1);
- }
-
- ret = xc_domain_unpause(xch, domid);
- if (ret < 0) {
- perror("xc_domain_unpause");
- exit(1);
}
return 0;
--
2.34.1
On 03/06/2024 3:59 pm, Matthew Barnes wrote: > xen-hvmcrash would previously save records, overwrite the instruction > pointer with a bogus value, and then restore them to crash a domain > just enough to cause the guest OS to memdump. > > This approach is found to be unreliable when tested on a guest running > Windows 10 x64, with some executions doing nothing at all. > > Another approach would be to trigger NMIs. This approach is found to be > unreliable when tested on Linux (Ubuntu 22.04), as Linux will ignore > NMIs if it is not configured to handle such. > > Injecting a double fault abort to all vCPUs is found to be more > reliable at crashing and invoking memdumps from Windows and Linux > domains. Why every CPU? We never did that before, and I don't see any it ought to be necessary now either. > diff --git a/tools/misc/xen-hvmcrash.c b/tools/misc/xen-hvmcrash.c > index 1d058fa40a47..8ef1beb388f8 100644 > --- a/tools/misc/xen-hvmcrash.c > +++ b/tools/misc/xen-hvmcrash.c > @@ -38,22 +38,21 @@ > #include <sys/stat.h> > #include <arpa/inet.h> > > +#define XC_WANT_COMPAT_DEVICEMODEL_API Please don't introduce this. We want to purge it from the codebase, not propagate it. You want to open and use a libxendevicemodel handle. (Sadly you also need a xenctrl handle too, until we sort out the userspace ABIs). > #include <xenctrl.h> > #include <xen/xen.h> > #include <xen/domctl.h> > #include <xen/hvm/save.h> > > +#define X86_ABORT_DF 8 #include <xen/asm/x86-defns.h> and use X86_EXC_DF. ~Andrew
On Tue, Jun 25, 2024 at 10:02:42PM +0100, Andrew Cooper wrote: > On 03/06/2024 3:59 pm, Matthew Barnes wrote: > > xen-hvmcrash would previously save records, overwrite the instruction > > pointer with a bogus value, and then restore them to crash a domain > > just enough to cause the guest OS to memdump. > > > > This approach is found to be unreliable when tested on a guest running > > Windows 10 x64, with some executions doing nothing at all. > > > > Another approach would be to trigger NMIs. This approach is found to be > > unreliable when tested on Linux (Ubuntu 22.04), as Linux will ignore > > NMIs if it is not configured to handle such. > > > > Injecting a double fault abort to all vCPUs is found to be more > > reliable at crashing and invoking memdumps from Windows and Linux > > domains. > > Why every CPU? > > We never did that before, and I don't see any it ought to be necessary > now either. We do: at the moment, xen-hvmcrash iterates through hvm_save_descriptors after pausing the domain, overwriting the EIP/RIP of each cpu it finds. Is there a reason not to inject #DF into each domain vCPU? Wouldn't that crash the domain more reliably? > > diff --git a/tools/misc/xen-hvmcrash.c b/tools/misc/xen-hvmcrash.c > > index 1d058fa40a47..8ef1beb388f8 100644 > > --- a/tools/misc/xen-hvmcrash.c > > +++ b/tools/misc/xen-hvmcrash.c > > @@ -38,22 +38,21 @@ > > #include <sys/stat.h> > > #include <arpa/inet.h> > > > > +#define XC_WANT_COMPAT_DEVICEMODEL_API > > Please don't introduce this. We want to purge it from the codebase, not > propagate it. > > You want to open and use a libxendevicemodel handle. (Sadly you also > need a xenctrl handle too, until we sort out the userspace ABIs). > > > #include <xenctrl.h> > > #include <xen/xen.h> > > #include <xen/domctl.h> > > #include <xen/hvm/save.h> > > > > +#define X86_ABORT_DF 8 > > #include <xen/asm/x86-defns.h> > > and use X86_EXC_DF. Understood: this will be reflected in patch v2. Matt
On Mon, Jun 03, 2024 at 03:59:18PM +0100, Matthew Barnes wrote: > diff --git a/tools/misc/xen-hvmcrash.c b/tools/misc/xen-hvmcrash.c > index 1d058fa40a47..8ef1beb388f8 100644 > --- a/tools/misc/xen-hvmcrash.c > +++ b/tools/misc/xen-hvmcrash.c > @@ -38,22 +38,21 @@ > #include <sys/stat.h> > #include <arpa/inet.h> > > +#define XC_WANT_COMPAT_DEVICEMODEL_API > #include <xenctrl.h> > #include <xen/xen.h> > #include <xen/domctl.h> > #include <xen/hvm/save.h> There's lots of headers that aren't used by the new codes and can be removed. (They were probably way too many headers included when this utility was introduced.) > + for (vcpu_id = 0; vcpu_id <= dominfo.max_vcpu_id; vcpu_id++) { > + printf("Injecting #DF to vcpu ID #%d...\n", vcpu_id); > + ret = xc_hvm_inject_trap(xch, domid, vcpu_id, > + X86_ABORT_DF, In the definition of xendevicemodel_inject_event(), the comment say to look at "enum x86_event_type" for possible event "type", but there's no "#DF" type, can we add this new one there before using it? (It's going to be something like X86_EVENTTYPE_*) > + XEN_DMOP_EVENT_hw_exc, 0, > + NULL, NULL); The new code doesn't build, "NULL" aren't integers. > + if (ret < 0) { > + fprintf(stderr, "Could not inject #DF to vcpu ID #%d\n", vcpu_id); > + perror("xc_hvm_inject_trap"); Are you meant to print two error lines when there's an error? You can also use strerror() to convert an "errno" to a string. Also, perror() might print an error from fprintf() if that last one failed. Are you meant to keep inject into other vcpus even if one have failed? Should `xen-hvmcrash` return success when it failed to inject the double fault to all vcpus? Thanks, -- Anthony Perard | Vates XCP-ng Developer XCP-ng & Xen Orchestra - Vates solutions web: https://vates.tech
© 2016 - 2024 Red Hat, Inc.