[RFC] xen/arm64: livepatch: enable attaching callbacks

Ryo Takakura posted 1 patch 5 days, 18 hours ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20260629020128.30561-1-takakura@valinux.co.jp
xen/arch/arm/arm64/livepatch.c      | 104 +++++++++++++++++++++++++++-
xen/common/livepatch.c              |  40 +++++++++--
xen/include/public/sysctl.h         |   3 +-
xen/include/xen/livepatch.h         |  13 +++-
xen/include/xen/livepatch_payload.h |   2 +
5 files changed, 150 insertions(+), 12 deletions(-)
[RFC] xen/arm64: livepatch: enable attaching callbacks
Posted by Ryo Takakura 5 days, 18 hours ago
Linux ftrace allows registering callbacks which is useful
for debugging and tracing events. On Linux, it is done by
reserving function entry points at compile time which can
later be patched to branch to a trampoline.

This patch implements similar callback feature, but with
different approach using existing livepatch infrastructure.
Instead of reserving function entry points at compile time,
the traced function will be livepatched so that it branches
to the trampoline.

The role of the trampoline(illustrated below) is to preserve
the context while jumping to the tracer function, and return
back to the traced function with its context restored.

trampoline:
    Save regs
    Call tracer function
    Restore regs
    old_addr
    return old_addr + 4

One can request the feature by setting @trampoline_buf to 1
which will allocate a buffer for trampoline.

Signed-off-by: Ryo Takakura <takakura@valinux.co.jp>
---

Hi!

For the future, I'm thinking of linux-like extensions
which help tracing and debugging by passing:
- saved registers
- caller information
- private data
- and so on ...

I would appreciate any advice or suggestion.
Thanks!

Example payload file:

#include <xen/lib.h>
#include <xen/livepatch.h>

static void my_tracer(void)
{
    printk("livepatch: do_domctl was called\n");
}

static struct livepatch_func funcs[]
    __attribute__((section(".livepatch.funcs"))) =
{
    {
        .name = "do_domctl",
        .old_size = 4572,
        .new_addr = my_tracer,
        .new_size = 32,
        .trampoline_buf = (void *)1,
        .version = LIVEPATCH_PAYLOAD_VERSION,
    }
};

Sample output:

$ tools/misc/xen-livepatch list
 ID                                     | status     | metadata
----------------------------------------+------------+---------------
trace_do_domctl                         | APPLIED    |
$ xl vcpu-list Domain-0
Name                                ID  VCPU   CPU State   Time(s) Affinity (Hard / Soft)
(XEN) livepatch: do_domctl was called
(XEN) livepatch: do_domctl was called
(XEN) livepatch: do_domctl was called
(XEN) livepatch: do_domctl was called
(XEN) livepatch: do_domctl was called
(XEN) livepatch: do_domctl was called
(XEN) livepatch: do_domctl was called
(XEN) livepatch: do_domctl was called
(XEN) livepatch: do_domctl was called
Domain-0                             0     0    1   -b-      67.7  all / all
Domain-0                             0     1    3   -b-     457.2  all / all
Domain-0                             0     2    2   -b-      42.4  all / all
Domain-0                             0     3    0   r--      32.4  all / all

Sincerely,
Ryo Takakura

---
 xen/arch/arm/arm64/livepatch.c      | 104 +++++++++++++++++++++++++++-
 xen/common/livepatch.c              |  40 +++++++++--
 xen/include/public/sysctl.h         |   3 +-
 xen/include/xen/livepatch.h         |  13 +++-
 xen/include/xen/livepatch_payload.h |   2 +
 5 files changed, 150 insertions(+), 12 deletions(-)

diff --git a/xen/arch/arm/arm64/livepatch.c b/xen/arch/arm/arm64/livepatch.c
index e135bd5bf9..b7c9aba94e 100644
--- a/xen/arch/arm/arm64/livepatch.c
+++ b/xen/arch/arm/arm64/livepatch.c
@@ -15,6 +15,29 @@
 #include <asm/insn.h>
 #include <asm/livepatch.h>
 
+
+#define AARCH64_REG_SP 31
+
+static uint32_t aarch64_insn_gen_stp_pre(unsigned int rt,
+                                         unsigned int rt2)
+{
+    return 0xa9800000 |
+           (((-16 / 8) & 0x7f) << 15) |
+           (rt2 << 10) |
+           (AARCH64_REG_SP << 5) |
+           rt;
+}
+
+static uint32_t aarch64_insn_gen_ldp_post(unsigned int rt,
+                                          unsigned int rt2)
+{
+    return 0xa8c00000 |
+           (((16 / 8) & 0x7f) << 15) |
+           (rt2 << 10) |
+           (AARCH64_REG_SP << 5) |
+           rt;
+}
+
 void arch_livepatch_apply(const struct livepatch_func *func,
                           struct livepatch_fstate *state)
 {
@@ -34,12 +57,87 @@ void arch_livepatch_apply(const struct livepatch_func *func,
     /* Save old ones. */
     memcpy(state->insn_buffer, func->old_addr, len);
 
-    if ( func->new_addr )
+    if ( !func->new_addr )
+    {
+        insn = aarch64_insn_gen_nop();
+    }
+    else if ( func->trampoline_buf )
+    {
+        int rc;
+        uint32_t *trampoline = func->trampoline_buf;
+        uint32_t *tp = trampoline;
+        void *orig_cont_addr = (void *)func->old_addr + len;
+        unsigned int trampoline_code_size = len + 12 * ARCH_PATCH_INSN_SIZE;
+        unsigned long trampoline_start = (unsigned long)trampoline & PAGE_MASK;
+        unsigned long trampoline_end =
+            PAGE_ALIGN((unsigned long)trampoline + trampoline_code_size);
+
+        /*
+         * Make the payload text area writeable while generating
+         * the trampoline instructions.
+         */
+        rc = modify_xen_mappings(trampoline_start, trampoline_end,
+                                 PAGE_HYPERVISOR);
+        if ( rc )
+        {
+            printk(XENLOG_ERR LIVEPATCH
+                   "Failed to make trampoline writable: %d\n", rc);
+            return;
+        }
+
+        /* Save state before calling the tracer. */
+        *tp++ = aarch64_insn_gen_stp_pre(0, 1);
+        *tp++ = aarch64_insn_gen_stp_pre(2, 3);
+        *tp++ = aarch64_insn_gen_stp_pre(4, 5);
+        *tp++ = aarch64_insn_gen_stp_pre(6, 7);
+        *tp++ = aarch64_insn_gen_stp_pre(29, 30);
+
+        /* Call user's tracing function. */
+        insn = aarch64_insn_gen_branch_imm(
+            (unsigned long)tp,
+            (unsigned long)func->new_addr,
+            AARCH64_INSN_BRANCH_LINK);
+        *tp++ = insn;
+
+        /* Restore state before continuing original function. */
+        *tp++ = aarch64_insn_gen_ldp_post(29, 30);
+        *tp++ = aarch64_insn_gen_ldp_post(6, 7);
+        *tp++ = aarch64_insn_gen_ldp_post(4, 5);
+        *tp++ = aarch64_insn_gen_ldp_post(2, 3);
+        *tp++ = aarch64_insn_gen_ldp_post(0, 1);
+
+        /* Original instruction. */
+        memcpy(tp, state->insn_buffer, len);
+        tp += len / ARCH_PATCH_INSN_SIZE;
+
+        /* Branch back to original function. */
+        insn = aarch64_insn_gen_branch_imm(
+            (unsigned long)tp,
+            (unsigned long)orig_cont_addr,
+            AARCH64_INSN_BRANCH_NOLINK);
+        *tp++ = insn;
+
+        clean_and_invalidate_dcache_va_range(trampoline, trampoline_code_size);
+
+        rc = modify_xen_mappings(trampoline_start, trampoline_end,
+                                 PAGE_HYPERVISOR_RX);
+        if ( rc )
+        {
+            printk(XENLOG_ERR LIVEPATCH
+                   "Failed to restore trampoline RX mapping: %d\n", rc);
+            return;
+        }
+
+        /* Branch from original function to trampoline. */
+        insn = aarch64_insn_gen_branch_imm(
+            (unsigned long)func->old_addr,
+            (unsigned long)func->trampoline_buf,
+            AARCH64_INSN_BRANCH_NOLINK);
+    }
+    else if ( func->new_addr )
         insn = aarch64_insn_gen_branch_imm((unsigned long)func->old_addr,
                                            (unsigned long)func->new_addr,
                                            AARCH64_INSN_BRANCH_NOLINK);
-    else
-        insn = aarch64_insn_gen_nop();
 
     /* Verified in livepatch_verify_distance. */
     ASSERT(insn != AARCH64_BREAK_FAULT);
diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
index 7515a040ad..8863ad5ca3 100644
--- a/xen/common/livepatch.c
+++ b/xen/common/livepatch.c
@@ -280,10 +280,30 @@ static int move_payload(struct payload *payload, struct livepatch_elf *elf)
 {
     void *text_buf, *ro_buf, *rw_buf;
     unsigned int i, rw_buf_sec, rw_buf_cnt = 0;
-    size_t size = 0;
+    const struct livepatch_elf_sec *sec;
+    const struct livepatch_func *funcs;
+    unsigned int nfuncs, trampolines_needed = 0;
+    size_t size = 0, trampoline_size = 0;
     unsigned int *offset;
     int rc = 0;
 
+    sec = livepatch_elf_sec_by_name(elf, ELF_LIVEPATCH_FUNC);
+    if ( sec )
+    {
+        funcs = sec->addr;
+        nfuncs = sec->sec->sh_size / sizeof(*funcs);
+
+        for ( i = 0; i < nfuncs; ++i )
+            if ( funcs[i].trampoline_buf == (void *)1 )
+                trampolines_needed++;
+
+        if ( trampolines_needed )
+        {
+            payload->n_trampolines = trampolines_needed;
+            trampoline_size = trampolines_needed * LIVEPATCH_TRAMPOLINE_SIZE;
+        }
+    }
+
     offset = xmalloc_array(unsigned int, elf->hdr->e_shnum);
     if ( !offset )
         return -ENOMEM;
@@ -323,8 +343,8 @@ static int move_payload(struct payload *payload, struct livepatch_elf *elf)
      * them on separate pages. The last one will by default fall on its
      * own page.
      */
-    size = PAGE_ALIGN(payload->text_size) + PAGE_ALIGN(payload->rw_size) +
-                      payload->ro_size;
+    size = PAGE_ALIGN(payload->text_size + trampoline_size) +
+           PAGE_ALIGN(payload->rw_size) + payload->ro_size;
 
     size = PFN_UP(size); /* Nr of pages. */
     text_buf = vmalloc_xen(size * PAGE_SIZE);
@@ -335,9 +355,12 @@ static int move_payload(struct payload *payload, struct livepatch_elf *elf)
         rc = -ENOMEM;
         goto out;
     }
-    rw_buf = text_buf + PAGE_ALIGN(payload->text_size);
+    rw_buf = text_buf + PAGE_ALIGN(payload->text_size + trampoline_size);
     ro_buf = rw_buf + PAGE_ALIGN(payload->rw_size);
 
+    if ( trampoline_size )
+        payload->trampoline_addr = text_buf + payload->text_size;
+
     payload->pages = size;
     payload->text_addr = text_buf;
     payload->rw_addr = rw_buf;
@@ -690,7 +713,7 @@ static int prepare_payload(struct payload *payload,
 {
     const struct livepatch_elf_sec *sec;
     const struct payload *data;
-    unsigned int i;
+    unsigned int i, trampoline_idx = 0;
     struct livepatch_func *funcs;
     struct livepatch_func *f;
     struct virtual_region *region;
@@ -737,6 +760,13 @@ static int prepare_payload(struct payload *payload,
             if ( rc )
                 return rc;
 
+            if ( f->trampoline_buf == (void *)1 )
+            {
+                f->trampoline_buf = (char *)payload->trampoline_addr +
+                                    trampoline_idx * LIVEPATCH_TRAMPOLINE_SIZE;
+                trampoline_idx++;
+            }
+
             rc = livepatch_verify_distance(f);
             if ( rc )
                 return rc;
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index c7cd9b4eb0..e79615d7c9 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -1010,10 +1010,11 @@ struct livepatch_func {
     const char *name;       /* Name of function to be patched. */
     void *new_addr;
     void *old_addr;
+    void *trampoline_buf;   /* Trampoline buffer when set to (void *)1. */
     uint32_t new_size;
     uint32_t old_size;
     uint8_t version;        /* MUST be LIVEPATCH_PAYLOAD_VERSION. */
-    uint8_t _pad[39];
+    uint8_t _pad[31];
     livepatch_expectation_t expect;
 };
 typedef struct livepatch_func livepatch_func_t;
diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
index 45c8924f34..7a81763cf2 100644
--- a/xen/include/xen/livepatch.h
+++ b/xen/include/xen/livepatch.h
@@ -48,6 +48,8 @@ struct xen_sysctl_livepatch_op;
 #define ELF_LIVEPATCH_POSTREVERT_HOOK ".livepatch.hooks.postrevert"
 /* Arbitrary limit for payload size and .bss section size. */
 #define LIVEPATCH_MAX_SIZE     MB(2)
+/* Size of a trampoline used for function tracing */
+#define LIVEPATCH_TRAMPOLINE_SIZE 128
 
 struct livepatch_symbol {
     const char *name;
@@ -109,13 +111,18 @@ unsigned int livepatch_insn_len(const struct livepatch_func *func,
 
 static inline int livepatch_verify_distance(const struct livepatch_func *func)
 {
+    const void *target;
     long offset;
     long range = ARCH_LIVEPATCH_RANGE;
 
-    if ( !func->new_addr ) /* Ignore NOPs. */
-        return 0;
+    if ( func->trampoline_buf )
+	target = func->trampoline_buf;
+    else if ( func->new_addr )
+	target = func->new_addr;
+    else
+	return 0; /* Ignore NOPs. */
 
-    offset = func->old_addr - func->new_addr;
+    offset = func->old_addr - target;
     if ( offset < -range || offset >= range )
         return -EOVERFLOW;
 
diff --git a/xen/include/xen/livepatch_payload.h b/xen/include/xen/livepatch_payload.h
index c6dc7cb5fa..7ed8be3fd6 100644
--- a/xen/include/xen/livepatch_payload.h
+++ b/xen/include/xen/livepatch_payload.h
@@ -52,6 +52,8 @@ struct payload {
     size_t ro_size;                      /* .. and its size (if any). */
     unsigned int pages;                  /* Total pages for [text,rw,ro]_addr */
     struct list_head applied_list;       /* Linked to 'applied_list'. */
+    void *trampoline_addr;               /* Virtual address of trampoline area. */
+    unsigned int n_trampolines;          /* Number of trampolines to be allocated */
     const struct livepatch_func *funcs;  /* The array of functions to patch. */
     struct livepatch_fstate *fstate;     /* State of patched functions. */
     unsigned int nfuncs;                 /* Nr of functions to patch. */
-- 
2.34.1
Re: [RFC] xen/arm64: livepatch: enable attaching callbacks
Posted by Roger Pau Monné 4 days, 11 hours ago
Hello,

On Mon, Jun 29, 2026 at 11:01:28AM +0900, Ryo Takakura wrote:
> Linux ftrace allows registering callbacks which is useful
> for debugging and tracing events. On Linux, it is done by
> reserving function entry points at compile time which can
> later be patched to branch to a trampoline.
> 
> This patch implements similar callback feature, but with
> different approach using existing livepatch infrastructure.
> Instead of reserving function entry points at compile time,
> the traced function will be livepatched so that it branches
> to the trampoline.

While this is an interesting usage of the livepatch logic in new ways,
may I ask why not do as Linux and add an empty function preamble that
can be replaced at run-time with calls to hooks?

You could still re-use most of the livepatch logic for handling the
addition of the hook calls, but it would be nicer in that we won't
need to move the original function.

> The role of the trampoline(illustrated below) is to preserve
> the context while jumping to the tracer function, and return
> back to the traced function with its context restored.

Alternatively - why not use livepatch-build-tools against a build with
the added hooks to generate a proper livepatch?  This looks a bit
fragile to me (see the question from Andrew about fixing up
instruction pointer relative references).

On x86 at least we would also need to adjust the bug frames and
exception table contents, and the contents of the symbol table to
account for the function being moved.

IOW: it looks like overall this is a lot more work than possibly
reserving a function preamble to add hook calls?

> trampoline:
>     Save regs
>     Call tracer function
>     Restore regs
>     old_addr
>     return old_addr + 4
> 
> One can request the feature by setting @trampoline_buf to 1
> which will allocate a buffer for trampoline.
> 
> Signed-off-by: Ryo Takakura <takakura@valinux.co.jp>
> ---
> 
> Hi!
> 
> For the future, I'm thinking of linux-like extensions
> which help tracing and debugging by passing:
> - saved registers
> - caller information
> - private data
> - and so on ...
> 
> I would appreciate any advice or suggestion.
> Thanks!
> 
> Example payload file:
> 
> #include <xen/lib.h>
> #include <xen/livepatch.h>
> 
> static void my_tracer(void)
> {
>     printk("livepatch: do_domctl was called\n");
> }
> 
> static struct livepatch_func funcs[]
>     __attribute__((section(".livepatch.funcs"))) =
> {
>     {
>         .name = "do_domctl",
>         .old_size = 4572,
>         .new_addr = my_tracer,
>         .new_size = 32,
>         .trampoline_buf = (void *)1,
>         .version = LIVEPATCH_PAYLOAD_VERSION,
>     }
> };
> 
> Sample output:
> 
> $ tools/misc/xen-livepatch list
>  ID                                     | status     | metadata
> ----------------------------------------+------------+---------------
> trace_do_domctl                         | APPLIED    |
> $ xl vcpu-list Domain-0
> Name                                ID  VCPU   CPU State   Time(s) Affinity (Hard / Soft)
> (XEN) livepatch: do_domctl was called
> (XEN) livepatch: do_domctl was called
> (XEN) livepatch: do_domctl was called
> (XEN) livepatch: do_domctl was called
> (XEN) livepatch: do_domctl was called
> (XEN) livepatch: do_domctl was called
> (XEN) livepatch: do_domctl was called
> (XEN) livepatch: do_domctl was called
> (XEN) livepatch: do_domctl was called
> Domain-0                             0     0    1   -b-      67.7  all / all
> Domain-0                             0     1    3   -b-     457.2  all / all
> Domain-0                             0     2    2   -b-      42.4  all / all
> Domain-0                             0     3    0   r--      32.4  all / all
> 
> Sincerely,
> Ryo Takakura
> 
> ---
>  xen/arch/arm/arm64/livepatch.c      | 104 +++++++++++++++++++++++++++-
>  xen/common/livepatch.c              |  40 +++++++++--
>  xen/include/public/sysctl.h         |   3 +-
>  xen/include/xen/livepatch.h         |  13 +++-
>  xen/include/xen/livepatch_payload.h |   2 +
>  5 files changed, 150 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/arch/arm/arm64/livepatch.c b/xen/arch/arm/arm64/livepatch.c
> index e135bd5bf9..b7c9aba94e 100644
> --- a/xen/arch/arm/arm64/livepatch.c
> +++ b/xen/arch/arm/arm64/livepatch.c
> @@ -15,6 +15,29 @@
>  #include <asm/insn.h>
>  #include <asm/livepatch.h>
>  
> +
> +#define AARCH64_REG_SP 31
> +
> +static uint32_t aarch64_insn_gen_stp_pre(unsigned int rt,
> +                                         unsigned int rt2)
> +{
> +    return 0xa9800000 |
> +           (((-16 / 8) & 0x7f) << 15) |
> +           (rt2 << 10) |
> +           (AARCH64_REG_SP << 5) |
> +           rt;
> +}
> +
> +static uint32_t aarch64_insn_gen_ldp_post(unsigned int rt,
> +                                          unsigned int rt2)
> +{
> +    return 0xa8c00000 |
> +           (((16 / 8) & 0x7f) << 15) |
> +           (rt2 << 10) |
> +           (AARCH64_REG_SP << 5) |
> +           rt;
> +}
> +
>  void arch_livepatch_apply(const struct livepatch_func *func,
>                            struct livepatch_fstate *state)
>  {
> @@ -34,12 +57,87 @@ void arch_livepatch_apply(const struct livepatch_func *func,
>      /* Save old ones. */
>      memcpy(state->insn_buffer, func->old_addr, len);
>  
> -    if ( func->new_addr )
> +    if ( !func->new_addr )
> +    {
> +        insn = aarch64_insn_gen_nop();
> +    }
> +    else if ( func->trampoline_buf )
> +    {
> +        int rc;
> +        uint32_t *trampoline = func->trampoline_buf;
> +        uint32_t *tp = trampoline;
> +        void *orig_cont_addr = (void *)func->old_addr + len;
> +        unsigned int trampoline_code_size = len + 12 * ARCH_PATCH_INSN_SIZE;
> +        unsigned long trampoline_start = (unsigned long)trampoline & PAGE_MASK;
> +        unsigned long trampoline_end =
> +            PAGE_ALIGN((unsigned long)trampoline + trampoline_code_size);
> +
> +        /*
> +         * Make the payload text area writeable while generating
> +         * the trampoline instructions.
> +         */
> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
> +                                 PAGE_HYPERVISOR);
> +        if ( rc )
> +        {
> +            printk(XENLOG_ERR LIVEPATCH
> +                   "Failed to make trampoline writable: %d\n", rc);
> +            return;
> +        }
> +
> +        /* Save state before calling the tracer. */
> +        *tp++ = aarch64_insn_gen_stp_pre(0, 1);
> +        *tp++ = aarch64_insn_gen_stp_pre(2, 3);
> +        *tp++ = aarch64_insn_gen_stp_pre(4, 5);
> +        *tp++ = aarch64_insn_gen_stp_pre(6, 7);
> +        *tp++ = aarch64_insn_gen_stp_pre(29, 30);
> +
> +        /* Call user's tracing function. */
> +        insn = aarch64_insn_gen_branch_imm(
> +            (unsigned long)tp,
> +            (unsigned long)func->new_addr,
> +            AARCH64_INSN_BRANCH_LINK);
> +        *tp++ = insn;
> +
> +        /* Restore state before continuing original function. */
> +        *tp++ = aarch64_insn_gen_ldp_post(29, 30);
> +        *tp++ = aarch64_insn_gen_ldp_post(6, 7);
> +        *tp++ = aarch64_insn_gen_ldp_post(4, 5);
> +        *tp++ = aarch64_insn_gen_ldp_post(2, 3);
> +        *tp++ = aarch64_insn_gen_ldp_post(0, 1);
> +
> +        /* Original instruction. */
> +        memcpy(tp, state->insn_buffer, len);
> +        tp += len / ARCH_PATCH_INSN_SIZE;
> +
> +        /* Branch back to original function. */
> +        insn = aarch64_insn_gen_branch_imm(
> +            (unsigned long)tp,
> +            (unsigned long)orig_cont_addr,
> +            AARCH64_INSN_BRANCH_NOLINK);
> +        *tp++ = insn;
> +
> +        clean_and_invalidate_dcache_va_range(trampoline, trampoline_code_size);
> +
> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
> +                                 PAGE_HYPERVISOR_RX);
> +        if ( rc )
> +        {
> +            printk(XENLOG_ERR LIVEPATCH
> +                   "Failed to restore trampoline RX mapping: %d\n", rc);
> +            return;
> +        }
> +
> +        /* Branch from original function to trampoline. */
> +        insn = aarch64_insn_gen_branch_imm(
> +            (unsigned long)func->old_addr,
> +            (unsigned long)func->trampoline_buf,
> +            AARCH64_INSN_BRANCH_NOLINK);
> +    }
> +    else if ( func->new_addr )
>          insn = aarch64_insn_gen_branch_imm((unsigned long)func->old_addr,
>                                             (unsigned long)func->new_addr,
>                                             AARCH64_INSN_BRANCH_NOLINK);
> -    else
> -        insn = aarch64_insn_gen_nop();

If we want to go this route, and use livepatching for this purpose, we
need to branch the use-cases in common code, and have arches provide
both a replacement and a preface addition hooks IMO.

>  
>      /* Verified in livepatch_verify_distance. */
>      ASSERT(insn != AARCH64_BREAK_FAULT);
> diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
> index 7515a040ad..8863ad5ca3 100644
> --- a/xen/common/livepatch.c
> +++ b/xen/common/livepatch.c
> @@ -280,10 +280,30 @@ static int move_payload(struct payload *payload, struct livepatch_elf *elf)
>  {
>      void *text_buf, *ro_buf, *rw_buf;
>      unsigned int i, rw_buf_sec, rw_buf_cnt = 0;
> -    size_t size = 0;
> +    const struct livepatch_elf_sec *sec;
> +    const struct livepatch_func *funcs;
> +    unsigned int nfuncs, trampolines_needed = 0;
> +    size_t size = 0, trampoline_size = 0;
>      unsigned int *offset;
>      int rc = 0;
>  
> +    sec = livepatch_elf_sec_by_name(elf, ELF_LIVEPATCH_FUNC);
> +    if ( sec )
> +    {
> +        funcs = sec->addr;
> +        nfuncs = sec->sec->sh_size / sizeof(*funcs);
> +
> +        for ( i = 0; i < nfuncs; ++i )
> +            if ( funcs[i].trampoline_buf == (void *)1 )
> +                trampolines_needed++;
> +
> +        if ( trampolines_needed )
> +        {
> +            payload->n_trampolines = trampolines_needed;
> +            trampoline_size = trampolines_needed * LIVEPATCH_TRAMPOLINE_SIZE;
> +        }
> +    }
> +
>      offset = xmalloc_array(unsigned int, elf->hdr->e_shnum);
>      if ( !offset )
>          return -ENOMEM;
> @@ -323,8 +343,8 @@ static int move_payload(struct payload *payload, struct livepatch_elf *elf)
>       * them on separate pages. The last one will by default fall on its
>       * own page.
>       */
> -    size = PAGE_ALIGN(payload->text_size) + PAGE_ALIGN(payload->rw_size) +
> -                      payload->ro_size;
> +    size = PAGE_ALIGN(payload->text_size + trampoline_size) +
> +           PAGE_ALIGN(payload->rw_size) + payload->ro_size;
>  
>      size = PFN_UP(size); /* Nr of pages. */
>      text_buf = vmalloc_xen(size * PAGE_SIZE);
> @@ -335,9 +355,12 @@ static int move_payload(struct payload *payload, struct livepatch_elf *elf)
>          rc = -ENOMEM;
>          goto out;
>      }
> -    rw_buf = text_buf + PAGE_ALIGN(payload->text_size);
> +    rw_buf = text_buf + PAGE_ALIGN(payload->text_size + trampoline_size);
>      ro_buf = rw_buf + PAGE_ALIGN(payload->rw_size);
>  
> +    if ( trampoline_size )
> +        payload->trampoline_addr = text_buf + payload->text_size;
> +
>      payload->pages = size;
>      payload->text_addr = text_buf;
>      payload->rw_addr = rw_buf;
> @@ -690,7 +713,7 @@ static int prepare_payload(struct payload *payload,
>  {
>      const struct livepatch_elf_sec *sec;
>      const struct payload *data;
> -    unsigned int i;
> +    unsigned int i, trampoline_idx = 0;
>      struct livepatch_func *funcs;
>      struct livepatch_func *f;
>      struct virtual_region *region;
> @@ -737,6 +760,13 @@ static int prepare_payload(struct payload *payload,
>              if ( rc )
>                  return rc;
>  
> +            if ( f->trampoline_buf == (void *)1 )
> +            {
> +                f->trampoline_buf = (char *)payload->trampoline_addr +

You don't need to cast to char *, the type of trampoline_addr is void
*, and we use the GNU extension to allow void pointer arithmetic by
treating the size of a void or of a function as 1.

> +                                    trampoline_idx * LIVEPATCH_TRAMPOLINE_SIZE;
> +                trampoline_idx++;
> +            }
> +
>              rc = livepatch_verify_distance(f);
>              if ( rc )
>                  return rc;
> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index c7cd9b4eb0..e79615d7c9 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -1010,10 +1010,11 @@ struct livepatch_func {
>      const char *name;       /* Name of function to be patched. */
>      void *new_addr;
>      void *old_addr;
> +    void *trampoline_buf;   /* Trampoline buffer when set to (void *)1. */
>      uint32_t new_size;
>      uint32_t old_size;
>      uint8_t version;        /* MUST be LIVEPATCH_PAYLOAD_VERSION. */
> -    uint8_t _pad[39];
> +    uint8_t _pad[31];

New fields should be preferably added at the tail of the structure,
and the change here needs to be propagated into livepatch-build-tools
livepatch_patch_func structure.  See:

https://xenbits.xen.org/gitweb/?p=livepatch-build-tools.git;a=blob;f=common.h;h=7f3a82ffdb29d2d1d117c1ccb20cc328bdb0529a;hb=HEAD#l135

This is sadly all very fragile.

>      livepatch_expectation_t expect;
>  };
>  typedef struct livepatch_func livepatch_func_t;
> diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
> index 45c8924f34..7a81763cf2 100644
> --- a/xen/include/xen/livepatch.h
> +++ b/xen/include/xen/livepatch.h
> @@ -48,6 +48,8 @@ struct xen_sysctl_livepatch_op;
>  #define ELF_LIVEPATCH_POSTREVERT_HOOK ".livepatch.hooks.postrevert"
>  /* Arbitrary limit for payload size and .bss section size. */
>  #define LIVEPATCH_MAX_SIZE     MB(2)
> +/* Size of a trampoline used for function tracing */
> +#define LIVEPATCH_TRAMPOLINE_SIZE 128
>  
>  struct livepatch_symbol {
>      const char *name;
> @@ -109,13 +111,18 @@ unsigned int livepatch_insn_len(const struct livepatch_func *func,
>  
>  static inline int livepatch_verify_distance(const struct livepatch_func *func)
>  {
> +    const void *target;
>      long offset;
>      long range = ARCH_LIVEPATCH_RANGE;
>  
> -    if ( !func->new_addr ) /* Ignore NOPs. */
> -        return 0;
> +    if ( func->trampoline_buf )
> +	target = func->trampoline_buf;
> +    else if ( func->new_addr )
> +	target = func->new_addr;
> +    else
> +	return 0; /* Ignore NOPs. */

FWIW, indentation is wrong here, you are adding hard tabs.

Thanks, Roger.
Re: [RFC] xen/arm64: livepatch: enable attaching callbacks
Posted by Ryo Takakura 3 days, 11 hours ago
Hi Roger and Andrew,

On Tue, 30 Jun 2026 08:47:14 +0000, Roger Pau Monné wrote:
>On Mon, Jun 29, 2026 at 11:01:28AM +0900, Ryo Takakura wrote:
>> Linux ftrace allows registering callbacks which is useful
>> for debugging and tracing events. On Linux, it is done by
>> reserving function entry points at compile time which can
>> later be patched to branch to a trampoline.
>> 
>> This patch implements similar callback feature, but with
>> different approach using existing livepatch infrastructure.
>> Instead of reserving function entry points at compile time,
>> the traced function will be livepatched so that it branches
>> to the trampoline.
>
>While this is an interesting usage of the livepatch logic in new ways,
>may I ask why not do as Linux and add an empty function preamble that
>can be replaced at run-time with calls to hooks?
>
>You could still re-use most of the livepatch logic for handling the
>addition of the hook calls, but it would be nicer in that we won't
>need to move the original function.

I agree that we can reuse a lot of livepatch logic.

>> The role of the trampoline(illustrated below) is to preserve
>> the context while jumping to the tracer function, and return
>> back to the traced function with its context restored.
>
>Alternatively - why not use livepatch-build-tools against a build with
>the added hooks to generate a proper livepatch?  This looks a bit
>fragile to me (see the question from Andrew about fixing up
>instruction pointer relative references).

Yes, I think proper livepatching would still be preferred
given all the concerns Andrew and Roger raised.

>On x86 at least we would also need to adjust the bug frames and
>exception table contents, and the contents of the symbol table to
>account for the function being moved.
>
>IOW: it looks like overall this is a lot more work than possibly
>reserving a function preamble to add hook calls?

Yes, I agree.
(I wasn't aware of this additional work when I replied
to Andrew yesterday, thanks!)

And if I were to summarize the discussion so far, assuming we
still want to add a tracing-feature, I think we are in agreement
adding an empty function preamble like Linux.
(I personally would still like to see a framework on Xen
that is more convenient and tracing-friendly which can be used
reliably at the same time)

If this sounds reasonable, I will try preparing one based on
the feedbacks I was given so far:
- Use of Linux-like reserved function preamble
- Use of __attribute__((no_caller_saved_registers))
- Split replacement and preamble-hook handling in common code

Let me know your thoughts!

>> trampoline:
>>     Save regs
>>     Call tracer function
>>     Restore regs
>>     old_addr
>>     return old_addr + 4
>> 
>> One can request the feature by setting @trampoline_buf to 1
>> which will allocate a buffer for trampoline.
>> 
>> Signed-off-by: Ryo Takakura <takakura@xxxxxxxxxxxxx>
>> ---
>> 
>> Hi!
>> 
>> For the future, I'm thinking of linux-like extensions
>> which help tracing and debugging by passing:
>> - saved registers
>> - caller information
>> - private data
>> - and so on ...
>> 
>> I would appreciate any advice or suggestion.
>> Thanks!
>> 
>> Example payload file:
>> 
>> #include <xen/lib.h>
>> #include <xen/livepatch.h>
>> 
>> static void my_tracer(void)
>> {
>>     printk("livepatch: do_domctl was called\n");
>> }
>> 
>> static struct livepatch_func funcs[]
>>     __attribute__((section(".livepatch.funcs"))) =
>> {
>>     {
>>         .name = "do_domctl",
>>         .old_size = 4572,
>>         .new_addr = my_tracer,
>>         .new_size = 32,
>>         .trampoline_buf = (void *)1,
>>         .version = LIVEPATCH_PAYLOAD_VERSION,
>>     }
>> };
>> 
>> Sample output:
>> 
>> $ tools/misc/xen-livepatch list
>>  ID                                     | status     | metadata
>> ----------------------------------------+------------+---------------
>> trace_do_domctl                         | APPLIED    |
>> $ xl vcpu-list Domain-0
>> Name                                ID  VCPU   CPU State   Time(s) Affinity 
>> (Hard / Soft)
>> (XEN) livepatch: do_domctl was called
>> (XEN) livepatch: do_domctl was called
>> (XEN) livepatch: do_domctl was called
>> (XEN) livepatch: do_domctl was called
>> (XEN) livepatch: do_domctl was called
>> (XEN) livepatch: do_domctl was called
>> (XEN) livepatch: do_domctl was called
>> (XEN) livepatch: do_domctl was called
>> (XEN) livepatch: do_domctl was called
>> Domain-0                             0     0    1   -b-      67.7  all / all
>> Domain-0                             0     1    3   -b-     457.2  all / all
>> Domain-0                             0     2    2   -b-      42.4  all / all
>> Domain-0                             0     3    0   r--      32.4  all / all
>> 
>> Sincerely,
>> Ryo Takakura
>> 
>> ---
>>  xen/arch/arm/arm64/livepatch.c      | 104 +++++++++++++++++++++++++++-
>>  xen/common/livepatch.c              |  40 +++++++++--
>>  xen/include/public/sysctl.h         |   3 +-
>>  xen/include/xen/livepatch.h         |  13 +++-
>>  xen/include/xen/livepatch_payload.h |   2 +
>>  5 files changed, 150 insertions(+), 12 deletions(-)
>> 
>> diff --git a/xen/arch/arm/arm64/livepatch.c b/xen/arch/arm/arm64/livepatch.c
>> index e135bd5bf9..b7c9aba94e 100644
>> --- a/xen/arch/arm/arm64/livepatch.c
>> +++ b/xen/arch/arm/arm64/livepatch.c
>> @@ -15,6 +15,29 @@
>>  #include <asm/insn.h>
>>  #include <asm/livepatch.h>
>>  
>> +
>> +#define AARCH64_REG_SP 31
>> +
>> +static uint32_t aarch64_insn_gen_stp_pre(unsigned int rt,
>> +                                         unsigned int rt2)
>> +{
>> +    return 0xa9800000 |
>> +           (((-16 / 8) & 0x7f) << 15) |
>> +           (rt2 << 10) |
>> +           (AARCH64_REG_SP << 5) |
>> +           rt;
>> +}
>> +
>> +static uint32_t aarch64_insn_gen_ldp_post(unsigned int rt,
>> +                                          unsigned int rt2)
>> +{
>> +    return 0xa8c00000 |
>> +           (((16 / 8) & 0x7f) << 15) |
>> +           (rt2 << 10) |
>> +           (AARCH64_REG_SP << 5) |
>> +           rt;
>> +}
>> +
>>  void arch_livepatch_apply(const struct livepatch_func *func,
>>                            struct livepatch_fstate *state)
>>  {
>> @@ -34,12 +57,87 @@ void arch_livepatch_apply(const struct livepatch_func 
>> *func,
>>      /* Save old ones. */
>>      memcpy(state->insn_buffer, func->old_addr, len);
>>  
>> -    if ( func->new_addr )
>> +    if ( !func->new_addr )
>> +    {
>> +        insn = aarch64_insn_gen_nop();
>> +    }
>> +    else if ( func->trampoline_buf )
>> +    {
>> +        int rc;
>> +        uint32_t *trampoline = func->trampoline_buf;
>> +        uint32_t *tp = trampoline;
>> +        void *orig_cont_addr = (void *)func->old_addr + len;
>> +        unsigned int trampoline_code_size = len + 12 * ARCH_PATCH_INSN_SIZE;
>> +        unsigned long trampoline_start = (unsigned long)trampoline & 
>> PAGE_MASK;
>> +        unsigned long trampoline_end =
>> +            PAGE_ALIGN((unsigned long)trampoline + trampoline_code_size);
>> +
>> +        /*
>> +         * Make the payload text area writeable while generating
>> +         * the trampoline instructions.
>> +         */
>> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
>> +                                 PAGE_HYPERVISOR);
>> +        if ( rc )
>> +        {
>> +            printk(XENLOG_ERR LIVEPATCH
>> +                   "Failed to make trampoline writable: %d\n", rc);
>> +            return;
>> +        }
>> +
>> +        /* Save state before calling the tracer. */
>> +        *tp++ = aarch64_insn_gen_stp_pre(0, 1);
>> +        *tp++ = aarch64_insn_gen_stp_pre(2, 3);
>> +        *tp++ = aarch64_insn_gen_stp_pre(4, 5);
>> +        *tp++ = aarch64_insn_gen_stp_pre(6, 7);
>> +        *tp++ = aarch64_insn_gen_stp_pre(29, 30);
>> +
>> +        /* Call user's tracing function. */
>> +        insn = aarch64_insn_gen_branch_imm(
>> +            (unsigned long)tp,
>> +            (unsigned long)func->new_addr,
>> +            AARCH64_INSN_BRANCH_LINK);
>> +        *tp++ = insn;
>> +
>> +        /* Restore state before continuing original function. */
>> +        *tp++ = aarch64_insn_gen_ldp_post(29, 30);
>> +        *tp++ = aarch64_insn_gen_ldp_post(6, 7);
>> +        *tp++ = aarch64_insn_gen_ldp_post(4, 5);
>> +        *tp++ = aarch64_insn_gen_ldp_post(2, 3);
>> +        *tp++ = aarch64_insn_gen_ldp_post(0, 1);
>> +
>> +        /* Original instruction. */
>> +        memcpy(tp, state->insn_buffer, len);
>> +        tp += len / ARCH_PATCH_INSN_SIZE;
>> +
>> +        /* Branch back to original function. */
>> +        insn = aarch64_insn_gen_branch_imm(
>> +            (unsigned long)tp,
>> +            (unsigned long)orig_cont_addr,
>> +            AARCH64_INSN_BRANCH_NOLINK);
>> +        *tp++ = insn;
>> +
>> +        clean_and_invalidate_dcache_va_range(trampoline, 
>> trampoline_code_size);
>> +
>> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
>> +                                 PAGE_HYPERVISOR_RX);
>> +        if ( rc )
>> +        {
>> +            printk(XENLOG_ERR LIVEPATCH
>> +                   "Failed to restore trampoline RX mapping: %d\n", rc);
>> +            return;
>> +        }
>> +
>> +        /* Branch from original function to trampoline. */
>> +        insn = aarch64_insn_gen_branch_imm(
>> +            (unsigned long)func->old_addr,
>> +            (unsigned long)func->trampoline_buf,
>> +            AARCH64_INSN_BRANCH_NOLINK);
>> +    }
>> +    else if ( func->new_addr )
>>          insn = aarch64_insn_gen_branch_imm((unsigned long)func->old_addr,
>>                                             (unsigned long)func->new_addr,
>>                                             AARCH64_INSN_BRANCH_NOLINK);
>> -    else
>> -        insn = aarch64_insn_gen_nop();
>
>If we want to go this route, and use livepatching for this purpose, we
>need to branch the use-cases in common code, and have arches provide
>both a replacement and a preface addition hooks IMO.

I think that is a good idea.
I'll take this into account for the next.

>>  
>>      /* Verified in livepatch_verify_distance. */
>>      ASSERT(insn != AARCH64_BREAK_FAULT);
>> diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
>> index 7515a040ad..8863ad5ca3 100644
>> --- a/xen/common/livepatch.c
>> +++ b/xen/common/livepatch.c
>> @@ -280,10 +280,30 @@ static int move_payload(struct payload *payload, struct 
>> livepatch_elf *elf)
>>  {
>>      void *text_buf, *ro_buf, *rw_buf;
>>      unsigned int i, rw_buf_sec, rw_buf_cnt = 0;
>> -    size_t size = 0;
>> +    const struct livepatch_elf_sec *sec;
>> +    const struct livepatch_func *funcs;
>> +    unsigned int nfuncs, trampolines_needed = 0;
>> +    size_t size = 0, trampoline_size = 0;
>>      unsigned int *offset;
>>      int rc = 0;
>>  
>> +    sec = livepatch_elf_sec_by_name(elf, ELF_LIVEPATCH_FUNC);
>> +    if ( sec )
>> +    {
>> +        funcs = sec->addr;
>> +        nfuncs = sec->sec->sh_size / sizeof(*funcs);
>> +
>> +        for ( i = 0; i < nfuncs; ++i )
>> +            if ( funcs[i].trampoline_buf == (void *)1 )
>> +                trampolines_needed++;
>> +
>> +        if ( trampolines_needed )
>> +        {
>> +            payload->n_trampolines = trampolines_needed;
>> +            trampoline_size = trampolines_needed * LIVEPATCH_TRAMPOLINE_SIZE;
>> +        }
>> +    }
>> +
>>      offset = xmalloc_array(unsigned int, elf->hdr->e_shnum);
>>      if ( !offset )
>>          return -ENOMEM;
>> @@ -323,8 +343,8 @@ static int move_payload(struct payload *payload, struct 
>> livepatch_elf *elf)
>>       * them on separate pages. The last one will by default fall on its
>>       * own page.
>>       */
>> -    size = PAGE_ALIGN(payload->text_size) + PAGE_ALIGN(payload->rw_size) +
>> -                      payload->ro_size;
>> +    size = PAGE_ALIGN(payload->text_size + trampoline_size) +
>> +           PAGE_ALIGN(payload->rw_size) + payload->ro_size;
>>  
>>      size = PFN_UP(size); /* Nr of pages. */
>>      text_buf = vmalloc_xen(size * PAGE_SIZE);
>> @@ -335,9 +355,12 @@ static int move_payload(struct payload *payload, struct 
>> livepatch_elf *elf)
>>          rc = -ENOMEM;
>>          goto out;
>>      }
>> -    rw_buf = text_buf + PAGE_ALIGN(payload->text_size);
>> +    rw_buf = text_buf + PAGE_ALIGN(payload->text_size + trampoline_size);
>>      ro_buf = rw_buf + PAGE_ALIGN(payload->rw_size);
>>  
>> +    if ( trampoline_size )
>> +        payload->trampoline_addr = text_buf + payload->text_size;
>> +
>>      payload->pages = size;
>>      payload->text_addr = text_buf;
>>      payload->rw_addr = rw_buf;
>> @@ -690,7 +713,7 @@ static int prepare_payload(struct payload *payload,
>>  {
>>      const struct livepatch_elf_sec *sec;
>>      const struct payload *data;
>> -    unsigned int i;
>> +    unsigned int i, trampoline_idx = 0;
>>      struct livepatch_func *funcs;
>>      struct livepatch_func *f;
>>      struct virtual_region *region;
>> @@ -737,6 +760,13 @@ static int prepare_payload(struct payload *payload,
>>              if ( rc )
>>                  return rc;
>>  
>> +            if ( f->trampoline_buf == (void *)1 )
>> +            {
>> +                f->trampoline_buf = (char *)payload->trampoline_addr +
>
>You don't need to cast to char *, the type of trampoline_addr is void
>*, and we use the GNU extension to allow void pointer arithmetic by
>treating the size of a void or of a function as 1.

Oh, thanks for the advice!
I'll keep it in mind.

>> +                                    trampoline_idx * 
>> LIVEPATCH_TRAMPOLINE_SIZE;
>> +                trampoline_idx++;
>> +            }
>> +
>>              rc = livepatch_verify_distance(f);
>>              if ( rc )
>>                  return rc;
>> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
>> index c7cd9b4eb0..e79615d7c9 100644
>> --- a/xen/include/public/sysctl.h
>> +++ b/xen/include/public/sysctl.h
>> @@ -1010,10 +1010,11 @@ struct livepatch_func {
>>      const char *name;       /* Name of function to be patched. */
>>      void *new_addr;
>>      void *old_addr;
>> +    void *trampoline_buf;   /* Trampoline buffer when set to (void *)1. */
>>      uint32_t new_size;
>>      uint32_t old_size;
>>      uint8_t version;        /* MUST be LIVEPATCH_PAYLOAD_VERSION. */
>> -    uint8_t _pad[39];
>> +    uint8_t _pad[31];
>
>New fields should be preferably added at the tail of the structure,
>and the change here needs to be propagated into livepatch-build-tools
>livepatch_patch_func structure.  See:
>
>https://xenbits.xen.org/gitweb/?p=livepatch-build-tools.git;a=blob;f=common.h;h=7f3a82ffdb29d2d1d117c1ccb20cc328bdb0529a;hb=HEAD#l135
>
>This is sadly all very fragile.

Thanks. I will make sure to update livepatch-build-tools if needed.
(But I believe the change here won't be needed as pointed by Andrew [1])

>>      livepatch_expectation_t expect;
>>  };
>>  typedef struct livepatch_func livepatch_func_t;
>> diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
>> index 45c8924f34..7a81763cf2 100644
>> --- a/xen/include/xen/livepatch.h
>> +++ b/xen/include/xen/livepatch.h
>> @@ -48,6 +48,8 @@ struct xen_sysctl_livepatch_op;
>>  #define ELF_LIVEPATCH_POSTREVERT_HOOK ".livepatch.hooks.postrevert"
>>  /* Arbitrary limit for payload size and .bss section size. */
>>  #define LIVEPATCH_MAX_SIZE     MB(2)
>> +/* Size of a trampoline used for function tracing */
>> +#define LIVEPATCH_TRAMPOLINE_SIZE 128
>>  
>>  struct livepatch_symbol {
>>      const char *name;
>> @@ -109,13 +111,18 @@ unsigned int livepatch_insn_len(const struct 
>> livepatch_func *func,
>>  
>>  static inline int livepatch_verify_distance(const struct livepatch_func 
>> *func)
>>  {
>> +    const void *target;
>>      long offset;
>>      long range = ARCH_LIVEPATCH_RANGE;
>>  
>> -    if ( !func->new_addr ) /* Ignore NOPs. */
>> -        return 0;
>> +    if ( func->trampoline_buf )
>> +     target = func->trampoline_buf;
>> +    else if ( func->new_addr )
>> +     target = func->new_addr;
>> +    else
>> +     return 0; /* Ignore NOPs. */
>
>FWIW, indentation is wrong here, you are adding hard tabs.

My mistake. I'll fix this.

Sincerely,
Ryo Takakura

[1] https://lists.xen.org/archives/html/xen-devel/2026-06/msg01583.html

>Thanks, Roger.

Re: [RFC] xen/arm64: livepatch: enable attaching callbacks
Posted by Roger Pau Monné 3 days, 6 hours ago
On Wed, Jul 01, 2026 at 06:09:08PM +0900, Ryo Takakura wrote:
> Hi Roger and Andrew,
> 
> On Tue, 30 Jun 2026 08:47:14 +0000, Roger Pau Monné wrote:
> >On Mon, Jun 29, 2026 at 11:01:28AM +0900, Ryo Takakura wrote:
> >> Linux ftrace allows registering callbacks which is useful
> >> for debugging and tracing events. On Linux, it is done by
> >> reserving function entry points at compile time which can
> >> later be patched to branch to a trampoline.
> >> 
> >> This patch implements similar callback feature, but with
> >> different approach using existing livepatch infrastructure.
> >> Instead of reserving function entry points at compile time,
> >> the traced function will be livepatched so that it branches
> >> to the trampoline.
> >
> >While this is an interesting usage of the livepatch logic in new ways,
> >may I ask why not do as Linux and add an empty function preamble that
> >can be replaced at run-time with calls to hooks?
> >
> >You could still re-use most of the livepatch logic for handling the
> >addition of the hook calls, but it would be nicer in that we won't
> >need to move the original function.
> 
> I agree that we can reuse a lot of livepatch logic.
> 
> >> The role of the trampoline(illustrated below) is to preserve
> >> the context while jumping to the tracer function, and return
> >> back to the traced function with its context restored.
> >
> >Alternatively - why not use livepatch-build-tools against a build with
> >the added hooks to generate a proper livepatch?  This looks a bit
> >fragile to me (see the question from Andrew about fixing up
> >instruction pointer relative references).
> 
> Yes, I think proper livepatching would still be preferred
> given all the concerns Andrew and Roger raised.
> 
> >On x86 at least we would also need to adjust the bug frames and
> >exception table contents, and the contents of the symbol table to
> >account for the function being moved.
> >
> >IOW: it looks like overall this is a lot more work than possibly
> >reserving a function preamble to add hook calls?
> 
> Yes, I agree.
> (I wasn't aware of this additional work when I replied
> to Andrew yesterday, thanks!)
> 
> And if I were to summarize the discussion so far, assuming we
> still want to add a tracing-feature, I think we are in agreement
> adding an empty function preamble like Linux.
> (I personally would still like to see a framework on Xen
> that is more convenient and tracing-friendly which can be used
> reliably at the same time)
> 
> If this sounds reasonable, I will try preparing one based on
> the feedbacks I was given so far:
> - Use of Linux-like reserved function preamble
> - Use of __attribute__((no_caller_saved_registers))
> - Split replacement and preamble-hook handling in common code
> 
> Let me know your thoughts!

I think the above approach is easier to implement, and more reliable
than the current proposal.

Do you also have the intention to integrate this with existing tracing
tools used by Linux or other OSes?  That would be very useful IMO.

Thanks, Roger.

Re: [RFC] xen/arm64: livepatch: enable attaching callbacks
Posted by Ryo Takakura 2 days, 17 hours ago
On Wed, 1 Jul 2026 15:50:59 +0200, Roger Pau Monné wrote:
>On Wed, Jul 01, 2026 at 06:09:08PM +0900, Ryo Takakura wrote:
>> On Tue, 30 Jun 2026 08:47:14 +0000, Roger Pau Monné wrote:
>> >On Mon, Jun 29, 2026 at 11:01:28AM +0900, Ryo Takakura wrote:
>> >> Linux ftrace allows registering callbacks which is useful
>> >> for debugging and tracing events. On Linux, it is done by
>> >> reserving function entry points at compile time which can
>> >> later be patched to branch to a trampoline.
>> >> 
>> >> This patch implements similar callback feature, but with
>> >> different approach using existing livepatch infrastructure.
>> >> Instead of reserving function entry points at compile time,
>> >> the traced function will be livepatched so that it branches
>> >> to the trampoline.
>> >
>> >While this is an interesting usage of the livepatch logic in new ways,
>> >may I ask why not do as Linux and add an empty function preamble that
>> >can be replaced at run-time with calls to hooks?
>> >
>> >You could still re-use most of the livepatch logic for handling the
>> >addition of the hook calls, but it would be nicer in that we won't
>> >need to move the original function.
>> 
>> I agree that we can reuse a lot of livepatch logic.
>> 
>> >> The role of the trampoline(illustrated below) is to preserve
>> >> the context while jumping to the tracer function, and return
>> >> back to the traced function with its context restored.
>> >
>> >Alternatively - why not use livepatch-build-tools against a build with
>> >the added hooks to generate a proper livepatch?  This looks a bit
>> >fragile to me (see the question from Andrew about fixing up
>> >instruction pointer relative references).
>> 
>> Yes, I think proper livepatching would still be preferred
>> given all the concerns Andrew and Roger raised.
>> 
>> >On x86 at least we would also need to adjust the bug frames and
>> >exception table contents, and the contents of the symbol table to
>> >account for the function being moved.
>> >
>> >IOW: it looks like overall this is a lot more work than possibly
>> >reserving a function preamble to add hook calls?
>> 
>> Yes, I agree.
>> (I wasn't aware of this additional work when I replied
>> to Andrew yesterday, thanks!)
>> 
>> And if I were to summarize the discussion so far, assuming we
>> still want to add a tracing-feature, I think we are in agreement
>> adding an empty function preamble like Linux.
>> (I personally would still like to see a framework on Xen
>> that is more convenient and tracing-friendly which can be used
>> reliably at the same time)
>> 
>> If this sounds reasonable, I will try preparing one based on
>> the feedbacks I was given so far:
>> - Use of Linux-like reserved function preamble
>> - Use of __attribute__((no_caller_saved_registers))
>> - Split replacement and preamble-hook handling in common code
>> 
>> Let me know your thoughts!
>
>I think the above approach is easier to implement, and more reliable
>than the current proposal.

Good!

>Do you also have the intention to integrate this with existing tracing
>tools used by Linux or other OSes?  That would be very useful IMO.

Although I don't have anything specific in mind yet,
I also hope for more tools to be integrated in the future :-)

Sincerely,
Ryo Takakura

Re: [RFC] xen/arm64: livepatch: enable attaching callbacks
Posted by Andrew Cooper 5 days, 3 hours ago
On 29/06/2026 3:01 am, Ryo Takakura wrote:
> Linux ftrace allows registering callbacks which is useful
> for debugging and tracing events. On Linux, it is done by
> reserving function entry points at compile time which can
> later be patched to branch to a trampoline.
>
> This patch implements similar callback feature, but with
> different approach using existing livepatch infrastructure.
> Instead of reserving function entry points at compile time,
> the traced function will be livepatched so that it branches
> to the trampoline.
>
> The role of the trampoline(illustrated below) is to preserve
> the context while jumping to the tracer function, and return
> back to the traced function with its context restored.
>
> trampoline:
>     Save regs
>     Call tracer function
>     Restore regs
>     old_addr
>     return old_addr + 4
>
> One can request the feature by setting @trampoline_buf to 1
> which will allocate a buffer for trampoline.
>
> Signed-off-by: Ryo Takakura <takakura@valinux.co.jp>

Having something a bit more like Linux tracing would be nice.  But, this
is very different to the other livepatching functionality and a few bits
don't match nicely.

First, you write a lot of the trampoline manually.  You can do most of
this in the target function with
__attribute__((no_caller_saved_registers)), avoiding the need to do it
by hand.   This would require a minimum GCC of 7 (where our baseline is
5) but it's acceptable for new features to require a newer compiler.

Secondly, what happens if the instruction at old_addr is an ADRP, or a
branch?  Right now, there's no case where we move an instruction; we
only produce new code, and branch from old to new.

When you're moving the instruction at old_addr, you must compensate for
any IP-relative component.  Also you can in principle have a conditional
branch as the first instruction, which gives you two branches to fix up
at the end of the trampoline, rather than one.

On x86, you've got an additional problem that it's generally more than
one instruction, and rarely an exect number of instructions overwritten
at old_addr.

Some high level comments, leaving aside the details until the above
questions are better understood.

> diff --git a/xen/arch/arm/arm64/livepatch.c b/xen/arch/arm/arm64/livepatch.c
> index e135bd5bf9..b7c9aba94e 100644
> --- a/xen/arch/arm/arm64/livepatch.c
> +++ b/xen/arch/arm/arm64/livepatch.c
> @@ -34,12 +57,87 @@ void arch_livepatch_apply(const struct livepatch_func *func,
>      /* Save old ones. */
>      memcpy(state->insn_buffer, func->old_addr, len);
>  
> -    if ( func->new_addr )
> +    if ( !func->new_addr )
> +    {
> +        insn = aarch64_insn_gen_nop();
> +    }
> +    else if ( func->trampoline_buf )
> +    {
> +        int rc;
> +        uint32_t *trampoline = func->trampoline_buf;
> +        uint32_t *tp = trampoline;
> +        void *orig_cont_addr = (void *)func->old_addr + len;
> +        unsigned int trampoline_code_size = len + 12 * ARCH_PATCH_INSN_SIZE;
> +        unsigned long trampoline_start = (unsigned long)trampoline & PAGE_MASK;
> +        unsigned long trampoline_end =
> +            PAGE_ALIGN((unsigned long)trampoline + trampoline_code_size);
> +
> +        /*
> +         * Make the payload text area writeable while generating
> +         * the trampoline instructions.
> +         */
> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
> +                                 PAGE_HYPERVISOR);
> +        if ( rc )
> +        {
> +            printk(XENLOG_ERR LIVEPATCH
> +                   "Failed to make trampoline writable: %d\n", rc);
> +            return;
> +        }

This ought not to be necessary.

The trampoline is executable code, so should have space reserved for it
in .text of the livepatch.

Then, you can identify it simply by references in a new section, without
having to have a pointer with a sentinel value (void *)1 in (which MISRA
will have a fit at).

> +
> +        /* Save state before calling the tracer. */
> +        *tp++ = aarch64_insn_gen_stp_pre(0, 1);
> +        *tp++ = aarch64_insn_gen_stp_pre(2, 3);
> +        *tp++ = aarch64_insn_gen_stp_pre(4, 5);
> +        *tp++ = aarch64_insn_gen_stp_pre(6, 7);
> +        *tp++ = aarch64_insn_gen_stp_pre(29, 30);
> +
> +        /* Call user's tracing function. */
> +        insn = aarch64_insn_gen_branch_imm(
> +            (unsigned long)tp,
> +            (unsigned long)func->new_addr,
> +            AARCH64_INSN_BRANCH_LINK);
> +        *tp++ = insn;
> +
> +        /* Restore state before continuing original function. */
> +        *tp++ = aarch64_insn_gen_ldp_post(29, 30);
> +        *tp++ = aarch64_insn_gen_ldp_post(6, 7);
> +        *tp++ = aarch64_insn_gen_ldp_post(4, 5);
> +        *tp++ = aarch64_insn_gen_ldp_post(2, 3);
> +        *tp++ = aarch64_insn_gen_ldp_post(0, 1);
> +
> +        /* Original instruction. */
> +        memcpy(tp, state->insn_buffer, len);
> +        tp += len / ARCH_PATCH_INSN_SIZE;
> +
> +        /* Branch back to original function. */
> +        insn = aarch64_insn_gen_branch_imm(
> +            (unsigned long)tp,
> +            (unsigned long)orig_cont_addr,
> +            AARCH64_INSN_BRANCH_NOLINK);
> +        *tp++ = insn;
> +
> +        clean_and_invalidate_dcache_va_range(trampoline, trampoline_code_size);
> +
> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
> +                                 PAGE_HYPERVISOR_RX);
> +        if ( rc )
> +        {
> +            printk(XENLOG_ERR LIVEPATCH
> +                   "Failed to restore trampoline RX mapping: %d\n", rc);
> +            return;
> +        }
> +
> +        /* Branch from original function to trampoline. */
> +        insn = aarch64_insn_gen_branch_imm(
> +            (unsigned long)func->old_addr,
> +            (unsigned long)func->trampoline_buf,
> +            AARCH64_INSN_BRANCH_NOLINK);

This entire block wants breaking out into a function for writing the
trampoline.  It does not want to live inline in arch_livepatch_apply().

> diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
> index 45c8924f34..7a81763cf2 100644
> --- a/xen/include/xen/livepatch.h
> +++ b/xen/include/xen/livepatch.h
> @@ -48,6 +48,8 @@ struct xen_sysctl_livepatch_op;
>  #define ELF_LIVEPATCH_POSTREVERT_HOOK ".livepatch.hooks.postrevert"
>  /* Arbitrary limit for payload size and .bss section size. */
>  #define LIVEPATCH_MAX_SIZE     MB(2)
> +/* Size of a trampoline used for function tracing */
> +#define LIVEPATCH_TRAMPOLINE_SIZE 128

This is a common header.  How have you calculate 128?

At best, it's an Aarch64 specific number, but if you reserve space
properly in .text then it won't even matter, I don't think.

~Andrew

Re: [RFC] xen/arm64: livepatch: enable attaching callbacks
Posted by Ryo Takakura 4 days, 9 hours ago
Hi Andrew and Roger,

(I'm still looking into the feedback from
Roger but will reply shortly, thanks!)

On Mon, 29 Jun 2026 17:33:40 +0100, Andrew Cooper wrote:
>On 29/06/2026 3:01 am, Ryo Takakura wrote:
>> Linux ftrace allows registering callbacks which is useful
>> for debugging and tracing events. On Linux, it is done by
>> reserving function entry points at compile time which can
>> later be patched to branch to a trampoline.
>>
>> This patch implements similar callback feature, but with
>> different approach using existing livepatch infrastructure.
>> Instead of reserving function entry points at compile time,
>> the traced function will be livepatched so that it branches
>> to the trampoline.
>>
>> The role of the trampoline(illustrated below) is to preserve
>> the context while jumping to the tracer function, and return
>> back to the traced function with its context restored.
>>
>> trampoline:
>>     Save regs
>>     Call tracer function
>>     Restore regs
>>     old_addr
>>     return old_addr + 4
>>
>> One can request the feature by setting @trampoline_buf to 1
>> which will allocate a buffer for trampoline.
>>
>> Signed-off-by: Ryo Takakura <takakura@xxxxxxxxxxxxx>
>
>Having something a bit more like Linux tracing would be nice.  But, this
>is very different to the other livepatching functionality and a few bits
>don't match nicely.
>
>First, you write a lot of the trampoline manually.  You can do most of
>this in the target function with
>__attribute__((no_caller_saved_registers)), avoiding the need to do it
>by hand.   This would require a minimum GCC of 7 (where our baseline is
>5) but it's acceptable for new features to require a newer compiler.

I wasn't aware of the attribute. I agree using it for the sake of
avoiding manual save/restore. I'll look into using it.

>Secondly, what happens if the instruction at old_addr is an ADRP, or a
>branch?  Right now, there's no case where we move an instruction; we
>only produce new code, and branch from old to new.
>
>When you're moving the instruction at old_addr, you must compensate for
>any IP-relative component.  Also you can in principle have a conditional
>branch as the first instruction, which gives you two branches to fix up
>at the end of the trampoline, rather than one.
>
>On x86, you've got an additional problem that it's generally more than
>one instruction, and rarely an exect number of instructions overwritten
>at old_addr.
>
>Some high level comments, leaving aside the details until the above
>questions are better understood.

Thanks for the suggestion here!
I can come up with two solutions based on my understanding:

1. Fix the instruction when copying to trampoline

2. Reserve an empty function preamble(just like Linux)

I think the 1st approach can be rather easily implemented on arm
given its fixed instruction length. (Or maybe we can simply check
if its safe to be copied and reject otherwise?)

But I believe x86 with varying instruction size wouldn't be
as easy as arm. So as suggestted by Roger on the following email,
maybe better to add an empty function preamble?

>> diff --git a/xen/arch/arm/arm64/livepatch.c b/xen/arch/arm/arm64/livepatch.c
>> index e135bd5bf9..b7c9aba94e 100644
>> --- a/xen/arch/arm/arm64/livepatch.c
>> +++ b/xen/arch/arm/arm64/livepatch.c
>> @@ -34,12 +57,87 @@ void arch_livepatch_apply(const struct livepatch_func
>> *func,
>>      /* Save old ones. */
>>      memcpy(state->insn_buffer, func->old_addr, len);
>>
>> -    if ( func->new_addr )
>> +    if ( !func->new_addr )
>> +    {
>> +        insn = aarch64_insn_gen_nop();
>> +    }
>> +    else if ( func->trampoline_buf )
>> +    {
>> +        int rc;
>> +        uint32_t *trampoline = func->trampoline_buf;
>> +        uint32_t *tp = trampoline;
>> +        void *orig_cont_addr = (void *)func->old_addr + len;
>> +        unsigned int trampoline_code_size = len + 12 * ARCH_PATCH_INSN_SIZE;
>> +        unsigned long trampoline_start = (unsigned long)trampoline &
>> PAGE_MASK;
>> +        unsigned long trampoline_end =
>> +            PAGE_ALIGN((unsigned long)trampoline + trampoline_code_size);
>> +
>> +        /*
>> +         * Make the payload text area writeable while generating
>> +         * the trampoline instructions.
>> +         */
>> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
>> +                                 PAGE_HYPERVISOR);
>> +        if ( rc )
>> +        {
>> +            printk(XENLOG_ERR LIVEPATCH
>> +                   "Failed to make trampoline writable: %d\n", rc);
>> +            return;
>> +        }
>
>This ought not to be necessary.
>
>The trampoline is executable code, so should have space reserved for it
>in .text of the livepatch.
>
>Then, you can identify it simply by references in a new section, without
>having to have a pointer with a sentinel value (void *)1 in (which MISRA
>will have a fit at).

I like this idea as well! I'll try this together with the earlier
suggestion using __attribute__((no_caller_saved_registers)).

>> +
>> +        /* Save state before calling the tracer. */
>> +        *tp++ = aarch64_insn_gen_stp_pre(0, 1);
>> +        *tp++ = aarch64_insn_gen_stp_pre(2, 3);
>> +        *tp++ = aarch64_insn_gen_stp_pre(4, 5);
>> +        *tp++ = aarch64_insn_gen_stp_pre(6, 7);
>> +        *tp++ = aarch64_insn_gen_stp_pre(29, 30);
>> +
>> +        /* Call user's tracing function. */
>> +        insn = aarch64_insn_gen_branch_imm(
>> +            (unsigned long)tp,
>> +            (unsigned long)func->new_addr,
>> +            AARCH64_INSN_BRANCH_LINK);
>> +        *tp++ = insn;
>> +
>> +        /* Restore state before continuing original function. */
>> +        *tp++ = aarch64_insn_gen_ldp_post(29, 30);
>> +        *tp++ = aarch64_insn_gen_ldp_post(6, 7);
>> +        *tp++ = aarch64_insn_gen_ldp_post(4, 5);
>> +        *tp++ = aarch64_insn_gen_ldp_post(2, 3);
>> +        *tp++ = aarch64_insn_gen_ldp_post(0, 1);
>> +
>> +        /* Original instruction. */
>> +        memcpy(tp, state->insn_buffer, len);
>> +        tp += len / ARCH_PATCH_INSN_SIZE;
>> +
>> +        /* Branch back to original function. */
>> +        insn = aarch64_insn_gen_branch_imm(
>> +            (unsigned long)tp,
>> +            (unsigned long)orig_cont_addr,
>> +            AARCH64_INSN_BRANCH_NOLINK);
>> +        *tp++ = insn;
>> +
>> +        clean_and_invalidate_dcache_va_range(trampoline,
>> trampoline_code_size);
>> +
>> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
>> +                                 PAGE_HYPERVISOR_RX);
>> +        if ( rc )
>> +        {
>> +            printk(XENLOG_ERR LIVEPATCH
>> +                   "Failed to restore trampoline RX mapping: %d\n", rc);
>> +            return;
>> +        }
>> +
>> +        /* Branch from original function to trampoline. */
>> +        insn = aarch64_insn_gen_branch_imm(
>> +            (unsigned long)func->old_addr,
>> +            (unsigned long)func->trampoline_buf,
>> +            AARCH64_INSN_BRANCH_NOLINK);
>
>This entire block wants breaking out into a function for writing the
>trampoline.  It does not want to live inline in arch_livepatch_apply().

I'll fix this.

>> diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
>> index 45c8924f34..7a81763cf2 100644
>> --- a/xen/include/xen/livepatch.h
>> +++ b/xen/include/xen/livepatch.h
>> @@ -48,6 +48,8 @@ struct xen_sysctl_livepatch_op;
>>  #define ELF_LIVEPATCH_POSTREVERT_HOOK ".livepatch.hooks.postrevert"
>>  /* Arbitrary limit for payload size and .bss section size. */
>>  #define LIVEPATCH_MAX_SIZE     MB(2)
>> +/* Size of a trampoline used for function tracing */
>> +#define LIVEPATCH_TRAMPOLINE_SIZE 128
>
>This is a common header.  How have you calculate 128?
>
>At best, it's an Aarch64 specific number, but if you reserve space
>properly in .text then it won't even matter, I don't think.

The value was arbitrary which i thought would be enough
for buffer... This should be taken care with the suggested
approach as said.

Sincerely,
Ryo Takakura

>~Andrew

________________________________________
差出人: Andrew Cooper <andrew.cooper3@citrix.com>
送信日時: 2026年6月30日 1:33
宛先: Ryo Takakura; xen-devel@lists.xenproject.org
CC: Andrew Cooper; roger.pau@citrix.com; ross.lagerwall@citrix.com; sstabellini@kernel.org; julien@xen.org; bertrand.marquis@arm.com; michal.orzel@amd.com; Volodymyr_Babchuk@epam.com; anthony.perard@vates.tech; jbeulich@suse.com; Hirokazu Takahashi; Koichiro Den
件名: Re: [RFC] xen/arm64: livepatch: enable attaching callbacks

On 29/06/2026 3:01 am, Ryo Takakura wrote:
> Linux ftrace allows registering callbacks which is useful
> for debugging and tracing events. On Linux, it is done by
> reserving function entry points at compile time which can
> later be patched to branch to a trampoline.
>
> This patch implements similar callback feature, but with
> different approach using existing livepatch infrastructure.
> Instead of reserving function entry points at compile time,
> the traced function will be livepatched so that it branches
> to the trampoline.
>
> The role of the trampoline(illustrated below) is to preserve
> the context while jumping to the tracer function, and return
> back to the traced function with its context restored.
>
> trampoline:
>     Save regs
>     Call tracer function
>     Restore regs
>     old_addr
>     return old_addr + 4
>
> One can request the feature by setting @trampoline_buf to 1
> which will allocate a buffer for trampoline.
>
> Signed-off-by: Ryo Takakura <takakura@valinux.co.jp>

Having something a bit more like Linux tracing would be nice.  But, this
is very different to the other livepatching functionality and a few bits
don't match nicely.

First, you write a lot of the trampoline manually.  You can do most of
this in the target function with
__attribute__((no_caller_saved_registers)), avoiding the need to do it
by hand.   This would require a minimum GCC of 7 (where our baseline is
5) but it's acceptable for new features to require a newer compiler.

Secondly, what happens if the instruction at old_addr is an ADRP, or a
branch?  Right now, there's no case where we move an instruction; we
only produce new code, and branch from old to new.

When you're moving the instruction at old_addr, you must compensate for
any IP-relative component.  Also you can in principle have a conditional
branch as the first instruction, which gives you two branches to fix up
at the end of the trampoline, rather than one.

On x86, you've got an additional problem that it's generally more than
one instruction, and rarely an exect number of instructions overwritten
at old_addr.

Some high level comments, leaving aside the details until the above
questions are better understood.

> diff --git a/xen/arch/arm/arm64/livepatch.c b/xen/arch/arm/arm64/livepatch.c
> index e135bd5bf9..b7c9aba94e 100644
> --- a/xen/arch/arm/arm64/livepatch.c
> +++ b/xen/arch/arm/arm64/livepatch.c
> @@ -34,12 +57,87 @@ void arch_livepatch_apply(const struct livepatch_func *func,
>      /* Save old ones. */
>      memcpy(state->insn_buffer, func->old_addr, len);
>
> -    if ( func->new_addr )
> +    if ( !func->new_addr )
> +    {
> +        insn = aarch64_insn_gen_nop();
> +    }
> +    else if ( func->trampoline_buf )
> +    {
> +        int rc;
> +        uint32_t *trampoline = func->trampoline_buf;
> +        uint32_t *tp = trampoline;
> +        void *orig_cont_addr = (void *)func->old_addr + len;
> +        unsigned int trampoline_code_size = len + 12 * ARCH_PATCH_INSN_SIZE;
> +        unsigned long trampoline_start = (unsigned long)trampoline & PAGE_MASK;
> +        unsigned long trampoline_end =
> +            PAGE_ALIGN((unsigned long)trampoline + trampoline_code_size);
> +
> +        /*
> +         * Make the payload text area writeable while generating
> +         * the trampoline instructions.
> +         */
> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
> +                                 PAGE_HYPERVISOR);
> +        if ( rc )
> +        {
> +            printk(XENLOG_ERR LIVEPATCH
> +                   "Failed to make trampoline writable: %d\n", rc);
> +            return;
> +        }

This ought not to be necessary.

The trampoline is executable code, so should have space reserved for it
in .text of the livepatch.

Then, you can identify it simply by references in a new section, without
having to have a pointer with a sentinel value (void *)1 in (which MISRA
will have a fit at).

> +
> +        /* Save state before calling the tracer. */
> +        *tp++ = aarch64_insn_gen_stp_pre(0, 1);
> +        *tp++ = aarch64_insn_gen_stp_pre(2, 3);
> +        *tp++ = aarch64_insn_gen_stp_pre(4, 5);
> +        *tp++ = aarch64_insn_gen_stp_pre(6, 7);
> +        *tp++ = aarch64_insn_gen_stp_pre(29, 30);
> +
> +        /* Call user's tracing function. */
> +        insn = aarch64_insn_gen_branch_imm(
> +            (unsigned long)tp,
> +            (unsigned long)func->new_addr,
> +            AARCH64_INSN_BRANCH_LINK);
> +        *tp++ = insn;
> +
> +        /* Restore state before continuing original function. */
> +        *tp++ = aarch64_insn_gen_ldp_post(29, 30);
> +        *tp++ = aarch64_insn_gen_ldp_post(6, 7);
> +        *tp++ = aarch64_insn_gen_ldp_post(4, 5);
> +        *tp++ = aarch64_insn_gen_ldp_post(2, 3);
> +        *tp++ = aarch64_insn_gen_ldp_post(0, 1);
> +
> +        /* Original instruction. */
> +        memcpy(tp, state->insn_buffer, len);
> +        tp += len / ARCH_PATCH_INSN_SIZE;
> +
> +        /* Branch back to original function. */
> +        insn = aarch64_insn_gen_branch_imm(
> +            (unsigned long)tp,
> +            (unsigned long)orig_cont_addr,
> +            AARCH64_INSN_BRANCH_NOLINK);
> +        *tp++ = insn;
> +
> +        clean_and_invalidate_dcache_va_range(trampoline, trampoline_code_size);
> +
> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
> +                                 PAGE_HYPERVISOR_RX);
> +        if ( rc )
> +        {
> +            printk(XENLOG_ERR LIVEPATCH
> +                   "Failed to restore trampoline RX mapping: %d\n", rc);
> +            return;
> +        }
> +
> +        /* Branch from original function to trampoline. */
> +        insn = aarch64_insn_gen_branch_imm(
> +            (unsigned long)func->old_addr,
> +            (unsigned long)func->trampoline_buf,
> +            AARCH64_INSN_BRANCH_NOLINK);

This entire block wants breaking out into a function for writing the
trampoline.  It does not want to live inline in arch_livepatch_apply().

> diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
> index 45c8924f34..7a81763cf2 100644
> --- a/xen/include/xen/livepatch.h
> +++ b/xen/include/xen/livepatch.h
> @@ -48,6 +48,8 @@ struct xen_sysctl_livepatch_op;
>  #define ELF_LIVEPATCH_POSTREVERT_HOOK ".livepatch.hooks.postrevert"
>  /* Arbitrary limit for payload size and .bss section size. */
>  #define LIVEPATCH_MAX_SIZE     MB(2)
> +/* Size of a trampoline used for function tracing */
> +#define LIVEPATCH_TRAMPOLINE_SIZE 128

This is a common header.  How have you calculate 128?

At best, it's an Aarch64 specific number, but if you reserve space
properly in .text then it won't even matter, I don't think.

~Andrew
Re: [RFC] xen/arm64: livepatch: enable attaching callbacks
Posted by Andrew Cooper 4 days, 9 hours ago
On 30/06/2026 11:43 am, Ryo Takakura wrote:
>>> diff --git a/xen/arch/arm/arm64/livepatch.c b/xen/arch/arm/arm64/livepatch.c
>>> index e135bd5bf9..b7c9aba94e 100644
>>> --- a/xen/arch/arm/arm64/livepatch.c
>>> +++ b/xen/arch/arm/arm64/livepatch.c
>>> @@ -34,12 +57,87 @@ void arch_livepatch_apply(const struct livepatch_func
>>> *func,
>>>      /* Save old ones. */
>>>      memcpy(state->insn_buffer, func->old_addr, len);
>>>
>>> -    if ( func->new_addr )
>>> +    if ( !func->new_addr )
>>> +    {
>>> +        insn = aarch64_insn_gen_nop();
>>> +    }
>>> +    else if ( func->trampoline_buf )
>>> +    {
>>> +        int rc;
>>> +        uint32_t *trampoline = func->trampoline_buf;
>>> +        uint32_t *tp = trampoline;
>>> +        void *orig_cont_addr = (void *)func->old_addr + len;
>>> +        unsigned int trampoline_code_size = len + 12 * ARCH_PATCH_INSN_SIZE;
>>> +        unsigned long trampoline_start = (unsigned long)trampoline &
>>> PAGE_MASK;
>>> +        unsigned long trampoline_end =
>>> +            PAGE_ALIGN((unsigned long)trampoline + trampoline_code_size);
>>> +
>>> +        /*
>>> +         * Make the payload text area writeable while generating
>>> +         * the trampoline instructions.
>>> +         */
>>> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
>>> +                                 PAGE_HYPERVISOR);
>>> +        if ( rc )
>>> +        {
>>> +            printk(XENLOG_ERR LIVEPATCH
>>> +                   "Failed to make trampoline writable: %d\n", rc);
>>> +            return;
>>> +        }
>> This ought not to be necessary.
>>
>> The trampoline is executable code, so should have space reserved for it
>> in .text of the livepatch.
>>
>> Then, you can identify it simply by references in a new section, without
>> having to have a pointer with a sentinel value (void *)1 in (which MISRA
>> will have a fit at).
> I like this idea as well! I'll try this together with the earlier
> suggestion using __attribute__((no_caller_saved_registers)).

If you reserve space in the function preamble, and use
__attribute__((no_caller_saved_registers)), then you don't need
trampolines at all.

The preamble just needs to turn into `call newfunc` when the callback
function is attached, and then it's regular return will do the right thing.

~Andrew
Re: [RFC] xen/arm64: livepatch: enable attaching callbacks
Posted by Ryo Takakura 3 days, 10 hours ago
Hi Andrew,

On Tue, 30 Jun 2026 10:57:38 +0000, Andrew Cooper wrote:
>On 30/06/2026 11:43 am, Ryo Takakura wrote:
>>>> diff --git a/xen/arch/arm/arm64/livepatch.c b/xen/arch/arm/arm64/livepatch.c
>>>> index e135bd5bf9..b7c9aba94e 100644
>>>> --- a/xen/arch/arm/arm64/livepatch.c
>>>> +++ b/xen/arch/arm/arm64/livepatch.c
>>>> @@ -34,12 +57,87 @@ void arch_livepatch_apply(const struct livepatch_func
>>>> *func,
>>>>      /* Save old ones. */
>>>>      memcpy(state->insn_buffer, func->old_addr, len);
>>>>
>>>> -    if ( func->new_addr )
>>>> +    if ( !func->new_addr )
>>>> +    {
>>>> +        insn = aarch64_insn_gen_nop();
>>>> +    }
>>>> +    else if ( func->trampoline_buf )
>>>> +    {
>>>> +        int rc;
>>>> +        uint32_t *trampoline = func->trampoline_buf;
>>>> +        uint32_t *tp = trampoline;
>>>> +        void *orig_cont_addr = (void *)func->old_addr + len;
>>>> +        unsigned int trampoline_code_size = len + 12 * 
>>>> ARCH_PATCH_INSN_SIZE;
>>>> +        unsigned long trampoline_start = (unsigned long)trampoline &
>>>> PAGE_MASK;
>>>> +        unsigned long trampoline_end =
>>>> +            PAGE_ALIGN((unsigned long)trampoline + trampoline_code_size);
>>>> +
>>>> +        /*
>>>> +         * Make the payload text area writeable while generating
>>>> +         * the trampoline instructions.
>>>> +         */
>>>> +        rc = modify_xen_mappings(trampoline_start, trampoline_end,
>>>> +                                 PAGE_HYPERVISOR);
>>>> +        if ( rc )
>>>> +        {
>>>> +            printk(XENLOG_ERR LIVEPATCH
>>>> +                   "Failed to make trampoline writable: %d\n", rc);
>>>> +            return;
>>>> +        }
>>> This ought not to be necessary.
>>>
>>> The trampoline is executable code, so should have space reserved for it
>>> in .text of the livepatch.
>>>
>>> Then, you can identify it simply by references in a new section, without
>>> having to have a pointer with a sentinel value (void *)1 in (which MISRA
>>> will have a fit at).
>> I like this idea as well! I'll try this together with the earlier
>> suggestion using __attribute__((no_caller_saved_registers)).
>
>If you reserve space in the function preamble, and use
>__attribute__((no_caller_saved_registers)), then you don't need
>trampolines at all.
>
>The preamble just needs to turn into `call newfunc` when the callback
>function is attached, and then it's regular return will do the right thing.

Understood!

Sincerely,
Ryo Takakura

>~Andrew