Changeset
tcg/arm/tcg-target.inc.c | 142 ++++++++++++++++++++++++++++++++---------------
tcg/ppc/tcg-target.inc.c |  84 ++++++++++++++++------------
2 files changed, 144 insertions(+), 82 deletions(-)
Git apply log
Switched to a new branch '20180112210613.14124-1-richard.henderson@linaro.org'
Applying: tcg/arm: Fix double-word comparisons
Applying: tcg/arm: Support tlb offsets larger than 64k
Applying: tcg/ppc: Support tlb offsets larger than 64k
Applying: tcg/ppc: Allow a 32-bit offset to the constant pool
To https://github.com/patchew-project/qemu
 * [new tag]               patchew/20180112210613.14124-1-richard.henderson@linaro.org -> patchew/20180112210613.14124-1-richard.henderson@linaro.org
Test passed: docker

loading

Test passed: checkpatch

loading

Test passed: s390x

loading

Test passed: ppc

loading

[Qemu-devel] [PULL 0/4] TCG queued patches
Posted by Richard Henderson, 5 days ago
This includes a fix for the tcg/arm bug exposed by the ppc64 code change
for comparisons.

It also includes improvements to tcg/arm and tcg/ppc to allow for larger
CPUFooState structures, as exposed by expanding CPUARMState for 2048-bit
vector registers.

Note that tcg/mips has the exact same problem.  However, the mips isa
makes it more difficult to fix up.  I'd like someone with hardware to
make this change.



r~



The following changes since commit 7398166ddf7c6dbbc9cae6ac69bb2feda14b40ac:

  Merge remote-tracking branch 'remotes/kraxel/tags/vnc-20180112-pull-request' into staging (2018-01-12 16:01:30 +0000)

are available in the Git repository at:

  git://github.com/rth7680/qemu.git tags/pull-tcg-20180112

for you to fetch changes up to bb08c35b17b7245c696bd12e527453e624e77da3:

  tcg/ppc: Allow a 32-bit offset to the constant pool (2018-01-12 12:50:36 -0800)

----------------------------------------------------------------
Queued tcg patches

----------------------------------------------------------------
Richard Henderson (4):
      tcg/arm: Fix double-word comparisons
      tcg/arm: Support tlb offsets larger than 64k
      tcg/ppc: Support tlb offsets larger than 64k
      tcg/ppc: Allow a 32-bit offset to the constant pool

 tcg/arm/tcg-target.inc.c | 142 ++++++++++++++++++++++++++++++++---------------
 tcg/ppc/tcg-target.inc.c |  84 ++++++++++++++++------------
 2 files changed, 144 insertions(+), 82 deletions(-)

Re: [Qemu-devel] [PULL 0/4] TCG queued patches
Posted by Aurelien Jarno, 5 days ago
On 2018-01-12 13:06, Richard Henderson wrote:
> This includes a fix for the tcg/arm bug exposed by the ppc64 code change
> for comparisons.
> 
> It also includes improvements to tcg/arm and tcg/ppc to allow for larger
> CPUFooState structures, as exposed by expanding CPUARMState for 2048-bit
> vector registers.
> 
> Note that tcg/mips has the exact same problem.  However, the mips isa
> makes it more difficult to fix up.  I'd like someone with hardware to
> make this change.

Ok, I'll try to have a look at that.
 
-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

Re: [Qemu-devel] [PULL 0/4] TCG queued patches
Posted by Peter Maydell, 3 days ago
On 12 January 2018 at 21:06, Richard Henderson
<richard.henderson@linaro.org> wrote:
> This includes a fix for the tcg/arm bug exposed by the ppc64 code change
> for comparisons.
>
> It also includes improvements to tcg/arm and tcg/ppc to allow for larger
> CPUFooState structures, as exposed by expanding CPUARMState for 2048-bit
> vector registers.

> ----------------------------------------------------------------
> Queued tcg patches
>
> ----------------------------------------------------------------
> Richard Henderson (4):
>       tcg/arm: Fix double-word comparisons
>       tcg/arm: Support tlb offsets larger than 64k
>       tcg/ppc: Support tlb offsets larger than 64k
>       tcg/ppc: Allow a 32-bit offset to the constant pool

This seems to crash on arm32 hosts with a sparc64 guest:

$ ./sparc64-softmmu/qemu-system-sparc64  -display none
Segmentation fault

Here's a backtrace:

Thread 3 "qemu-system-spa" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0xe8cb8e10 (LWP 25876)]
0x000325a0 in tcg_out32 (s=0xe8300470, v=<optimised out>) at
/home/peter.maydell/qemu/tcg/tcg.c:193
193             *s->code_ptr++ = v;
(gdb) bt
#0  0x000325a0 in tcg_out32 (s=0xe8300470, v=<optimised out>) at
/home/peter.maydell/qemu/tcg/tcg.c:193
#1  tcg_out_dat_imm (im=<optimised out>, rn=<optimised out>, rd=2,
opc=8388608, cond=14, s=0xe8300470)
    at /home/peter.maydell/qemu/tcg/arm/tcg-target.inc.c:451
#2  tcg_out_tlb_read (s=s@entry=0xe8300470,
addrlo=addrlo@entry=TCG_REG_R4, addrhi=addrhi@entry=TCG_REG_R5,
opc=opc@entry=MO_8,
    mem_index=mem_index@entry=5, is_load=is_load@entry=true) at
/home/peter.maydell/qemu/tcg/arm/tcg-target.inc.c:1320
#3  0x00033144 in tcg_out_qemu_ld (s=0xe8300470, args=<optimised out>,
is64=<optimised out>)
    at /home/peter.maydell/qemu/tcg/arm/tcg-target.inc.c:1648
#4  0x00033b06 in tcg_out_op (s=s@entry=0xe8300470, opc=<optimised
out>, args=args@entry=0xe8cb85fc,
    const_args=const_args@entry=0xe8cb863c) at
/home/peter.maydell/qemu/tcg/arm/tcg-target.inc.c:2059
#5  0x000360b6 in tcg_reg_alloc_op (op=0xe83076b8, s=<optimised out>)
at /home/peter.maydell/qemu/tcg/tcg.c:2893
#6  tcg_gen_code (s=<optimised out>, tb=tb@entry=0xe8e9d680
<code_gen_buffer+1636>) at /home/peter.maydell/qemu/tcg/tcg.c:3279
#7  0x000757b8 in tb_gen_code (cpu=cpu@entry=0xaf3ee8,
pc=2198754869620, cs_base=cs_base@entry=2198754869624,
flags=flags@entry=69,
    cflags=0) at /home/peter.maydell/qemu/accel/tcg/translate-all.c:1319
#8  0x000747fc in tb_find (cf_mask=<optimised out>, tb_exit=<optimised
out>, last_tb=0x0, cpu=0xf000c178)
    at /home/peter.maydell/qemu/accel/tcg/cpu-exec.c:404
#9  cpu_exec (cpu=cpu@entry=0xaf3ee8) at
/home/peter.maydell/qemu/accel/tcg/cpu-exec.c:731
#10 0x00052f60 in tcg_cpu_exec (cpu=0xaf3ee8) at
/home/peter.maydell/qemu/cpus.c:1300
#11 qemu_tcg_rr_cpu_thread_fn (arg=<optimised out>) at
/home/peter.maydell/qemu/cpus.c:1396
#12 0xec9235b4 in start_thread (arg=0x0) at pthread_create.c:335
#13 0xec8c4bec in ?? () at ../sysdeps/unix/sysv/linux/arm/clone.S:89
from /lib/arm-linux-gnueabihf/libc.so.6

Writing off the end of the codegen buffer?

thanks
-- PMM

[Qemu-devel] [PULL 1/4] tcg/arm: Fix double-word comparisons
Posted by Richard Henderson, 5 days ago
From: Richard Henderson <rth@twiddle.net>

The code sequence we were generating was only good for unsigned
comparisons.  For signed comparisions, use the sequence from gcc.

Fixes booting of ppc64 firmware, with a patch changing the code
sequence for ppc comparisons.

Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.inc.c | 112 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 80 insertions(+), 32 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 98a12535a5..b9890c8bd8 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -239,10 +239,11 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
     }
 }
 
-#define TCG_CT_CONST_ARM  0x100
-#define TCG_CT_CONST_INV  0x200
-#define TCG_CT_CONST_NEG  0x400
-#define TCG_CT_CONST_ZERO 0x800
+#define TCG_CT_CONST_ARM     0x0100
+#define TCG_CT_CONST_INV     0x0200
+#define TCG_CT_CONST_NEG     0x0400
+#define TCG_CT_CONST_INVNEG  0x0800
+#define TCG_CT_CONST_ZERO    0x1000
 
 /* parse target specific constraints */
 static const char *target_parse_constraint(TCGArgConstraint *ct,
@@ -258,6 +259,9 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     case 'N': /* The gcc constraint letter is L, already used here.  */
         ct->ct |= TCG_CT_CONST_NEG;
         break;
+    case 'M':
+        ct->ct |= TCG_CT_CONST_INVNEG;
+        break;
     case 'Z':
         ct->ct |= TCG_CT_CONST_ZERO;
         break;
@@ -351,8 +355,7 @@ static inline int check_fit_imm(uint32_t imm)
 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
                                          const TCGArgConstraint *arg_ct)
 {
-    int ct;
-    ct = arg_ct->ct;
+    int ct = arg_ct->ct;
     if (ct & TCG_CT_CONST) {
         return 1;
     } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
@@ -361,6 +364,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
         return 1;
     } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
         return 1;
+    } else if ((ct & TCG_CT_CONST_INVNEG)
+               && check_fit_imm(~val) && check_fit_imm(-val)) {
+        return 1;
     } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
         return 1;
     } else {
@@ -1103,6 +1109,64 @@ static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
     }
 }
 
+static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
+                            const int *const_args)
+{
+    TCGReg al = args[0];
+    TCGReg ah = args[1];
+    TCGArg bl = args[2];
+    TCGArg bh = args[3];
+    TCGCond cond = args[4];
+    int const_bl = const_args[2];
+    int const_bh = const_args[3];
+
+    switch (cond) {
+    case TCG_COND_EQ:
+    case TCG_COND_NE:
+    case TCG_COND_LTU:
+    case TCG_COND_LEU:
+    case TCG_COND_GTU:
+    case TCG_COND_GEU:
+        /* We perform a conditional comparision.  If the high half is
+           equal, then overwrite the flags with the comparison of the
+           low half.  The resulting flags cover the whole.  */
+        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, ah, bh, const_bh);
+        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, al, bl, const_bl);
+        return cond;
+
+    case TCG_COND_LT:
+    case TCG_COND_GE:
+        /* We perform a double-word subtraction and examine the result.
+           We do not actually need the result of the subtract, so the
+           low part "subtract" is a compare.  For the high half we have
+           no choice but to compute into a temporary.  */
+        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, al, bl, const_bl);
+        tcg_out_dat_rIK(s, COND_AL, ARITH_SBC | TO_CPSR, ARITH_ADC | TO_CPSR,
+                        TCG_REG_TMP, ah, bh, const_bh);
+        return cond;
+
+    case TCG_COND_LE:
+    case TCG_COND_GT:
+        /* Similar, but with swapped arguments.  And of course we must
+           force the immediates into a register.  */
+        if (const_bl) {
+            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP, bl);
+            bl = TCG_REG_TMP;
+        }
+        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, bl, al, 0);
+        if (const_bh) {
+            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP, bh);
+            bh = TCG_REG_TMP;
+        }
+        tcg_out_dat_rIK(s, COND_AL, ARITH_SBC | TO_CPSR, ARITH_ADC | TO_CPSR,
+                        TCG_REG_TMP, bh, ah, 0);
+        return tcg_swap_cond(cond);
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
 #ifdef CONFIG_SOFTMMU
 #include "tcg-ldst.inc.c"
 
@@ -1964,22 +2028,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
                            arg_label(args[3]));
         break;
-    case INDEX_op_brcond2_i32:
-        /* The resulting conditions are:
-         * TCG_COND_EQ    -->  a0 == a2 && a1 == a3,
-         * TCG_COND_NE    --> (a0 != a2 && a1 == a3) ||  a1 != a3,
-         * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3,
-         * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
-         * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
-         * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
-         */
-        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-                        args[1], args[3], const_args[3]);
-        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
-                        args[0], args[2], const_args[2]);
-        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
-                           arg_label(args[5]));
-        break;
     case INDEX_op_setcond_i32:
         tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
                         args[1], args[2], const_args[2]);
@@ -1988,15 +2036,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
                         ARITH_MOV, args[0], 0, 0);
         break;
+
+    case INDEX_op_brcond2_i32:
+        c = tcg_out_cmp2(s, args, const_args);
+        tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5]));
+        break;
     case INDEX_op_setcond2_i32:
-        /* See brcond2_i32 comment */
-        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-                        args[2], args[4], const_args[4]);
-        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
-                        args[1], args[3], const_args[3]);
-        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
-                        ARITH_MOV, args[0], 0, 1);
-        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
+        c = tcg_out_cmp2(s, args + 1, const_args + 1);
+        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
+        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
                         ARITH_MOV, args[0], 0, 0);
         break;
 
@@ -2093,9 +2141,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
     static const TCGTargetOpDef sub2
         = { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } };
     static const TCGTargetOpDef br2
-        = { .args_ct_str = { "r", "r", "rIN", "rIN" } };
+        = { .args_ct_str = { "r", "r", "rIM", "rIM" } };
     static const TCGTargetOpDef setc2
-        = { .args_ct_str = { "r", "r", "r", "rIN", "rIN" } };
+        = { .args_ct_str = { "r", "r", "r", "rIM", "rIM" } };
 
     switch (op) {
     case INDEX_op_goto_ptr:
-- 
2.14.3


[Qemu-devel] [PULL 2/4] tcg/arm: Support tlb offsets larger than 64k
Posted by Richard Henderson, 5 days ago
AArch64 with SVE has an offset of 80k to the 8th TLB.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/arm/tcg-target.inc.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index b9890c8bd8..4bd465732b 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1261,12 +1261,6 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
 /* We're expecting to use an 8-bit immediate and to mask.  */
 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
 
-/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
-   Using the offset of the second entry in the last tlb table ensures
-   that we can index all of the elements of the first entry.  */
-QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
-                  > 0xffff);
-
 /* Load and compare a TLB entry, leaving the flags set.  Returns the register
    containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
 
@@ -1279,6 +1273,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
          ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
          : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
     int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+    int mask_off;
     unsigned s_bits = opc & MO_SIZE;
     unsigned a_bits = get_alignment_bits(opc);
 
@@ -1310,16 +1305,25 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
                         0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
     }
 
-    /* We checked that the offset is contained within 16 bits above.  */
-    if (add_off > 0xfff
-        || (use_armv6_instructions && TARGET_LONG_BITS == 64
-            && cmp_off > 0xff)) {
+    /* Add portions of the offset until the memory access is in range.
+     * If we plan on using ldrd, reduce to an 8-bit offset; otherwise
+     * we can use a 12-bit offset.  */
+    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+        mask_off = 0xff;
+    } else {
+        mask_off = 0xfff;
+    }
+    while (add_off > mask_off) {
+        int shift = ctz32(cmp_off & ~mask_off) & ~1;
+        int rot = ((32 - shift) << 7) & 0xf00;
+        int addend = cmp_off & (0xff << shift);
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
-                        (24 << 7) | (cmp_off >> 8));
+                        rot | ((cmp_off >> shift) & 0xff));
         base = TCG_REG_R2;
-        add_off -= cmp_off & 0xff00;
-        cmp_off &= 0xff;
+        add_off -= addend;
+        cmp_off -= addend;
     }
+
     if (!use_armv7_instructions) {
         tcg_out_dat_imm(s, COND_AL, ARITH_AND,
                         TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
-- 
2.14.3


[Qemu-devel] [PULL 3/4] tcg/ppc: Support tlb offsets larger than 64k
Posted by Richard Henderson, 5 days ago
AArch64 with SVE has an offset of 80k to the 8th TLB.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/ppc/tcg-target.inc.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 879885b68b..74f9b4aa34 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -1524,16 +1524,15 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
 
     /* Compensate for very large offsets.  */
     if (add_off >= 0x8000) {
-        /* Most target env are smaller than 32k; none are larger than 64k.
-           Simplify the logic here merely to offset by 0x7ff0, giving us a
-           range just shy of 64k.  Check this assumption.  */
-        QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
-                                   tlb_table[NB_MMU_MODES - 1][1])
-                          > 0x7ff0 + 0x7fff);
-        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0));
+        int low = (int16_t)cmp_off;
+        int high = cmp_off - low;
+        assert((high & 0xffff) == 0);
+        assert(cmp_off - high == (int16_t)(cmp_off - high));
+        assert(add_off - high == (int16_t)(add_off - high));
+        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, base, high >> 16));
         base = TCG_REG_TMP1;
-        cmp_off -= 0x7ff0;
-        add_off -= 0x7ff0;
+        cmp_off -= high;
+        add_off -= high;
     }
 
     /* Extraction and shifting, part 2.  */
-- 
2.14.3


[Qemu-devel] [PULL 4/4] tcg/ppc: Allow a 32-bit offset to the constant pool
Posted by Richard Henderson, 5 days ago
We recently relaxed the limit of the number of opcodes that can
appear in a TranslationBlock.  In certain cases this has resulted
in relocation overflow.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/ppc/tcg-target.inc.c | 67 ++++++++++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 28 deletions(-)

diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 74f9b4aa34..86f7de5f7e 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -222,33 +222,6 @@ static inline void tcg_out_bc_noaddr(TCGContext *s, int insn)
     tcg_out32(s, insn | retrans);
 }
 
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
-                        intptr_t value, intptr_t addend)
-{
-    tcg_insn_unit *target;
-    tcg_insn_unit old;
-
-    value += addend;
-    target = (tcg_insn_unit *)value;
-
-    switch (type) {
-    case R_PPC_REL14:
-        reloc_pc14(code_ptr, target);
-        break;
-    case R_PPC_REL24:
-        reloc_pc24(code_ptr, target);
-        break;
-    case R_PPC_ADDR16:
-        assert(value == (int16_t)value);
-        old = *code_ptr;
-        old = deposit32(old, 0, 16, value);
-        *code_ptr = old;
-        break;
-    default:
-        tcg_abort();
-    }
-}
-
 /* parse target specific constraints */
 static const char *target_parse_constraint(TCGArgConstraint *ct,
                                            const char *ct_str, TCGType type)
@@ -552,6 +525,43 @@ static const uint32_t tcg_to_isel[] = {
     [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
 };
 
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+                        intptr_t value, intptr_t addend)
+{
+    tcg_insn_unit *target;
+    tcg_insn_unit old;
+
+    value += addend;
+    target = (tcg_insn_unit *)value;
+
+    switch (type) {
+    case R_PPC_REL14:
+        reloc_pc14(code_ptr, target);
+        break;
+    case R_PPC_REL24:
+        reloc_pc24(code_ptr, target);
+        break;
+    case R_PPC_ADDR16:
+        /* We are abusing this relocation type.  This points to a pair
+           of insns, addis + load.  If the displacement is small, we
+           can nop out the addis.  */
+        if (value == (int16_t)value) {
+            code_ptr[0] = NOP;
+            old = deposit32(code_ptr[1], 0, 16, value);
+            code_ptr[1] = deposit32(old, 16, 5, TCG_REG_TB);
+        } else {
+            int16_t lo = value;
+            int hi = value - lo;
+            assert(hi + lo == value);
+            code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
+            code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
                              TCGReg base, tcg_target_long offset);
 
@@ -690,7 +700,8 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
     if (!in_prologue && USE_REG_TB) {
         new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
                        -(intptr_t)s->code_gen_ptr);
-        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
+        tcg_out32(s, ADDIS | TAI(ret, TCG_REG_TB, 0));
+        tcg_out32(s, LD | TAI(ret, ret, 0));
         return;
     }
 
-- 
2.14.3