[PATCH v4 6/9] target/ppc: Add IBM PPE42 special instructions

Glenn Miles posted 9 patches 2 weeks, 1 day ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Nicholas Piggin <npiggin@gmail.com>, Chinmay Rath <rathc@linux.ibm.com>, Glenn Miles <milesg@linux.ibm.com>
There is a newer version of this series
[PATCH v4 6/9] target/ppc: Add IBM PPE42 special instructions
Posted by Glenn Miles 2 weeks, 1 day ago
Adds the following instructions exclusively for
IBM PPE42 processors:

  LSKU
  LCXU
  STSKU
  STCXU
  LVD
  LVDU
  LVDX
  STVD
  STVDU
  STVDX
  SLVD
  SRVD
  CMPWBC
  CMPLWBC
  CMPWIBC
  BNBWI
  BNBW
  CLRBWIBC
  CLRWBC
  DCBQ
  RLDICL
  RLDICR
  RLDIMI

A PPE42 GCC compiler is available here:
https://github.com/open-power/ppe42-gcc

For more information on the PPE42 processors please visit:
https://wiki.raptorcs.com/w/images/a/a3/PPE_42X_Core_Users_Manual.pdf

Signed-off-by: Glenn Miles <milesg@linux.ibm.com>
---
Changes from v3:
  - Removed copy of CHECK_VDR
  - Refactored ld/st instructions

 target/ppc/insn32.decode            |  66 ++-
 target/ppc/translate.c              |  29 +-
 target/ppc/translate/ppe-impl.c.inc | 665 ++++++++++++++++++++++++++++
 3 files changed, 750 insertions(+), 10 deletions(-)
 create mode 100644 target/ppc/translate/ppe-impl.c.inc

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index e53fd2840d..8beb588a2a 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -58,6 +58,10 @@
 %ds_rtp         22:4   !function=times_2
 @DS_rtp         ...... ....0 ra:5 .............. ..             &D rt=%ds_rtp si=%ds_si
 
+%dd_si          3:s13
+&DD             rt ra si:int64_t
+@DD             ...... rt:5 ra:5 ............. . ..             &DD si=%dd_si
+
 &DX_b           vrt b
 %dx_b           6:10 16:5 0:1
 @DX_b           ...... vrt:5  ..... .......... ..... .          &DX_b b=%dx_b
@@ -66,6 +70,11 @@
 %dx_d           6:s10 16:5 0:1
 @DX             ...... rt:5  ..... .......... ..... .           &DX d=%dx_d
 
+%md_sh          1:1 11:5
+%md_mb          5:1 6:5
+&MD             rs ra sh mb rc
+@MD             ...... rs:5 ra:5 ..... ...... ... . rc:1        &MD sh=%md_sh mb=%md_mb
+
 &VA             vrt vra vrb rc
 @VA             ...... vrt:5 vra:5 vrb:5 rc:5 ......            &VA
 
@@ -322,6 +331,13 @@ LDUX            011111 ..... ..... ..... 0000110101 -   @X
 
 LQ              111000 ..... ..... ............ ----    @DQ_rtp
 
+LVD             000101 ..... ..... ................     @D
+LVDU            001001 ..... ..... ................     @D
+LVDX            011111 ..... ..... ..... 0000010001 -   @X
+LSKU            111010 ..... ..... ............. 0 11   @DD
+LCXU            111010 ..... ..... ............. 1 11   @DD
+
+
 ### Fixed-Point Store Instructions
 
 STB             100110 ..... ..... ................     @D
@@ -346,6 +362,11 @@ STDUX           011111 ..... ..... ..... 0010110101 -   @X
 
 STQ             111110 ..... ..... ..............10     @DS_rtp
 
+STVDU           010110 ..... ..... ................     @D
+STVDX           011111 ..... ..... ..... 0010010001 -   @X
+STSKU           111110 ..... ..... ............. 0 11   @DD
+STCXU           111110 ..... ..... ............. 1 11   @DD
+
 ### Fixed-Point Compare Instructions
 
 CMP             011111 ... - . ..... ..... 0000000000 - @X_bfl
@@ -461,8 +482,14 @@ PRTYD           011111 ..... ..... ----- 0010111010 -   @X_sa
 
 BPERMD          011111 ..... ..... ..... 0011111100 -   @X
 CFUGED          011111 ..... ..... ..... 0011011100 -   @X
-CNTLZDM         011111 ..... ..... ..... 0000111011 -   @X
-CNTTZDM         011111 ..... ..... ..... 1000111011 -   @X
+{
+  SLVD            011111 ..... ..... ..... 0000111011 .   @X_rc
+  CNTLZDM         011111 ..... ..... ..... 0000111011 -   @X
+}
+{
+  SRVD            011111 ..... ..... ..... 1000111011 .   @X_rc
+  CNTTZDM         011111 ..... ..... ..... 1000111011 -   @X
+}
 PDEPD           011111 ..... ..... ..... 0010011100 -   @X
 PEXTD           011111 ..... ..... ..... 0010111100 -   @X
 
@@ -981,8 +1008,16 @@ LXSSP           111001 ..... ..... .............. 11    @DS
 STXSSP          111101 ..... ..... .............. 11    @DS
 LXV             111101 ..... ..... ............ . 001   @DQ_TSX
 STXV            111101 ..... ..... ............ . 101   @DQ_TSX
-LXVP            000110 ..... ..... ............ 0000    @DQ_TSXP
-STXVP           000110 ..... ..... ............ 0001    @DQ_TSXP
+
+# STVD PPE instruction overlaps with the LXVP and STXVP instructions
+{
+  STVD          000110 ..... ..... ................     @D
+  [
+    LXVP        000110 ..... ..... ............ 0000    @DQ_TSXP
+    STXVP       000110 ..... ..... ............ 0001    @DQ_TSXP
+  ]
+}
+
 LXVX            011111 ..... ..... ..... 0100 - 01100 . @X_TSX
 STXVX           011111 ..... ..... ..... 0110001100 .   @X_TSX
 LXVPX           011111 ..... ..... ..... 0101001101 -   @X_TSXP
@@ -1300,3 +1335,26 @@ CLRBHRB         011111 ----- ----- ----- 0110101110 -
 ## Misc POWER instructions
 
 ATTN            000000 00000 00000 00000 0100000000 0
+
+# Fused compare-branch instructions for PPE only
+%fcb_bdx        1:s10  !function=times_4
+&FCB            px:bool ra rb:uint64_t bdx lk:bool
+@FCB            ...... .. px:1 .. ra:5 rb:5 .......... lk:1       &FCB bdx=%fcb_bdx
+&FCB_bix        px:bool bix ra rb:uint64_t bdx lk:bool
+@FCB_bix        ...... .. px:1 bix:2 ra:5 rb:5 .......... lk:1    &FCB_bix bdx=%fcb_bdx
+
+CMPWBC          000001 00 . .. ..... ..... .......... .     @FCB_bix
+CMPLWBC         000001 01 . .. ..... ..... .......... .     @FCB_bix
+CMPWIBC         000001 10 . .. ..... ..... .......... .     @FCB_bix
+BNBWI           000001 11 . 00 ..... ..... .......... .     @FCB
+BNBW            000001 11 . 01 ..... ..... .......... .     @FCB
+CLRBWIBC        000001 11 . 10 ..... ..... .......... .     @FCB
+CLRBWBC         000001 11 . 11 ..... ..... .......... .     @FCB
+
+# Data Cache Block Query for PPE only
+DCBQ            011111 ..... ..... ..... 0110010110 -       @X
+
+# Rotate Doubleword Instructions for PPE only (if TARGET_PPC64 not defined)
+RLDICL          011110 ..... ..... ..... ...... 000 . .     @MD
+RLDICR          011110 ..... ..... ..... ...... 001 . .     @MD
+RLDIMI          011110 ..... ..... ..... ...... 011 . .     @MD
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index fc817dab54..d422789a1d 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -209,6 +209,11 @@ struct DisasContext {
 #define DISAS_CHAIN        DISAS_TARGET_2  /* lookup next tb, pc updated */
 #define DISAS_CHAIN_UPDATE DISAS_TARGET_3  /* lookup next tb, pc stale */
 
+static inline bool is_ppe(const DisasContext *ctx)
+{
+    return !!(ctx->flags & POWERPC_FLAG_PPE42);
+}
+
 /* Return true iff byteswap is needed in a scalar memop */
 static inline bool need_byteswap(const DisasContext *ctx)
 {
@@ -556,11 +561,8 @@ void spr_access_nop(DisasContext *ctx, int sprn, int gprn)
 
 #endif
 
-/* SPR common to all PowerPC */
-/* XER */
-void spr_read_xer(DisasContext *ctx, int gprn, int sprn)
+static void gen_get_xer(DisasContext *ctx, TCGv dst)
 {
-    TCGv dst = cpu_gpr[gprn];
     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();
     TCGv t2 = tcg_temp_new();
@@ -579,9 +581,16 @@ void spr_read_xer(DisasContext *ctx, int gprn, int sprn)
     }
 }
 
-void spr_write_xer(DisasContext *ctx, int sprn, int gprn)
+/* SPR common to all PowerPC */
+/* XER */
+void spr_read_xer(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv dst = cpu_gpr[gprn];
+    gen_get_xer(ctx, dst);
+}
+
+static void gen_set_xer(DisasContext *ctx, TCGv src)
 {
-    TCGv src = cpu_gpr[gprn];
     /* Write all flags, while reading back check for isa300 */
     tcg_gen_andi_tl(cpu_xer, src,
                     ~((1u << XER_SO) |
@@ -594,6 +603,12 @@ void spr_write_xer(DisasContext *ctx, int sprn, int gprn)
     tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1);
 }
 
+void spr_write_xer(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv src = cpu_gpr[gprn];
+    gen_set_xer(ctx, src);
+}
+
 /* LR */
 void spr_read_lr(DisasContext *ctx, int gprn, int sprn)
 {
@@ -5755,6 +5770,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, arg_PLS_D *a)
 
 #include "translate/bhrb-impl.c.inc"
 
+#include "translate/ppe-impl.c.inc"
+
 /* Handles lfdp */
 static void gen_dform39(DisasContext *ctx)
 {
diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/ppe-impl.c.inc
new file mode 100644
index 0000000000..792103d7c2
--- /dev/null
+++ b/target/ppc/translate/ppe-impl.c.inc
@@ -0,0 +1,665 @@
+/*
+ * IBM PPE Instructions
+ *
+ * Copyright (c) 2025, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+
+#if !defined(TARGET_PPC64)
+static bool vdr_is_valid(uint32_t vdr)
+{
+    const uint32_t valid_bitmap = 0xf00003ff;
+    return !!((1ul << (vdr & 0x1f)) & valid_bitmap);
+}
+
+static bool ppe_gpr_is_valid(uint32_t reg)
+{
+    const uint32_t valid_bitmap = 0xf00027ff;
+    return !!((1ul << (reg & 0x1f)) & valid_bitmap);
+}
+#endif
+
+#define CHECK_VDR(CTX, VDR)                             \
+    do {                                                \
+        if (unlikely(!vdr_is_valid(VDR))) {             \
+            gen_invalid(CTX);                           \
+            return true;                                \
+        }                                               \
+    } while (0)
+
+#define CHECK_PPE_GPR(CTX, REG)                         \
+    do {                                                \
+        if (unlikely(!ppe_gpr_is_valid(REG))) {         \
+            gen_invalid(CTX);                           \
+            return true;                                \
+        }                                               \
+    } while (0)
+
+#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f)
+
+#define CHECK_PPE_LEVEL(CTX, LVL)                       \
+    do {                                                \
+        if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \
+            gen_invalid(CTX);                           \
+            return true;                                \
+        }                                               \
+    } while (0)
+
+static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    int i;
+    TCGv base, EA;
+    TCGv lo, hi;
+    TCGv_i64 t8;
+    const uint8_t vd_list[] = {9, 7, 5, 3, 0};
+
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
+    CHECK_PPE_GPR(ctx, a->rt);
+
+    if (unlikely((a->rt != a->ra) || (a->ra == 0) || (a->si < 0xB))) {
+        gen_invalid(ctx);
+        return true;
+    }
+
+    EA = tcg_temp_new();
+    base = tcg_temp_new();
+
+    tcg_gen_addi_tl(base, cpu_gpr[a->ra], a->si * 8);
+    gen_store_spr(SPR_PPE42_EDR, base);
+
+    t8 = tcg_temp_new_i64();
+
+    tcg_gen_addi_tl(EA, base, -8);
+    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+    tcg_gen_extr_i64_tl(cpu_gpr[31], cpu_gpr[30], t8);
+
+    tcg_gen_addi_tl(EA, EA, -8);
+    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+    tcg_gen_extr_i64_tl(cpu_gpr[29], cpu_gpr[28], t8);
+
+    lo = tcg_temp_new();
+    hi = tcg_temp_new();
+
+    tcg_gen_addi_tl(EA, EA, -8);
+    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+    tcg_gen_extr_i64_tl(lo, hi, t8);
+    gen_store_spr(SPR_SRR0, hi);
+    gen_store_spr(SPR_SRR1, lo);
+
+    tcg_gen_addi_tl(EA, EA, -8);
+    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+    tcg_gen_extr_i64_tl(lo, hi, t8);
+    gen_set_xer(ctx, hi);
+    tcg_gen_mov_tl(cpu_ctr, lo);
+
+    for (i = 0; i < sizeof(vd_list); i++) {
+        int vd = vd_list[i];
+        tcg_gen_addi_tl(EA, EA, -8);
+        tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+        tcg_gen_extr_i64_tl(cpu_gpr[VDR_PAIR_REG(vd)], cpu_gpr[vd], t8);
+    }
+
+    tcg_gen_addi_tl(EA, EA, -8);
+    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+    tcg_gen_extr_i64_tl(lo, hi, t8);
+    tcg_gen_shri_tl(hi, hi, 28);
+    tcg_gen_trunc_tl_i32(cpu_crf[0], hi);
+    gen_store_spr(SPR_SPRG0, lo);
+
+    tcg_gen_addi_tl(EA, base, 4);
+    tcg_gen_qemu_ld_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
+    tcg_gen_mov_tl(cpu_gpr[a->ra], base);
+    return true;
+#endif
+}
+
+static bool trans_LSKU(DisasContext *ctx, arg_LSKU *a)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    int64_t n;
+    TCGv base, EA;
+    TCGv_i32 lo, hi;
+    TCGv_i64 t8;
+
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+
+    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
+    CHECK_PPE_GPR(ctx, a->rt);
+
+    if (unlikely((a->rt != a->ra) || (a->ra == 0) ||
+                 (a->si & PPC_BIT(0)) || (a->si == 0))) {
+        gen_invalid(ctx);
+        return true;
+    }
+
+    EA = tcg_temp_new();
+    base = tcg_temp_new();
+    gen_addr_register(ctx, base);
+
+
+    tcg_gen_addi_tl(base, base, a->si * 8);
+    gen_store_spr(SPR_PPE42_EDR, base);
+
+    n = a->si - 1;
+    t8 = tcg_temp_new_i64();
+    if (n > 0) {
+        tcg_gen_addi_tl(EA, base, -8);
+        tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+        hi = cpu_gpr[30];
+        lo = cpu_gpr[31];
+        tcg_gen_extr_i64_i32(lo, hi, t8);
+    }
+    if (n > 1) {
+        tcg_gen_addi_tl(EA, base, -16);
+        tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+        hi = cpu_gpr[28];
+        lo = cpu_gpr[29];
+        tcg_gen_extr_i64_i32(lo, hi, t8);
+    }
+    tcg_gen_addi_tl(EA, base, 4);
+    tcg_gen_qemu_ld_i32(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
+    tcg_gen_mov_tl(cpu_gpr[a->ra], base);
+    return true;
+#endif
+}
+
+static bool trans_STCXU(DisasContext *ctx, arg_STCXU *a)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    TCGv EA;
+    TCGv lo, hi;
+    TCGv_i64 t8;
+    int i;
+    const uint8_t vd_list[] = {9, 7, 5, 3, 0};
+
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+
+    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
+    CHECK_PPE_GPR(ctx, a->rt);
+
+    if (unlikely((a->rt != a->ra) || (a->ra == 0) || !(a->si & PPC_BIT(0)))) {
+        gen_invalid(ctx);
+        return true;
+    }
+
+    EA = tcg_temp_new();
+    tcg_gen_addi_tl(EA, cpu_gpr[a->ra], 4);
+    tcg_gen_qemu_st_i32(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
+
+    gen_store_spr(SPR_PPE42_EDR, cpu_gpr[a->ra]);
+
+    t8 = tcg_temp_new_i64();
+
+    tcg_gen_concat_tl_i64(t8, cpu_gpr[31], cpu_gpr[30]);
+    tcg_gen_addi_tl(EA, cpu_gpr[a->ra], -8);
+    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+
+    tcg_gen_concat_tl_i64(t8, cpu_gpr[29], cpu_gpr[28]);
+    tcg_gen_addi_tl(EA, EA, -8);
+    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+
+    lo = tcg_temp_new();
+    hi = tcg_temp_new();
+
+    gen_load_spr(hi, SPR_SRR0);
+    gen_load_spr(lo, SPR_SRR1);
+    tcg_gen_concat_tl_i64(t8, lo, hi);
+    tcg_gen_addi_tl(EA, EA, -8);
+    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+
+    gen_get_xer(ctx, hi);
+    tcg_gen_mov_tl(lo, cpu_ctr);
+    tcg_gen_concat_tl_i64(t8, lo, hi);
+    tcg_gen_addi_tl(EA, EA, -8);
+    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+
+    for (i = 0; i < sizeof(vd_list); i++) {
+        int vd = vd_list[i];
+        tcg_gen_concat_tl_i64(t8, cpu_gpr[VDR_PAIR_REG(vd)], cpu_gpr[vd]);
+        tcg_gen_addi_tl(EA, EA, -8);
+        tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+    }
+
+    gen_load_spr(lo, SPR_SPRG0);
+    tcg_gen_extu_i32_tl(hi, cpu_crf[0]);
+    tcg_gen_shli_tl(hi, hi, 28);
+    tcg_gen_concat_tl_i64(t8, lo, hi);
+    tcg_gen_addi_tl(EA, EA, -8);
+    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+
+    tcg_gen_addi_tl(EA, cpu_gpr[a->ra], a->si * 8);
+    tcg_gen_qemu_st_i32(cpu_gpr[a->rt], EA, ctx->mem_idx, DEF_MEMOP(MO_32) |
+                                                          MO_ALIGN);
+    tcg_gen_mov_tl(cpu_gpr[a->ra], EA);
+    return true;
+#endif
+}
+
+static bool trans_STSKU(DisasContext *ctx, arg_STSKU *a)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    int64_t n;
+    TCGv base, EA;
+    TCGv_i32 lo, hi;
+    TCGv_i64 t8;
+
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+
+    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
+    CHECK_PPE_GPR(ctx, a->rt);
+
+    if (unlikely((a->rt != a->ra) || (a->ra == 0) || !(a->si & PPC_BIT(0)))) {
+        gen_invalid(ctx);
+        return true;
+    }
+
+    EA = tcg_temp_new();
+    base = tcg_temp_new();
+    gen_addr_register(ctx, base);
+    tcg_gen_addi_tl(EA, base, 4);
+    tcg_gen_qemu_st_i32(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
+
+    gen_store_spr(SPR_PPE42_EDR, base);
+
+    n = ~(a->si);
+
+    t8 = tcg_temp_new_i64();
+    if (n > 0) {
+        hi = cpu_gpr[30];
+        lo = cpu_gpr[31];
+        tcg_gen_concat_i32_i64(t8, lo, hi);
+        tcg_gen_addi_tl(EA, base, -8);
+        tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+    }
+    if (n > 1) {
+        hi = cpu_gpr[28];
+        lo = cpu_gpr[29];
+        tcg_gen_concat_i32_i64(t8, lo, hi);
+        tcg_gen_addi_tl(EA, base, -16);
+        tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
+    }
+
+    tcg_gen_addi_tl(EA, base, a->si * 8);
+    tcg_gen_qemu_st_i32(cpu_gpr[a->rt], EA, ctx->mem_idx, DEF_MEMOP(MO_32) |
+                                                          MO_ALIGN);
+    tcg_gen_mov_tl(cpu_gpr[a->ra], EA);
+    return true;
+#endif
+}
+
+#if !defined(TARGET_PPC64)
+static bool do_ppe_ldst(DisasContext *ctx, int rt, int ra, TCGv disp,
+                        bool update, bool store)
+{
+    TCGv ea;
+    int rt_lo;
+    TCGv_i64 t8;
+
+    CHECK_VDR(ctx, rt);
+    CHECK_PPE_GPR(ctx, ra);
+    rt_lo = VDR_PAIR_REG(rt);
+    if (update && (ra == 0 || (!store && ((ra == rt) || (ra == rt_lo))))) {
+        gen_invalid(ctx);
+        return true;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+
+    ea = do_ea_calc(ctx, ra, disp);
+    t8 = tcg_temp_new_i64();
+    if (store) {
+        tcg_gen_concat_i32_i64(t8, cpu_gpr[rt_lo], cpu_gpr[rt]);
+        tcg_gen_qemu_st_i64(t8, ea, ctx->mem_idx, DEF_MEMOP(MO_64));
+    } else {
+        tcg_gen_qemu_ld_i64(t8, ea, ctx->mem_idx, DEF_MEMOP(MO_64));
+        tcg_gen_extr_i64_i32(cpu_gpr[rt_lo], cpu_gpr[rt], t8);
+    }
+    if (update) {
+        tcg_gen_mov_tl(cpu_gpr[ra], ea);
+    }
+    return true;
+}
+#endif
+
+static bool do_ppe_ldst_D(DisasContext *ctx, arg_D *a, bool update, bool store)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    /* Some PowerPC CPU's have a different meaning for the STVD instruction */
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+    return do_ppe_ldst(ctx, a->rt, a->ra, tcg_constant_tl(a->si), update,
+                       store);
+#endif
+}
+
+static bool do_ppe_ldst_X(DisasContext *ctx, arg_X *a, bool store)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    CHECK_PPE_GPR(ctx, a->rb);
+    return do_ppe_ldst(ctx, a->rt, a->ra, cpu_gpr[a->rb], false, store);
+#endif
+}
+
+TRANS(LVD,   do_ppe_ldst_D, false, false)
+TRANS(LVDU,  do_ppe_ldst_D, true,  false)
+TRANS(STVD,  do_ppe_ldst_D, false, true)
+TRANS(STVDU, do_ppe_ldst_D, true,  true)
+TRANS(LVDX,  do_ppe_ldst_X, false)
+TRANS(STVDX, do_ppe_ldst_X, true)
+
+
+#if !defined(TARGET_PPC64)
+static bool do_fcb(DisasContext *ctx, TCGv ra_val, TCGv rb_val, int bix,
+                          int32_t bdx, bool s, bool px, bool lk)
+{
+    TCGCond cond;
+    uint32_t mask;
+    TCGLabel *no_branch;
+    target_ulong dest;
+
+    /* Update CR0 */
+    gen_op_cmp32(ra_val, rb_val, s, 0);
+
+    if (lk) {
+        gen_setlr(ctx, ctx->base.pc_next);
+    }
+
+
+    mask = PPC_BIT32(28 + bix);
+    cond = (px) ? TCG_COND_TSTEQ : TCG_COND_TSTNE;
+    no_branch = gen_new_label();
+    dest = ctx->cia + bdx;
+
+    /* Do the branch if CR0[bix] == PX */
+    tcg_gen_brcondi_i32(cond, cpu_crf[0], mask, no_branch);
+    gen_goto_tb(ctx, 0, dest);
+    gen_set_label(no_branch);
+    gen_goto_tb(ctx, 1, ctx->base.pc_next);
+    ctx->base.is_jmp = DISAS_NORETURN;
+    return true;
+}
+#endif
+
+static bool do_cmp_branch(DisasContext *ctx, arg_FCB_bix *a, bool s,
+                          bool rb_is_gpr)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    TCGv old_ra;
+    TCGv rb_val;
+
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+    CHECK_PPE_GPR(ctx, a->ra);
+    if (rb_is_gpr) {
+        CHECK_PPE_GPR(ctx, a->rb);
+        rb_val = cpu_gpr[a->rb];
+    } else {
+        rb_val = tcg_constant_tl(a->rb);
+    }
+    if (a->bix == 3) {
+        old_ra = tcg_temp_new();
+        tcg_gen_mov_tl(old_ra, cpu_gpr[a->ra]);
+        tcg_gen_sub_tl(cpu_gpr[a->ra], cpu_gpr[a->ra], rb_val);
+        return do_fcb(ctx, old_ra, rb_val, 2,
+                      a->bdx, s, a->px, a->lk);
+    } else {
+        return do_fcb(ctx, cpu_gpr[a->ra], rb_val, a->bix,
+                      a->bdx, s, a->px, a->lk);
+    }
+#endif
+}
+
+TRANS(CMPWBC, do_cmp_branch, true, true)
+TRANS(CMPLWBC, do_cmp_branch, false, true)
+TRANS(CMPWIBC, do_cmp_branch, true, false)
+
+static bool do_mask_branch(DisasContext *ctx, arg_FCB * a, bool invert,
+                           bool update, bool rb_is_gpr)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    TCGv r;
+    TCGv mask, shift;
+
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+    CHECK_PPE_GPR(ctx, a->ra);
+    if (rb_is_gpr) {
+        CHECK_PPE_GPR(ctx, a->rb);
+        mask = tcg_temp_new();
+        shift = tcg_temp_new();
+        tcg_gen_andi_tl(shift, cpu_gpr[a->rb], 0x1f);
+        tcg_gen_shr_tl(mask, tcg_constant_tl(0x80000000), shift);
+    } else {
+        mask = tcg_constant_tl(PPC_BIT32(a->rb));
+    }
+    if (invert) {
+        tcg_gen_not_tl(mask, mask);
+    }
+
+    /* apply mask to ra */
+    r = tcg_temp_new();
+    tcg_gen_and_tl(r, cpu_gpr[a->ra], mask);
+    if (update) {
+        tcg_gen_mov_tl(cpu_gpr[a->ra], r);
+    }
+    return do_fcb(ctx, r, tcg_constant_tl(0), 2,
+                  a->bdx, false, a->px, a->lk);
+#endif
+}
+
+TRANS(BNBWI,    do_mask_branch, false, false, false)
+TRANS(BNBW,     do_mask_branch, false, false, true)
+TRANS(CLRBWIBC, do_mask_branch, true,  true,  false)
+TRANS(CLRBWBC,  do_mask_branch, true,  true,  true)
+
+#if !defined(TARGET_PPC64)
+static void gen_set_Rc0_i64(DisasContext *ctx, TCGv_i64 reg)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i32 t = tcg_temp_new_i32();
+
+    tcg_gen_movi_i64(t0, CRF_EQ);
+    tcg_gen_movi_i64(t1, CRF_LT);
+    tcg_gen_movcond_i64(TCG_COND_LT, t0, reg, tcg_constant_i64(0), t1, t0);
+    tcg_gen_movi_i64(t1, CRF_GT);
+    tcg_gen_movcond_i64(TCG_COND_GT, t0, reg, tcg_constant_i64(0), t1, t0);
+    tcg_gen_extrl_i64_i32(t, t0);
+    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+    tcg_gen_or_i32(cpu_crf[0], cpu_crf[0], t);
+}
+#endif
+
+static bool do_shift64(DisasContext *ctx, arg_X_rc *a, bool left)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    int rt_lo, ra_lo;
+    TCGv_i64 t0, t8;
+
+    /* Check for PPE since opcode overlaps with CNTTZDM instruction */
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
+    CHECK_VDR(ctx, a->rt);
+    CHECK_VDR(ctx, a->ra);
+    CHECK_PPE_GPR(ctx, a->rb);
+    rt_lo = VDR_PAIR_REG(a->rt);
+    ra_lo = VDR_PAIR_REG(a->ra);
+    t8 = tcg_temp_new_i64();
+
+    /* AND rt with a mask that is 0 when rb >= 0x40 */
+    t0 = tcg_temp_new_i64();
+    tcg_gen_extu_tl_i64(t0, cpu_gpr[a->rb]);
+    tcg_gen_shli_i64(t0, t0, 0x39);
+    tcg_gen_sari_i64(t0, t0, 0x3f);
+
+    /* form 64bit value from two 32bit regs */
+    tcg_gen_concat_tl_i64(t8, cpu_gpr[rt_lo], cpu_gpr[a->rt]);
+
+    /* apply mask */
+    tcg_gen_andc_i64(t8, t8, t0);
+
+    /* do the shift */
+    tcg_gen_extu_tl_i64(t0, cpu_gpr[a->rb]);
+    tcg_gen_andi_i64(t0, t0, 0x3f);
+    if (left) {
+        tcg_gen_shl_i64(t8, t8, t0);
+    } else {
+        tcg_gen_shr_i64(t8, t8, t0);
+    }
+
+    /* split the 64bit word back into two 32bit regs */
+    tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t8);
+
+    /* update CR0 if requested */
+    if (unlikely(a->rc != 0)) {
+        gen_set_Rc0_i64(ctx, t8);
+    }
+    return true;
+#endif
+}
+
+TRANS(SRVD, do_shift64, false)
+TRANS(SLVD, do_shift64, true)
+
+static bool trans_DCBQ(DisasContext *ctx, arg_DCBQ * a)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+
+    CHECK_PPE_GPR(ctx, a->rt);
+    CHECK_PPE_GPR(ctx, a->ra);
+    CHECK_PPE_GPR(ctx, a->rb);
+
+    /* No cache exists, so just set RT to 0 */
+    tcg_gen_movi_tl(cpu_gpr[a->rt], 0);
+    return true;
+#endif
+}
+
+static bool trans_RLDIMI(DisasContext *ctx, arg_RLDIMI *a)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    TCGv_i64 t_rs, t_ra;
+    int ra_lo, rs_lo;
+    uint32_t sh = a->sh;
+    uint32_t mb = a->mb;
+    uint32_t me = 63 - sh;
+
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
+    CHECK_VDR(ctx, a->rs);
+    CHECK_VDR(ctx, a->ra);
+
+    rs_lo = VDR_PAIR_REG(a->rs);
+    ra_lo = VDR_PAIR_REG(a->ra);
+
+    t_rs = tcg_temp_new_i64();
+    t_ra = tcg_temp_new_i64();
+
+    tcg_gen_concat_tl_i64(t_rs, cpu_gpr[rs_lo], cpu_gpr[a->rs]);
+    tcg_gen_concat_tl_i64(t_ra, cpu_gpr[ra_lo], cpu_gpr[a->ra]);
+
+    if (mb <= me) {
+        tcg_gen_deposit_i64(t_ra, t_ra, t_rs, sh, me - mb + 1);
+    } else {
+        uint64_t mask = mask_u64(mb, me);
+        TCGv_i64 t1 = tcg_temp_new_i64();
+
+        tcg_gen_rotli_i64(t1, t_rs, sh);
+        tcg_gen_andi_i64(t1, t1, mask);
+        tcg_gen_andi_i64(t_ra, t_ra, ~mask);
+        tcg_gen_or_i64(t_ra, t_ra, t1);
+    }
+
+    tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t_ra);
+
+    if (unlikely(a->rc != 0)) {
+        gen_set_Rc0_i64(ctx, t_ra);
+    }
+    return true;
+#endif
+}
+
+
+static bool gen_rldinm_i64(DisasContext *ctx, arg_MD *a, int mb, int me, int sh)
+{
+#if defined(TARGET_PPC64)
+    return false;
+#else
+    int len = me - mb + 1;
+    int rsh = (64 - sh) & 63;
+    int ra_lo, rs_lo;
+    TCGv_i64 t8;
+
+    if (unlikely(!is_ppe(ctx))) {
+        return false;
+    }
+    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
+    CHECK_VDR(ctx, a->rs);
+    CHECK_VDR(ctx, a->ra);
+
+    rs_lo = VDR_PAIR_REG(a->rs);
+    ra_lo = VDR_PAIR_REG(a->ra);
+    t8 = tcg_temp_new_i64();
+    tcg_gen_concat_tl_i64(t8, cpu_gpr[rs_lo], cpu_gpr[a->rs]);
+    if (sh != 0 && len > 0 && me == (63 - sh)) {
+        tcg_gen_deposit_z_i64(t8, t8, sh, len);
+    } else if (me == 63 && rsh + len <= 64) {
+        tcg_gen_extract_i64(t8, t8, rsh, len);
+    } else {
+        tcg_gen_rotli_i64(t8, t8, sh);
+        tcg_gen_andi_i64(t8, t8, mask_u64(mb, me));
+    }
+    tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t8);
+    if (unlikely(a->rc != 0)) {
+        gen_set_Rc0_i64(ctx, t8);
+    }
+    return true;
+#endif
+}
+
+TRANS(RLDICL, gen_rldinm_i64, a->mb, 63, a->sh)
+TRANS(RLDICR, gen_rldinm_i64, 0, a->mb, a->sh)
+
-- 
2.43.0
Re: [PATCH v4 6/9] target/ppc: Add IBM PPE42 special instructions
Posted by Harsh Prateek Bora 1 week, 4 days ago

On 9/12/25 22:17, Glenn Miles wrote:
> Adds the following instructions exclusively for
> IBM PPE42 processors:
> 
>    LSKU
>    LCXU
>    STSKU
>    STCXU
>    LVD
>    LVDU
>    LVDX
>    STVD
>    STVDU
>    STVDX
>    SLVD
>    SRVD
>    CMPWBC
>    CMPLWBC
>    CMPWIBC
>    BNBWI
>    BNBW
>    CLRBWIBC
>    CLRWBC
>    DCBQ
>    RLDICL
>    RLDICR
>    RLDIMI
> 
> A PPE42 GCC compiler is available here:
> https://github.com/open-power/ppe42-gcc
> 
> For more information on the PPE42 processors please visit:
> https://wiki.raptorcs.com/w/images/a/a3/PPE_42X_Core_Users_Manual.pdf
> 
> Signed-off-by: Glenn Miles <milesg@linux.ibm.com>
> ---
> Changes from v3:
>    - Removed copy of CHECK_VDR
>    - Refactored ld/st instructions
> 
>   target/ppc/insn32.decode            |  66 ++-
>   target/ppc/translate.c              |  29 +-
>   target/ppc/translate/ppe-impl.c.inc | 665 ++++++++++++++++++++++++++++
>   3 files changed, 750 insertions(+), 10 deletions(-)
>   create mode 100644 target/ppc/translate/ppe-impl.c.inc
> 

<snip>

> diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/ppe-impl.c.inc
> new file mode 100644
> index 0000000000..792103d7c2
> --- /dev/null
> +++ b/target/ppc/translate/ppe-impl.c.inc
> @@ -0,0 +1,665 @@
> +/*
> + * IBM PPE Instructions
> + *
> + * Copyright (c) 2025, IBM Corporation.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +
> +#if !defined(TARGET_PPC64)
> +static bool vdr_is_valid(uint32_t vdr)
> +{
> +    const uint32_t valid_bitmap = 0xf00003ff;
> +    return !!((1ul << (vdr & 0x1f)) & valid_bitmap);
> +}
> +
> +static bool ppe_gpr_is_valid(uint32_t reg)
> +{
> +    const uint32_t valid_bitmap = 0xf00027ff;
> +    return !!((1ul << (reg & 0x1f)) & valid_bitmap);
> +}
> +#endif
> +
> +#define CHECK_VDR(CTX, VDR)                             \
> +    do {                                                \
> +        if (unlikely(!vdr_is_valid(VDR))) {             \
> +            gen_invalid(CTX);                           \
> +            return true;                                \
> +        }                                               \
> +    } while (0)
> +
> +#define CHECK_PPE_GPR(CTX, REG)                         \
> +    do {                                                \
> +        if (unlikely(!ppe_gpr_is_valid(REG))) {         \
> +            gen_invalid(CTX);                           \
> +            return true;                                \
> +        }                                               \
> +    } while (0)
> +
> +#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f)
> +
> +#define CHECK_PPE_LEVEL(CTX, LVL)                       \
> +    do {                                                \
> +        if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \
> +            gen_invalid(CTX);                           \
> +            return true;                                \
> +        }                                               \
> +    } while (0)
> +
> +static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else

If we are building the PPE42 instructions only for !TARGET_PPC64, does
it still make it usable with qemu-system-ppc64?
If not, we may want to use the conditional compilation in 
hreg_compute_hlfags_value() as well. Otherwise, we may need a run-time 
check to identify if its a PPC32 machine to support 32-bit machines with 
qemu-system-ppc64.

regards,
Harsh
> +    int i;
> +    TCGv base, EA;
> +    TCGv lo, hi;
> +    TCGv_i64 t8;
> +    const uint8_t vd_list[] = {9, 7, 5, 3, 0};
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_PPE_GPR(ctx, a->rt);
> +
> +    if (unlikely((a->rt != a->ra) || (a->ra == 0) || (a->si < 0xB))) {
> +        gen_invalid(ctx);
> +        return true;
> +    }
> +
> +    EA = tcg_temp_new();
> +    base = tcg_temp_new();
> +
> +    tcg_gen_addi_tl(base, cpu_gpr[a->ra], a->si * 8);
> +    gen_store_spr(SPR_PPE42_EDR, base);
> +
> +    t8 = tcg_temp_new_i64();
> +
> +    tcg_gen_addi_tl(EA, base, -8);
> +    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    tcg_gen_extr_i64_tl(cpu_gpr[31], cpu_gpr[30], t8);
> +
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    tcg_gen_extr_i64_tl(cpu_gpr[29], cpu_gpr[28], t8);
> +
> +    lo = tcg_temp_new();
> +    hi = tcg_temp_new();
> +
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    tcg_gen_extr_i64_tl(lo, hi, t8);
> +    gen_store_spr(SPR_SRR0, hi);
> +    gen_store_spr(SPR_SRR1, lo);
> +
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    tcg_gen_extr_i64_tl(lo, hi, t8);
> +    gen_set_xer(ctx, hi);
> +    tcg_gen_mov_tl(cpu_ctr, lo);
> +
> +    for (i = 0; i < sizeof(vd_list); i++) {
> +        int vd = vd_list[i];
> +        tcg_gen_addi_tl(EA, EA, -8);
> +        tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +        tcg_gen_extr_i64_tl(cpu_gpr[VDR_PAIR_REG(vd)], cpu_gpr[vd], t8);
> +    }
> +
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    tcg_gen_extr_i64_tl(lo, hi, t8);
> +    tcg_gen_shri_tl(hi, hi, 28);
> +    tcg_gen_trunc_tl_i32(cpu_crf[0], hi);
> +    gen_store_spr(SPR_SPRG0, lo);
> +
> +    tcg_gen_addi_tl(EA, base, 4);
> +    tcg_gen_qemu_ld_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
> +    tcg_gen_mov_tl(cpu_gpr[a->ra], base);
> +    return true;
> +#endif
> +}
> +
> +static bool trans_LSKU(DisasContext *ctx, arg_LSKU *a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    int64_t n;
> +    TCGv base, EA;
> +    TCGv_i32 lo, hi;
> +    TCGv_i64 t8;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_PPE_GPR(ctx, a->rt);
> +
> +    if (unlikely((a->rt != a->ra) || (a->ra == 0) ||
> +                 (a->si & PPC_BIT(0)) || (a->si == 0))) {
> +        gen_invalid(ctx);
> +        return true;
> +    }
> +
> +    EA = tcg_temp_new();
> +    base = tcg_temp_new();
> +    gen_addr_register(ctx, base);
> +
> +
> +    tcg_gen_addi_tl(base, base, a->si * 8);
> +    gen_store_spr(SPR_PPE42_EDR, base);
> +
> +    n = a->si - 1;
> +    t8 = tcg_temp_new_i64();
> +    if (n > 0) {
> +        tcg_gen_addi_tl(EA, base, -8);
> +        tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +        hi = cpu_gpr[30];
> +        lo = cpu_gpr[31];
> +        tcg_gen_extr_i64_i32(lo, hi, t8);
> +    }
> +    if (n > 1) {
> +        tcg_gen_addi_tl(EA, base, -16);
> +        tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +        hi = cpu_gpr[28];
> +        lo = cpu_gpr[29];
> +        tcg_gen_extr_i64_i32(lo, hi, t8);
> +    }
> +    tcg_gen_addi_tl(EA, base, 4);
> +    tcg_gen_qemu_ld_i32(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
> +    tcg_gen_mov_tl(cpu_gpr[a->ra], base);
> +    return true;
> +#endif
> +}
> +
> +static bool trans_STCXU(DisasContext *ctx, arg_STCXU *a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    TCGv EA;
> +    TCGv lo, hi;
> +    TCGv_i64 t8;
> +    int i;
> +    const uint8_t vd_list[] = {9, 7, 5, 3, 0};
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_PPE_GPR(ctx, a->rt);
> +
> +    if (unlikely((a->rt != a->ra) || (a->ra == 0) || !(a->si & PPC_BIT(0)))) {
> +        gen_invalid(ctx);
> +        return true;
> +    }
> +
> +    EA = tcg_temp_new();
> +    tcg_gen_addi_tl(EA, cpu_gpr[a->ra], 4);
> +    tcg_gen_qemu_st_i32(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
> +
> +    gen_store_spr(SPR_PPE42_EDR, cpu_gpr[a->ra]);
> +
> +    t8 = tcg_temp_new_i64();
> +
> +    tcg_gen_concat_tl_i64(t8, cpu_gpr[31], cpu_gpr[30]);
> +    tcg_gen_addi_tl(EA, cpu_gpr[a->ra], -8);
> +    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +
> +    tcg_gen_concat_tl_i64(t8, cpu_gpr[29], cpu_gpr[28]);
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +
> +    lo = tcg_temp_new();
> +    hi = tcg_temp_new();
> +
> +    gen_load_spr(hi, SPR_SRR0);
> +    gen_load_spr(lo, SPR_SRR1);
> +    tcg_gen_concat_tl_i64(t8, lo, hi);
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +
> +    gen_get_xer(ctx, hi);
> +    tcg_gen_mov_tl(lo, cpu_ctr);
> +    tcg_gen_concat_tl_i64(t8, lo, hi);
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +
> +    for (i = 0; i < sizeof(vd_list); i++) {
> +        int vd = vd_list[i];
> +        tcg_gen_concat_tl_i64(t8, cpu_gpr[VDR_PAIR_REG(vd)], cpu_gpr[vd]);
> +        tcg_gen_addi_tl(EA, EA, -8);
> +        tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    }
> +
> +    gen_load_spr(lo, SPR_SPRG0);
> +    tcg_gen_extu_i32_tl(hi, cpu_crf[0]);
> +    tcg_gen_shli_tl(hi, hi, 28);
> +    tcg_gen_concat_tl_i64(t8, lo, hi);
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +
> +    tcg_gen_addi_tl(EA, cpu_gpr[a->ra], a->si * 8);
> +    tcg_gen_qemu_st_i32(cpu_gpr[a->rt], EA, ctx->mem_idx, DEF_MEMOP(MO_32) |
> +                                                          MO_ALIGN);
> +    tcg_gen_mov_tl(cpu_gpr[a->ra], EA);
> +    return true;
> +#endif
> +}
> +
> +static bool trans_STSKU(DisasContext *ctx, arg_STSKU *a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    int64_t n;
> +    TCGv base, EA;
> +    TCGv_i32 lo, hi;
> +    TCGv_i64 t8;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_PPE_GPR(ctx, a->rt);
> +
> +    if (unlikely((a->rt != a->ra) || (a->ra == 0) || !(a->si & PPC_BIT(0)))) {
> +        gen_invalid(ctx);
> +        return true;
> +    }
> +
> +    EA = tcg_temp_new();
> +    base = tcg_temp_new();
> +    gen_addr_register(ctx, base);
> +    tcg_gen_addi_tl(EA, base, 4);
> +    tcg_gen_qemu_st_i32(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
> +
> +    gen_store_spr(SPR_PPE42_EDR, base);
> +
> +    n = ~(a->si);
> +
> +    t8 = tcg_temp_new_i64();
> +    if (n > 0) {
> +        hi = cpu_gpr[30];
> +        lo = cpu_gpr[31];
> +        tcg_gen_concat_i32_i64(t8, lo, hi);
> +        tcg_gen_addi_tl(EA, base, -8);
> +        tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    }
> +    if (n > 1) {
> +        hi = cpu_gpr[28];
> +        lo = cpu_gpr[29];
> +        tcg_gen_concat_i32_i64(t8, lo, hi);
> +        tcg_gen_addi_tl(EA, base, -16);
> +        tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    }
> +
> +    tcg_gen_addi_tl(EA, base, a->si * 8);
> +    tcg_gen_qemu_st_i32(cpu_gpr[a->rt], EA, ctx->mem_idx, DEF_MEMOP(MO_32) |
> +                                                          MO_ALIGN);
> +    tcg_gen_mov_tl(cpu_gpr[a->ra], EA);
> +    return true;
> +#endif
> +}
> +
> +#if !defined(TARGET_PPC64)
> +static bool do_ppe_ldst(DisasContext *ctx, int rt, int ra, TCGv disp,
> +                        bool update, bool store)
> +{
> +    TCGv ea;
> +    int rt_lo;
> +    TCGv_i64 t8;
> +
> +    CHECK_VDR(ctx, rt);
> +    CHECK_PPE_GPR(ctx, ra);
> +    rt_lo = VDR_PAIR_REG(rt);
> +    if (update && (ra == 0 || (!store && ((ra == rt) || (ra == rt_lo))))) {
> +        gen_invalid(ctx);
> +        return true;
> +    }
> +    gen_set_access_type(ctx, ACCESS_INT);
> +
> +    ea = do_ea_calc(ctx, ra, disp);
> +    t8 = tcg_temp_new_i64();
> +    if (store) {
> +        tcg_gen_concat_i32_i64(t8, cpu_gpr[rt_lo], cpu_gpr[rt]);
> +        tcg_gen_qemu_st_i64(t8, ea, ctx->mem_idx, DEF_MEMOP(MO_64));
> +    } else {
> +        tcg_gen_qemu_ld_i64(t8, ea, ctx->mem_idx, DEF_MEMOP(MO_64));
> +        tcg_gen_extr_i64_i32(cpu_gpr[rt_lo], cpu_gpr[rt], t8);
> +    }
> +    if (update) {
> +        tcg_gen_mov_tl(cpu_gpr[ra], ea);
> +    }
> +    return true;
> +}
> +#endif
> +
> +static bool do_ppe_ldst_D(DisasContext *ctx, arg_D *a, bool update, bool store)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    /* Some PowerPC CPU's have a different meaning for the STVD instruction */
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    return do_ppe_ldst(ctx, a->rt, a->ra, tcg_constant_tl(a->si), update,
> +                       store);
> +#endif
> +}
> +
> +static bool do_ppe_ldst_X(DisasContext *ctx, arg_X *a, bool store)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    CHECK_PPE_GPR(ctx, a->rb);
> +    return do_ppe_ldst(ctx, a->rt, a->ra, cpu_gpr[a->rb], false, store);
> +#endif
> +}
> +
> +TRANS(LVD,   do_ppe_ldst_D, false, false)
> +TRANS(LVDU,  do_ppe_ldst_D, true,  false)
> +TRANS(STVD,  do_ppe_ldst_D, false, true)
> +TRANS(STVDU, do_ppe_ldst_D, true,  true)
> +TRANS(LVDX,  do_ppe_ldst_X, false)
> +TRANS(STVDX, do_ppe_ldst_X, true)
> +
> +
> +#if !defined(TARGET_PPC64)
> +static bool do_fcb(DisasContext *ctx, TCGv ra_val, TCGv rb_val, int bix,
> +                          int32_t bdx, bool s, bool px, bool lk)
> +{
> +    TCGCond cond;
> +    uint32_t mask;
> +    TCGLabel *no_branch;
> +    target_ulong dest;
> +
> +    /* Update CR0 */
> +    gen_op_cmp32(ra_val, rb_val, s, 0);
> +
> +    if (lk) {
> +        gen_setlr(ctx, ctx->base.pc_next);
> +    }
> +
> +
> +    mask = PPC_BIT32(28 + bix);
> +    cond = (px) ? TCG_COND_TSTEQ : TCG_COND_TSTNE;
> +    no_branch = gen_new_label();
> +    dest = ctx->cia + bdx;
> +
> +    /* Do the branch if CR0[bix] == PX */
> +    tcg_gen_brcondi_i32(cond, cpu_crf[0], mask, no_branch);
> +    gen_goto_tb(ctx, 0, dest);
> +    gen_set_label(no_branch);
> +    gen_goto_tb(ctx, 1, ctx->base.pc_next);
> +    ctx->base.is_jmp = DISAS_NORETURN;
> +    return true;
> +}
> +#endif
> +
> +static bool do_cmp_branch(DisasContext *ctx, arg_FCB_bix *a, bool s,
> +                          bool rb_is_gpr)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    TCGv old_ra;
> +    TCGv rb_val;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_GPR(ctx, a->ra);
> +    if (rb_is_gpr) {
> +        CHECK_PPE_GPR(ctx, a->rb);
> +        rb_val = cpu_gpr[a->rb];
> +    } else {
> +        rb_val = tcg_constant_tl(a->rb);
> +    }
> +    if (a->bix == 3) {
> +        old_ra = tcg_temp_new();
> +        tcg_gen_mov_tl(old_ra, cpu_gpr[a->ra]);
> +        tcg_gen_sub_tl(cpu_gpr[a->ra], cpu_gpr[a->ra], rb_val);
> +        return do_fcb(ctx, old_ra, rb_val, 2,
> +                      a->bdx, s, a->px, a->lk);
> +    } else {
> +        return do_fcb(ctx, cpu_gpr[a->ra], rb_val, a->bix,
> +                      a->bdx, s, a->px, a->lk);
> +    }
> +#endif
> +}
> +
> +TRANS(CMPWBC, do_cmp_branch, true, true)
> +TRANS(CMPLWBC, do_cmp_branch, false, true)
> +TRANS(CMPWIBC, do_cmp_branch, true, false)
> +
> +static bool do_mask_branch(DisasContext *ctx, arg_FCB * a, bool invert,
> +                           bool update, bool rb_is_gpr)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    TCGv r;
> +    TCGv mask, shift;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_GPR(ctx, a->ra);
> +    if (rb_is_gpr) {
> +        CHECK_PPE_GPR(ctx, a->rb);
> +        mask = tcg_temp_new();
> +        shift = tcg_temp_new();
> +        tcg_gen_andi_tl(shift, cpu_gpr[a->rb], 0x1f);
> +        tcg_gen_shr_tl(mask, tcg_constant_tl(0x80000000), shift);
> +    } else {
> +        mask = tcg_constant_tl(PPC_BIT32(a->rb));
> +    }
> +    if (invert) {
> +        tcg_gen_not_tl(mask, mask);
> +    }
> +
> +    /* apply mask to ra */
> +    r = tcg_temp_new();
> +    tcg_gen_and_tl(r, cpu_gpr[a->ra], mask);
> +    if (update) {
> +        tcg_gen_mov_tl(cpu_gpr[a->ra], r);
> +    }
> +    return do_fcb(ctx, r, tcg_constant_tl(0), 2,
> +                  a->bdx, false, a->px, a->lk);
> +#endif
> +}
> +
> +TRANS(BNBWI,    do_mask_branch, false, false, false)
> +TRANS(BNBW,     do_mask_branch, false, false, true)
> +TRANS(CLRBWIBC, do_mask_branch, true,  true,  false)
> +TRANS(CLRBWBC,  do_mask_branch, true,  true,  true)
> +
> +#if !defined(TARGET_PPC64)
> +static void gen_set_Rc0_i64(DisasContext *ctx, TCGv_i64 reg)
> +{
> +    TCGv_i64 t0 = tcg_temp_new_i64();
> +    TCGv_i64 t1 = tcg_temp_new_i64();
> +    TCGv_i32 t = tcg_temp_new_i32();
> +
> +    tcg_gen_movi_i64(t0, CRF_EQ);
> +    tcg_gen_movi_i64(t1, CRF_LT);
> +    tcg_gen_movcond_i64(TCG_COND_LT, t0, reg, tcg_constant_i64(0), t1, t0);
> +    tcg_gen_movi_i64(t1, CRF_GT);
> +    tcg_gen_movcond_i64(TCG_COND_GT, t0, reg, tcg_constant_i64(0), t1, t0);
> +    tcg_gen_extrl_i64_i32(t, t0);
> +    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> +    tcg_gen_or_i32(cpu_crf[0], cpu_crf[0], t);
> +}
> +#endif
> +
> +static bool do_shift64(DisasContext *ctx, arg_X_rc *a, bool left)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    int rt_lo, ra_lo;
> +    TCGv_i64 t0, t8;
> +
> +    /* Check for PPE since opcode overlaps with CNTTZDM instruction */
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_VDR(ctx, a->rt);
> +    CHECK_VDR(ctx, a->ra);
> +    CHECK_PPE_GPR(ctx, a->rb);
> +    rt_lo = VDR_PAIR_REG(a->rt);
> +    ra_lo = VDR_PAIR_REG(a->ra);
> +    t8 = tcg_temp_new_i64();
> +
> +    /* AND rt with a mask that is 0 when rb >= 0x40 */
> +    t0 = tcg_temp_new_i64();
> +    tcg_gen_extu_tl_i64(t0, cpu_gpr[a->rb]);
> +    tcg_gen_shli_i64(t0, t0, 0x39);
> +    tcg_gen_sari_i64(t0, t0, 0x3f);
> +
> +    /* form 64bit value from two 32bit regs */
> +    tcg_gen_concat_tl_i64(t8, cpu_gpr[rt_lo], cpu_gpr[a->rt]);
> +
> +    /* apply mask */
> +    tcg_gen_andc_i64(t8, t8, t0);
> +
> +    /* do the shift */
> +    tcg_gen_extu_tl_i64(t0, cpu_gpr[a->rb]);
> +    tcg_gen_andi_i64(t0, t0, 0x3f);
> +    if (left) {
> +        tcg_gen_shl_i64(t8, t8, t0);
> +    } else {
> +        tcg_gen_shr_i64(t8, t8, t0);
> +    }
> +
> +    /* split the 64bit word back into two 32bit regs */
> +    tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t8);
> +
> +    /* update CR0 if requested */
> +    if (unlikely(a->rc != 0)) {
> +        gen_set_Rc0_i64(ctx, t8);
> +    }
> +    return true;
> +#endif
> +}
> +
> +TRANS(SRVD, do_shift64, false)
> +TRANS(SLVD, do_shift64, true)
> +
> +static bool trans_DCBQ(DisasContext *ctx, arg_DCBQ * a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +
> +    CHECK_PPE_GPR(ctx, a->rt);
> +    CHECK_PPE_GPR(ctx, a->ra);
> +    CHECK_PPE_GPR(ctx, a->rb);
> +
> +    /* No cache exists, so just set RT to 0 */
> +    tcg_gen_movi_tl(cpu_gpr[a->rt], 0);
> +    return true;
> +#endif
> +}
> +
> +static bool trans_RLDIMI(DisasContext *ctx, arg_RLDIMI *a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    TCGv_i64 t_rs, t_ra;
> +    int ra_lo, rs_lo;
> +    uint32_t sh = a->sh;
> +    uint32_t mb = a->mb;
> +    uint32_t me = 63 - sh;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_VDR(ctx, a->rs);
> +    CHECK_VDR(ctx, a->ra);
> +
> +    rs_lo = VDR_PAIR_REG(a->rs);
> +    ra_lo = VDR_PAIR_REG(a->ra);
> +
> +    t_rs = tcg_temp_new_i64();
> +    t_ra = tcg_temp_new_i64();
> +
> +    tcg_gen_concat_tl_i64(t_rs, cpu_gpr[rs_lo], cpu_gpr[a->rs]);
> +    tcg_gen_concat_tl_i64(t_ra, cpu_gpr[ra_lo], cpu_gpr[a->ra]);
> +
> +    if (mb <= me) {
> +        tcg_gen_deposit_i64(t_ra, t_ra, t_rs, sh, me - mb + 1);
> +    } else {
> +        uint64_t mask = mask_u64(mb, me);
> +        TCGv_i64 t1 = tcg_temp_new_i64();
> +
> +        tcg_gen_rotli_i64(t1, t_rs, sh);
> +        tcg_gen_andi_i64(t1, t1, mask);
> +        tcg_gen_andi_i64(t_ra, t_ra, ~mask);
> +        tcg_gen_or_i64(t_ra, t_ra, t1);
> +    }
> +
> +    tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t_ra);
> +
> +    if (unlikely(a->rc != 0)) {
> +        gen_set_Rc0_i64(ctx, t_ra);
> +    }
> +    return true;
> +#endif
> +}
> +
> +
> +static bool gen_rldinm_i64(DisasContext *ctx, arg_MD *a, int mb, int me, int sh)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    int len = me - mb + 1;
> +    int rsh = (64 - sh) & 63;
> +    int ra_lo, rs_lo;
> +    TCGv_i64 t8;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_VDR(ctx, a->rs);
> +    CHECK_VDR(ctx, a->ra);
> +
> +    rs_lo = VDR_PAIR_REG(a->rs);
> +    ra_lo = VDR_PAIR_REG(a->ra);
> +    t8 = tcg_temp_new_i64();
> +    tcg_gen_concat_tl_i64(t8, cpu_gpr[rs_lo], cpu_gpr[a->rs]);
> +    if (sh != 0 && len > 0 && me == (63 - sh)) {
> +        tcg_gen_deposit_z_i64(t8, t8, sh, len);
> +    } else if (me == 63 && rsh + len <= 64) {
> +        tcg_gen_extract_i64(t8, t8, rsh, len);
> +    } else {
> +        tcg_gen_rotli_i64(t8, t8, sh);
> +        tcg_gen_andi_i64(t8, t8, mask_u64(mb, me));
> +    }
> +    tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t8);
> +    if (unlikely(a->rc != 0)) {
> +        gen_set_Rc0_i64(ctx, t8);
> +    }
> +    return true;
> +#endif
> +}
> +
> +TRANS(RLDICL, gen_rldinm_i64, a->mb, 63, a->sh)
> +TRANS(RLDICR, gen_rldinm_i64, 0, a->mb, a->sh)
> +
Re: [PATCH v4 6/9] target/ppc: Add IBM PPE42 special instructions
Posted by Thomas Huth 1 week, 4 days ago
On 17/09/2025 06.57, Harsh Prateek Bora wrote:
> 
> 
> On 9/12/25 22:17, Glenn Miles wrote:
>> Adds the following instructions exclusively for
>> IBM PPE42 processors:
>>
>>    LSKU
>>    LCXU
>>    STSKU
>>    STCXU
>>    LVD
>>    LVDU
>>    LVDX
>>    STVD
>>    STVDU
>>    STVDX
>>    SLVD
>>    SRVD
>>    CMPWBC
>>    CMPLWBC
>>    CMPWIBC
>>    BNBWI
>>    BNBW
>>    CLRBWIBC
>>    CLRWBC
>>    DCBQ
>>    RLDICL
>>    RLDICR
>>    RLDIMI
>>
>> A PPE42 GCC compiler is available here:
>> https://github.com/open-power/ppe42-gcc
>>
>> For more information on the PPE42 processors please visit:
>> https://wiki.raptorcs.com/w/images/a/a3/PPE_42X_Core_Users_Manual.pdf
>>
>> Signed-off-by: Glenn Miles <milesg@linux.ibm.com>
>> ---
>> Changes from v3:
>>    - Removed copy of CHECK_VDR
>>    - Refactored ld/st instructions
>>
>>   target/ppc/insn32.decode            |  66 ++-
>>   target/ppc/translate.c              |  29 +-
>>   target/ppc/translate/ppe-impl.c.inc | 665 ++++++++++++++++++++++++++++
>>   3 files changed, 750 insertions(+), 10 deletions(-)
>>   create mode 100644 target/ppc/translate/ppe-impl.c.inc
>>
> 
> <snip>
> 
>> diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/ 
>> ppe-impl.c.inc
>> new file mode 100644
>> index 0000000000..792103d7c2
>> --- /dev/null
>> +++ b/target/ppc/translate/ppe-impl.c.inc
>> @@ -0,0 +1,665 @@
>> +/*
>> + * IBM PPE Instructions
>> + *
>> + * Copyright (c) 2025, IBM Corporation.
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +
>> +#if !defined(TARGET_PPC64)
>> +static bool vdr_is_valid(uint32_t vdr)
>> +{
>> +    const uint32_t valid_bitmap = 0xf00003ff;
>> +    return !!((1ul << (vdr & 0x1f)) & valid_bitmap);
>> +}
>> +
>> +static bool ppe_gpr_is_valid(uint32_t reg)
>> +{
>> +    const uint32_t valid_bitmap = 0xf00027ff;
>> +    return !!((1ul << (reg & 0x1f)) & valid_bitmap);
>> +}
>> +#endif
>> +
>> +#define CHECK_VDR(CTX, VDR)                             \
>> +    do {                                                \
>> +        if (unlikely(!vdr_is_valid(VDR))) {             \
>> +            gen_invalid(CTX);                           \
>> +            return true;                                \
>> +        }                                               \
>> +    } while (0)
>> +
>> +#define CHECK_PPE_GPR(CTX, REG)                         \
>> +    do {                                                \
>> +        if (unlikely(!ppe_gpr_is_valid(REG))) {         \
>> +            gen_invalid(CTX);                           \
>> +            return true;                                \
>> +        }                                               \
>> +    } while (0)
>> +
>> +#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f)
>> +
>> +#define CHECK_PPE_LEVEL(CTX, LVL)                       \
>> +    do {                                                \
>> +        if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \
>> +            gen_invalid(CTX);                           \
>> +            return true;                                \
>> +        }                                               \
>> +    } while (0)
>> +
>> +static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a)
>> +{
>> +#if defined(TARGET_PPC64)
>> +    return false;
>> +#else
> 
> If we are building the PPE42 instructions only for !TARGET_PPC64, does
> it still make it usable with qemu-system-ppc64?

As explained in an earlier thread already, qemu-system-ppc64 is a superset 
of qemu-system-ppc. Thus the ppe42 stuff should work in qemu-system-ppc64, too.

  Thomas


Re: [PATCH v4 6/9] target/ppc: Add IBM PPE42 special instructions
Posted by Miles Glenn 1 week, 4 days ago
On Wed, 2025-09-17 at 08:20 +0200, Thomas Huth wrote:
> On 17/09/2025 06.57, Harsh Prateek Bora wrote:
> > 
> > On 9/12/25 22:17, Glenn Miles wrote:
> > > Adds the following instructions exclusively for
> > > IBM PPE42 processors:
> > > 
> > >    LSKU
> > >    LCXU
> > >    STSKU
> > >    STCXU
> > >    LVD
> > >    LVDU
> > >    LVDX
> > >    STVD
> > >    STVDU
> > >    STVDX
> > >    SLVD
> > >    SRVD
> > >    CMPWBC
> > >    CMPLWBC
> > >    CMPWIBC
> > >    BNBWI
> > >    BNBW
> > >    CLRBWIBC
> > >    CLRWBC
> > >    DCBQ
> > >    RLDICL
> > >    RLDICR
> > >    RLDIMI
> > > 
> > > A PPE42 GCC compiler is available here:
> > > https://github.com/open-power/ppe42-gcc
> > > 
> > > For more information on the PPE42 processors please visit:
> > > https://wiki.raptorcs.com/w/images/a/a3/PPE_42X_Core_Users_Manual.pdf
> > > 
> > > Signed-off-by: Glenn Miles <milesg@linux.ibm.com>
> > > ---
> > > Changes from v3:
> > >    - Removed copy of CHECK_VDR
> > >    - Refactored ld/st instructions
> > > 
> > >   target/ppc/insn32.decode            |  66 ++-
> > >   target/ppc/translate.c              |  29 +-
> > >   target/ppc/translate/ppe-impl.c.inc | 665 ++++++++++++++++++++++++++++
> > >   3 files changed, 750 insertions(+), 10 deletions(-)
> > >   create mode 100644 target/ppc/translate/ppe-impl.c.inc
> > > 
> > 
> > <snip>
> > 
> > > diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/ 
> > > ppe-impl.c.inc
> > > new file mode 100644
> > > index 0000000000..792103d7c2
> > > --- /dev/null
> > > +++ b/target/ppc/translate/ppe-impl.c.inc
> > > @@ -0,0 +1,665 @@
> > > +/*
> > > + * IBM PPE Instructions
> > > + *
> > > + * Copyright (c) 2025, IBM Corporation.
> > > + *
> > > + * SPDX-License-Identifier: GPL-2.0-or-later
> > > + */
> > > +
> > > +
> > > +#if !defined(TARGET_PPC64)
> > > +static bool vdr_is_valid(uint32_t vdr)
> > > +{
> > > +    const uint32_t valid_bitmap = 0xf00003ff;
> > > +    return !!((1ul << (vdr & 0x1f)) & valid_bitmap);
> > > +}
> > > +
> > > +static bool ppe_gpr_is_valid(uint32_t reg)
> > > +{
> > > +    const uint32_t valid_bitmap = 0xf00027ff;
> > > +    return !!((1ul << (reg & 0x1f)) & valid_bitmap);
> > > +}
> > > +#endif
> > > +
> > > +#define CHECK_VDR(CTX, VDR)                             \
> > > +    do {                                                \
> > > +        if (unlikely(!vdr_is_valid(VDR))) {             \
> > > +            gen_invalid(CTX);                           \
> > > +            return true;                                \
> > > +        }                                               \
> > > +    } while (0)
> > > +
> > > +#define CHECK_PPE_GPR(CTX, REG)                         \
> > > +    do {                                                \
> > > +        if (unlikely(!ppe_gpr_is_valid(REG))) {         \
> > > +            gen_invalid(CTX);                           \
> > > +            return true;                                \
> > > +        }                                               \
> > > +    } while (0)
> > > +
> > > +#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f)
> > > +
> > > +#define CHECK_PPE_LEVEL(CTX, LVL)                       \
> > > +    do {                                                \
> > > +        if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \
> > > +            gen_invalid(CTX);                           \
> > > +            return true;                                \
> > > +        }                                               \
> > > +    } while (0)
> > > +
> > > +static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a)
> > > +{
> > > +#if defined(TARGET_PPC64)
> > > +    return false;
> > > +#else
> > 
> > If we are building the PPE42 instructions only for !TARGET_PPC64, does
> > it still make it usable with qemu-system-ppc64?
> 
> As explained in an earlier thread already, qemu-system-ppc64 is a superset 
> of qemu-system-ppc. Thus the ppe42 stuff should work in qemu-system-ppc64, too.
> 
>   Thomas
> 

Ah, yes, I don't think I fully understood the ramifications of Thomas's
statements earlier.  Looks like I'll need to scrub the code to ensure
that PPE42 can run even if TARGET_PPC64 is defined.

Cedric, this requires me to change my response to your request to add
the check for TARGET_PPC64 inside the is_ppe() function.  I will need
to leave that function as-is if we want PPE42 to be supported in both
targets.  Will you be ok with that?

Thanks,

Glenn
Re: [PATCH v4 6/9] target/ppc: Add IBM PPE42 special instructions
Posted by Cédric Le Goater 1 week, 4 days ago
On 9/17/25 16:38, Miles Glenn wrote:
> On Wed, 2025-09-17 at 08:20 +0200, Thomas Huth wrote:
>> On 17/09/2025 06.57, Harsh Prateek Bora wrote:
>>>
>>> On 9/12/25 22:17, Glenn Miles wrote:
>>>> Adds the following instructions exclusively for
>>>> IBM PPE42 processors:
>>>>
>>>>     LSKU
>>>>     LCXU
>>>>     STSKU
>>>>     STCXU
>>>>     LVD
>>>>     LVDU
>>>>     LVDX
>>>>     STVD
>>>>     STVDU
>>>>     STVDX
>>>>     SLVD
>>>>     SRVD
>>>>     CMPWBC
>>>>     CMPLWBC
>>>>     CMPWIBC
>>>>     BNBWI
>>>>     BNBW
>>>>     CLRBWIBC
>>>>     CLRWBC
>>>>     DCBQ
>>>>     RLDICL
>>>>     RLDICR
>>>>     RLDIMI
>>>>
>>>> A PPE42 GCC compiler is available here:
>>>> https://github.com/open-power/ppe42-gcc
>>>>
>>>> For more information on the PPE42 processors please visit:
>>>> https://wiki.raptorcs.com/w/images/a/a3/PPE_42X_Core_Users_Manual.pdf
>>>>
>>>> Signed-off-by: Glenn Miles <milesg@linux.ibm.com>
>>>> ---
>>>> Changes from v3:
>>>>     - Removed copy of CHECK_VDR
>>>>     - Refactored ld/st instructions
>>>>
>>>>    target/ppc/insn32.decode            |  66 ++-
>>>>    target/ppc/translate.c              |  29 +-
>>>>    target/ppc/translate/ppe-impl.c.inc | 665 ++++++++++++++++++++++++++++
>>>>    3 files changed, 750 insertions(+), 10 deletions(-)
>>>>    create mode 100644 target/ppc/translate/ppe-impl.c.inc
>>>>
>>>
>>> <snip>
>>>
>>>> diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/
>>>> ppe-impl.c.inc
>>>> new file mode 100644
>>>> index 0000000000..792103d7c2
>>>> --- /dev/null
>>>> +++ b/target/ppc/translate/ppe-impl.c.inc
>>>> @@ -0,0 +1,665 @@
>>>> +/*
>>>> + * IBM PPE Instructions
>>>> + *
>>>> + * Copyright (c) 2025, IBM Corporation.
>>>> + *
>>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>>> + */
>>>> +
>>>> +
>>>> +#if !defined(TARGET_PPC64)
>>>> +static bool vdr_is_valid(uint32_t vdr)
>>>> +{
>>>> +    const uint32_t valid_bitmap = 0xf00003ff;
>>>> +    return !!((1ul << (vdr & 0x1f)) & valid_bitmap);
>>>> +}
>>>> +
>>>> +static bool ppe_gpr_is_valid(uint32_t reg)
>>>> +{
>>>> +    const uint32_t valid_bitmap = 0xf00027ff;
>>>> +    return !!((1ul << (reg & 0x1f)) & valid_bitmap);
>>>> +}
>>>> +#endif
>>>> +
>>>> +#define CHECK_VDR(CTX, VDR)                             \
>>>> +    do {                                                \
>>>> +        if (unlikely(!vdr_is_valid(VDR))) {             \
>>>> +            gen_invalid(CTX);                           \
>>>> +            return true;                                \
>>>> +        }                                               \
>>>> +    } while (0)
>>>> +
>>>> +#define CHECK_PPE_GPR(CTX, REG)                         \
>>>> +    do {                                                \
>>>> +        if (unlikely(!ppe_gpr_is_valid(REG))) {         \
>>>> +            gen_invalid(CTX);                           \
>>>> +            return true;                                \
>>>> +        }                                               \
>>>> +    } while (0)
>>>> +
>>>> +#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f)
>>>> +
>>>> +#define CHECK_PPE_LEVEL(CTX, LVL)                       \
>>>> +    do {                                                \
>>>> +        if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \
>>>> +            gen_invalid(CTX);                           \
>>>> +            return true;                                \
>>>> +        }                                               \
>>>> +    } while (0)
>>>> +
>>>> +static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a)
>>>> +{
>>>> +#if defined(TARGET_PPC64)
>>>> +    return false;
>>>> +#else
>>>
>>> If we are building the PPE42 instructions only for !TARGET_PPC64, does
>>> it still make it usable with qemu-system-ppc64?
>>
>> As explained in an earlier thread already, qemu-system-ppc64 is a superset
>> of qemu-system-ppc. Thus the ppe42 stuff should work in qemu-system-ppc64, too.
>>
>>    Thomas
>>
> 
> Ah, yes, I don't think I fully understood the ramifications of Thomas's
> statements earlier.  Looks like I'll need to scrub the code to ensure
> that PPE42 can run even if TARGET_PPC64 is defined.
> 
> Cedric, this requires me to change my response to your request to add
> the check for TARGET_PPC64 inside the is_ppe() function.  I will need
> to leave that function as-is if we want PPE42 to be supported in both
> targets.  Will you be ok with that?
sure if it compiles. Try a clang  build.

C.
Re: [PATCH v4 6/9] target/ppc: Add IBM PPE42 special instructions
Posted by BALATON Zoltan 1 week, 4 days ago
On Wed, 17 Sep 2025, Miles Glenn wrote:
> On Wed, 2025-09-17 at 08:20 +0200, Thomas Huth wrote:
>> On 17/09/2025 06.57, Harsh Prateek Bora wrote:
>>>
>>> On 9/12/25 22:17, Glenn Miles wrote:
>>>> Adds the following instructions exclusively for
>>>> IBM PPE42 processors:
>>>>
>>>>    LSKU
>>>>    LCXU
>>>>    STSKU
>>>>    STCXU
>>>>    LVD
>>>>    LVDU
>>>>    LVDX
>>>>    STVD
>>>>    STVDU
>>>>    STVDX
>>>>    SLVD
>>>>    SRVD
>>>>    CMPWBC
>>>>    CMPLWBC
>>>>    CMPWIBC
>>>>    BNBWI
>>>>    BNBW
>>>>    CLRBWIBC
>>>>    CLRWBC
>>>>    DCBQ
>>>>    RLDICL
>>>>    RLDICR
>>>>    RLDIMI
>>>>
>>>> A PPE42 GCC compiler is available here:
>>>> https://github.com/open-power/ppe42-gcc
>>>>
>>>> For more information on the PPE42 processors please visit:
>>>> https://wiki.raptorcs.com/w/images/a/a3/PPE_42X_Core_Users_Manual.pdf
>>>>
>>>> Signed-off-by: Glenn Miles <milesg@linux.ibm.com>
>>>> ---
>>>> Changes from v3:
>>>>    - Removed copy of CHECK_VDR
>>>>    - Refactored ld/st instructions
>>>>
>>>>   target/ppc/insn32.decode            |  66 ++-
>>>>   target/ppc/translate.c              |  29 +-
>>>>   target/ppc/translate/ppe-impl.c.inc | 665 ++++++++++++++++++++++++++++
>>>>   3 files changed, 750 insertions(+), 10 deletions(-)
>>>>   create mode 100644 target/ppc/translate/ppe-impl.c.inc
>>>>
>>>
>>> <snip>
>>>
>>>> diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/
>>>> ppe-impl.c.inc
>>>> new file mode 100644
>>>> index 0000000000..792103d7c2
>>>> --- /dev/null
>>>> +++ b/target/ppc/translate/ppe-impl.c.inc
>>>> @@ -0,0 +1,665 @@
>>>> +/*
>>>> + * IBM PPE Instructions
>>>> + *
>>>> + * Copyright (c) 2025, IBM Corporation.
>>>> + *
>>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>>> + */
>>>> +
>>>> +
>>>> +#if !defined(TARGET_PPC64)
>>>> +static bool vdr_is_valid(uint32_t vdr)
>>>> +{
>>>> +    const uint32_t valid_bitmap = 0xf00003ff;
>>>> +    return !!((1ul << (vdr & 0x1f)) & valid_bitmap);
>>>> +}
>>>> +
>>>> +static bool ppe_gpr_is_valid(uint32_t reg)
>>>> +{
>>>> +    const uint32_t valid_bitmap = 0xf00027ff;
>>>> +    return !!((1ul << (reg & 0x1f)) & valid_bitmap);
>>>> +}
>>>> +#endif
>>>> +
>>>> +#define CHECK_VDR(CTX, VDR)                             \
>>>> +    do {                                                \
>>>> +        if (unlikely(!vdr_is_valid(VDR))) {             \
>>>> +            gen_invalid(CTX);                           \
>>>> +            return true;                                \
>>>> +        }                                               \
>>>> +    } while (0)
>>>> +
>>>> +#define CHECK_PPE_GPR(CTX, REG)                         \
>>>> +    do {                                                \
>>>> +        if (unlikely(!ppe_gpr_is_valid(REG))) {         \
>>>> +            gen_invalid(CTX);                           \
>>>> +            return true;                                \
>>>> +        }                                               \
>>>> +    } while (0)
>>>> +
>>>> +#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f)
>>>> +
>>>> +#define CHECK_PPE_LEVEL(CTX, LVL)                       \
>>>> +    do {                                                \
>>>> +        if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \
>>>> +            gen_invalid(CTX);                           \
>>>> +            return true;                                \
>>>> +        }                                               \
>>>> +    } while (0)
>>>> +
>>>> +static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a)
>>>> +{
>>>> +#if defined(TARGET_PPC64)
>>>> +    return false;
>>>> +#else
>>>
>>> If we are building the PPE42 instructions only for !TARGET_PPC64, does
>>> it still make it usable with qemu-system-ppc64?
>>
>> As explained in an earlier thread already, qemu-system-ppc64 is a superset
>> of qemu-system-ppc. Thus the ppe42 stuff should work in qemu-system-ppc64, too.
>>
>>   Thomas
>>
>
> Ah, yes, I don't think I fully understood the ramifications of Thomas's
> statements earlier.  Looks like I'll need to scrub the code to ensure
> that PPE42 can run even if TARGET_PPC64 is defined.
>
> Cedric, this requires me to change my response to your request to add
> the check for TARGET_PPC64 inside the is_ppe() function.  I will need
> to leave that function as-is if we want PPE42 to be supported in both
> targets.  Will you be ok with that?

Does it make sense to support it with !TARGET_PPC64 if its only use is in 
qemu-system-ppc64? Even if the CPU is 32-bit it has some 64-bit 
instructions IIUC so does that make it TARGET_PPC64 only?

Regards,
BALATON Zoltan
Re: [PATCH v4 6/9] target/ppc: Add IBM PPE42 special instructions
Posted by Thomas Huth 1 week, 3 days ago
On 17/09/2025 18.07, BALATON Zoltan wrote:
> On Wed, 17 Sep 2025, Miles Glenn wrote:
>> On Wed, 2025-09-17 at 08:20 +0200, Thomas Huth wrote:
>>> On 17/09/2025 06.57, Harsh Prateek Bora wrote:
>>>>
>>>> On 9/12/25 22:17, Glenn Miles wrote:
>>>>> Adds the following instructions exclusively for
>>>>> IBM PPE42 processors:
>>>>>
>>>>>    LSKU
>>>>>    LCXU
>>>>>    STSKU
>>>>>    STCXU
>>>>>    LVD
>>>>>    LVDU
>>>>>    LVDX
>>>>>    STVD
>>>>>    STVDU
>>>>>    STVDX
>>>>>    SLVD
>>>>>    SRVD
>>>>>    CMPWBC
>>>>>    CMPLWBC
>>>>>    CMPWIBC
>>>>>    BNBWI
>>>>>    BNBW
>>>>>    CLRBWIBC
>>>>>    CLRWBC
>>>>>    DCBQ
>>>>>    RLDICL
>>>>>    RLDICR
>>>>>    RLDIMI
>>>>>
>>>>> A PPE42 GCC compiler is available here:
>>>>> https://github.com/open-power/ppe42-gcc
>>>>>
>>>>> For more information on the PPE42 processors please visit:
>>>>> https://wiki.raptorcs.com/w/images/a/a3/PPE_42X_Core_Users_Manual.pdf
>>>>>
>>>>> Signed-off-by: Glenn Miles <milesg@linux.ibm.com>
>>>>> ---
>>>>> Changes from v3:
>>>>>    - Removed copy of CHECK_VDR
>>>>>    - Refactored ld/st instructions
>>>>>
>>>>>   target/ppc/insn32.decode            |  66 ++-
>>>>>   target/ppc/translate.c              |  29 +-
>>>>>   target/ppc/translate/ppe-impl.c.inc | 665 ++++++++++++++++++++++++++++
>>>>>   3 files changed, 750 insertions(+), 10 deletions(-)
>>>>>   create mode 100644 target/ppc/translate/ppe-impl.c.inc
>>>>>
>>>>
>>>> <snip>
>>>>
>>>>> diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/
>>>>> ppe-impl.c.inc
>>>>> new file mode 100644
>>>>> index 0000000000..792103d7c2
>>>>> --- /dev/null
>>>>> +++ b/target/ppc/translate/ppe-impl.c.inc
>>>>> @@ -0,0 +1,665 @@
>>>>> +/*
>>>>> + * IBM PPE Instructions
>>>>> + *
>>>>> + * Copyright (c) 2025, IBM Corporation.
>>>>> + *
>>>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>>>> + */
>>>>> +
>>>>> +
>>>>> +#if !defined(TARGET_PPC64)
>>>>> +static bool vdr_is_valid(uint32_t vdr)
>>>>> +{
>>>>> +    const uint32_t valid_bitmap = 0xf00003ff;
>>>>> +    return !!((1ul << (vdr & 0x1f)) & valid_bitmap);
>>>>> +}
>>>>> +
>>>>> +static bool ppe_gpr_is_valid(uint32_t reg)
>>>>> +{
>>>>> +    const uint32_t valid_bitmap = 0xf00027ff;
>>>>> +    return !!((1ul << (reg & 0x1f)) & valid_bitmap);
>>>>> +}
>>>>> +#endif
>>>>> +
>>>>> +#define CHECK_VDR(CTX, VDR)                             \
>>>>> +    do {                                                \
>>>>> +        if (unlikely(!vdr_is_valid(VDR))) {             \
>>>>> +            gen_invalid(CTX);                           \
>>>>> +            return true;                                \
>>>>> +        }                                               \
>>>>> +    } while (0)
>>>>> +
>>>>> +#define CHECK_PPE_GPR(CTX, REG)                         \
>>>>> +    do {                                                \
>>>>> +        if (unlikely(!ppe_gpr_is_valid(REG))) {         \
>>>>> +            gen_invalid(CTX);                           \
>>>>> +            return true;                                \
>>>>> +        }                                               \
>>>>> +    } while (0)
>>>>> +
>>>>> +#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f)
>>>>> +
>>>>> +#define CHECK_PPE_LEVEL(CTX, LVL)                       \
>>>>> +    do {                                                \
>>>>> +        if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \
>>>>> +            gen_invalid(CTX);                           \
>>>>> +            return true;                                \
>>>>> +        }                                               \
>>>>> +    } while (0)
>>>>> +
>>>>> +static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a)
>>>>> +{
>>>>> +#if defined(TARGET_PPC64)
>>>>> +    return false;
>>>>> +#else
>>>>
>>>> If we are building the PPE42 instructions only for !TARGET_PPC64, does
>>>> it still make it usable with qemu-system-ppc64?
>>>
>>> As explained in an earlier thread already, qemu-system-ppc64 is a superset
>>> of qemu-system-ppc. Thus the ppe42 stuff should work in qemu-system- 
>>> ppc64, too.
>>>
>>>   Thomas
>>>
>>
>> Ah, yes, I don't think I fully understood the ramifications of Thomas's
>> statements earlier.  Looks like I'll need to scrub the code to ensure
>> that PPE42 can run even if TARGET_PPC64 is defined.
>>
>> Cedric, this requires me to change my response to your request to add
>> the check for TARGET_PPC64 inside the is_ppe() function.  I will need
>> to leave that function as-is if we want PPE42 to be supported in both
>> targets.  Will you be ok with that?
> 
> Does it make sense to support it with !TARGET_PPC64 if its only use is in 
> qemu-system-ppc64? Even if the CPU is 32-bit it has some 64-bit instructions 
> IIUC so does that make it TARGET_PPC64 only?

As long as the GPRs are 32 bit only (which is the case here, I assume), it's 
a 32-bit PPC CPU, isn't it? So that certainly should not go into the ppc64 
binary only.

  Thomas


Re: [PATCH v4 6/9] target/ppc: Add IBM PPE42 special instructions
Posted by Miles Glenn 1 week, 3 days ago
On Wed, 2025-09-17 at 18:43 +0200, Thomas Huth wrote:
> On 17/09/2025 18.07, BALATON Zoltan wrote:
> > On Wed, 17 Sep 2025, Miles Glenn wrote:
> > > On Wed, 2025-09-17 at 08:20 +0200, Thomas Huth wrote:
> > > > On 17/09/2025 06.57, Harsh Prateek Bora wrote:
> > > > > On 9/12/25 22:17, Glenn Miles wrote:
> > > > > > Adds the following instructions exclusively for
> > > > > > IBM PPE42 processors:
> > > > > > 
> > > > > >    LSKU
> > > > > >    LCXU
> > > > > >    STSKU
> > > > > >    STCXU
> > > > > >    LVD
> > > > > >    LVDU
> > > > > >    LVDX
> > > > > >    STVD
> > > > > >    STVDU
> > > > > >    STVDX
> > > > > >    SLVD
> > > > > >    SRVD
> > > > > >    CMPWBC
> > > > > >    CMPLWBC
> > > > > >    CMPWIBC
> > > > > >    BNBWI
> > > > > >    BNBW
> > > > > >    CLRBWIBC
> > > > > >    CLRWBC
> > > > > >    DCBQ
> > > > > >    RLDICL
> > > > > >    RLDICR
> > > > > >    RLDIMI
> > > > > > 
> > > > > > A PPE42 GCC compiler is available here:
> > > > > > https://github.com/open-power/ppe42-gcc
> > > > > > 
> > > > > > For more information on the PPE42 processors please visit:
> > > > > > https://wiki.raptorcs.com/w/images/a/a3/PPE_42X_Core_Users_Manual.pdf
> > > > > > 
> > > > > > Signed-off-by: Glenn Miles <milesg@linux.ibm.com>
> > > > > > ---
> > > > > > Changes from v3:
> > > > > >    - Removed copy of CHECK_VDR
> > > > > >    - Refactored ld/st instructions
> > > > > > 
> > > > > >   target/ppc/insn32.decode            |  66 ++-
> > > > > >   target/ppc/translate.c              |  29 +-
> > > > > >   target/ppc/translate/ppe-impl.c.inc | 665 ++++++++++++++++++++++++++++
> > > > > >   3 files changed, 750 insertions(+), 10 deletions(-)
> > > > > >   create mode 100644 target/ppc/translate/ppe-impl.c.inc
> > > > > > 
> > > > > 
> > > > > <snip>
> > > > > 
> > > > > > diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/
> > > > > > ppe-impl.c.inc
> > > > > > new file mode 100644
> > > > > > index 0000000000..792103d7c2
> > > > > > --- /dev/null
> > > > > > +++ b/target/ppc/translate/ppe-impl.c.inc
> > > > > > @@ -0,0 +1,665 @@
> > > > > > +/*
> > > > > > + * IBM PPE Instructions
> > > > > > + *
> > > > > > + * Copyright (c) 2025, IBM Corporation.
> > > > > > + *
> > > > > > + * SPDX-License-Identifier: GPL-2.0-or-later
> > > > > > + */
> > > > > > +
> > > > > > +
> > > > > > +#if !defined(TARGET_PPC64)
> > > > > > +static bool vdr_is_valid(uint32_t vdr)
> > > > > > +{
> > > > > > +    const uint32_t valid_bitmap = 0xf00003ff;
> > > > > > +    return !!((1ul << (vdr & 0x1f)) & valid_bitmap);
> > > > > > +}
> > > > > > +
> > > > > > +static bool ppe_gpr_is_valid(uint32_t reg)
> > > > > > +{
> > > > > > +    const uint32_t valid_bitmap = 0xf00027ff;
> > > > > > +    return !!((1ul << (reg & 0x1f)) & valid_bitmap);
> > > > > > +}
> > > > > > +#endif
> > > > > > +
> > > > > > +#define CHECK_VDR(CTX, VDR)                             \
> > > > > > +    do {                                                \
> > > > > > +        if (unlikely(!vdr_is_valid(VDR))) {             \
> > > > > > +            gen_invalid(CTX);                           \
> > > > > > +            return true;                                \
> > > > > > +        }                                               \
> > > > > > +    } while (0)
> > > > > > +
> > > > > > +#define CHECK_PPE_GPR(CTX, REG)                         \
> > > > > > +    do {                                                \
> > > > > > +        if (unlikely(!ppe_gpr_is_valid(REG))) {         \
> > > > > > +            gen_invalid(CTX);                           \
> > > > > > +            return true;                                \
> > > > > > +        }                                               \
> > > > > > +    } while (0)
> > > > > > +
> > > > > > +#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f)
> > > > > > +
> > > > > > +#define CHECK_PPE_LEVEL(CTX, LVL)                       \
> > > > > > +    do {                                                \
> > > > > > +        if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \
> > > > > > +            gen_invalid(CTX);                           \
> > > > > > +            return true;                                \
> > > > > > +        }                                               \
> > > > > > +    } while (0)
> > > > > > +
> > > > > > +static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a)
> > > > > > +{
> > > > > > +#if defined(TARGET_PPC64)
> > > > > > +    return false;
> > > > > > +#else
> > > > > 
> > > > > If we are building the PPE42 instructions only for !TARGET_PPC64, does
> > > > > it still make it usable with qemu-system-ppc64?
> > > > 
> > > > As explained in an earlier thread already, qemu-system-ppc64 is a superset
> > > > of qemu-system-ppc. Thus the ppe42 stuff should work in qemu-system- 
> > > > ppc64, too.
> > > > 
> > > >   Thomas
> > > > 
> > > 
> > > Ah, yes, I don't think I fully understood the ramifications of Thomas's
> > > statements earlier.  Looks like I'll need to scrub the code to ensure
> > > that PPE42 can run even if TARGET_PPC64 is defined.
> > > 
> > > Cedric, this requires me to change my response to your request to add
> > > the check for TARGET_PPC64 inside the is_ppe() function.  I will need
> > > to leave that function as-is if we want PPE42 to be supported in both
> > > targets.  Will you be ok with that?
> > 
> > Does it make sense to support it with !TARGET_PPC64 if its only use is in 
> > qemu-system-ppc64? Even if the CPU is 32-bit it has some 64-bit instructions 
> > IIUC so does that make it TARGET_PPC64 only?
> 
> As long as the GPRs are 32 bit only (which is the case here, I assume), it's 
> a 32-bit PPC CPU, isn't it? So that certainly should not go into the ppc64 
> binary only.
> 
>   Thomas
> 

I agree.  While the PPE42 does support some 64 bit operations, it
actually is using two 32 bit register pairs to accomplish the 64 bit
operations.  Also, the address space is a 32 bit space.

Glenn
Re: [PATCH v4 6/9] target/ppc: Add IBM PPE42 special instructions
Posted by Cédric Le Goater 1 week, 5 days ago
On 9/12/25 18:47, Glenn Miles wrote:
> Adds the following instructions exclusively for
> IBM PPE42 processors:
> 
>    LSKU
>    LCXU
>    STSKU
>    STCXU
>    LVD
>    LVDU
>    LVDX
>    STVD
>    STVDU
>    STVDX
>    SLVD
>    SRVD
>    CMPWBC
>    CMPLWBC
>    CMPWIBC
>    BNBWI
>    BNBW
>    CLRBWIBC
>    CLRWBC
>    DCBQ
>    RLDICL
>    RLDICR
>    RLDIMI
> 
> A PPE42 GCC compiler is available here:
> https://github.com/open-power/ppe42-gcc
> 
> For more information on the PPE42 processors please visit:
> https://wiki.raptorcs.com/w/images/a/a3/PPE_42X_Core_Users_Manual.pdf
> 
> Signed-off-by: Glenn Miles <milesg@linux.ibm.com>
> ---
> Changes from v3:
>    - Removed copy of CHECK_VDR
>    - Refactored ld/st instructions
> 
>   target/ppc/insn32.decode            |  66 ++-
>   target/ppc/translate.c              |  29 +-
>   target/ppc/translate/ppe-impl.c.inc | 665 ++++++++++++++++++++++++++++
>   3 files changed, 750 insertions(+), 10 deletions(-)
>   create mode 100644 target/ppc/translate/ppe-impl.c.inc
> 
> diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
> index e53fd2840d..8beb588a2a 100644
> --- a/target/ppc/insn32.decode
> +++ b/target/ppc/insn32.decode
> @@ -58,6 +58,10 @@
>   %ds_rtp         22:4   !function=times_2
>   @DS_rtp         ...... ....0 ra:5 .............. ..             &D rt=%ds_rtp si=%ds_si
>   
> +%dd_si          3:s13
> +&DD             rt ra si:int64_t
> +@DD             ...... rt:5 ra:5 ............. . ..             &DD si=%dd_si
> +
>   &DX_b           vrt b
>   %dx_b           6:10 16:5 0:1
>   @DX_b           ...... vrt:5  ..... .......... ..... .          &DX_b b=%dx_b
> @@ -66,6 +70,11 @@
>   %dx_d           6:s10 16:5 0:1
>   @DX             ...... rt:5  ..... .......... ..... .           &DX d=%dx_d
>   
> +%md_sh          1:1 11:5
> +%md_mb          5:1 6:5
> +&MD             rs ra sh mb rc
> +@MD             ...... rs:5 ra:5 ..... ...... ... . rc:1        &MD sh=%md_sh mb=%md_mb
> +
>   &VA             vrt vra vrb rc
>   @VA             ...... vrt:5 vra:5 vrb:5 rc:5 ......            &VA
>   
> @@ -322,6 +331,13 @@ LDUX            011111 ..... ..... ..... 0000110101 -   @X
>   
>   LQ              111000 ..... ..... ............ ----    @DQ_rtp
>   
> +LVD             000101 ..... ..... ................     @D
> +LVDU            001001 ..... ..... ................     @D
> +LVDX            011111 ..... ..... ..... 0000010001 -   @X
> +LSKU            111010 ..... ..... ............. 0 11   @DD
> +LCXU            111010 ..... ..... ............. 1 11   @DD
> +
> +
>   ### Fixed-Point Store Instructions
>   
>   STB             100110 ..... ..... ................     @D
> @@ -346,6 +362,11 @@ STDUX           011111 ..... ..... ..... 0010110101 -   @X
>   
>   STQ             111110 ..... ..... ..............10     @DS_rtp
>   
> +STVDU           010110 ..... ..... ................     @D
> +STVDX           011111 ..... ..... ..... 0010010001 -   @X
> +STSKU           111110 ..... ..... ............. 0 11   @DD
> +STCXU           111110 ..... ..... ............. 1 11   @DD
> +
>   ### Fixed-Point Compare Instructions
>   
>   CMP             011111 ... - . ..... ..... 0000000000 - @X_bfl
> @@ -461,8 +482,14 @@ PRTYD           011111 ..... ..... ----- 0010111010 -   @X_sa
>   
>   BPERMD          011111 ..... ..... ..... 0011111100 -   @X
>   CFUGED          011111 ..... ..... ..... 0011011100 -   @X
> -CNTLZDM         011111 ..... ..... ..... 0000111011 -   @X
> -CNTTZDM         011111 ..... ..... ..... 1000111011 -   @X
> +{
> +  SLVD            011111 ..... ..... ..... 0000111011 .   @X_rc
> +  CNTLZDM         011111 ..... ..... ..... 0000111011 -   @X
> +}
> +{
> +  SRVD            011111 ..... ..... ..... 1000111011 .   @X_rc
> +  CNTTZDM         011111 ..... ..... ..... 1000111011 -   @X
> +}
>   PDEPD           011111 ..... ..... ..... 0010011100 -   @X
>   PEXTD           011111 ..... ..... ..... 0010111100 -   @X
>   
> @@ -981,8 +1008,16 @@ LXSSP           111001 ..... ..... .............. 11    @DS
>   STXSSP          111101 ..... ..... .............. 11    @DS
>   LXV             111101 ..... ..... ............ . 001   @DQ_TSX
>   STXV            111101 ..... ..... ............ . 101   @DQ_TSX
> -LXVP            000110 ..... ..... ............ 0000    @DQ_TSXP
> -STXVP           000110 ..... ..... ............ 0001    @DQ_TSXP
> +
> +# STVD PPE instruction overlaps with the LXVP and STXVP instructions
> +{
> +  STVD          000110 ..... ..... ................     @D
> +  [
> +    LXVP        000110 ..... ..... ............ 0000    @DQ_TSXP
> +    STXVP       000110 ..... ..... ............ 0001    @DQ_TSXP
> +  ]
> +}
> +
>   LXVX            011111 ..... ..... ..... 0100 - 01100 . @X_TSX
>   STXVX           011111 ..... ..... ..... 0110001100 .   @X_TSX
>   LXVPX           011111 ..... ..... ..... 0101001101 -   @X_TSXP
> @@ -1300,3 +1335,26 @@ CLRBHRB         011111 ----- ----- ----- 0110101110 -
>   ## Misc POWER instructions
>   
>   ATTN            000000 00000 00000 00000 0100000000 0
> +
> +# Fused compare-branch instructions for PPE only
> +%fcb_bdx        1:s10  !function=times_4
> +&FCB            px:bool ra rb:uint64_t bdx lk:bool
> +@FCB            ...... .. px:1 .. ra:5 rb:5 .......... lk:1       &FCB bdx=%fcb_bdx
> +&FCB_bix        px:bool bix ra rb:uint64_t bdx lk:bool
> +@FCB_bix        ...... .. px:1 bix:2 ra:5 rb:5 .......... lk:1    &FCB_bix bdx=%fcb_bdx
> +
> +CMPWBC          000001 00 . .. ..... ..... .......... .     @FCB_bix
> +CMPLWBC         000001 01 . .. ..... ..... .......... .     @FCB_bix
> +CMPWIBC         000001 10 . .. ..... ..... .......... .     @FCB_bix
> +BNBWI           000001 11 . 00 ..... ..... .......... .     @FCB
> +BNBW            000001 11 . 01 ..... ..... .......... .     @FCB
> +CLRBWIBC        000001 11 . 10 ..... ..... .......... .     @FCB
> +CLRBWBC         000001 11 . 11 ..... ..... .......... .     @FCB
> +
> +# Data Cache Block Query for PPE only
> +DCBQ            011111 ..... ..... ..... 0110010110 -       @X
> +
> +# Rotate Doubleword Instructions for PPE only (if TARGET_PPC64 not defined)
> +RLDICL          011110 ..... ..... ..... ...... 000 . .     @MD
> +RLDICR          011110 ..... ..... ..... ...... 001 . .     @MD
> +RLDIMI          011110 ..... ..... ..... ...... 011 . .     @MD
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index fc817dab54..d422789a1d 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -209,6 +209,11 @@ struct DisasContext {
>   #define DISAS_CHAIN        DISAS_TARGET_2  /* lookup next tb, pc updated */
>   #define DISAS_CHAIN_UPDATE DISAS_TARGET_3  /* lookup next tb, pc stale */
>   
> +static inline bool is_ppe(const DisasContext *ctx)

is_ppe() needs to be conditionnaly compiled if !TARGET_PPC64

Thanks,

C.




> +{
> +    return !!(ctx->flags & POWERPC_FLAG_PPE42);
> +}
> +
>   /* Return true iff byteswap is needed in a scalar memop */
>   static inline bool need_byteswap(const DisasContext *ctx)
>   {
> @@ -556,11 +561,8 @@ void spr_access_nop(DisasContext *ctx, int sprn, int gprn)
>   
>   #endif
>   
> -/* SPR common to all PowerPC */
> -/* XER */
> -void spr_read_xer(DisasContext *ctx, int gprn, int sprn)
> +static void gen_get_xer(DisasContext *ctx, TCGv dst)
>   {
> -    TCGv dst = cpu_gpr[gprn];
>       TCGv t0 = tcg_temp_new();
>       TCGv t1 = tcg_temp_new();
>       TCGv t2 = tcg_temp_new();
> @@ -579,9 +581,16 @@ void spr_read_xer(DisasContext *ctx, int gprn, int sprn)
>       }
>   }
>   
> -void spr_write_xer(DisasContext *ctx, int sprn, int gprn)
> +/* SPR common to all PowerPC */
> +/* XER */
> +void spr_read_xer(DisasContext *ctx, int gprn, int sprn)
> +{
> +    TCGv dst = cpu_gpr[gprn];
> +    gen_get_xer(ctx, dst);
> +}
> +
> +static void gen_set_xer(DisasContext *ctx, TCGv src)
>   {
> -    TCGv src = cpu_gpr[gprn];
>       /* Write all flags, while reading back check for isa300 */
>       tcg_gen_andi_tl(cpu_xer, src,
>                       ~((1u << XER_SO) |
> @@ -594,6 +603,12 @@ void spr_write_xer(DisasContext *ctx, int sprn, int gprn)
>       tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1);
>   }
>   
> +void spr_write_xer(DisasContext *ctx, int sprn, int gprn)
> +{
> +    TCGv src = cpu_gpr[gprn];
> +    gen_set_xer(ctx, src);
> +}
> +
>   /* LR */
>   void spr_read_lr(DisasContext *ctx, int gprn, int sprn)
>   {
> @@ -5755,6 +5770,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, arg_PLS_D *a)
>   
>   #include "translate/bhrb-impl.c.inc"
>   
> +#include "translate/ppe-impl.c.inc"
> +
>   /* Handles lfdp */
>   static void gen_dform39(DisasContext *ctx)
>   {
> diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/ppe-impl.c.inc
> new file mode 100644
> index 0000000000..792103d7c2
> --- /dev/null
> +++ b/target/ppc/translate/ppe-impl.c.inc
> @@ -0,0 +1,665 @@
> +/*
> + * IBM PPE Instructions
> + *
> + * Copyright (c) 2025, IBM Corporation.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +
> +#if !defined(TARGET_PPC64)
> +static bool vdr_is_valid(uint32_t vdr)
> +{
> +    const uint32_t valid_bitmap = 0xf00003ff;
> +    return !!((1ul << (vdr & 0x1f)) & valid_bitmap);
> +}
> +
> +static bool ppe_gpr_is_valid(uint32_t reg)
> +{
> +    const uint32_t valid_bitmap = 0xf00027ff;
> +    return !!((1ul << (reg & 0x1f)) & valid_bitmap);
> +}
> +#endif
> +
> +#define CHECK_VDR(CTX, VDR)                             \
> +    do {                                                \
> +        if (unlikely(!vdr_is_valid(VDR))) {             \
> +            gen_invalid(CTX);                           \
> +            return true;                                \
> +        }                                               \
> +    } while (0)
> +
> +#define CHECK_PPE_GPR(CTX, REG)                         \
> +    do {                                                \
> +        if (unlikely(!ppe_gpr_is_valid(REG))) {         \
> +            gen_invalid(CTX);                           \
> +            return true;                                \
> +        }                                               \
> +    } while (0)
> +
> +#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f)
> +
> +#define CHECK_PPE_LEVEL(CTX, LVL)                       \
> +    do {                                                \
> +        if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \
> +            gen_invalid(CTX);                           \
> +            return true;                                \
> +        }                                               \
> +    } while (0)
> +
> +static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    int i;
> +    TCGv base, EA;
> +    TCGv lo, hi;
> +    TCGv_i64 t8;
> +    const uint8_t vd_list[] = {9, 7, 5, 3, 0};
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_PPE_GPR(ctx, a->rt);
> +
> +    if (unlikely((a->rt != a->ra) || (a->ra == 0) || (a->si < 0xB))) {
> +        gen_invalid(ctx);
> +        return true;
> +    }
> +
> +    EA = tcg_temp_new();
> +    base = tcg_temp_new();
> +
> +    tcg_gen_addi_tl(base, cpu_gpr[a->ra], a->si * 8);
> +    gen_store_spr(SPR_PPE42_EDR, base);
> +
> +    t8 = tcg_temp_new_i64();
> +
> +    tcg_gen_addi_tl(EA, base, -8);
> +    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    tcg_gen_extr_i64_tl(cpu_gpr[31], cpu_gpr[30], t8);
> +
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    tcg_gen_extr_i64_tl(cpu_gpr[29], cpu_gpr[28], t8);
> +
> +    lo = tcg_temp_new();
> +    hi = tcg_temp_new();
> +
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    tcg_gen_extr_i64_tl(lo, hi, t8);
> +    gen_store_spr(SPR_SRR0, hi);
> +    gen_store_spr(SPR_SRR1, lo);
> +
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    tcg_gen_extr_i64_tl(lo, hi, t8);
> +    gen_set_xer(ctx, hi);
> +    tcg_gen_mov_tl(cpu_ctr, lo);
> +
> +    for (i = 0; i < sizeof(vd_list); i++) {
> +        int vd = vd_list[i];
> +        tcg_gen_addi_tl(EA, EA, -8);
> +        tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +        tcg_gen_extr_i64_tl(cpu_gpr[VDR_PAIR_REG(vd)], cpu_gpr[vd], t8);
> +    }
> +
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    tcg_gen_extr_i64_tl(lo, hi, t8);
> +    tcg_gen_shri_tl(hi, hi, 28);
> +    tcg_gen_trunc_tl_i32(cpu_crf[0], hi);
> +    gen_store_spr(SPR_SPRG0, lo);
> +
> +    tcg_gen_addi_tl(EA, base, 4);
> +    tcg_gen_qemu_ld_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
> +    tcg_gen_mov_tl(cpu_gpr[a->ra], base);
> +    return true;
> +#endif
> +}
> +
> +static bool trans_LSKU(DisasContext *ctx, arg_LSKU *a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    int64_t n;
> +    TCGv base, EA;
> +    TCGv_i32 lo, hi;
> +    TCGv_i64 t8;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_PPE_GPR(ctx, a->rt);
> +
> +    if (unlikely((a->rt != a->ra) || (a->ra == 0) ||
> +                 (a->si & PPC_BIT(0)) || (a->si == 0))) {
> +        gen_invalid(ctx);
> +        return true;
> +    }
> +
> +    EA = tcg_temp_new();
> +    base = tcg_temp_new();
> +    gen_addr_register(ctx, base);
> +
> +
> +    tcg_gen_addi_tl(base, base, a->si * 8);
> +    gen_store_spr(SPR_PPE42_EDR, base);
> +
> +    n = a->si - 1;
> +    t8 = tcg_temp_new_i64();
> +    if (n > 0) {
> +        tcg_gen_addi_tl(EA, base, -8);
> +        tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +        hi = cpu_gpr[30];
> +        lo = cpu_gpr[31];
> +        tcg_gen_extr_i64_i32(lo, hi, t8);
> +    }
> +    if (n > 1) {
> +        tcg_gen_addi_tl(EA, base, -16);
> +        tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +        hi = cpu_gpr[28];
> +        lo = cpu_gpr[29];
> +        tcg_gen_extr_i64_i32(lo, hi, t8);
> +    }
> +    tcg_gen_addi_tl(EA, base, 4);
> +    tcg_gen_qemu_ld_i32(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
> +    tcg_gen_mov_tl(cpu_gpr[a->ra], base);
> +    return true;
> +#endif
> +}
> +
> +static bool trans_STCXU(DisasContext *ctx, arg_STCXU *a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    TCGv EA;
> +    TCGv lo, hi;
> +    TCGv_i64 t8;
> +    int i;
> +    const uint8_t vd_list[] = {9, 7, 5, 3, 0};
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_PPE_GPR(ctx, a->rt);
> +
> +    if (unlikely((a->rt != a->ra) || (a->ra == 0) || !(a->si & PPC_BIT(0)))) {
> +        gen_invalid(ctx);
> +        return true;
> +    }
> +
> +    EA = tcg_temp_new();
> +    tcg_gen_addi_tl(EA, cpu_gpr[a->ra], 4);
> +    tcg_gen_qemu_st_i32(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
> +
> +    gen_store_spr(SPR_PPE42_EDR, cpu_gpr[a->ra]);
> +
> +    t8 = tcg_temp_new_i64();
> +
> +    tcg_gen_concat_tl_i64(t8, cpu_gpr[31], cpu_gpr[30]);
> +    tcg_gen_addi_tl(EA, cpu_gpr[a->ra], -8);
> +    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +
> +    tcg_gen_concat_tl_i64(t8, cpu_gpr[29], cpu_gpr[28]);
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +
> +    lo = tcg_temp_new();
> +    hi = tcg_temp_new();
> +
> +    gen_load_spr(hi, SPR_SRR0);
> +    gen_load_spr(lo, SPR_SRR1);
> +    tcg_gen_concat_tl_i64(t8, lo, hi);
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +
> +    gen_get_xer(ctx, hi);
> +    tcg_gen_mov_tl(lo, cpu_ctr);
> +    tcg_gen_concat_tl_i64(t8, lo, hi);
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +
> +    for (i = 0; i < sizeof(vd_list); i++) {
> +        int vd = vd_list[i];
> +        tcg_gen_concat_tl_i64(t8, cpu_gpr[VDR_PAIR_REG(vd)], cpu_gpr[vd]);
> +        tcg_gen_addi_tl(EA, EA, -8);
> +        tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    }
> +
> +    gen_load_spr(lo, SPR_SPRG0);
> +    tcg_gen_extu_i32_tl(hi, cpu_crf[0]);
> +    tcg_gen_shli_tl(hi, hi, 28);
> +    tcg_gen_concat_tl_i64(t8, lo, hi);
> +    tcg_gen_addi_tl(EA, EA, -8);
> +    tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +
> +    tcg_gen_addi_tl(EA, cpu_gpr[a->ra], a->si * 8);
> +    tcg_gen_qemu_st_i32(cpu_gpr[a->rt], EA, ctx->mem_idx, DEF_MEMOP(MO_32) |
> +                                                          MO_ALIGN);
> +    tcg_gen_mov_tl(cpu_gpr[a->ra], EA);
> +    return true;
> +#endif
> +}
> +
> +static bool trans_STSKU(DisasContext *ctx, arg_STSKU *a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    int64_t n;
> +    TCGv base, EA;
> +    TCGv_i32 lo, hi;
> +    TCGv_i64 t8;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_PPE_GPR(ctx, a->rt);
> +
> +    if (unlikely((a->rt != a->ra) || (a->ra == 0) || !(a->si & PPC_BIT(0)))) {
> +        gen_invalid(ctx);
> +        return true;
> +    }
> +
> +    EA = tcg_temp_new();
> +    base = tcg_temp_new();
> +    gen_addr_register(ctx, base);
> +    tcg_gen_addi_tl(EA, base, 4);
> +    tcg_gen_qemu_st_i32(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN);
> +
> +    gen_store_spr(SPR_PPE42_EDR, base);
> +
> +    n = ~(a->si);
> +
> +    t8 = tcg_temp_new_i64();
> +    if (n > 0) {
> +        hi = cpu_gpr[30];
> +        lo = cpu_gpr[31];
> +        tcg_gen_concat_i32_i64(t8, lo, hi);
> +        tcg_gen_addi_tl(EA, base, -8);
> +        tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    }
> +    if (n > 1) {
> +        hi = cpu_gpr[28];
> +        lo = cpu_gpr[29];
> +        tcg_gen_concat_i32_i64(t8, lo, hi);
> +        tcg_gen_addi_tl(EA, base, -16);
> +        tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN);
> +    }
> +
> +    tcg_gen_addi_tl(EA, base, a->si * 8);
> +    tcg_gen_qemu_st_i32(cpu_gpr[a->rt], EA, ctx->mem_idx, DEF_MEMOP(MO_32) |
> +                                                          MO_ALIGN);
> +    tcg_gen_mov_tl(cpu_gpr[a->ra], EA);
> +    return true;
> +#endif
> +}
> +
> +#if !defined(TARGET_PPC64)
> +static bool do_ppe_ldst(DisasContext *ctx, int rt, int ra, TCGv disp,
> +                        bool update, bool store)
> +{
> +    TCGv ea;
> +    int rt_lo;
> +    TCGv_i64 t8;
> +
> +    CHECK_VDR(ctx, rt);
> +    CHECK_PPE_GPR(ctx, ra);
> +    rt_lo = VDR_PAIR_REG(rt);
> +    if (update && (ra == 0 || (!store && ((ra == rt) || (ra == rt_lo))))) {
> +        gen_invalid(ctx);
> +        return true;
> +    }
> +    gen_set_access_type(ctx, ACCESS_INT);
> +
> +    ea = do_ea_calc(ctx, ra, disp);
> +    t8 = tcg_temp_new_i64();
> +    if (store) {
> +        tcg_gen_concat_i32_i64(t8, cpu_gpr[rt_lo], cpu_gpr[rt]);
> +        tcg_gen_qemu_st_i64(t8, ea, ctx->mem_idx, DEF_MEMOP(MO_64));
> +    } else {
> +        tcg_gen_qemu_ld_i64(t8, ea, ctx->mem_idx, DEF_MEMOP(MO_64));
> +        tcg_gen_extr_i64_i32(cpu_gpr[rt_lo], cpu_gpr[rt], t8);
> +    }
> +    if (update) {
> +        tcg_gen_mov_tl(cpu_gpr[ra], ea);
> +    }
> +    return true;
> +}
> +#endif
> +
> +static bool do_ppe_ldst_D(DisasContext *ctx, arg_D *a, bool update, bool store)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    /* Some PowerPC CPU's have a different meaning for the STVD instruction */
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    return do_ppe_ldst(ctx, a->rt, a->ra, tcg_constant_tl(a->si), update,
> +                       store);
> +#endif
> +}
> +
> +static bool do_ppe_ldst_X(DisasContext *ctx, arg_X *a, bool store)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    CHECK_PPE_GPR(ctx, a->rb);
> +    return do_ppe_ldst(ctx, a->rt, a->ra, cpu_gpr[a->rb], false, store);
> +#endif
> +}
> +
> +TRANS(LVD,   do_ppe_ldst_D, false, false)
> +TRANS(LVDU,  do_ppe_ldst_D, true,  false)
> +TRANS(STVD,  do_ppe_ldst_D, false, true)
> +TRANS(STVDU, do_ppe_ldst_D, true,  true)
> +TRANS(LVDX,  do_ppe_ldst_X, false)
> +TRANS(STVDX, do_ppe_ldst_X, true)
> +
> +
> +#if !defined(TARGET_PPC64)
> +static bool do_fcb(DisasContext *ctx, TCGv ra_val, TCGv rb_val, int bix,
> +                          int32_t bdx, bool s, bool px, bool lk)
> +{
> +    TCGCond cond;
> +    uint32_t mask;
> +    TCGLabel *no_branch;
> +    target_ulong dest;
> +
> +    /* Update CR0 */
> +    gen_op_cmp32(ra_val, rb_val, s, 0);
> +
> +    if (lk) {
> +        gen_setlr(ctx, ctx->base.pc_next);
> +    }
> +
> +
> +    mask = PPC_BIT32(28 + bix);
> +    cond = (px) ? TCG_COND_TSTEQ : TCG_COND_TSTNE;
> +    no_branch = gen_new_label();
> +    dest = ctx->cia + bdx;
> +
> +    /* Do the branch if CR0[bix] == PX */
> +    tcg_gen_brcondi_i32(cond, cpu_crf[0], mask, no_branch);
> +    gen_goto_tb(ctx, 0, dest);
> +    gen_set_label(no_branch);
> +    gen_goto_tb(ctx, 1, ctx->base.pc_next);
> +    ctx->base.is_jmp = DISAS_NORETURN;
> +    return true;
> +}
> +#endif
> +
> +static bool do_cmp_branch(DisasContext *ctx, arg_FCB_bix *a, bool s,
> +                          bool rb_is_gpr)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    TCGv old_ra;
> +    TCGv rb_val;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_GPR(ctx, a->ra);
> +    if (rb_is_gpr) {
> +        CHECK_PPE_GPR(ctx, a->rb);
> +        rb_val = cpu_gpr[a->rb];
> +    } else {
> +        rb_val = tcg_constant_tl(a->rb);
> +    }
> +    if (a->bix == 3) {
> +        old_ra = tcg_temp_new();
> +        tcg_gen_mov_tl(old_ra, cpu_gpr[a->ra]);
> +        tcg_gen_sub_tl(cpu_gpr[a->ra], cpu_gpr[a->ra], rb_val);
> +        return do_fcb(ctx, old_ra, rb_val, 2,
> +                      a->bdx, s, a->px, a->lk);
> +    } else {
> +        return do_fcb(ctx, cpu_gpr[a->ra], rb_val, a->bix,
> +                      a->bdx, s, a->px, a->lk);
> +    }
> +#endif
> +}
> +
> +TRANS(CMPWBC, do_cmp_branch, true, true)
> +TRANS(CMPLWBC, do_cmp_branch, false, true)
> +TRANS(CMPWIBC, do_cmp_branch, true, false)
> +
> +static bool do_mask_branch(DisasContext *ctx, arg_FCB * a, bool invert,
> +                           bool update, bool rb_is_gpr)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    TCGv r;
> +    TCGv mask, shift;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_GPR(ctx, a->ra);
> +    if (rb_is_gpr) {
> +        CHECK_PPE_GPR(ctx, a->rb);
> +        mask = tcg_temp_new();
> +        shift = tcg_temp_new();
> +        tcg_gen_andi_tl(shift, cpu_gpr[a->rb], 0x1f);
> +        tcg_gen_shr_tl(mask, tcg_constant_tl(0x80000000), shift);
> +    } else {
> +        mask = tcg_constant_tl(PPC_BIT32(a->rb));
> +    }
> +    if (invert) {
> +        tcg_gen_not_tl(mask, mask);
> +    }
> +
> +    /* apply mask to ra */
> +    r = tcg_temp_new();
> +    tcg_gen_and_tl(r, cpu_gpr[a->ra], mask);
> +    if (update) {
> +        tcg_gen_mov_tl(cpu_gpr[a->ra], r);
> +    }
> +    return do_fcb(ctx, r, tcg_constant_tl(0), 2,
> +                  a->bdx, false, a->px, a->lk);
> +#endif
> +}
> +
> +TRANS(BNBWI,    do_mask_branch, false, false, false)
> +TRANS(BNBW,     do_mask_branch, false, false, true)
> +TRANS(CLRBWIBC, do_mask_branch, true,  true,  false)
> +TRANS(CLRBWBC,  do_mask_branch, true,  true,  true)
> +
> +#if !defined(TARGET_PPC64)
> +static void gen_set_Rc0_i64(DisasContext *ctx, TCGv_i64 reg)
> +{
> +    TCGv_i64 t0 = tcg_temp_new_i64();
> +    TCGv_i64 t1 = tcg_temp_new_i64();
> +    TCGv_i32 t = tcg_temp_new_i32();
> +
> +    tcg_gen_movi_i64(t0, CRF_EQ);
> +    tcg_gen_movi_i64(t1, CRF_LT);
> +    tcg_gen_movcond_i64(TCG_COND_LT, t0, reg, tcg_constant_i64(0), t1, t0);
> +    tcg_gen_movi_i64(t1, CRF_GT);
> +    tcg_gen_movcond_i64(TCG_COND_GT, t0, reg, tcg_constant_i64(0), t1, t0);
> +    tcg_gen_extrl_i64_i32(t, t0);
> +    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> +    tcg_gen_or_i32(cpu_crf[0], cpu_crf[0], t);
> +}
> +#endif
> +
> +static bool do_shift64(DisasContext *ctx, arg_X_rc *a, bool left)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    int rt_lo, ra_lo;
> +    TCGv_i64 t0, t8;
> +
> +    /* Check for PPE since opcode overlaps with CNTTZDM instruction */
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_VDR(ctx, a->rt);
> +    CHECK_VDR(ctx, a->ra);
> +    CHECK_PPE_GPR(ctx, a->rb);
> +    rt_lo = VDR_PAIR_REG(a->rt);
> +    ra_lo = VDR_PAIR_REG(a->ra);
> +    t8 = tcg_temp_new_i64();
> +
> +    /* AND rt with a mask that is 0 when rb >= 0x40 */
> +    t0 = tcg_temp_new_i64();
> +    tcg_gen_extu_tl_i64(t0, cpu_gpr[a->rb]);
> +    tcg_gen_shli_i64(t0, t0, 0x39);
> +    tcg_gen_sari_i64(t0, t0, 0x3f);
> +
> +    /* form 64bit value from two 32bit regs */
> +    tcg_gen_concat_tl_i64(t8, cpu_gpr[rt_lo], cpu_gpr[a->rt]);
> +
> +    /* apply mask */
> +    tcg_gen_andc_i64(t8, t8, t0);
> +
> +    /* do the shift */
> +    tcg_gen_extu_tl_i64(t0, cpu_gpr[a->rb]);
> +    tcg_gen_andi_i64(t0, t0, 0x3f);
> +    if (left) {
> +        tcg_gen_shl_i64(t8, t8, t0);
> +    } else {
> +        tcg_gen_shr_i64(t8, t8, t0);
> +    }
> +
> +    /* split the 64bit word back into two 32bit regs */
> +    tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t8);
> +
> +    /* update CR0 if requested */
> +    if (unlikely(a->rc != 0)) {
> +        gen_set_Rc0_i64(ctx, t8);
> +    }
> +    return true;
> +#endif
> +}
> +
> +TRANS(SRVD, do_shift64, false)
> +TRANS(SLVD, do_shift64, true)
> +
> +static bool trans_DCBQ(DisasContext *ctx, arg_DCBQ * a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +
> +    CHECK_PPE_GPR(ctx, a->rt);
> +    CHECK_PPE_GPR(ctx, a->ra);
> +    CHECK_PPE_GPR(ctx, a->rb);
> +
> +    /* No cache exists, so just set RT to 0 */
> +    tcg_gen_movi_tl(cpu_gpr[a->rt], 0);
> +    return true;
> +#endif
> +}
> +
> +static bool trans_RLDIMI(DisasContext *ctx, arg_RLDIMI *a)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    TCGv_i64 t_rs, t_ra;
> +    int ra_lo, rs_lo;
> +    uint32_t sh = a->sh;
> +    uint32_t mb = a->mb;
> +    uint32_t me = 63 - sh;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_VDR(ctx, a->rs);
> +    CHECK_VDR(ctx, a->ra);
> +
> +    rs_lo = VDR_PAIR_REG(a->rs);
> +    ra_lo = VDR_PAIR_REG(a->ra);
> +
> +    t_rs = tcg_temp_new_i64();
> +    t_ra = tcg_temp_new_i64();
> +
> +    tcg_gen_concat_tl_i64(t_rs, cpu_gpr[rs_lo], cpu_gpr[a->rs]);
> +    tcg_gen_concat_tl_i64(t_ra, cpu_gpr[ra_lo], cpu_gpr[a->ra]);
> +
> +    if (mb <= me) {
> +        tcg_gen_deposit_i64(t_ra, t_ra, t_rs, sh, me - mb + 1);
> +    } else {
> +        uint64_t mask = mask_u64(mb, me);
> +        TCGv_i64 t1 = tcg_temp_new_i64();
> +
> +        tcg_gen_rotli_i64(t1, t_rs, sh);
> +        tcg_gen_andi_i64(t1, t1, mask);
> +        tcg_gen_andi_i64(t_ra, t_ra, ~mask);
> +        tcg_gen_or_i64(t_ra, t_ra, t1);
> +    }
> +
> +    tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t_ra);
> +
> +    if (unlikely(a->rc != 0)) {
> +        gen_set_Rc0_i64(ctx, t_ra);
> +    }
> +    return true;
> +#endif
> +}
> +
> +
> +static bool gen_rldinm_i64(DisasContext *ctx, arg_MD *a, int mb, int me, int sh)
> +{
> +#if defined(TARGET_PPC64)
> +    return false;
> +#else
> +    int len = me - mb + 1;
> +    int rsh = (64 - sh) & 63;
> +    int ra_lo, rs_lo;
> +    TCGv_i64 t8;
> +
> +    if (unlikely(!is_ppe(ctx))) {
> +        return false;
> +    }
> +    CHECK_PPE_LEVEL(ctx, PPC2_PPE42X);
> +    CHECK_VDR(ctx, a->rs);
> +    CHECK_VDR(ctx, a->ra);
> +
> +    rs_lo = VDR_PAIR_REG(a->rs);
> +    ra_lo = VDR_PAIR_REG(a->ra);
> +    t8 = tcg_temp_new_i64();
> +    tcg_gen_concat_tl_i64(t8, cpu_gpr[rs_lo], cpu_gpr[a->rs]);
> +    if (sh != 0 && len > 0 && me == (63 - sh)) {
> +        tcg_gen_deposit_z_i64(t8, t8, sh, len);
> +    } else if (me == 63 && rsh + len <= 64) {
> +        tcg_gen_extract_i64(t8, t8, rsh, len);
> +    } else {
> +        tcg_gen_rotli_i64(t8, t8, sh);
> +        tcg_gen_andi_i64(t8, t8, mask_u64(mb, me));
> +    }
> +    tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t8);
> +    if (unlikely(a->rc != 0)) {
> +        gen_set_Rc0_i64(ctx, t8);
> +    }
> +    return true;
> +#endif
> +}
> +
> +TRANS(RLDICL, gen_rldinm_i64, a->mb, 63, a->sh)
> +TRANS(RLDICR, gen_rldinm_i64, 0, a->mb, a->sh)
> +