'trans_vupkpx' function implements both vupkhpx and vupklpx instructions with
argument 'high' determine which instruction is processed. Instructions are
implemented in two 'for' loops. Outer 'for' loop repeats unpacking two times,
since both doubleword elements of destination register are formed the same way.
It also stores result of every iteration in temporary register, that is later
transferred to destination register. Inner 'for' loop does unpacking of pixels
and forms resulting doubleword 32 by 32 bits.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
---
target/ppc/helper.h | 2 -
target/ppc/int_helper.c | 20 --------
target/ppc/translate/vmx-impl.inc.c | 91 ++++++++++++++++++++++++++++++++++++-
3 files changed, 89 insertions(+), 24 deletions(-)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index b489b38..fd06b56 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -233,8 +233,6 @@ DEF_HELPER_2(vextsh2d, void, avr, avr)
DEF_HELPER_2(vextsw2d, void, avr, avr)
DEF_HELPER_2(vnegw, void, avr, avr)
DEF_HELPER_2(vnegd, void, avr, avr)
-DEF_HELPER_2(vupkhpx, void, avr, avr)
-DEF_HELPER_2(vupklpx, void, avr, avr)
DEF_HELPER_2(vupkhsb, void, avr, avr)
DEF_HELPER_2(vupkhsh, void, avr, avr)
DEF_HELPER_2(vupkhsw, void, avr, avr)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index f910c11..9ee667d 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1737,26 +1737,6 @@ void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
#define UPKHI 0
#define UPKLO 1
#endif
-#define VUPKPX(suffix, hi) \
- void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
- { \
- int i; \
- ppc_avr_t result; \
- \
- for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
- uint16_t e = b->u16[hi ? i : i + 4]; \
- uint8_t a = (e >> 15) ? 0xff : 0; \
- uint8_t r = (e >> 10) & 0x1f; \
- uint8_t g = (e >> 5) & 0x1f; \
- uint8_t b = e & 0x1f; \
- \
- result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
- } \
- *r = result; \
- }
-VUPKPX(lpx, UPKLO)
-VUPKPX(hpx, UPKHI)
-#undef VUPKPX
#define VUPK(suffix, unpacked, packee, hi) \
void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index 3550ffa..09d80d6 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -1031,6 +1031,95 @@ static void trans_vclzd(DisasContext *ctx)
tcg_temp_free_i64(avr);
}
+/*
+ * vupkhpx VRT,VRB - Vector Unpack High Pixel
+ * vupklpx VRT,VRB - Vector Unpack Low Pixel
+ *
+ * Unpacks 4 pixels coded in 1-5-5-5 pattern from high/low doubleword element
+ * of source register into contigous array of bits in the destination register.
+ * Argument 'high' determines if high or low doubleword element of source
+ * register is processed.
+ */
+static void trans_vupkpx(DisasContext *ctx, int high)
+{
+ int VT = rD(ctx->opcode);
+ int VB = rB(ctx->opcode);
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ TCGv_i64 avr = tcg_temp_new_i64();
+ TCGv_i64 result = tcg_temp_new_i64();
+ TCGv_i64 result1 = tcg_temp_new_i64();
+ TCGv_i64 result2 = tcg_temp_new_i64();
+ int64_t mask1 = 0x1fULL;
+ int64_t mask2 = 0x1fULL << 8;
+ int64_t mask3 = 0x1fULL << 16;
+ int64_t mask4 = 0xffULL << 56;
+ int i, j;
+
+ if (high == 1) {
+ get_avr64(avr, VB, true);
+ } else {
+ get_avr64(avr, VB, false);
+ }
+
+ tcg_gen_movi_i64(result, 0x0ULL);
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ tcg_gen_shli_i64(tmp, avr, (j * 16));
+ tcg_gen_andi_i64(tmp, tmp, mask1 << (j * 32));
+ tcg_gen_or_i64(result, result, tmp);
+
+ tcg_gen_shli_i64(tmp, avr, 3 + (j * 16));
+ tcg_gen_andi_i64(tmp, tmp, mask2 << (j * 32));
+ tcg_gen_or_i64(result, result, tmp);
+
+ tcg_gen_shli_i64(tmp, avr, 6 + (j * 16));
+ tcg_gen_andi_i64(tmp, tmp, mask3 << (j * 32));
+ tcg_gen_or_i64(result, result, tmp);
+
+ tcg_gen_shri_i64(tmp, avr, (j * 16));
+ tcg_gen_ext16s_i64(tmp, tmp);
+ tcg_gen_andi_i64(tmp, tmp, mask4);
+ tcg_gen_shri_i64(tmp, tmp, (32 * (1 - j)));
+ tcg_gen_or_i64(result, result, tmp);
+ }
+ if (i == 0) {
+ tcg_gen_mov_i64(result1, result);
+ tcg_gen_movi_i64(result, 0x0ULL);
+ tcg_gen_shri_i64(avr, avr, 32);
+ }
+ if (i == 1) {
+ tcg_gen_mov_i64(result2, result);
+ }
+ }
+
+ set_avr64(VT, result1, false);
+ set_avr64(VT, result2, true);
+
+ tcg_temp_free_i64(tmp);
+ tcg_temp_free_i64(avr);
+ tcg_temp_free_i64(result);
+ tcg_temp_free_i64(result1);
+ tcg_temp_free_i64(result2);
+}
+
+static void gen_vupkhpx(DisasContext *ctx)
+{
+ if (unlikely(!ctx->altivec_enabled)) {
+ gen_exception(ctx, POWERPC_EXCP_VPU);
+ return;
+ }
+ trans_vupkpx(ctx, 1);
+}
+
+static void gen_vupklpx(DisasContext *ctx)
+{
+ if (unlikely(!ctx->altivec_enabled)) {
+ gen_exception(ctx, POWERPC_EXCP_VPU);
+ return;
+ }
+ trans_vupkpx(ctx, 0);
+}
+
GEN_VXFORM(vmuloub, 4, 0);
GEN_VXFORM(vmulouh, 4, 1);
GEN_VXFORM(vmulouw, 4, 2);
@@ -1348,8 +1437,6 @@ GEN_VXFORM_NOA(vupkhsw, 7, 25);
GEN_VXFORM_NOA(vupklsb, 7, 10);
GEN_VXFORM_NOA(vupklsh, 7, 11);
GEN_VXFORM_NOA(vupklsw, 7, 27);
-GEN_VXFORM_NOA(vupkhpx, 7, 13);
-GEN_VXFORM_NOA(vupklpx, 7, 15);
GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
--
2.7.4
On Thursday, October 17, 2019, Stefan Brankovic <stefan.brankovic@rt-rk.com>
wrote:
> 'trans_vupkpx' function implements both vupkhpx and vupklpx instructions
> with
implements both -> implements emulation of both
with -> , while its
argument 'high' determine which
determine -> determines
> instruction is processed. Instructions are
> implemented in two 'for' loops. Outer 'for' loop repeats unpacking two
> times,
> since both doubleword elements of destination register are formed the same
> way.
> It also stores result of every iteration in temporary register, that is
> later
> transferred to destination register. Inner 'for' loop does unpacking of
> pixels
> and forms resulting doubleword 32 by 32 bits.
temporary register -> a temporary variable
destination register -> the destination register
'forms resulting doubleword 32 by 32 bits' is unclear, reword.
> Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
> ---
> target/ppc/helper.h | 2 -
> target/ppc/int_helper.c | 20 --------
> target/ppc/translate/vmx-impl.inc.c | 91 ++++++++++++++++++++++++++++++
> ++++++-
> 3 files changed, 89 insertions(+), 24 deletions(-)
>
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index b489b38..fd06b56 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -233,8 +233,6 @@ DEF_HELPER_2(vextsh2d, void, avr, avr)
> DEF_HELPER_2(vextsw2d, void, avr, avr)
> DEF_HELPER_2(vnegw, void, avr, avr)
> DEF_HELPER_2(vnegd, void, avr, avr)
> -DEF_HELPER_2(vupkhpx, void, avr, avr)
> -DEF_HELPER_2(vupklpx, void, avr, avr)
> DEF_HELPER_2(vupkhsb, void, avr, avr)
> DEF_HELPER_2(vupkhsh, void, avr, avr)
> DEF_HELPER_2(vupkhsw, void, avr, avr)
> diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
> index f910c11..9ee667d 100644
> --- a/target/ppc/int_helper.c
> +++ b/target/ppc/int_helper.c
> @@ -1737,26 +1737,6 @@ void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t
> *r, ppc_avr_t *a, ppc_avr_t *b)
> #define UPKHI 0
> #define UPKLO 1
> #endif
> -#define VUPKPX(suffix, hi) \
> - void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
> - { \
> - int i; \
> - ppc_avr_t result; \
> - \
> - for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
> - uint16_t e = b->u16[hi ? i : i + 4]; \
> - uint8_t a = (e >> 15) ? 0xff : 0; \
> - uint8_t r = (e >> 10) & 0x1f; \
> - uint8_t g = (e >> 5) & 0x1f; \
> - uint8_t b = e & 0x1f; \
> - \
> - result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
> - } \
> - *r = result; \
> - }
> -VUPKPX(lpx, UPKLO)
> -VUPKPX(hpx, UPKHI)
> -#undef VUPKPX
>
> #define VUPK(suffix, unpacked, packee, hi) \
> void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
> diff --git a/target/ppc/translate/vmx-impl.inc.c
> b/target/ppc/translate/vmx-impl.inc.c
> index 3550ffa..09d80d6 100644
> --- a/target/ppc/translate/vmx-impl.inc.c
> +++ b/target/ppc/translate/vmx-impl.inc.c
> @@ -1031,6 +1031,95 @@ static void trans_vclzd(DisasContext *ctx)
> tcg_temp_free_i64(avr);
> }
>
> +/*
> + * vupkhpx VRT,VRB - Vector Unpack High Pixel
> + * vupklpx VRT,VRB - Vector Unpack Low Pixel
> + *
> + * Unpacks 4 pixels coded in 1-5-5-5 pattern from high/low doubleword
> element
> + * of source register into contigous array of bits in the destination
> register.
> + * Argument 'high' determines if high or low doubleword element of source
> + * register is processed.
> + */
> +static void trans_vupkpx(DisasContext *ctx, int high)
> +{
> + int VT = rD(ctx->opcode);
> + int VB = rB(ctx->opcode);
> + TCGv_i64 tmp = tcg_temp_new_i64();
> + TCGv_i64 avr = tcg_temp_new_i64();
> + TCGv_i64 result = tcg_temp_new_i64();
> + TCGv_i64 result1 = tcg_temp_new_i64();
> + TCGv_i64 result2 = tcg_temp_new_i64();
> + int64_t mask1 = 0x1fULL;
> + int64_t mask2 = 0x1fULL << 8;
> + int64_t mask3 = 0x1fULL << 16;
> + int64_t mask4 = 0xffULL << 56;
+ int i, j;
> +
> + if (high == 1) {
> + get_avr64(avr, VB, true);
before this line, insert comment: /* vupkhpx */
> + } else {
> + get_avr64(avr, VB, false);
before this line, insert comment: /* vupklpx */
> + }
> +
> + tcg_gen_movi_i64(result, 0x0ULL);
> + for (i = 0; i < 2; i++) {
> + for (j = 0; j < 2; j++) {
> + tcg_gen_shli_i64(tmp, avr, (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask1 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shli_i64(tmp, avr, 3 + (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask2 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shli_i64(tmp, avr, 6 + (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask3 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shri_i64(tmp, avr, (j * 16));
> + tcg_gen_ext16s_i64(tmp, tmp);
> + tcg_gen_andi_i64(tmp, tmp, mask4);
> + tcg_gen_shri_i64(tmp, tmp, (32 * (1 - j)));
> + tcg_gen_or_i64(result, result, tmp);
> + }
> + if (i == 0) {
> + tcg_gen_mov_i64(result1, result);
> + tcg_gen_movi_i64(result, 0x0ULL);
> + tcg_gen_shri_i64(avr, avr, 32);
> + }
> + if (i == 1) {
> + tcg_gen_mov_i64(result2, result);
> + }
> + }
> +
> + set_avr64(VT, result1, false);
> + set_avr64(VT, result2, true);
> +
> + tcg_temp_free_i64(tmp);
> + tcg_temp_free_i64(avr);
> + tcg_temp_free_i64(result);
> + tcg_temp_free_i64(result1);
> + tcg_temp_free_i64(result2);
> +}
> +
> +static void gen_vupkhpx(DisasContext *ctx)
+{
> + if (unlikely(!ctx->altivec_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VPU);
> + return;
> + }
> + trans_vupkpx(ctx, 1);
> +}
> +
> +static void gen_vupklpx(DisasContext *ctx)
+{
> + if (unlikely(!ctx->altivec_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VPU);
> + return;
> + }
> + trans_vupkpx(ctx, 0);
> +}
> +
> GEN_VXFORM(vmuloub, 4, 0);
> GEN_VXFORM(vmulouh, 4, 1);
> GEN_VXFORM(vmulouw, 4, 2);
> @@ -1348,8 +1437,6 @@ GEN_VXFORM_NOA(vupkhsw, 7, 25);
> GEN_VXFORM_NOA(vupklsb, 7, 10);
> GEN_VXFORM_NOA(vupklsh, 7, 11);
> GEN_VXFORM_NOA(vupklsw, 7, 27);
> -GEN_VXFORM_NOA(vupkhpx, 7, 13);
> -GEN_VXFORM_NOA(vupklpx, 7, 15);
GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
> GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
> GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
> --
> 2.7.4
>
>
>
On Thursday, October 17, 2019, Stefan Brankovic <stefan.brankovic@rt-rk.com>
wrote:
> 'trans_vupkpx' function implements both vupkhpx and vupklpx instructions
> with
> argument 'high' determine which instruction is processed. Instructions are
> implemented in two 'for' loops. Outer 'for' loop repeats unpacking two
> times,
> since both doubleword elements of destination register are formed the same
> way.
> It also stores result of every iteration in temporary register, that is
> later
> transferred to destination register. Inner 'for' loop does unpacking of
> pixels
> and forms resulting doubleword 32 by 32 bits.
>
> Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
> ---
> target/ppc/helper.h | 2 -
> target/ppc/int_helper.c | 20 --------
> target/ppc/translate/vmx-impl.inc.c | 91 ++++++++++++++++++++++++++++++
> ++++++-
> 3 files changed, 89 insertions(+), 24 deletions(-)
>
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index b489b38..fd06b56 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -233,8 +233,6 @@ DEF_HELPER_2(vextsh2d, void, avr, avr)
> DEF_HELPER_2(vextsw2d, void, avr, avr)
> DEF_HELPER_2(vnegw, void, avr, avr)
> DEF_HELPER_2(vnegd, void, avr, avr)
> -DEF_HELPER_2(vupkhpx, void, avr, avr)
> -DEF_HELPER_2(vupklpx, void, avr, avr)
> DEF_HELPER_2(vupkhsb, void, avr, avr)
> DEF_HELPER_2(vupkhsh, void, avr, avr)
> DEF_HELPER_2(vupkhsw, void, avr, avr)
> diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
> index f910c11..9ee667d 100644
> --- a/target/ppc/int_helper.c
> +++ b/target/ppc/int_helper.c
> @@ -1737,26 +1737,6 @@ void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t
> *r, ppc_avr_t *a, ppc_avr_t *b)
> #define UPKHI 0
> #define UPKLO 1
> #endif
> -#define VUPKPX(suffix, hi) \
> - void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
> - { \
> - int i; \
> - ppc_avr_t result; \
> - \
> - for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
> - uint16_t e = b->u16[hi ? i : i + 4]; \
> - uint8_t a = (e >> 15) ? 0xff : 0; \
> - uint8_t r = (e >> 10) & 0x1f; \
> - uint8_t g = (e >> 5) & 0x1f; \
> - uint8_t b = e & 0x1f; \
> - \
> - result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
> - } \
> - *r = result; \
> - }
> -VUPKPX(lpx, UPKLO)
> -VUPKPX(hpx, UPKHI)
> -#undef VUPKPX
>
> #define VUPK(suffix, unpacked, packee, hi) \
> void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
> diff --git a/target/ppc/translate/vmx-impl.inc.c
> b/target/ppc/translate/vmx-impl.inc.c
> index 3550ffa..09d80d6 100644
> --- a/target/ppc/translate/vmx-impl.inc.c
> +++ b/target/ppc/translate/vmx-impl.inc.c
> @@ -1031,6 +1031,95 @@ static void trans_vclzd(DisasContext *ctx)
> tcg_temp_free_i64(avr);
> }
>
> +/*
> + * vupkhpx VRT,VRB - Vector Unpack High Pixel
> + * vupklpx VRT,VRB - Vector Unpack Low Pixel
> + *
> + * Unpacks 4 pixels coded in 1-5-5-5 pattern from high/low doubleword
> element
> + * of source register into contigous array of bits in the destination
> register.
> + * Argument 'high' determines if high or low doubleword element of source
> + * register is processed.
> + */
> +static void trans_vupkpx(DisasContext *ctx, int high)
> +{
> + int VT = rD(ctx->opcode);
> + int VB = rB(ctx->opcode);
> + TCGv_i64 tmp = tcg_temp_new_i64();
> + TCGv_i64 avr = tcg_temp_new_i64();
> + TCGv_i64 result = tcg_temp_new_i64();
> + TCGv_i64 result1 = tcg_temp_new_i64();
> + TCGv_i64 result2 = tcg_temp_new_i64();
> + int64_t mask1 = 0x1fULL;
> + int64_t mask2 = 0x1fULL << 8;
> + int64_t mask3 = 0x1fULL << 16;
> + int64_t mask4 = 0xffULL << 56;
> + int i, j;
> +
> + if (high == 1) {
> + get_avr64(avr, VB, true);
> + } else {
> + get_avr64(avr, VB, false);
> + }
> +
> + tcg_gen_movi_i64(result, 0x0ULL);
> + for (i = 0; i < 2; i++) {
> + for (j = 0; j < 2; j++) {
> + tcg_gen_shli_i64(tmp, avr, (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask1 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shli_i64(tmp, avr, 3 + (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask2 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shli_i64(tmp, avr, 6 + (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask3 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shri_i64(tmp, avr, (j * 16));
> + tcg_gen_ext16s_i64(tmp, tmp);
> + tcg_gen_andi_i64(tmp, tmp, mask4);
> + tcg_gen_shri_i64(tmp, tmp, (32 * (1 - j)));
> + tcg_gen_or_i64(result, result, tmp);
> + }
> + if (i == 0) {
> + tcg_gen_mov_i64(result1, result);
> + tcg_gen_movi_i64(result, 0x0ULL);
> + tcg_gen_shri_i64(avr, avr, 32);
> + }
> + if (i == 1) {
> + tcg_gen_mov_i64(result2, result);
> + }
> + }
> +
> + set_avr64(VT, result1, false);
> + set_avr64(VT, result2, true);
> +
> + tcg_temp_free_i64(tmp);
> + tcg_temp_free_i64(avr);
> + tcg_temp_free_i64(result);
> + tcg_temp_free_i64(result1);
> + tcg_temp_free_i64(result2);
> +}
> +
> +static void gen_vupkhpx(DisasContext *ctx)
> +{
> + if (unlikely(!ctx->altivec_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VPU);
> + return;
> + }
> + trans_vupkpx(ctx, 1);
> +}
> +
> +static void gen_vupklpx(DisasContext *ctx)
> +{
> + if (unlikely(!ctx->altivec_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VPU);
> + return;
> + }
> + trans_vupkpx(ctx, 0);
> +}
> +
> GEN_VXFORM(vmuloub, 4, 0);
> GEN_VXFORM(vmulouh, 4, 1);
> GEN_VXFORM(vmulouw, 4, 2);
> @@ -1348,8 +1437,6 @@ GEN_VXFORM_NOA(vupkhsw, 7, 25);
> GEN_VXFORM_NOA(vupklsb, 7, 10);
> GEN_VXFORM_NOA(vupklsh, 7, 11);
> GEN_VXFORM_NOA(vupklsw, 7, 27);
> -GEN_VXFORM_NOA(vupkhpx, 7, 13);
> -GEN_VXFORM_NOA(vupklpx, 7, 15);
There is inconsistency here compared to your previous patches. There should
be lines:
GEN_VXFORM_TRANS(vupkhpx, 7, 13);
GEN_VXFORM_TRANS(vupklpx, 7, 15);
and there should be two new functions trans_vupkhpx() and trans_vupklpx()
drfined as thin wrappers around trans_vupkpx(). gen_vupkhpx() and
gen_vupklpx() should be deleted.
> GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
> GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
> GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
> --
> 2.7.4
>
>
>
On Thursday, October 17, 2019, Stefan Brankovic <stefan.brankovic@rt-rk.com>
wrote:
> 'trans_vupkpx' function implements both vupkhpx and vupklpx instructions
> with
> argument 'high' determine which instruction is processed. Instructions are
> implemented in two 'for' loops. Outer 'for' loop repeats unpacking two
> times,
> since both doubleword elements of destination register are formed the same
> way.
> It also stores result of every iteration in temporary register, that is
> later
> transferred to destination register. Inner 'for' loop does unpacking of
> pixels
> and forms resulting doubleword 32 by 32 bits.
>
> Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
> ---
> target/ppc/helper.h | 2 -
> target/ppc/int_helper.c | 20 --------
> target/ppc/translate/vmx-impl.inc.c | 91 ++++++++++++++++++++++++++++++
> ++++++-
> 3 files changed, 89 insertions(+), 24 deletions(-)
>
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index b489b38..fd06b56 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -233,8 +233,6 @@ DEF_HELPER_2(vextsh2d, void, avr, avr)
> DEF_HELPER_2(vextsw2d, void, avr, avr)
> DEF_HELPER_2(vnegw, void, avr, avr)
> DEF_HELPER_2(vnegd, void, avr, avr)
> -DEF_HELPER_2(vupkhpx, void, avr, avr)
> -DEF_HELPER_2(vupklpx, void, avr, avr)
> DEF_HELPER_2(vupkhsb, void, avr, avr)
> DEF_HELPER_2(vupkhsh, void, avr, avr)
> DEF_HELPER_2(vupkhsw, void, avr, avr)
> diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
> index f910c11..9ee667d 100644
> --- a/target/ppc/int_helper.c
> +++ b/target/ppc/int_helper.c
> @@ -1737,26 +1737,6 @@ void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t
> *r, ppc_avr_t *a, ppc_avr_t *b)
> #define UPKHI 0
> #define UPKLO 1
> #endif
> -#define VUPKPX(suffix, hi) \
> - void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
> - { \
> - int i; \
> - ppc_avr_t result; \
> - \
> - for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
> - uint16_t e = b->u16[hi ? i : i + 4]; \
> - uint8_t a = (e >> 15) ? 0xff : 0; \
> - uint8_t r = (e >> 10) & 0x1f; \
> - uint8_t g = (e >> 5) & 0x1f; \
> - uint8_t b = e & 0x1f; \
> - \
> - result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
> - } \
> - *r = result; \
> - }
> -VUPKPX(lpx, UPKLO)
> -VUPKPX(hpx, UPKHI)
> -#undef VUPKPX
>
> #define VUPK(suffix, unpacked, packee, hi) \
> void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
> diff --git a/target/ppc/translate/vmx-impl.inc.c
> b/target/ppc/translate/vmx-impl.inc.c
> index 3550ffa..09d80d6 100644
> --- a/target/ppc/translate/vmx-impl.inc.c
> +++ b/target/ppc/translate/vmx-impl.inc.c
> @@ -1031,6 +1031,95 @@ static void trans_vclzd(DisasContext *ctx)
> tcg_temp_free_i64(avr);
> }
>
> +/*
> + * vupkhpx VRT,VRB - Vector Unpack High Pixel
> + * vupklpx VRT,VRB - Vector Unpack Low Pixel
> + *
> + * Unpacks 4 pixels coded in 1-5-5-5 pattern from high/low doubleword
> element
> + * of source register into contigous array of bits in the destination
> register.
> + * Argument 'high' determines if high or low doubleword element of source
> + * register is processed.
> + */
> +static void trans_vupkpx(DisasContext *ctx, int high)
The last argument should be boolean.
> +{
> + int VT = rD(ctx->opcode);
> + int VB = rB(ctx->opcode);
> + TCGv_i64 tmp = tcg_temp_new_i64();
> + TCGv_i64 avr = tcg_temp_new_i64();
> + TCGv_i64 result = tcg_temp_new_i64();
> + TCGv_i64 result1 = tcg_temp_new_i64();
> + TCGv_i64 result2 = tcg_temp_new_i64();
> + int64_t mask1 = 0x1fULL;
> + int64_t mask2 = 0x1fULL << 8;
> + int64_t mask3 = 0x1fULL << 16;
> + int64_t mask4 = 0xffULL << 56;
> + int i, j;
> +
> + if (high == 1) {
> + get_avr64(avr, VB, true);
> + } else {
> + get_avr64(avr, VB, false);
> + }
> +
> + tcg_gen_movi_i64(result, 0x0ULL);
> + for (i = 0; i < 2; i++) {
> + for (j = 0; j < 2; j++) {
> + tcg_gen_shli_i64(tmp, avr, (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask1 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shli_i64(tmp, avr, 3 + (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask2 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shli_i64(tmp, avr, 6 + (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask3 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shri_i64(tmp, avr, (j * 16));
> + tcg_gen_ext16s_i64(tmp, tmp);
> + tcg_gen_andi_i64(tmp, tmp, mask4);
> + tcg_gen_shri_i64(tmp, tmp, (32 * (1 - j)));
> + tcg_gen_or_i64(result, result, tmp);
> + }
> + if (i == 0) {
> + tcg_gen_mov_i64(result1, result);
> + tcg_gen_movi_i64(result, 0x0ULL);
> + tcg_gen_shri_i64(avr, avr, 32);
> + }
> + if (i == 1) {
> + tcg_gen_mov_i64(result2, result);
> + }
> + }
> +
> + set_avr64(VT, result1, false);
> + set_avr64(VT, result2, true);
> +
> + tcg_temp_free_i64(tmp);
> + tcg_temp_free_i64(avr);
> + tcg_temp_free_i64(result);
> + tcg_temp_free_i64(result1);
> + tcg_temp_free_i64(result2);
> +}
> +
> +static void gen_vupkhpx(DisasContext *ctx)
> +{
> + if (unlikely(!ctx->altivec_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VPU);
> + return;
> + }
> + trans_vupkpx(ctx, 1);
> +}
> +
> +static void gen_vupklpx(DisasContext *ctx)
> +{
> + if (unlikely(!ctx->altivec_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VPU);
> + return;
> + }
> + trans_vupkpx(ctx, 0);
> +}
> +
> GEN_VXFORM(vmuloub, 4, 0);
> GEN_VXFORM(vmulouh, 4, 1);
> GEN_VXFORM(vmulouw, 4, 2);
> @@ -1348,8 +1437,6 @@ GEN_VXFORM_NOA(vupkhsw, 7, 25);
> GEN_VXFORM_NOA(vupklsb, 7, 10);
> GEN_VXFORM_NOA(vupklsh, 7, 11);
> GEN_VXFORM_NOA(vupklsw, 7, 27);
> -GEN_VXFORM_NOA(vupkhpx, 7, 13);
> -GEN_VXFORM_NOA(vupklpx, 7, 15);
> GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
> GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
> GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
> --
> 2.7.4
>
>
>
Hello Aleksandar,
Thank you for taking a look at this patch. I will start working on a
version 8 of the patch where I will address all your suggestions.
Kind Regards,
Stefan
On 19.10.19. 22:40, Aleksandar Markovic wrote:
>
>
> On Thursday, October 17, 2019, Stefan Brankovic
> <stefan.brankovic@rt-rk.com <mailto:stefan.brankovic@rt-rk.com>> wrote:
>
> 'trans_vupkpx' function implements both vupkhpx and vupklpx
> instructions with
> argument 'high' determine which instruction is processed.
> Instructions are
> implemented in two 'for' loops. Outer 'for' loop repeats unpacking
> two times,
> since both doubleword elements of destination register are formed
> the same way.
> It also stores result of every iteration in temporary register,
> that is later
> transferred to destination register. Inner 'for' loop does
> unpacking of pixels
> and forms resulting doubleword 32 by 32 bits.
>
> Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com
> <mailto:stefan.brankovic@rt-rk.com>>
> ---
> target/ppc/helper.h | 2 -
> target/ppc/int_helper.c | 20 --------
> target/ppc/translate/vmx-impl.inc.c | 91
> ++++++++++++++++++++++++++++++++++++-
> 3 files changed, 89 insertions(+), 24 deletions(-)
>
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index b489b38..fd06b56 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -233,8 +233,6 @@ DEF_HELPER_2(vextsh2d, void, avr, avr)
> DEF_HELPER_2(vextsw2d, void, avr, avr)
> DEF_HELPER_2(vnegw, void, avr, avr)
> DEF_HELPER_2(vnegd, void, avr, avr)
> -DEF_HELPER_2(vupkhpx, void, avr, avr)
> -DEF_HELPER_2(vupklpx, void, avr, avr)
> DEF_HELPER_2(vupkhsb, void, avr, avr)
> DEF_HELPER_2(vupkhsh, void, avr, avr)
> DEF_HELPER_2(vupkhsw, void, avr, avr)
> diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
> index f910c11..9ee667d 100644
> --- a/target/ppc/int_helper.c
> +++ b/target/ppc/int_helper.c
> @@ -1737,26 +1737,6 @@ void helper_vsum4ubs(CPUPPCState *env,
> ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
> #define UPKHI 0
> #define UPKLO 1
> #endif
> -#define VUPKPX(suffix, hi) \
> - void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
> - { \
> - int i; \
> - ppc_avr_t result; \
> - \
> - for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
> - uint16_t e = b->u16[hi ? i : i + 4]; \
> - uint8_t a = (e >> 15) ? 0xff : 0; \
> - uint8_t r = (e >> 10) & 0x1f; \
> - uint8_t g = (e >> 5) & 0x1f; \
> - uint8_t b = e & 0x1f; \
> - \
> - result.u32[i] = (a << 24) | (r << 16) | (g << 8) |
> b; \
> - } \
> - *r = result; \
> - }
> -VUPKPX(lpx, UPKLO)
> -VUPKPX(hpx, UPKHI)
> -#undef VUPKPX
>
> #define VUPK(suffix, unpacked, packee, hi) \
> void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
> diff --git a/target/ppc/translate/vmx-impl.inc.c
> b/target/ppc/translate/vmx-impl.inc.c
> index 3550ffa..09d80d6 100644
> --- a/target/ppc/translate/vmx-impl.inc.c
> +++ b/target/ppc/translate/vmx-impl.inc.c
> @@ -1031,6 +1031,95 @@ static void trans_vclzd(DisasContext *ctx)
> tcg_temp_free_i64(avr);
> }
>
> +/*
> + * vupkhpx VRT,VRB - Vector Unpack High Pixel
> + * vupklpx VRT,VRB - Vector Unpack Low Pixel
> + *
> + * Unpacks 4 pixels coded in 1-5-5-5 pattern from high/low
> doubleword element
> + * of source register into contigous array of bits in the
> destination register.
> + * Argument 'high' determines if high or low doubleword element
> of source
> + * register is processed.
> + */
> +static void trans_vupkpx(DisasContext *ctx, int high)
>
>
> The last argument should be boolean.
>
> +{
> + int VT = rD(ctx->opcode);
> + int VB = rB(ctx->opcode);
> + TCGv_i64 tmp = tcg_temp_new_i64();
> + TCGv_i64 avr = tcg_temp_new_i64();
> + TCGv_i64 result = tcg_temp_new_i64();
> + TCGv_i64 result1 = tcg_temp_new_i64();
> + TCGv_i64 result2 = tcg_temp_new_i64();
> + int64_t mask1 = 0x1fULL;
> + int64_t mask2 = 0x1fULL << 8;
> + int64_t mask3 = 0x1fULL << 16;
> + int64_t mask4 = 0xffULL << 56;
> + int i, j;
> +
> + if (high == 1) {
> + get_avr64(avr, VB, true);
> + } else {
> + get_avr64(avr, VB, false);
> + }
> +
> + tcg_gen_movi_i64(result, 0x0ULL);
> + for (i = 0; i < 2; i++) {
> + for (j = 0; j < 2; j++) {
> + tcg_gen_shli_i64(tmp, avr, (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask1 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shli_i64(tmp, avr, 3 + (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask2 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shli_i64(tmp, avr, 6 + (j * 16));
> + tcg_gen_andi_i64(tmp, tmp, mask3 << (j * 32));
> + tcg_gen_or_i64(result, result, tmp);
> +
> + tcg_gen_shri_i64(tmp, avr, (j * 16));
> + tcg_gen_ext16s_i64(tmp, tmp);
> + tcg_gen_andi_i64(tmp, tmp, mask4);
> + tcg_gen_shri_i64(tmp, tmp, (32 * (1 - j)));
> + tcg_gen_or_i64(result, result, tmp);
> + }
> + if (i == 0) {
> + tcg_gen_mov_i64(result1, result);
> + tcg_gen_movi_i64(result, 0x0ULL);
> + tcg_gen_shri_i64(avr, avr, 32);
> + }
> + if (i == 1) {
> + tcg_gen_mov_i64(result2, result);
> + }
> + }
> +
> + set_avr64(VT, result1, false);
> + set_avr64(VT, result2, true);
> +
> + tcg_temp_free_i64(tmp);
> + tcg_temp_free_i64(avr);
> + tcg_temp_free_i64(result);
> + tcg_temp_free_i64(result1);
> + tcg_temp_free_i64(result2);
> +}
> +
> +static void gen_vupkhpx(DisasContext *ctx)
> +{
> + if (unlikely(!ctx->altivec_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VPU);
> + return;
> + }
> + trans_vupkpx(ctx, 1);
> +}
> +
> +static void gen_vupklpx(DisasContext *ctx)
> +{
> + if (unlikely(!ctx->altivec_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VPU);
> + return;
> + }
> + trans_vupkpx(ctx, 0);
> +}
> +
> GEN_VXFORM(vmuloub, 4, 0);
> GEN_VXFORM(vmulouh, 4, 1);
> GEN_VXFORM(vmulouw, 4, 2);
> @@ -1348,8 +1437,6 @@ GEN_VXFORM_NOA(vupkhsw, 7, 25);
> GEN_VXFORM_NOA(vupklsb, 7, 10);
> GEN_VXFORM_NOA(vupklsh, 7, 11);
> GEN_VXFORM_NOA(vupklsw, 7, 27);
> -GEN_VXFORM_NOA(vupkhpx, 7, 13);
> -GEN_VXFORM_NOA(vupklpx, 7, 15);
> GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
> GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
> GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
> --
> 2.7.4
>
>
© 2016 - 2026 Red Hat, Inc.