[Qemu-devel] [PATCH 1/4] target/mips: Optimize support for MSA instructions ILVEV.<B|H|W|D>

Mateja Marjanovic posted 4 patches 6 years, 8 months ago
Maintainers: Aleksandar Markovic <amarkovic@wavecomp.com>, Aleksandar Rikalo <arikalo@wavecomp.com>, Aurelien Jarno <aurelien@aurel32.net>
There is a newer version of this series
[Qemu-devel] [PATCH 1/4] target/mips: Optimize support for MSA instructions ILVEV.<B|H|W|D>
Posted by Mateja Marjanovic 6 years, 8 months ago
From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>

Optimize support for MSA instructions ILVEV.B, ILVEV.H, ILVEV.W, and
ILVEV.D.

Optimization is done by eliminating loops, and explicitly assigning
desired values to individual data elements. Performance measurement
is done by executing the instructions large number of times on a
computer with Intel Core i7-3770 CPU @ 3.40GHz×8.

Measured time before optimization:
  ILVEV.B:  119.02 ms
  ILVEV.H:   94.16 ms
  ILVEV.W:  120.97 ms
  ILVEV.D:   42.99 ms

Measured time after optimization:
  ILVEV.B:   61.81 ms
  ILVEV.H:   42.78 ms
  ILVEV.W:   39.47 ms
  ILVEV.D:   39.11 ms

Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
---
 target/mips/msa_helper.c | 60 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 52 insertions(+), 8 deletions(-)

diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
index c74e3cd..4e6584e 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/msa_helper.c
@@ -1198,14 +1198,6 @@ MSA_FN_DF(ilvl_df)
 MSA_FN_DF(ilvr_df)
 #undef MSA_DO
 
-#define MSA_DO(DF)                      \
-    do {                                \
-        pwx->DF[2*i]   = pwt->DF[2*i];  \
-        pwx->DF[2*i+1] = pws->DF[2*i];  \
-    } while (0)
-MSA_FN_DF(ilvev_df)
-#undef MSA_DO
-
 #define MSA_DO(DF)                          \
     do {                                    \
         pwx->DF[2*i]   = pwt->DF[2*i+1];    \
@@ -1230,6 +1222,58 @@ MSA_FN_DF(vshf_df)
 #undef MSA_LOOP_COND
 #undef MSA_FN_DF
 
+
+void helper_msa_ilvev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[0]  = pwt->b[0];
+        pwd->b[1]  = pws->b[0];
+        pwd->b[2]  = pwt->b[2];
+        pwd->b[3]  = pws->b[2];
+        pwd->b[4]  = pwt->b[4];
+        pwd->b[5]  = pws->b[4];
+        pwd->b[6]  = pwt->b[6];
+        pwd->b[7]  = pws->b[6];
+        pwd->b[8]  = pwt->b[8];
+        pwd->b[9]  = pws->b[8];
+        pwd->b[10] = pwt->b[10];
+        pwd->b[11] = pws->b[10];
+        pwd->b[12] = pwt->b[12];
+        pwd->b[13] = pws->b[12];
+        pwd->b[14] = pwt->b[14];
+        pwd->b[15] = pws->b[14];
+        break;
+    case DF_HALF:
+        pwd->h[0] = pwt->h[0];
+        pwd->h[1] = pws->h[0];
+        pwd->h[2] = pwt->h[2];
+        pwd->h[3] = pws->h[2];
+        pwd->h[4] = pwt->h[4];
+        pwd->h[5] = pws->h[4];
+        pwd->h[6] = pwt->h[6];
+        pwd->h[7] = pws->h[6];
+        break;
+    case DF_WORD:
+        pwd->w[0] = pwt->w[0];
+        pwd->w[1] = pws->w[0];
+        pwd->w[2] = pwt->w[2];
+        pwd->w[3] = pws->w[2];
+        break;
+    case DF_DOUBLE:
+        pwd->d[0] = pwt->d[0];
+        pwd->d[1] = pws->d[0];
+        break;
+    default:
+        assert(0);
+    }
+}
+
 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
                         uint32_t ws, uint32_t n)
 {
-- 
2.7.4


Re: [Qemu-devel] [PATCH 1/4] target/mips: Optimize support for MSA instructions ILVEV.<B|H|W|D>
Posted by Richard Henderson 6 years, 8 months ago
On 2/27/19 9:40 AM, Mateja Marjanovic wrote:
> +void helper_msa_ilvev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
> +                         uint32_t ws, uint32_t wt)
> +{
> +    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
> +    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
> +    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
> +
> +    switch (df) {

I'll bet you can do even better by splitting this into 4 functions such that
you do not need to pass the "df" parameter at all -- just choose the correct
helper function during translate.


r~