[Qemu-devel] [PATCH v2] target-s390x: Implement stfl and stfle

Michal Marek posted 1 patch 7 years, 1 month ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20170225231653.14942-1-mmarek@suse.com
Test checkpatch passed
Test docker passed
Test s390x passed
There is a newer version of this series
target/s390x/cpu_features.c |  6 ++++--
target/s390x/cpu_features.h |  2 +-
target/s390x/helper.h       |  2 ++
target/s390x/insn-data.def  |  2 ++
target/s390x/misc_helper.c  | 35 +++++++++++++++++++++++++++++++++++
target/s390x/translate.c    | 17 +++++++++--------
6 files changed, 53 insertions(+), 11 deletions(-)
[Qemu-devel] [PATCH v2] target-s390x: Implement stfl and stfle
Posted by Michal Marek 7 years, 1 month ago
The implementation is partially cargo cult based, but it works for the
linux kernel use case.

Signed-off-by: Michal Marek <mmarek@suse.com>
---
v2:
 - STFLE is not a privileged instruction, go through the MMU to store the
   result
 - annotate the stfl helper with TCG_CALL_NO_RWG
 - Use a large enough buffer to hold the feature bitmap
 - Fix coding style of the stfle helper
---
 target/s390x/cpu_features.c |  6 ++++--
 target/s390x/cpu_features.h |  2 +-
 target/s390x/helper.h       |  2 ++
 target/s390x/insn-data.def  |  2 ++
 target/s390x/misc_helper.c  | 35 +++++++++++++++++++++++++++++++++++
 target/s390x/translate.c    | 17 +++++++++--------
 6 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 42fd9d792bc8..d77c560380c4 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -286,11 +286,11 @@ void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap)
     }
 }
 
-void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
+int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
                           uint8_t *data)
 {
     S390Feat feat;
-    int bit_nr;
+    int bit_nr, res = 0;
 
     if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH, features)) {
         /* z/Architecture is always active if around */
@@ -303,9 +303,11 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
             bit_nr = s390_features[feat].bit;
             /* big endian on uint8_t array */
             data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
+            res = MAX(res, bit_nr / 8 + 1);
         }
         feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
     }
+    return res;
 }
 
 void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
index d66912178680..e3c41be08060 100644
--- a/target/s390x/cpu_features.h
+++ b/target/s390x/cpu_features.h
@@ -56,7 +56,7 @@ typedef uint64_t S390FeatInit[S390_FEAT_MAX / 64 + 1];
 const S390FeatDef *s390_feat_def(S390Feat feat);
 S390Feat s390_feat_by_type_and_bit(S390FeatType type, int bit);
 void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap);
-void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
+int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
                           uint8_t *data);
 void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
                           uint8_t *data);
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 9102071d0aa4..fa4a617cd1ab 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -95,6 +95,8 @@ DEF_HELPER_FLAGS_1(stckc, TCG_CALL_NO_RWG, i64, env)
 DEF_HELPER_FLAGS_2(spt, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_1(stpt, TCG_CALL_NO_RWG, i64, env)
 DEF_HELPER_4(stsi, i32, env, i64, i64, i64)
+DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_3(stfle, i64, env, i64, i64)
 DEF_HELPER_FLAGS_4(lctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
 DEF_HELPER_FLAGS_4(lctlg, TCG_CALL_NO_WG, void, env, i32, i64, i32)
 DEF_HELPER_FLAGS_4(stctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 075ff597c3de..be830a42ed8d 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -899,6 +899,8 @@
     C(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64, stidp, 0)
 /* STORE CPU TIMER */
     C(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64, stpt, 0)
+/* STORE FACILITY LIST EXTENDED */
+    C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
 /* STORE FACILITY LIST */
     C(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0)
 /* STORE PREFIX */
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index c9604ea9c728..3baee27d40ce 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -500,6 +500,41 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0,
     return cc;
 }
 
+static int do_stfle(CPUS390XState *env, uint64_t addr, int len)
+{
+    S390CPU *cpu = s390_env_get_cpu(env);
+    uint8_t data[256 * 8]; /* 256 doublewords as per STFLE documentation */
+    int i, res;
+
+    memset(data, 0, sizeof(data));
+    res = s390_fill_feat_block(cpu->model->features, S390_FEAT_TYPE_STFL, data);
+    for (i = 0; i < MIN(res, len); i++) {
+        cpu_stb_data(env, addr + i, data[i]);
+    }
+
+    return res;
+}
+
+uint64_t HELPER(stfle)(CPUS390XState *env, uint64_t a0, uint64_t r0)
+{
+    int need, len = r0 & 0xff;
+
+    need = do_stfle(env, a0, len * 8);
+    need = DIV_ROUND_UP(need, 8);
+    if (need <= len) {
+        env->cc_op = 0;
+    } else {
+        env->cc_op = 3;
+    }
+
+    return (r0 & ~0xffLL) | (need - 1);
+}
+
+void HELPER(stfl)(CPUS390XState *env)
+{
+    do_stfle(env, 200, 4);
+}
+
 uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1,
                       uint64_t cpu_addr)
 {
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 01c62176bf70..3a569d3cc0ad 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -3628,15 +3628,16 @@ static ExitStatus op_spt(DisasContext *s, DisasOps *o)
 
 static ExitStatus op_stfl(DisasContext *s, DisasOps *o)
 {
-    TCGv_i64 f, a;
-    /* We really ought to have more complete indication of facilities
-       that we implement.  Address this when STFLE is implemented.  */
     check_privileged(s);
-    f = tcg_const_i64(0xc0000000);
-    a = tcg_const_i64(200);
-    tcg_gen_qemu_st32(f, a, get_mem_index(s));
-    tcg_temp_free_i64(f);
-    tcg_temp_free_i64(a);
+    gen_helper_stfl(cpu_env);
+    return NO_EXIT;
+}
+
+static ExitStatus op_stfle(DisasContext *s, DisasOps *o)
+{
+    potential_page_fault(s);
+    gen_helper_stfle(regs[0], cpu_env, o->in2, regs[0]);
+    set_cc_static(s);
     return NO_EXIT;
 }
 
-- 
2.10.2


[Qemu-devel] [PATCH v3] target-s390x: Implement stfl and stfle
Posted by Michal Marek 7 years, 1 month ago
The implementation is partially cargo cult based, but it works for the
linux kernel use case.

Signed-off-by: Michal Marek <mmarek@suse.com>
---
v3:
 - Initialize the buffer in do_stfle()
v2:
 - STFLE is not a privileged instruction, go through the MMU to store the
   result
 - annotate the stfl helper with TCG_CALL_NO_RWG
 - Use a large enough buffer to hold the feature bitmap
 - Fix coding style of the stfle helper
---
 target/s390x/cpu_features.c |  6 ++++--
 target/s390x/cpu_features.h |  2 +-
 target/s390x/helper.h       |  2 ++
 target/s390x/insn-data.def  |  2 ++
 target/s390x/misc_helper.c  | 36 ++++++++++++++++++++++++++++++++++++
 target/s390x/translate.c    | 17 +++++++++--------
 6 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 42fd9d792bc8..d77c560380c4 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -286,11 +286,11 @@ void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap)
     }
 }
 
-void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
+int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
                           uint8_t *data)
 {
     S390Feat feat;
-    int bit_nr;
+    int bit_nr, res = 0;
 
     if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH, features)) {
         /* z/Architecture is always active if around */
@@ -303,9 +303,11 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
             bit_nr = s390_features[feat].bit;
             /* big endian on uint8_t array */
             data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
+            res = MAX(res, bit_nr / 8 + 1);
         }
         feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
     }
+    return res;
 }
 
 void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
index d66912178680..e3c41be08060 100644
--- a/target/s390x/cpu_features.h
+++ b/target/s390x/cpu_features.h
@@ -56,7 +56,7 @@ typedef uint64_t S390FeatInit[S390_FEAT_MAX / 64 + 1];
 const S390FeatDef *s390_feat_def(S390Feat feat);
 S390Feat s390_feat_by_type_and_bit(S390FeatType type, int bit);
 void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap);
-void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
+int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
                           uint8_t *data);
 void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
                           uint8_t *data);
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 9102071d0aa4..fa4a617cd1ab 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -95,6 +95,8 @@ DEF_HELPER_FLAGS_1(stckc, TCG_CALL_NO_RWG, i64, env)
 DEF_HELPER_FLAGS_2(spt, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_1(stpt, TCG_CALL_NO_RWG, i64, env)
 DEF_HELPER_4(stsi, i32, env, i64, i64, i64)
+DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_3(stfle, i64, env, i64, i64)
 DEF_HELPER_FLAGS_4(lctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
 DEF_HELPER_FLAGS_4(lctlg, TCG_CALL_NO_WG, void, env, i32, i64, i32)
 DEF_HELPER_FLAGS_4(stctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 075ff597c3de..be830a42ed8d 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -899,6 +899,8 @@
     C(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64, stidp, 0)
 /* STORE CPU TIMER */
     C(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64, stpt, 0)
+/* STORE FACILITY LIST EXTENDED */
+    C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
 /* STORE FACILITY LIST */
     C(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0)
 /* STORE PREFIX */
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index c9604ea9c728..b51454ea7861 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -500,6 +500,42 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0,
     return cc;
 }
 
+static int do_stfle(CPUS390XState *env, uint64_t addr, int len)
+{
+    S390CPU *cpu = s390_env_get_cpu(env);
+    /* 256 doublewords as per STFLE documentation */
+    uint8_t data[256 * 8] = { 0 };
+    int i, res;
+
+    memset(data, 0, sizeof(data));
+    res = s390_fill_feat_block(cpu->model->features, S390_FEAT_TYPE_STFL, data);
+    for (i = 0; i < MIN(res, len); i++) {
+        cpu_stb_data(env, addr + i, data[i]);
+    }
+
+    return res;
+}
+
+uint64_t HELPER(stfle)(CPUS390XState *env, uint64_t a0, uint64_t r0)
+{
+    int need, len = r0 & 0xff;
+
+    need = do_stfle(env, a0, len * 8);
+    need = DIV_ROUND_UP(need, 8);
+    if (need <= len) {
+        env->cc_op = 0;
+    } else {
+        env->cc_op = 3;
+    }
+
+    return (r0 & ~0xffLL) | (need - 1);
+}
+
+void HELPER(stfl)(CPUS390XState *env)
+{
+    do_stfle(env, 200, 4);
+}
+
 uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1,
                       uint64_t cpu_addr)
 {
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 01c62176bf70..3a569d3cc0ad 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -3628,15 +3628,16 @@ static ExitStatus op_spt(DisasContext *s, DisasOps *o)
 
 static ExitStatus op_stfl(DisasContext *s, DisasOps *o)
 {
-    TCGv_i64 f, a;
-    /* We really ought to have more complete indication of facilities
-       that we implement.  Address this when STFLE is implemented.  */
     check_privileged(s);
-    f = tcg_const_i64(0xc0000000);
-    a = tcg_const_i64(200);
-    tcg_gen_qemu_st32(f, a, get_mem_index(s));
-    tcg_temp_free_i64(f);
-    tcg_temp_free_i64(a);
+    gen_helper_stfl(cpu_env);
+    return NO_EXIT;
+}
+
+static ExitStatus op_stfle(DisasContext *s, DisasOps *o)
+{
+    potential_page_fault(s);
+    gen_helper_stfle(regs[0], cpu_env, o->in2, regs[0]);
+    set_cc_static(s);
     return NO_EXIT;
 }
 
-- 
2.10.2


Re: [Qemu-devel] [PATCH v3] target-s390x: Implement stfl and stfle
Posted by Thomas Huth 7 years, 1 month ago
On 26.02.2017 00:38, Michal Marek wrote:
> The implementation is partially cargo cult based, but it works for the
> linux kernel use case.
> 
> Signed-off-by: Michal Marek <mmarek@suse.com>
> ---
> v3:
>  - Initialize the buffer in do_stfle()
> v2:
>  - STFLE is not a privileged instruction, go through the MMU to store the
>    result
>  - annotate the stfl helper with TCG_CALL_NO_RWG
>  - Use a large enough buffer to hold the feature bitmap
>  - Fix coding style of the stfle helper
> ---
>  target/s390x/cpu_features.c |  6 ++++--
>  target/s390x/cpu_features.h |  2 +-
>  target/s390x/helper.h       |  2 ++
>  target/s390x/insn-data.def  |  2 ++
>  target/s390x/misc_helper.c  | 36 ++++++++++++++++++++++++++++++++++++
>  target/s390x/translate.c    | 17 +++++++++--------
>  6 files changed, 54 insertions(+), 11 deletions(-)
> 
> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
> index 42fd9d792bc8..d77c560380c4 100644
> --- a/target/s390x/cpu_features.c
> +++ b/target/s390x/cpu_features.c
> @@ -286,11 +286,11 @@ void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap)
>      }
>  }
>  
> -void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
> +int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>                            uint8_t *data)
>  {
>      S390Feat feat;
> -    int bit_nr;
> +    int bit_nr, res = 0;
>  
>      if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH, features)) {
>          /* z/Architecture is always active if around */
> @@ -303,9 +303,11 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>              bit_nr = s390_features[feat].bit;
>              /* big endian on uint8_t array */
>              data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
> +            res = MAX(res, bit_nr / 8 + 1);

Not sure whether the assumption is valid, but it seems like the bit
numbers are stored in ascending order in the s390_features array, so you
could theoretically also get along without the res variable and the MAX
calculation and just return the last bit_nr / 8 + 1 at the end.

>          }
>          feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>      }
> +    return res;
>  }
[...]
> diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
> index c9604ea9c728..b51454ea7861 100644
> --- a/target/s390x/misc_helper.c
> +++ b/target/s390x/misc_helper.c
> @@ -500,6 +500,42 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0,
>      return cc;
>  }
>  
> +static int do_stfle(CPUS390XState *env, uint64_t addr, int len)
> +{
> +    S390CPU *cpu = s390_env_get_cpu(env);
> +    /* 256 doublewords as per STFLE documentation */
> +    uint8_t data[256 * 8] = { 0 };
> +    int i, res;
> +
> +    memset(data, 0, sizeof(data));

You've alread set data to 0 with the "= { 0 }" initializer, so the
memset() is redundant here (or you can remove the "= { 0 }" initializer
instead.

> +    res = s390_fill_feat_block(cpu->model->features, S390_FEAT_TYPE_STFL, data);
> +    for (i = 0; i < MIN(res, len); i++) {
> +        cpu_stb_data(env, addr + i, data[i]);

Since we know that we're using 64-bit values here, why not using
cpu_stq_data instead? That should be faster.
I think you could even use s390_cpu_virt_mem_write() here to avoid the
loop completely.

> +    }
> +
> +    return res;
> +}
> +
> +uint64_t HELPER(stfle)(CPUS390XState *env, uint64_t a0, uint64_t r0)
> +{
> +    int need, len = r0 & 0xff;

According to the POP spec, the address "must be designated on a
doubleword boundary; otherwise, a specification exception is recognized."
Could you please add this check here (or in translate.c)?

> +    need = do_stfle(env, a0, len * 8);
> +    need = DIV_ROUND_UP(need, 8);
> +    if (need <= len) {
> +        env->cc_op = 0;
> +    } else {
> +        env->cc_op = 3;
> +    }
> +
> +    return (r0 & ~0xffLL) | (need - 1);
> +}
> +
> +void HELPER(stfl)(CPUS390XState *env)
> +{
> +    do_stfle(env, 200, 4);

It's maybe nicer to use offsetof(LowCore, stfl_fac_list) instead of the
magic value 200 here.

> +}
> +
>  uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1,
>                        uint64_t cpu_addr)
>  {
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index 01c62176bf70..3a569d3cc0ad 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -3628,15 +3628,16 @@ static ExitStatus op_spt(DisasContext *s, DisasOps *o)
>  
>  static ExitStatus op_stfl(DisasContext *s, DisasOps *o)
>  {
> -    TCGv_i64 f, a;
> -    /* We really ought to have more complete indication of facilities
> -       that we implement.  Address this when STFLE is implemented.  */
>      check_privileged(s);
> -    f = tcg_const_i64(0xc0000000);
> -    a = tcg_const_i64(200);
> -    tcg_gen_qemu_st32(f, a, get_mem_index(s));
> -    tcg_temp_free_i64(f);
> -    tcg_temp_free_i64(a);
> +    gen_helper_stfl(cpu_env);
> +    return NO_EXIT;
> +}
> +
> +static ExitStatus op_stfle(DisasContext *s, DisasOps *o)
> +{
> +    potential_page_fault(s);
> +    gen_helper_stfle(regs[0], cpu_env, o->in2, regs[0]);
> +    set_cc_static(s);
>      return NO_EXIT;
>  }

 Thomas


Re: [Qemu-devel] [PATCH v3] target-s390x: Implement stfl and stfle
Posted by Michal Marek 7 years ago
Dne 26.2.2017 v 12:22 Thomas Huth napsal(a):
> On 26.02.2017 00:38, Michal Marek wrote:
>> The implementation is partially cargo cult based, but it works for the
>> linux kernel use case.
>>
>> Signed-off-by: Michal Marek <mmarek@suse.com>
>> ---
>> v3:
>>  - Initialize the buffer in do_stfle()
>> v2:
>>  - STFLE is not a privileged instruction, go through the MMU to store the
>>    result
>>  - annotate the stfl helper with TCG_CALL_NO_RWG
>>  - Use a large enough buffer to hold the feature bitmap
>>  - Fix coding style of the stfle helper
>> ---
>>  target/s390x/cpu_features.c |  6 ++++--
>>  target/s390x/cpu_features.h |  2 +-
>>  target/s390x/helper.h       |  2 ++
>>  target/s390x/insn-data.def  |  2 ++
>>  target/s390x/misc_helper.c  | 36 ++++++++++++++++++++++++++++++++++++
>>  target/s390x/translate.c    | 17 +++++++++--------
>>  6 files changed, 54 insertions(+), 11 deletions(-)
>>
>> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
>> index 42fd9d792bc8..d77c560380c4 100644
>> --- a/target/s390x/cpu_features.c
>> +++ b/target/s390x/cpu_features.c
>> @@ -286,11 +286,11 @@ void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap)
>>      }
>>  }
>>  
>> -void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>> +int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>>                            uint8_t *data)
>>  {
>>      S390Feat feat;
>> -    int bit_nr;
>> +    int bit_nr, res = 0;
>>  
>>      if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH, features)) {
>>          /* z/Architecture is always active if around */
>> @@ -303,9 +303,11 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>>              bit_nr = s390_features[feat].bit;
>>              /* big endian on uint8_t array */
>>              data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
>> +            res = MAX(res, bit_nr / 8 + 1);
> 
> Not sure whether the assumption is valid, but it seems like the bit
> numbers are stored in ascending order in the s390_features array, so you
> could theoretically also get along without the res variable and the MAX
> calculation and just return the last bit_nr / 8 + 1 at the end.

Maybe. If we are doing this, a comment in the s390_features definition
should be added.


>> +static int do_stfle(CPUS390XState *env, uint64_t addr, int len)
>> +{
>> +    S390CPU *cpu = s390_env_get_cpu(env);
>> +    /* 256 doublewords as per STFLE documentation */
>> +    uint8_t data[256 * 8] = { 0 };
>> +    int i, res;
>> +
>> +    memset(data, 0, sizeof(data));
> 
> You've alread set data to 0 with the "= { 0 }" initializer, so the
> memset() is redundant here (or you can remove the "= { 0 }" initializer
> instead.

Oops. It was quite late in my timezone when I sent the v3. v2 was correct.


>> +    res = s390_fill_feat_block(cpu->model->features, S390_FEAT_TYPE_STFL, data);
>> +    for (i = 0; i < MIN(res, len); i++) {
>> +        cpu_stb_data(env, addr + i, data[i]);
> 
> Since we know that we're using 64-bit values here, why not using
> cpu_stq_data instead? That should be faster.

STFL is storing a 32bit number. But this needs fixing anyway, res needs
to be rounded up to a multiple of 8.


> I think you could even use s390_cpu_virt_mem_write() here to avoid the
> loop completely.

_That_ is the function I was looking for.


>> +uint64_t HELPER(stfle)(CPUS390XState *env, uint64_t a0, uint64_t r0)
>> +{
>> +    int need, len = r0 & 0xff;
> 
> According to the POP spec, the address "must be designated on a
> doubleword boundary; otherwise, a specification exception is recognized."
> Could you please add this check here (or in translate.c)?

Dumb question, but how do I signal a specification exception?
s390_cpu_do_interrupt() does not seem to be prepared for it.


>> +void HELPER(stfl)(CPUS390XState *env)
>> +{
>> +    do_stfle(env, 200, 4);
> 
> It's maybe nicer to use offsetof(LowCore, stfl_fac_list) instead of the
> magic value 200 here.

OK.

Michal

Re: [Qemu-devel] [PATCH v3] target-s390x: Implement stfl and stfle
Posted by Thomas Huth 7 years ago
On 26.02.2017 19:57, Michal Marek wrote:
> Dne 26.2.2017 v 12:22 Thomas Huth napsal(a):
>> On 26.02.2017 00:38, Michal Marek wrote:
>>> The implementation is partially cargo cult based, but it works for the
>>> linux kernel use case.
>>>
>>> Signed-off-by: Michal Marek <mmarek@suse.com>
>>> ---
>>> v3:
>>>  - Initialize the buffer in do_stfle()
>>> v2:
>>>  - STFLE is not a privileged instruction, go through the MMU to store the
>>>    result
>>>  - annotate the stfl helper with TCG_CALL_NO_RWG
>>>  - Use a large enough buffer to hold the feature bitmap
>>>  - Fix coding style of the stfle helper
>>> ---
>>>  target/s390x/cpu_features.c |  6 ++++--
>>>  target/s390x/cpu_features.h |  2 +-
>>>  target/s390x/helper.h       |  2 ++
>>>  target/s390x/insn-data.def  |  2 ++
>>>  target/s390x/misc_helper.c  | 36 ++++++++++++++++++++++++++++++++++++
>>>  target/s390x/translate.c    | 17 +++++++++--------
>>>  6 files changed, 54 insertions(+), 11 deletions(-)
[...]
>>> +uint64_t HELPER(stfle)(CPUS390XState *env, uint64_t a0, uint64_t r0)
>>> +{
>>> +    int need, len = r0 & 0xff;
>>
>> According to the POP spec, the address "must be designated on a
>> doubleword boundary; otherwise, a specification exception is recognized."
>> Could you please add this check here (or in translate.c)?
> 
> Dumb question, but how do I signal a specification exception?
> s390_cpu_do_interrupt() does not seem to be prepared for it.

Not sure, but I think something like

 program_interrupt(env, PGM_SPECIFICATION, 4);

should do the job here.

 Thomas


[Qemu-devel] [PATCH v4] target-s390x: Implement stfl and stfle
Posted by Michal Marek 7 years ago
Indicate the actual features in the STFL implementation and implement
STFLE.

Signed-off-by: Michal Marek <mmarek@suse.com>
---
v4:
 - Remove redundant buffer clearing in do_stfle()
 - Always store whole doublewords in STFLE
 - Use s390_cpu_virt_mem_write() to store the result
 - Raise a specification exception if the STFLE address is not aligned
 - Use the LowCore offset instead of hardcoding the STFL store address
v3:
 - Initialize the buffer in do_stfle()
v2:
 - STFLE is not a privileged instruction, go through the MMU to store the
   result
 - annotate the stfl helper with TCG_CALL_NO_RWG
 - Use a large enough buffer to hold the feature bitmap
 - Fix coding style of the stfle helper
---
 target/s390x/cpu_features.c |  6 ++++--
 target/s390x/cpu_features.h |  2 +-
 target/s390x/helper.h       |  2 ++
 target/s390x/insn-data.def  |  2 ++
 target/s390x/misc_helper.c  | 38 ++++++++++++++++++++++++++++++++++++++
 target/s390x/translate.c    | 18 ++++++++++--------
 6 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 42fd9d792bc8..d77c560380c4 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -286,11 +286,11 @@ void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap)
     }
 }
 
-void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
+int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
                           uint8_t *data)
 {
     S390Feat feat;
-    int bit_nr;
+    int bit_nr, res = 0;
 
     if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH, features)) {
         /* z/Architecture is always active if around */
@@ -303,9 +303,11 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
             bit_nr = s390_features[feat].bit;
             /* big endian on uint8_t array */
             data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
+            res = MAX(res, bit_nr / 8 + 1);
         }
         feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
     }
+    return res;
 }
 
 void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
index d66912178680..e3c41be08060 100644
--- a/target/s390x/cpu_features.h
+++ b/target/s390x/cpu_features.h
@@ -56,7 +56,7 @@ typedef uint64_t S390FeatInit[S390_FEAT_MAX / 64 + 1];
 const S390FeatDef *s390_feat_def(S390Feat feat);
 S390Feat s390_feat_by_type_and_bit(S390FeatType type, int bit);
 void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap);
-void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
+int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
                           uint8_t *data);
 void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
                           uint8_t *data);
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 9102071d0aa4..f24b50ea48ab 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -95,6 +95,8 @@ DEF_HELPER_FLAGS_1(stckc, TCG_CALL_NO_RWG, i64, env)
 DEF_HELPER_FLAGS_2(spt, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_1(stpt, TCG_CALL_NO_RWG, i64, env)
 DEF_HELPER_4(stsi, i32, env, i64, i64, i64)
+DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_4(stfle, i64, env, i64, i32, i64)
 DEF_HELPER_FLAGS_4(lctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
 DEF_HELPER_FLAGS_4(lctlg, TCG_CALL_NO_WG, void, env, i32, i64, i32)
 DEF_HELPER_FLAGS_4(stctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 075ff597c3de..be830a42ed8d 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -899,6 +899,8 @@
     C(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64, stidp, 0)
 /* STORE CPU TIMER */
     C(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64, stpt, 0)
+/* STORE FACILITY LIST EXTENDED */
+    C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
 /* STORE FACILITY LIST */
     C(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0)
 /* STORE PREFIX */
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index c9604ea9c728..2645ff8b1840 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -500,6 +500,44 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0,
     return cc;
 }
 
+static int do_stfle(CPUS390XState *env, uint64_t addr, uint32_t ar, int len)
+{
+    S390CPU *cpu = s390_env_get_cpu(env);
+    /* 256 doublewords as per STFLE documentation */
+    uint8_t data[256 * 8] = { 0 };
+    int res;
+
+    res = s390_fill_feat_block(cpu->model->features, S390_FEAT_TYPE_STFL, data);
+    res = ROUND_UP(res, 8);
+    s390_cpu_virt_mem_write(cpu, addr, ar, data, MIN(res, len));
+
+    return res;
+}
+
+uint64_t HELPER(stfle)(CPUS390XState *env, uint64_t addr, uint32_t ar,
+                       uint64_t r0)
+{
+    int need, len = r0 & 0xff;
+
+    if (addr % 8) {
+        program_interrupt(env, PGM_SPECIFICATION, 4);
+        return r0;
+    }
+    need = do_stfle(env, addr, ar, len * 8) / 8;
+    if (need <= len) {
+        env->cc_op = 0;
+    } else {
+        env->cc_op = 3;
+    }
+
+    return (r0 & ~0xffLL) | (need - 1);
+}
+
+void HELPER(stfl)(CPUS390XState *env)
+{
+    do_stfle(env, offsetof(LowCore, stfl_fac_list), 0, 4);
+}
+
 uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1,
                       uint64_t cpu_addr)
 {
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 01c62176bf70..d18a2ffb447c 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -3628,15 +3628,17 @@ static ExitStatus op_spt(DisasContext *s, DisasOps *o)
 
 static ExitStatus op_stfl(DisasContext *s, DisasOps *o)
 {
-    TCGv_i64 f, a;
-    /* We really ought to have more complete indication of facilities
-       that we implement.  Address this when STFLE is implemented.  */
     check_privileged(s);
-    f = tcg_const_i64(0xc0000000);
-    a = tcg_const_i64(200);
-    tcg_gen_qemu_st32(f, a, get_mem_index(s));
-    tcg_temp_free_i64(f);
-    tcg_temp_free_i64(a);
+    gen_helper_stfl(cpu_env);
+    return NO_EXIT;
+}
+
+static ExitStatus op_stfle(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 b2 = tcg_const_i32(get_field(s->fields, b2));
+    potential_page_fault(s);
+    gen_helper_stfle(regs[0], cpu_env, o->in2, b2, regs[0]);
+    set_cc_static(s);
     return NO_EXIT;
 }
 
-- 
2.10.2


Re: [Qemu-devel] [PATCH v4] target-s390x: Implement stfl and stfle
Posted by Richard Henderson 7 years ago
On 02/27/2017 09:18 PM, Michal Marek wrote:
> Indicate the actual features in the STFL implementation and implement
> STFLE.
>
> Signed-off-by: Michal Marek <mmarek@suse.com>
> ---
> v4:
>  - Remove redundant buffer clearing in do_stfle()
>  - Always store whole doublewords in STFLE
>  - Use s390_cpu_virt_mem_write() to store the result
>  - Raise a specification exception if the STFLE address is not aligned
>  - Use the LowCore offset instead of hardcoding the STFL store address
> v3:
>  - Initialize the buffer in do_stfle()
> v2:
>  - STFLE is not a privileged instruction, go through the MMU to store the
>    result
>  - annotate the stfl helper with TCG_CALL_NO_RWG
>  - Use a large enough buffer to hold the feature bitmap
>  - Fix coding style of the stfle helper
> ---
>  target/s390x/cpu_features.c |  6 ++++--
>  target/s390x/cpu_features.h |  2 +-
>  target/s390x/helper.h       |  2 ++
>  target/s390x/insn-data.def  |  2 ++
>  target/s390x/misc_helper.c  | 38 ++++++++++++++++++++++++++++++++++++++
>  target/s390x/translate.c    | 18 ++++++++++--------
>  6 files changed, 57 insertions(+), 11 deletions(-)
>
> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
> index 42fd9d792bc8..d77c560380c4 100644
> --- a/target/s390x/cpu_features.c
> +++ b/target/s390x/cpu_features.c
> @@ -286,11 +286,11 @@ void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap)
>      }
>  }
>
> -void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
> +int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>                            uint8_t *data)
>  {
>      S390Feat feat;
> -    int bit_nr;
> +    int bit_nr, res = 0;
>
>      if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH, features)) {
>          /* z/Architecture is always active if around */
> @@ -303,9 +303,11 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>              bit_nr = s390_features[feat].bit;
>              /* big endian on uint8_t array */
>              data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
> +            res = MAX(res, bit_nr / 8 + 1);
>          }
>          feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>      }
> +    return res;
>  }
>
>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
> diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
> index d66912178680..e3c41be08060 100644
> --- a/target/s390x/cpu_features.h
> +++ b/target/s390x/cpu_features.h
> @@ -56,7 +56,7 @@ typedef uint64_t S390FeatInit[S390_FEAT_MAX / 64 + 1];
>  const S390FeatDef *s390_feat_def(S390Feat feat);
>  S390Feat s390_feat_by_type_and_bit(S390FeatType type, int bit);
>  void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap);
> -void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
> +int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>                            uint8_t *data);
>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
>                            uint8_t *data);
> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> index 9102071d0aa4..f24b50ea48ab 100644
> --- a/target/s390x/helper.h
> +++ b/target/s390x/helper.h
> @@ -95,6 +95,8 @@ DEF_HELPER_FLAGS_1(stckc, TCG_CALL_NO_RWG, i64, env)
>  DEF_HELPER_FLAGS_2(spt, TCG_CALL_NO_RWG, void, env, i64)
>  DEF_HELPER_FLAGS_1(stpt, TCG_CALL_NO_RWG, i64, env)
>  DEF_HELPER_4(stsi, i32, env, i64, i64, i64)
> +DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
> +DEF_HELPER_4(stfle, i64, env, i64, i32, i64)
>  DEF_HELPER_FLAGS_4(lctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
>  DEF_HELPER_FLAGS_4(lctlg, TCG_CALL_NO_WG, void, env, i32, i64, i32)
>  DEF_HELPER_FLAGS_4(stctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index 075ff597c3de..be830a42ed8d 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -899,6 +899,8 @@
>      C(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64, stidp, 0)
>  /* STORE CPU TIMER */
>      C(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64, stpt, 0)
> +/* STORE FACILITY LIST EXTENDED */
> +    C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
>  /* STORE FACILITY LIST */
>      C(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0)
>  /* STORE PREFIX */
> diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
> index c9604ea9c728..2645ff8b1840 100644
> --- a/target/s390x/misc_helper.c
> +++ b/target/s390x/misc_helper.c
> @@ -500,6 +500,44 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0,
>      return cc;
>  }
>
> +static int do_stfle(CPUS390XState *env, uint64_t addr, uint32_t ar, int len)
> +{
> +    S390CPU *cpu = s390_env_get_cpu(env);
> +    /* 256 doublewords as per STFLE documentation */
> +    uint8_t data[256 * 8] = { 0 };
> +    int res;
> +
> +    res = s390_fill_feat_block(cpu->model->features, S390_FEAT_TYPE_STFL, data);
> +    res = ROUND_UP(res, 8);
> +    s390_cpu_virt_mem_write(cpu, addr, ar, data, MIN(res, len));

This does not do what you think it does.  Or indeed, I suspect what the 
original author thinks it does.  I suspect it works for KVM only, and no one 
has actually tried the non-KVM code path.

Primarily, it does not raise an exception for error.  Secondarily, I have no 
idea what the "AR" argument is supposed to be, or how that's supposed to 
interact with the rest of the virtual memory system.

Your writes need to look a lot more like fast_memmove in mem_helper.c, except 
that of course only the destination needs translation.

Of course, in practice we could reduce this to just one cpu_stl_data for STFL 
and one or two cpu_stq_data for STFLE.  So a full fast_memmove loop is overkill.

As it happens, this reminds me that I wrote a version of this several years ago 
but never submitted it upstream.  I'll dig it out.

> +    need = do_stfle(env, addr, ar, len * 8) / 8;

I'm not keen on the scattered divide by 8.  Can we not standardize on bit 
number please, and divide by 64 or 32 when we need?

> +    if (need <= len) {
> +        env->cc_op = 0;
> +    } else {
> +        env->cc_op = 3;
> +    }

Better as env->cc_op = (need <= len ? 0 : 3);


r~

Re: [Qemu-devel] [PATCH v4] target-s390x: Implement stfl and stfle
Posted by Thomas Huth 7 years ago
On 28.02.2017 23:11, Richard Henderson wrote:
> On 02/27/2017 09:18 PM, Michal Marek wrote:
>> Indicate the actual features in the STFL implementation and implement
>> STFLE.
>>
>> Signed-off-by: Michal Marek <mmarek@suse.com>
>> ---
>> v4:
>>  - Remove redundant buffer clearing in do_stfle()
>>  - Always store whole doublewords in STFLE
>>  - Use s390_cpu_virt_mem_write() to store the result
>>  - Raise a specification exception if the STFLE address is not aligned
>>  - Use the LowCore offset instead of hardcoding the STFL store address
>> v3:
>>  - Initialize the buffer in do_stfle()
>> v2:
>>  - STFLE is not a privileged instruction, go through the MMU to store the
>>    result
>>  - annotate the stfl helper with TCG_CALL_NO_RWG
>>  - Use a large enough buffer to hold the feature bitmap
>>  - Fix coding style of the stfle helper
>> ---
>>  target/s390x/cpu_features.c |  6 ++++--
>>  target/s390x/cpu_features.h |  2 +-
>>  target/s390x/helper.h       |  2 ++
>>  target/s390x/insn-data.def  |  2 ++
>>  target/s390x/misc_helper.c  | 38 ++++++++++++++++++++++++++++++++++++++
>>  target/s390x/translate.c    | 18 ++++++++++--------
>>  6 files changed, 57 insertions(+), 11 deletions(-)
>>
>> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
>> index 42fd9d792bc8..d77c560380c4 100644
>> --- a/target/s390x/cpu_features.c
>> +++ b/target/s390x/cpu_features.c
>> @@ -286,11 +286,11 @@ void s390_init_feat_bitmap(const S390FeatInit
>> init, S390FeatBitmap bitmap)
>>      }
>>  }
>>
>> -void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType
>> type,
>> +int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType
>> type,
>>                            uint8_t *data)
>>  {
>>      S390Feat feat;
>> -    int bit_nr;
>> +    int bit_nr, res = 0;
>>
>>      if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH,
>> features)) {
>>          /* z/Architecture is always active if around */
>> @@ -303,9 +303,11 @@ void s390_fill_feat_block(const S390FeatBitmap
>> features, S390FeatType type,
>>              bit_nr = s390_features[feat].bit;
>>              /* big endian on uint8_t array */
>>              data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
>> +            res = MAX(res, bit_nr / 8 + 1);
>>          }
>>          feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>>      }
>> +    return res;
>>  }
>>
>>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType
>> type,
>> diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
>> index d66912178680..e3c41be08060 100644
>> --- a/target/s390x/cpu_features.h
>> +++ b/target/s390x/cpu_features.h
>> @@ -56,7 +56,7 @@ typedef uint64_t S390FeatInit[S390_FEAT_MAX / 64 + 1];
>>  const S390FeatDef *s390_feat_def(S390Feat feat);
>>  S390Feat s390_feat_by_type_and_bit(S390FeatType type, int bit);
>>  void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap
>> bitmap);
>> -void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType
>> type,
>> +int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType
>> type,
>>                            uint8_t *data);
>>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType
>> type,
>>                            uint8_t *data);
>> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
>> index 9102071d0aa4..f24b50ea48ab 100644
>> --- a/target/s390x/helper.h
>> +++ b/target/s390x/helper.h
>> @@ -95,6 +95,8 @@ DEF_HELPER_FLAGS_1(stckc, TCG_CALL_NO_RWG, i64, env)
>>  DEF_HELPER_FLAGS_2(spt, TCG_CALL_NO_RWG, void, env, i64)
>>  DEF_HELPER_FLAGS_1(stpt, TCG_CALL_NO_RWG, i64, env)
>>  DEF_HELPER_4(stsi, i32, env, i64, i64, i64)
>> +DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
>> +DEF_HELPER_4(stfle, i64, env, i64, i32, i64)
>>  DEF_HELPER_FLAGS_4(lctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
>>  DEF_HELPER_FLAGS_4(lctlg, TCG_CALL_NO_WG, void, env, i32, i64, i32)
>>  DEF_HELPER_FLAGS_4(stctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
>> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
>> index 075ff597c3de..be830a42ed8d 100644
>> --- a/target/s390x/insn-data.def
>> +++ b/target/s390x/insn-data.def
>> @@ -899,6 +899,8 @@
>>      C(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64, stidp, 0)
>>  /* STORE CPU TIMER */
>>      C(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64, stpt, 0)
>> +/* STORE FACILITY LIST EXTENDED */
>> +    C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
>>  /* STORE FACILITY LIST */
>>      C(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0)
>>  /* STORE PREFIX */
>> diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
>> index c9604ea9c728..2645ff8b1840 100644
>> --- a/target/s390x/misc_helper.c
>> +++ b/target/s390x/misc_helper.c
>> @@ -500,6 +500,44 @@ uint32_t HELPER(stsi)(CPUS390XState *env,
>> uint64_t a0,
>>      return cc;
>>  }
>>
>> +static int do_stfle(CPUS390XState *env, uint64_t addr, uint32_t ar,
>> int len)
>> +{
>> +    S390CPU *cpu = s390_env_get_cpu(env);
>> +    /* 256 doublewords as per STFLE documentation */
>> +    uint8_t data[256 * 8] = { 0 };
>> +    int res;
>> +
>> +    res = s390_fill_feat_block(cpu->model->features,
>> S390_FEAT_TYPE_STFL, data);
>> +    res = ROUND_UP(res, 8);
>> +    s390_cpu_virt_mem_write(cpu, addr, ar, data, MIN(res, len));
> 
> This does not do what you think it does.  Or indeed, I suspect what the
> original author thinks it does.  I suspect it works for KVM only, and no
> one has actually tried the non-KVM code path.

It should work fine TCG, too. At least it used to work fine with TCG
when I put that stuff together two years ago. The function only uses the
KVM_S390_MEM_OP if KVM is enabled, and uses mmu_translate() internally
to walk the MMU pages otherwise.

> Primarily, it does not raise an exception for error.

Protection and page faults should be generated properly via
trigger_access_exception() there ... or did I miss something?

> Secondarily, I
> have no idea what the "AR" argument is supposed to be, or how that's
> supposed to interact with the rest of the virtual memory system.

AR = Access Register ... they are needed when the CPU runs in access
register mode (i.e. the AS PSW bits are 01). AFAIK we don't emulate this
in QEMU yet, but the KVM_S390_MEM_OP ioctl already supports it.

> Your writes need to look a lot more like fast_memmove in mem_helper.c,
> except that of course only the destination needs translation.

I still think that s390_cpu_virt_mem_write() should be fine here, too.

> Of course, in practice we could reduce this to just one cpu_stl_data for
> STFL and one or two cpu_stq_data for STFLE.

I think STFLE can store more than two 64-bit words, can't it?

 Thomas


Re: [Qemu-devel] [PATCH v4] target-s390x: Implement stfl and stfle
Posted by Richard Henderson 7 years ago
On 03/01/2017 07:00 PM, Thomas Huth wrote:
>> Primarily, it does not raise an exception for error.
>
> Protection and page faults should be generated properly via
> trigger_access_exception() there ... or did I miss something?

So why does s390_cpu_virt_mem_rw document that it returns non-zero if there was 
an error?

I see you do raise an exception on the TCG path (via translate_pages), but not 
the KVM path.  That is very confusing.

> AR = Access Register ... they are needed when the CPU runs in access
> register mode (i.e. the AS PSW bits are 01). AFAIK we don't emulate this
> in QEMU yet, but the KVM_S390_MEM_OP ioctl already supports it.

Hmm.  I guess I don't know how to read that then.

>> Your writes need to look a lot more like fast_memmove in mem_helper.c,
>> except that of course only the destination needs translation.
>
> I still think that s390_cpu_virt_mem_write() should be fine here, too.

Ideally you'd interact with the TCG TLB in some way so that you didn't have to 
manage your own translation and your own exceptions.  That's what fast_memmove 
does.

>
>> Of course, in practice we could reduce this to just one cpu_stl_data for
>> STFL and one or two cpu_stq_data for STFLE.
>
> I think STFLE can store more than two 64-bit words, can't it?

Technically, yes.  But there are less than 128 bits defined.  Certainly much 
less than the 4k bits that Michal prepares for.


r~

Re: [Qemu-devel] [PATCH v4] target-s390x: Implement stfl and stfle
Posted by Thomas Huth 7 years ago
On 01.03.2017 20:30, Richard Henderson wrote:
> On 03/01/2017 07:00 PM, Thomas Huth wrote:
>>> Primarily, it does not raise an exception for error.
>>
>> Protection and page faults should be generated properly via
>> trigger_access_exception() there ... or did I miss something?
> 
> So why does s390_cpu_virt_mem_rw document that it returns non-zero if
> there was an error?

The non-zero return code is needed for the callers to decide whether to
continue or not (this is used in non-TCG code, too, so we need something
that is independent from TCG magic here). See the usage of
s390_cpu_virt_mem_read() / ..._write() in target/s390x/ioinst.c for example.

> I see you do raise an exception on the TCG path (via translate_pages),
> but not the KVM path.  That is very confusing.

The KVM_S390_MEM_OP ioctl can raise an exception, too. I konw, it's
confusing, but that ioctl was necessary since you need to hold a special
lock in KVM while walking the page tables.

>> AR = Access Register ... they are needed when the CPU runs in access
>> register mode (i.e. the AS PSW bits are 01). AFAIK we don't emulate this
>> in QEMU yet, but the KVM_S390_MEM_OP ioctl already supports it.
> 
> Hmm.  I guess I don't know how to read that then.

The access-register mode is described in the "Access-Register
Translation" sub-chapter in Chapter 5 ("Program Execution") of the POP.
Make sure to have some good painkillers ready first, though ... that's
IMHO really one of the ugliest parts of the architecture ;-)

>>> Your writes need to look a lot more like fast_memmove in mem_helper.c,
>>> except that of course only the destination needs translation.
>>
>> I still think that s390_cpu_virt_mem_write() should be fine here, too.
> 
> Ideally you'd interact with the TCG TLB in some way so that you didn't
> have to manage your own translation and your own exceptions.  That's
> what fast_memmove does.

Well, all the code from s390_cpu_virt_mem_rw() can also run with KVM -
in case the KVM_S390_MEM_OP is not available (which also has just been
introduced two years ago). So all the code that can be called by
s390_cpu_virt_mem_rw() has to work also without TCG.

But I agree, since the original problem here (TCG emulation of stfle) is
not related to KVM, it's maybe better to use a function a la
fast_memmove there instead.

>>> Of course, in practice we could reduce this to just one cpu_stl_data for
>>> STFL and one or two cpu_stq_data for STFLE.
>>
>> I think STFLE can store more than two 64-bit words, can't it?
> 
> Technically, yes.  But there are less than 128 bits defined.  Certainly
> much less than the 4k bits that Michal prepares for.

True. But maybe we should then also have an assert() or something
similar here in case the bits exceed the 128 limit one day...?

 Thomas


Re: [Qemu-devel] [PATCH v4] target-s390x: Implement stfl and stfle
Posted by David Hildenbrand 7 years ago
>>> Of course, in practice we could reduce this to just one cpu_stl_data for
>>> STFL and one or two cpu_stq_data for STFLE.
>>
>> I think STFLE can store more than two 64-bit words, can't it?
> 
> Technically, yes.  But there are less than 128 bits defined.  Certainly much 
> less than the 4k bits that Michal prepares for.
> 

The architectural limit is 2k bytes (yes I said bytes).

Check out "struct kvm_s390_vm_cpu_machine - fac_list" /
KVM_S390_VM_CPU_MACHINE in
arch/s390/include/uapi/asm/kvm.h. Here we prepared for that.

Also note preparations for new stfl bits for future HW:

cd1836f583d7 (KVM: s390: instruction-execution-protection support)
-> bit 130
a679c547d19d (KVM: s390: gaccess: add ESOP2 handling)
-> bit 131

However, for now 128 bit should be more then enough, as TCG still misses
loads of features. CPU model still isn't properly wired up for TCG yet.

-- 
Thanks,

David

Re: [Qemu-devel] [PATCH v4] target-s390x: Implement stfl and stfle
Posted by Michal Marek 7 years ago
Dne 28.2.2017 v 23:11 Richard Henderson napsal(a):
> On 02/27/2017 09:18 PM, Michal Marek wrote:
>> +static int do_stfle(CPUS390XState *env, uint64_t addr, uint32_t ar,
>> int len)
>> +{
>> +    S390CPU *cpu = s390_env_get_cpu(env);
>> +    /* 256 doublewords as per STFLE documentation */
>> +    uint8_t data[256 * 8] = { 0 };
>> +    int res;
>> +
>> +    res = s390_fill_feat_block(cpu->model->features,
>> S390_FEAT_TYPE_STFL, data);
>> +    res = ROUND_UP(res, 8);
>> +    s390_cpu_virt_mem_write(cpu, addr, ar, data, MIN(res, len));
> 
> This does not do what you think it does.  Or indeed, I suspect what the
> original author thinks it does.  I suspect it works for KVM only, and no
> one has actually tried the non-KVM code path.
> 
> Primarily, it does not raise an exception for error.  Secondarily, I
> have no idea what the "AR" argument is supposed to be, or how that's
> supposed to interact with the rest of the virtual memory system.
> 
> Your writes need to look a lot more like fast_memmove in mem_helper.c,
> except that of course only the destination needs translation.
> 
> Of course, in practice we could reduce this to just one cpu_stl_data for
> STFL and one or two cpu_stq_data for STFLE.  So a full fast_memmove loop
> is overkill.
> 
> As it happens, this reminds me that I wrote a version of this several
> years ago but never submitted it upstream.  I'll dig it out.

So, does it make sense for me to post a v5 again not using
s390_cpu_virt_mem_write(), or are you going to refresh your stfle
implementation?

Thanks,
Michal

Re: [Qemu-devel] [PATCH v4] target-s390x: Implement stfl and stfle
Posted by David Hildenbrand 7 years ago
>> As it happens, this reminds me that I wrote a version of this several
>> years ago but never submitted it upstream.  I'll dig it out.
> 
> So, does it make sense for me to post a v5 again not using
> s390_cpu_virt_mem_write(), or are you going to refresh your stfle
> implementation?
> 
> Thanks,
> Michal
> 

A new implementation should definitely take care of the CPU model.

-- 
Thanks,

David