target/s390x/helper.h | 1 + target/s390x/insn-data.def | 2 + target/s390x/mem_helper.c | 189 +++++++++++++++++++++++++++++++++++++++++++++ target/s390x/translate.c | 13 +++- 4 files changed, 204 insertions(+), 1 deletion(-)
There are no uses in a Linux system with which to test,
but it Looks Right by my reading of the PoO.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
target/s390x/helper.h | 1 +
target/s390x/insn-data.def | 2 +
target/s390x/mem_helper.c | 189 +++++++++++++++++++++++++++++++++++++++++++++
target/s390x/translate.c | 13 +++-
4 files changed, 204 insertions(+), 1 deletion(-)
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index b268367..456aaa9 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -33,6 +33,7 @@ DEF_HELPER_3(celgb, i64, env, i64, i32)
DEF_HELPER_3(cdlgb, i64, env, i64, i32)
DEF_HELPER_3(cxlgb, i64, env, i64, i32)
DEF_HELPER_4(cdsg, void, env, i64, i32, i32)
+DEF_HELPER_4(csst, i32, env, i32, i64, i64)
DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64)
DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index aa4c5b2..ef02a8e 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -256,6 +256,8 @@
D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ)
D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ)
C(0xeb3e, CDSG, RSY_a, Z, 0, 0, 0, 0, cdsg, 0)
+/* COMPARE AND SWAP AND STORE */
+ C(0xc802, CSST, SSF, CASS, la1, a2, 0, 0, csst, 0)
/* COMPARE AND TRAP */
D(0xb972, CRT, RRF_c, GIE, r1_32s, r2_32s, 0, 0, ct, 0, 0)
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 6125725..4a7d770 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -1344,6 +1344,195 @@ void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
env->regs[r1 + 1] = int128_getlo(oldv);
}
+uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
+{
+#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
+ uint32_t mem_idx = cpu_mmu_index(env, false);
+#endif
+ uintptr_t ra = GETPC();
+ uint32_t fc = extract32(env->regs[0], 0, 8);
+ uint32_t sc = extract32(env->regs[0], 8, 8);
+ uint64_t pl = get_address(env, 1) & -16;
+ uint64_t svh, svl;
+ uint32_t cc;
+
+ /* Sanity check the function code and storage characteristic. */
+ if (fc > 1 || sc > 3) {
+ if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
+ goto spec_exception;
+ }
+ if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
+ goto spec_exception;
+ }
+ }
+
+ /* Sanity check the alignments. */
+ if (extract32(a1, 0, 4 << fc) || extract32(a2, 0, 1 << sc)) {
+ goto spec_exception;
+ }
+
+ /* Sanity check writability of the store address. */
+#ifndef CONFIG_USER_ONLY
+ probe_write(env, a2, mem_idx, ra);
+#endif
+
+ /* Note that the compare-and-swap is atomic, and the store is atomic, but
+ the complete operation is not. Therefore we do not need to assert serial
+ context in order to implement this. That said, restart early if we can't
+ support either operation that is supposed to be atomic. */
+ if (parallel_cpus) {
+ int mask = 0;
+#if !defined(CONFIG_ATOMIC64)
+ mask = -8;
+#elif !defined(CONFIG_ATOMIC128)
+ mask = -16;
+#endif
+ if (((4 << fc) | (1 << sc)) & mask) {
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+ }
+ }
+
+ /* All loads happen before all stores. For simplicity, load the entire
+ store value area from the parameter list. */
+ svh = cpu_ldq_data_ra(env, pl + 16, ra);
+ svl = cpu_ldq_data_ra(env, pl + 24, ra);
+
+ switch (fc) {
+ case 0:
+ {
+ uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
+ uint32_t cv = env->regs[r3];
+ uint32_t ov;
+
+ if (parallel_cpus) {
+#ifdef CONFIG_USER_ONLY
+ uint32_t *haddr = g2h(a1);
+ ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
+#else
+ TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
+ ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
+#endif
+ } else {
+ ov = cpu_ldl_data_ra(env, a1, ra);
+ cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
+ }
+ cc = (ov != cv);
+ env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
+ }
+ break;
+
+ case 1:
+ {
+ uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
+ uint64_t cv = env->regs[r3];
+ uint64_t ov;
+
+ if (parallel_cpus) {
+#ifdef CONFIG_USER_ONLY
+# ifdef CONFIG_ATOMIC64
+ uint64_t *haddr = g2h(a1);
+ ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
+# else
+ /* Note that we asserted !parallel_cpus above. */
+ g_assert_not_reached();
+# endif
+#else
+ TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
+ ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
+#endif
+ } else {
+ ov = cpu_ldq_data_ra(env, a1, ra);
+ cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
+ }
+ cc = (ov != cv);
+ env->regs[r3] = ov;
+ }
+ break;
+
+ case 2:
+ {
+ uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
+ uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
+ Int128 nv = int128_make128(nvl, nvh);
+ Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
+ Int128 ov;
+
+ if (parallel_cpus) {
+#ifdef CONFIG_ATOMIC128
+ TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+ ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
+ cc = !int128_eq(ov, cv);
+#else
+ /* Note that we asserted !parallel_cpus above. */
+ g_assert_not_reached();
+#endif
+ } else {
+ uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
+ uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
+
+ ov = int128_make128(ol, oh);
+ cc = !int128_eq(ov, cv);
+ if (cc) {
+ nv = ov;
+ }
+
+ cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
+ cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
+ }
+
+ env->regs[r3 + 0] = int128_gethi(ov);
+ env->regs[r3 + 1] = int128_getlo(ov);
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ /* Store only if the comparison succeeded. Note that above we use a pair
+ of 64-bit big-endian loads, so for sc < 3 we must extract the value
+ from the most-significant bits of svh. */
+ if (cc == 0) {
+ switch (sc) {
+ case 0:
+ cpu_stb_data_ra(env, a2, svh >> 56, ra);
+ break;
+ case 1:
+ cpu_stw_data_ra(env, a2, svh >> 48, ra);
+ break;
+ case 2:
+ cpu_stl_data_ra(env, a2, svh >> 32, ra);
+ break;
+ case 3:
+ cpu_stq_data_ra(env, a2, svh, ra);
+ break;
+ case 4:
+ if (parallel_cpus) {
+#ifdef CONFIG_ATOMIC128
+ TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+ Int128 sv = int128_make128(svl, svh);
+ helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
+#else
+ /* Note that we asserted !parallel_cpus above. */
+ g_assert_not_reached();
+#endif
+ } else {
+ cpu_stq_data_ra(env, a2 + 0, svh, ra);
+ cpu_stq_data_ra(env, a2 + 8, svl, ra);
+ }
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ return cc;
+
+ spec_exception:
+ cpu_restore_state(ENV_GET_CPU(env), ra);
+ program_interrupt(env, PGM_SPECIFICATION, 6);
+ g_assert_not_reached();
+}
+
#if !defined(CONFIG_USER_ONLY)
void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
{
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index a546119..1269b26 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -2032,6 +2032,18 @@ static ExitStatus op_cdsg(DisasContext *s, DisasOps *o)
return NO_EXIT;
}
+static ExitStatus op_csst(DisasContext *s, DisasOps *o)
+{
+ int r3 = get_field(s->fields, r3);
+ TCGv_i32 t_r3 = tcg_const_i32(r3);
+
+ gen_helper_csst(cc_op, cpu_env, t_r3, o->in1, o->in2);
+ tcg_temp_free_i32(t_r3);
+
+ set_cc_static(s);
+ return NO_EXIT;
+}
+
#ifndef CONFIG_USER_ONLY
static ExitStatus op_csp(DisasContext *s, DisasOps *o)
{
@@ -5396,7 +5408,6 @@ enum DisasInsnEnum {
/* Give smaller names to the various facilities. */
#define FAC_Z S390_FEAT_ZARCH
#define FAC_CASS S390_FEAT_COMPARE_AND_SWAP_AND_STORE
-#define FAC_CASS2 S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2
#define FAC_DFP S390_FEAT_DFP
#define FAC_DFPR S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* DFP-rounding */
#define FAC_DO S390_FEAT_STFLE_45 /* distinct-operands */
--
2.9.4
On 15.06.2017 22:37, Richard Henderson wrote:
> There are no uses in a Linux system with which to test,
> but it Looks Right by my reading of the PoO.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> target/s390x/helper.h | 1 +
> target/s390x/insn-data.def | 2 +
> target/s390x/mem_helper.c | 189 +++++++++++++++++++++++++++++++++++++++++++++
> target/s390x/translate.c | 13 +++-
> 4 files changed, 204 insertions(+), 1 deletion(-)
>
> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> index b268367..456aaa9 100644
> --- a/target/s390x/helper.h
> +++ b/target/s390x/helper.h
> @@ -33,6 +33,7 @@ DEF_HELPER_3(celgb, i64, env, i64, i32)
> DEF_HELPER_3(cdlgb, i64, env, i64, i32)
> DEF_HELPER_3(cxlgb, i64, env, i64, i32)
> DEF_HELPER_4(cdsg, void, env, i64, i32, i32)
> +DEF_HELPER_4(csst, i32, env, i32, i64, i64)
> DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
> DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64)
> DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index aa4c5b2..ef02a8e 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -256,6 +256,8 @@
> D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ)
> D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ)
> C(0xeb3e, CDSG, RSY_a, Z, 0, 0, 0, 0, cdsg, 0)
> +/* COMPARE AND SWAP AND STORE */
> + C(0xc802, CSST, SSF, CASS, la1, a2, 0, 0, csst, 0)
>
> /* COMPARE AND TRAP */
> D(0xb972, CRT, RRF_c, GIE, r1_32s, r2_32s, 0, 0, ct, 0, 0)
> diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> index 6125725..4a7d770 100644
> --- a/target/s390x/mem_helper.c
> +++ b/target/s390x/mem_helper.c
> @@ -1344,6 +1344,195 @@ void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
> env->regs[r1 + 1] = int128_getlo(oldv);
> }
>
> +uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
> +{
> +#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
> + uint32_t mem_idx = cpu_mmu_index(env, false);
> +#endif
> + uintptr_t ra = GETPC();
> + uint32_t fc = extract32(env->regs[0], 0, 8);
> + uint32_t sc = extract32(env->regs[0], 8, 8);
> + uint64_t pl = get_address(env, 1) & -16;
> + uint64_t svh, svl;
> + uint32_t cc;
> +
> + /* Sanity check the function code and storage characteristic. */
> + if (fc > 1 || sc > 3) {
> + if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
> + goto spec_exception;
> + }
> + if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
I think you could omit the "fc == 2" here. fc has to be bigger than 1
due to the outer if-statement, and if it is not 2, the first "fc > 1"
has already triggered. So "fc" has to be 2 here and the "fc == 2" is a
redundant check.
Thomas
On 06/19/2017 01:08 AM, Thomas Huth wrote:
>> + /* Sanity check the function code and storage characteristic. */
>> + if (fc > 1 || sc > 3) {
>> + if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
>> + goto spec_exception;
>> + }
>> + if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
>
> I think you could omit the "fc == 2" here. fc has to be bigger than 1
> due to the outer if-statement, and if it is not 2, the first "fc > 1"
> has already triggered. So "fc" has to be 2 here and the "fc == 2" is a
> redundant check.
Not so. We can also get here with fc == 0 && sc == 4.
r~
On 20.06.2017 01:44, Richard Henderson wrote:
> On 06/19/2017 01:08 AM, Thomas Huth wrote:
>>> + /* Sanity check the function code and storage characteristic. */
>>> + if (fc > 1 || sc > 3) {
>>> + if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
>>> + goto spec_exception;
>>> + }
>>> + if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
>>
>> I think you could omit the "fc == 2" here. fc has to be bigger than 1
>> due to the outer if-statement, and if it is not 2, the first "fc > 1"
>> has already triggered. So "fc" has to be 2 here and the "fc == 2" is a
>> redundant check.
>
> Not so. We can also get here with fc == 0 && sc == 4.
Uh, right, sorry for the confusion!
Thomas
On 15.06.2017 22:37, Richard Henderson wrote:
> There are no uses in a Linux system with which to test,
> but it Looks Right by my reading of the PoO.
I am using next.git/master with this patch applied:
https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=8aa8680aa383bf6e2ac
I am using QEMU with the mvcos patch and your patch applied (and a patch
that allows enabling csst/csst2).
I am using the following qemu command line:
#!/bin/bash
/home/dhildenb/git/qemu/s390x-softmmu/qemu-system-s390x \
-nographic -nodefaults -machine s390-ccw-virtio,accel=tcg \
-cpu qemu,mvcos=on,stfle=on,ldisp=on,ldisphp=on,\
eimm=on,stckf=on,csst=on,csst2=on,ginste=on,exrl=on\
-m 256M -smp 1 -chardev stdio,id=con0 \
-device sclpconsole,chardev=con0 \
-kernel vmlinux -initrd /home/dhildenb/initrd.debian
Right now, I can start a z9 compiled kernel.
When trying to start a z10 compiled kernel (which generates many csst),
qemu simply crashes / the guests exits (have to debug) without any
command line output.
So either something in csst is broken or in the other instructions for z10.
--
Thanks,
David
On 19.06.2017 12:03, David Hildenbrand wrote:
> On 15.06.2017 22:37, Richard Henderson wrote:
>> There are no uses in a Linux system with which to test,
>> but it Looks Right by my reading of the PoO.
>
> I am using next.git/master with this patch applied:
> https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=8aa8680aa383bf6e2ac
>
> I am using QEMU with the mvcos patch and your patch applied (and a patch
> that allows enabling csst/csst2).
>
> I am using the following qemu command line:
>
> #!/bin/bash
> /home/dhildenb/git/qemu/s390x-softmmu/qemu-system-s390x \
> -nographic -nodefaults -machine s390-ccw-virtio,accel=tcg \
> -cpu qemu,mvcos=on,stfle=on,ldisp=on,ldisphp=on,\
> eimm=on,stckf=on,csst=on,csst2=on,ginste=on,exrl=on\
> -m 256M -smp 1 -chardev stdio,id=con0 \
> -device sclpconsole,chardev=con0 \
> -kernel vmlinux -initrd /home/dhildenb/initrd.debian
>
> Right now, I can start a z9 compiled kernel.
>
> When trying to start a z10 compiled kernel (which generates many csst),
> qemu simply crashes / the guests exits (have to debug) without any
> command line output.
>
> So either something in csst is broken or in the other instructions for z10.
>
With CONFIG_DEBUG_LOCKDEP I get a pgm exception in check_no_collision()
and the kernel can't find an exception table for it, resulting in a panic().
#0 0x00000000001cf584 in check_no_collision (chain=<optimized out>,
hlock=<optimized out>, curr=<optimized out>)
at kernel/locking/lockdep.c:2111
#1 lookup_chain_cache (chain_key=<optimized out>, hlock=<optimized
out>, curr=<optimized out>)
at kernel/locking/lockdep.c:2158
#2 validate_chain (curr=0xdad300 <init_task>, hlock=0x18398c8
<lock_classes>, chain_head=<optimized out>,
chain_key=10957502631322533163, lock=<optimized out>) at
kernel/locking/lockdep.c:2252
#3 0x00000000001d089a in __lock_acquire (lock=<optimized out>,
subclass=<optimized out>, trylock=0, read=0,
check=1, hardirqs_off=<optimized out>, nest_lock=0x0, ip=15545474,
references=<optimized out>,
pin_count=<optimized out>) at kernel/locking/lockdep.c:3367
#4 0x00000000001d15a6 in lock_acquire (lock=<optimized out>,
subclass=<optimized out>, trylock=<optimized out>,
read=<optimized out>, check=1, nest_lock=0x0, ip=15545474) at
kernel/locking/lockdep.c:3855
#5 0x00000000009b76cc in __mutex_lock_common (use_ww_ctx=<optimized
out>, ww_ctx=<optimized out>,
ip=<optimized out>, nest_lock=<optimized out>, subclass=<optimized
out>, state=<optimized out>,
lock=<optimized out>) at kernel/locking/mutex.c:756
#6 __mutex_lock (lock=0xded840 <cgroup_mutex>, state=2, subclass=0,
nest_lock=0x0, ip=<optimized out>)
at kernel/locking/mutex.c:893
#7 0x00000000009b801a in mutex_lock_nested (lock=<optimized out>,
subclass=<optimized out>)
at kernel/locking/mutex.c:908
#8 0x0000000000ed3482 in cgroup_init_subsys (ss=0xdd2340
<cpu_cgrp_subsys>, early=true)
at kernel/cgroup/cgroup.c:4403
#9 0x0000000000ed3752 in cgroup_init_early () at
kernel/cgroup/cgroup.c:4481
#10 0x0000000000ebd79a in start_kernel () at init/main.c:502
#11 0x0000000000100020 in _stext () at arch/s390/kernel/head64.S:100
1cf552: e3 10 c0 30 00 04 lg %r1,48(%r12)
1cf558: eb 11 00 33 00 0c srlg %r1,%r1,51
1cf55e: eb c1 00 05 00 0d sllg %r12,%r1,5
1cf564: b9 09 00 c1 sgr %r12,%r1
1cf568: eb cc 00 04 00 0d sllg %r12,%r12,4
1cf56e: e3 c0 d0 28 00 08 ag %r12,40(%r13)
1cf574: a7 f4 ff 6f j 1cf452
<validate_chain.isra.23+0xba2>
if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i
- 1))) {
1cf578: c0 30 00 4c cc e3 larl %r3,b68f3e
<kallsyms_token_index+0x10626>
1cf57e: c0 20 00 4c 95 37 larl %r2,b61fec
<kallsyms_token_index+0x96d4>
1cf584: c0 e5 00 07 f1 2e brasl %r14,2cd7e0 <printk>
1cf58a: a7 f4 00 01 j 1cf58c
<validate_chain.isra.23+0xcdc>
1cf58e: a7 f4 fc d9 j 1cef40
<validate_chain.isra.23+0x690>
Without CONFIG_DEBUG_LOCKDEP:
I get a similar crash in static void __init mm_init(void).
Not sure yet what the real root cause is. Maybe something not related to
CSST.
--
Thanks,
David
© 2016 - 2025 Red Hat, Inc.