Series comparison

-[PULL 00/51] target-arm queue
+[PULL 00/30] target-arm queue
-Probably the last arm pullreq before softfreeze...
+Hi; here's this week's arm pullreq. Mostly this is my
 work on FEAT_MOPS and FEAT_HBC, but there are some
 other bits and pieces in there too, including a recent
 set of elf2dmp patches.
-The following changes since commit 58560ad254fbda71d4daa6622d71683190070ee2:
+thanks
 -- PMM
-  Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-4.2-20191024' into staging (2019-10-24 16:22:58 +0100)
+The following changes since commit 55394dcbec8f0c29c30e792c102a0edd50a52bf4:
   Merge tag 'pull-loongarch-20230920' of https://gitlab.com/gaosong/qemu into staging (2023-09-20 13:56:18 -0400)
 are available in the Git repository at:
-  https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20191024
+  https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20230921
-for you to fetch changes up to a01a4a3e85ae8f6fe21adbedc80f7013faabdcf4:
+for you to fetch changes up to 231f6a7d66254a58bedbee458591b780e0a507b1:
-  hw/arm/highbank: Use AddressSpace when using write_secondary_boot() (2019-10-24 17:16:30 +0100)
+  elf2dmp: rework PDB_STREAM_INDEXES::segments obtaining (2023-09-21 16:13:54 +0100)
 ----------------------------------------------------------------
 target-arm queue:
- * raspi boards: some cleanup
+ * target/m68k: Add URL to semihosting spec
- * raspi: implement the bcm2835 system timer device
+ * docs/devel/loads-stores: Fix git grep regexes
- * raspi: implement a dummy thermal sensor
+ * hw/arm/boot: Set SCR_EL3.FGTEn when booting kernel
- * KVM: support providing SVE to the guest
+ * linux-user: Correct SME feature names reported in cpuinfo
- * misc devices: switch to ptimer transaction API
+ * linux-user: Add missing arm32 hwcaps
- * cache TB flag state to improve performance of cpu_get_tb_cpu_state
+ * Don't skip MTE checks for LDRT/STRT at EL0
- * aspeed: Add an AST2600 eval board
+ * Implement FEAT_HBC
  * Implement FEAT_MOPS
  * audio/jackaudio: Avoid dynamic stack allocation
  * sbsa-ref: add non-secure EL2 virtual timer
  * elf2dmp: improve Win2022, Win11 and large dumps
 ----------------------------------------------------------------
-Andrew Jones (9):
+Fabian Vogt (1):
-      target/arm/monitor: Introduce qmp_query_cpu_model_expansion
+      hw/arm/boot: Set SCR_EL3.FGTEn when booting kernel
       tests: arm: Introduce cpu feature tests
       target/arm: Allow SVE to be disabled via a CPU property
       target/arm/cpu64: max cpu: Introduce sve<N> properties
       target/arm/kvm64: Add kvm_arch_get/put_sve
       target/arm/kvm64: max cpu: Enable SVE when available
       target/arm/kvm: scratch vcpu: Preserve input kvm_vcpu_init features
       target/arm/cpu64: max cpu: Support sve properties with KVM
       target/arm/kvm: host cpu: Add support for sve<N> properties
-Cédric Le Goater (2):
+Marcin Juszkiewicz (1):
-      hw/gpio: Fix property accessors of the AST2600 GPIO 1.8V model
+      sbsa-ref: add non-secure EL2 virtual timer
       aspeed: Add an AST2600 eval board
-Peter Maydell (8):
+Peter Maydell (23):
-      hw/net/fsl_etsec/etsec.c: Switch to transaction-based ptimer API
+      target/m68k: Add URL to semihosting spec
-      hw/timer/xilinx_timer.c: Switch to transaction-based ptimer API
+      docs/devel/loads-stores: Fix git grep regexes
-      hw/dma/xilinx_axidma.c: Switch to transaction-based ptimer API
+      linux-user/elfload.c: Correct SME feature names reported in cpuinfo
-      hw/timer/slavio_timer: Remove useless check for NULL t->timer
+      linux-user/elfload.c: Add missing arm and arm64 hwcap values
-      hw/timer/slavio_timer.c: Switch to transaction-based ptimer API
+      linux-user/elfload.c: Report previously missing arm32 hwcaps
-      hw/timer/grlib_gptimer.c: Switch to transaction-based ptimer API
+      target/arm: Update AArch64 ID register field definitions
-      hw/m68k/mcf5206.c: Switch to transaction-based ptimer API
+      target/arm: Update user-mode ID reg mask values
-      hw/watchdog/milkymist-sysctl.c: Switch to transaction-based ptimer API
+      target/arm: Implement FEAT_HBC
       target/arm: Remove unused allocation_tag_mem() argument
       target/arm: Don't skip MTE checks for LDRT/STRT at EL0
       target/arm: Implement FEAT_MOPS enable bits
       target/arm: Pass unpriv bool to get_a64_user_mem_index()
       target/arm: Define syndrome function for MOPS exceptions
       target/arm: New function allocation_tag_mem_probe()
       target/arm: Implement MTE tag-checking functions for FEAT_MOPS
       target/arm: Implement the SET* instructions
       target/arm: Define new TB flag for ATA0
       target/arm: Implement the SETG* instructions
       target/arm: Implement MTE tag-checking functions for FEAT_MOPS copies
       target/arm: Implement the CPY* instructions
       target/arm: Enable FEAT_MOPS for CPU 'max'
       audio/jackaudio: Avoid dynamic stack allocation in qjack_client_init
       audio/jackaudio: Avoid dynamic stack allocation in qjack_process()
-Philippe Mathieu-Daudé (8):
+Viktor Prutyanov (5):
-      hw/misc/bcm2835_thermal: Add a dummy BCM2835 thermal sensor
+      elf2dmp: replace PE export name check with PDB name check
-      hw/arm/bcm2835_peripherals: Use the thermal sensor block
+      elf2dmp: introduce physical block alignment
-      hw/timer/bcm2835: Add the BCM2835 SYS_timer
+      elf2dmp: introduce merging of physical memory runs
-      hw/arm/bcm2835_peripherals: Use the SYS_timer
+      elf2dmp: use Linux mmap with MAP_NORESERVE when possible
-      hw/arm/bcm2836: Make the SoC code modular
+      elf2dmp: rework PDB_STREAM_INDEXES::segments obtaining
       hw/arm/bcm2836: Rename cpus[] as cpu[].core
       hw/arm/raspi: Use AddressSpace when using arm_boot::write_secondary_boot
       hw/arm/highbank: Use AddressSpace when using write_secondary_boot()
-Richard Henderson (24):
+ docs/devel/loads-stores.rst    |  40 +-
-      target/arm: Split out rebuild_hflags_common
+ docs/system/arm/emulation.rst  |   2 +
-      target/arm: Split out rebuild_hflags_a64
+ contrib/elf2dmp/addrspace.h    |   1 +
-      target/arm: Split out rebuild_hflags_common_32
+ contrib/elf2dmp/pdb.h          |   2 +-
-      target/arm: Split arm_cpu_data_is_big_endian
+ contrib/elf2dmp/qemu_elf.h     |   2 +
-      target/arm: Split out rebuild_hflags_m32
+ target/arm/cpu.h               |  35 ++
-      target/arm: Reduce tests vs M-profile in cpu_get_tb_cpu_state
+ target/arm/internals.h         |  55 +++
-      target/arm: Split out rebuild_hflags_a32
+ target/arm/syndrome.h          |  12 +
-      target/arm: Split out rebuild_hflags_aprofile
+ target/arm/tcg/helper-a64.h    |  14 +
-      target/arm: Hoist XSCALE_CPAR, VECLEN, VECSTRIDE in cpu_get_tb_cpu_state
+ target/arm/tcg/translate.h     |   4 +-
-      target/arm: Simplify set of PSTATE_SS in cpu_get_tb_cpu_state
+ target/arm/tcg/a64.decode      |  38 +-
-      target/arm: Hoist computation of TBFLAG_A32.VFPEN
+ audio/jackaudio.c              |  21 +-
-      target/arm: Add arm_rebuild_hflags
+ contrib/elf2dmp/addrspace.c    |  31 +-
-      target/arm: Split out arm_mmu_idx_el
+ contrib/elf2dmp/main.c         | 154 ++++----
-      target/arm: Hoist store to cs_base in cpu_get_tb_cpu_state
+ contrib/elf2dmp/pdb.c          |  15 +-
-      target/arm: Add HELPER(rebuild_hflags_{a32, a64, m32})
+ contrib/elf2dmp/qemu_elf.c     |  68 +++-
-      target/arm: Rebuild hflags at EL changes
+ hw/arm/boot.c                  |   4 +
-      target/arm: Rebuild hflags at MSR writes
+ hw/arm/sbsa-ref.c              |   2 +
-      target/arm: Rebuild hflags at CPSR writes
+ linux-user/elfload.c           |  72 +++-
-      target/arm: Rebuild hflags at Xscale SCTLR writes
+ target/arm/helper.c            |  39 +-
-      target/arm: Rebuild hflags for M-profile
+ target/arm/tcg/cpu64.c         |   5 +
-      target/arm: Rebuild hflags for M-profile NVIC
+ target/arm/tcg/helper-a64.c    | 878 +++++++++++++++++++++++++++++++++++++++++
-      linux-user/aarch64: Rebuild hflags for TARGET_WORDS_BIGENDIAN
+ target/arm/tcg/hflags.c        |  21 +
-      linux-user/arm: Rebuild hflags for TARGET_WORDS_BIGENDIAN
+ target/arm/tcg/mte_helper.c    | 281 +++++++++++--
-      target/arm: Rely on hflags correct in cpu_get_tb_cpu_state
+ target/arm/tcg/translate-a64.c | 164 +++++++-
+ target/m68k/m68k-semi.c        |   4 +
- hw/misc/Makefile.objs                |   1 +
+ tests/tcg/aarch64/sysregs.c    |   4 +-
- hw/timer/Makefile.objs               |   1 +
+files changed, 1768 insertions(+), 200 deletions(-)
  tests/Makefile.include               |   5 +-
  qapi/machine-target.json             |   6 +-
  hw/net/fsl_etsec/etsec.h             |   1 -
  include/hw/arm/aspeed.h              |   1 +
  include/hw/arm/bcm2835_peripherals.h |   5 +-
  include/hw/arm/bcm2836.h             |   4 +-
  include/hw/arm/raspi_platform.h      |   1 +
  include/hw/misc/bcm2835_thermal.h    |  27 ++
  include/hw/timer/bcm2835_systmr.h    |  33 +++
  include/qemu/bitops.h                |   1 +
  target/arm/cpu.h                     | 105 +++++--
  target/arm/helper.h                  |   4 +
  target/arm/internals.h               |   9 +
  target/arm/kvm_arm.h                 |  39 +++
  hw/arm/aspeed.c                      |  23 ++
  hw/arm/bcm2835_peripherals.c         |  30 +-
  hw/arm/bcm2836.c                     |  44 +--
  hw/arm/highbank.c                    |   3 +-
  hw/arm/raspi.c                       |  14 +-
  hw/dma/xilinx_axidma.c               |   9 +-
  hw/gpio/aspeed_gpio.c                |   8 +-
  hw/intc/armv7m_nvic.c                |  22 +-
  hw/m68k/mcf5206.c                    |  15 +-
  hw/misc/bcm2835_thermal.c            | 135 +++++++++
  hw/net/fsl_etsec/etsec.c             |   9 +-
  hw/timer/bcm2835_systmr.c            | 163 +++++++++++
  hw/timer/grlib_gptimer.c             |  28 +-
  hw/timer/milkymist-sysctl.c          |  25 +-
  hw/timer/slavio_timer.c              |  32 ++-
  hw/timer/xilinx_timer.c              |  13 +-
  linux-user/aarch64/cpu_loop.c        |   1 +
  linux-user/arm/cpu_loop.c            |   1 +
  linux-user/syscall.c                 |   1 +
  target/arm/cpu.c                     |  26 +-
  target/arm/cpu64.c                   | 364 +++++++++++++++++++++--
  target/arm/helper-a64.c              |   3 +
  target/arm/helper.c                  | 403 +++++++++++++++++---------
  target/arm/kvm.c                     |  25 +-
  target/arm/kvm32.c                   |   6 +-
  target/arm/kvm64.c                   | 325 ++++++++++++++++++---
  target/arm/m_helper.c                |   6 +
  target/arm/machine.c                 |   1 +
  target/arm/monitor.c                 | 158 ++++++++++
  target/arm/op_helper.c               |   4 +
  target/arm/translate-a64.c           |  13 +-
  target/arm/translate.c               |  33 ++-
  tests/arm-cpu-features.c             | 540 +++++++++++++++++++++++++++++++++++
  docs/arm-cpu-features.rst            | 317 ++++++++++++++++++++
  hw/timer/trace-events                |   5 +
 files changed, 2725 insertions(+), 323 deletions(-)
  create mode 100644 include/hw/misc/bcm2835_thermal.h
  create mode 100644 include/hw/timer/bcm2835_systmr.h
  create mode 100644 hw/misc/bcm2835_thermal.c
  create mode 100644 hw/timer/bcm2835_systmr.c
  create mode 100644 tests/arm-cpu-features.c
  create mode 100644 docs/arm-cpu-features.rst

-[PULL 01/51] hw/gpio: Fix property accessors of the AST2600 GPIO 1.8V model
+Deleted patch
-From: Cédric Le Goater <clg@kaod.org>
-The property names of AST2600 GPIO 1.8V model are one character bigger
-than the names of the other ASPEED GPIO model. Increase the string
-buffer size by one and be more strict on the expected pattern of the
-property name.
-This fixes the QOM test of the ast2600-evb machine under :
-  Apple LLVM version 10.0.0 (clang-1000.10.44.4)
-  Target: x86_64-apple-darwin17.7.0
-  Thread model: posix
-  InstalledDir: /Library/Developer/CommandLineTools/usr/bin
-Cc: Rashmica Gupta <rashmica.g@gmail.com>
-Fixes: 36d737ee82b2 ("hw/gpio: Add in AST2600 specific implementation")
-Signed-off-by: Cédric Le Goater <clg@kaod.org>
-Message-id: 20191023130455.1347-2-clg@kaod.org
-Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- hw/gpio/aspeed_gpio.c | 8 ++++----
-file changed, 4 insertions(+), 4 deletions(-)
-diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c
-index XXXXXXX..XXXXXXX 100644
---- a/hw/gpio/aspeed_gpio.c
-+++ b/hw/gpio/aspeed_gpio.c
-@@ -XXX,XX +XXX,XX @@ static void aspeed_gpio_get_pin(Object *obj, Visitor *v, const char *name,
- {
-     int pin = 0xfff;
-     bool level = true;
--    char group[3];
-+    char group[4];
-     AspeedGPIOState *s = ASPEED_GPIO(obj);
-     int set_idx, group_idx = 0;
-     if (sscanf(name, "gpio%2[A-Z]%1d", group, &pin) != 2) {
-         /* 1.8V gpio */
--        if (sscanf(name, "gpio%3s%1d", group, &pin) != 2) {
-+        if (sscanf(name, "gpio%3[18A-E]%1d", group, &pin) != 2) {
-             error_setg(errp, "%s: error reading %s", __func__, name);
-             return;
-         }
-@@ -XXX,XX +XXX,XX @@ static void aspeed_gpio_set_pin(Object *obj, Visitor *v, const char *name,
-     Error *local_err = NULL;
-     bool level;
-     int pin = 0xfff;
--    char group[3];
-+    char group[4];
-     AspeedGPIOState *s = ASPEED_GPIO(obj);
-     int set_idx, group_idx = 0;
-@@ -XXX,XX +XXX,XX @@ static void aspeed_gpio_set_pin(Object *obj, Visitor *v, const char *name,
-     }
-     if (sscanf(name, "gpio%2[A-Z]%1d", group, &pin) != 2) {
-         /* 1.8V gpio */
--        if (sscanf(name, "gpio%3s%1d", group, &pin) != 2) {
-+        if (sscanf(name, "gpio%3[18A-E]%1d", group, &pin) != 2) {
-             error_setg(errp, "%s: error reading %s", __func__, name);
-             return;
-         }
---
-.20.1

-[PULL 33/51] hw/m68k/mcf5206.c: Switch to transaction-based ptimer API
+[PULL 01/30] target/m68k: Add URL to semihosting spec
-Switch the mcf5206 code away from bottom-half based ptimers to
+The spec for m68k semihosting is documented in the libgloss
-the new transaction-based ptimer API.  This just requires adding
+sources. Add a comment with the URL for it, as we already
-begin/commit calls around the various places that modify the ptimer
+have for nios2 semihosting.
 state, and using the new ptimer_init() function to create the timer.
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-Reviewed-by: Thomas Huth <thuth@redhat.com>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191021140600.10725-1-peter.maydell@linaro.org
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Message-id: 20230801154451.3505492-1-peter.maydell@linaro.org
 ---
- hw/m68k/mcf5206.c | 15 +++++++++------
+ target/m68k/m68k-semi.c | 4 ++++
-file changed, 9 insertions(+), 6 deletions(-)
+file changed, 4 insertions(+)
-diff --git a/hw/m68k/mcf5206.c b/hw/m68k/mcf5206.c
+diff --git a/target/m68k/m68k-semi.c b/target/m68k/m68k-semi.c
 index XXXXXXX..XXXXXXX 100644
---- a/hw/m68k/mcf5206.c
+--- a/target/m68k/m68k-semi.c
-+++ b/hw/m68k/mcf5206.c
++++ b/target/m68k/m68k-semi.c
 @@ -XXX,XX +XXX,XX @@
+  *
+  *  You should have received a copy of the GNU General Public License
+  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ *  The semihosting protocol implemented here is described in the
++ *  libgloss sources:
++ *  https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=libgloss/m68k/m68k-semi.txt;hb=HEAD
+  */
  #include "qemu/osdep.h"
- #include "qemu/error-report.h"
--#include "qemu/main-loop.h"
- #include "cpu.h"
- #include "hw/hw.h"
- #include "hw/irq.h"
-@@ -XXX,XX +XXX,XX @@ static void m5206_timer_recalibrate(m5206_timer_state *s)
-     int prescale;
-     int mode;
-+    ptimer_transaction_begin(s->timer);
-     ptimer_stop(s->timer);
--    if ((s->tmr & TMR_RST) == 0)
--        return;
-+    if ((s->tmr & TMR_RST) == 0) {
-+        goto exit;
-+    }
-     prescale = (s->tmr >> 8) + 1;
-     mode = (s->tmr >> 1) & 3;
-@@ -XXX,XX +XXX,XX @@ static void m5206_timer_recalibrate(m5206_timer_state *s)
-     ptimer_set_limit(s->timer, s->trr, 0);
-     ptimer_run(s->timer, 0);
-+exit:
-+    ptimer_transaction_commit(s->timer);
- }
- static void m5206_timer_trigger(void *opaque)
-@@ -XXX,XX +XXX,XX @@ static void m5206_timer_write(m5206_timer_state *s, uint32_t addr, uint32_t val)
-         s->tcr = val;
-         break;
-     case 0xc:
-+        ptimer_transaction_begin(s->timer);
-         ptimer_set_count(s->timer, val);
-+        ptimer_transaction_commit(s->timer);
-         break;
-     case 0x11:
-         s->ter &= ~val;
-@@ -XXX,XX +XXX,XX @@ static void m5206_timer_write(m5206_timer_state *s, uint32_t addr, uint32_t val)
- static m5206_timer_state *m5206_timer_init(qemu_irq irq)
- {
-     m5206_timer_state *s;
--    QEMUBH *bh;
-     s = g_new0(m5206_timer_state, 1);
--    bh = qemu_bh_new(m5206_timer_trigger, s);
--    s->timer = ptimer_init_with_bh(bh, PTIMER_POLICY_DEFAULT);
-+    s->timer = ptimer_init(m5206_timer_trigger, s, PTIMER_POLICY_DEFAULT);
-     s->irq = irq;
-     m5206_timer_reset(s);
-     return s;
 --
-.20.1
+.34.1

-[PULL 30/51] hw/timer/slavio_timer: Remove useless check for NULL t->timer
+[PULL 02/30] docs/devel/loads-stores: Fix git grep regexes
-In the slavio timer devcie, the ptimer TimerContext::timer is
+The loads-and-stores documentation includes git grep regexes to find
-always created by slavio_timer_init(), so there's no need to
+occurrences of the various functions.  Some of these regexes have
-check it for NULL; remove the single unneeded NULL check.
+errors, typically failing to escape the '?', '(' and ')' when they
 should be metacharacters (since these are POSIX basic REs). We also
 weren't consistent about whether to have a ':' on the end of the
 line introducing the list of regexes in each section.
-This will be useful to avoid compiler/Coverity errors when
+Fix the errors.
-a subsequent change adds a use of t->timer before the location
-we currently do the NULL check.
+The following shell rune will complain about any REs in the
 file which don't have any matches in the codebase:
  for re in $(sed -ne 's/ - ``\(\\<.*\)``/\1/p' docs/devel/loads-stores.rst); do git grep -q "$re" || echo "no matches for re $re"; done
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
-Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Message-id: 20230904161703.3996734-1-peter.maydell@linaro.org
 Message-id: 20191021134357.14266-2-peter.maydell@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- hw/timer/slavio_timer.c | 12 +++++-------
+ docs/devel/loads-stores.rst | 40 ++++++++++++++++++-------------------
-file changed, 5 insertions(+), 7 deletions(-)
+file changed, 20 insertions(+), 20 deletions(-)
-diff --git a/hw/timer/slavio_timer.c b/hw/timer/slavio_timer.c
+diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
 index XXXXXXX..XXXXXXX 100644
---- a/hw/timer/slavio_timer.c
+--- a/docs/devel/loads-stores.rst
-+++ b/hw/timer/slavio_timer.c
++++ b/docs/devel/loads-stores.rst
-@@ -XXX,XX +XXX,XX @@ static void slavio_timer_mem_writel(void *opaque, hwaddr addr,
+@@ -XXX,XX +XXX,XX @@ which stores ``val`` to ``ptr`` as an ``{endian}`` order value
-             // set limit, reset counter
+ of size ``sz`` bytes.
-             qemu_irq_lower(t->irq);
-             t->limit = val & TIMER_MAX_COUNT32;
--            if (t->timer) {
+-Regexes for git grep
--                if (t->limit == 0) { /* free-run */
++Regexes for git grep:
--                    ptimer_set_limit(t->timer,
+  - ``\<ld[us]\?[bwlq]\(_[hbl]e\)\?_p\>``
--                                     LIMIT_TO_PERIODS(TIMER_MAX_COUNT32), 1);
+  - ``\<st[bwlq]\(_[hbl]e\)\?_p\>``
--                } else {
+  - ``\<st24\(_[hbl]e\)\?_p\>``
--                    ptimer_set_limit(t->timer, LIMIT_TO_PERIODS(t->limit), 1);
+- - ``\<ldn_\([hbl]e\)?_p\>``
--                }
+- - ``\<stn_\([hbl]e\)?_p\>``
-+            if (t->limit == 0) { /* free-run */
++ - ``\<ldn_\([hbl]e\)\?_p\>``
-+                ptimer_set_limit(t->timer,
++ - ``\<stn_\([hbl]e\)\?_p\>``
-+                                 LIMIT_TO_PERIODS(TIMER_MAX_COUNT32), 1);
-+            } else {
+ ``cpu_{ld,st}*_mmu``
-+                ptimer_set_limit(t->timer, LIMIT_TO_PERIODS(t->limit), 1);
+ ~~~~~~~~~~~~~~~~~~~~
-             }
+@@ -XXX,XX +XXX,XX @@ store: ``cpu_st{size}{end}_mmu(env, ptr, val, oi, retaddr)``
-         }
+  - ``_le`` : little endian
-         break;
  Regexes for git grep:
 - - ``\<cpu_ld[bwlq](_[bl]e)\?_mmu\>``
 - - ``\<cpu_st[bwlq](_[bl]e)\?_mmu\>``
 + - ``\<cpu_ld[bwlq]\(_[bl]e\)\?_mmu\>``
 + - ``\<cpu_st[bwlq]\(_[bl]e\)\?_mmu\>``
  ``cpu_{ld,st}*_mmuidx_ra``
@@ -XXX,XX +XXX,XX @@ store: ``cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmuidx, retaddr)``
   - ``_le`` : little endian
  Regexes for git grep:
 - - ``\<cpu_ld[us]\?[bwlq](_[bl]e)\?_mmuidx_ra\>``
 - - ``\<cpu_st[bwlq](_[bl]e)\?_mmuidx_ra\>``
 + - ``\<cpu_ld[us]\?[bwlq]\(_[bl]e\)\?_mmuidx_ra\>``
 + - ``\<cpu_st[bwlq]\(_[bl]e\)\?_mmuidx_ra\>``
  ``cpu_{ld,st}*_data_ra``
  ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -XXX,XX +XXX,XX @@ store: ``cpu_st{size}{end}_data_ra(env, ptr, val, ra)``
   - ``_le`` : little endian
  Regexes for git grep:
 - - ``\<cpu_ld[us]\?[bwlq](_[bl]e)\?_data_ra\>``
 - - ``\<cpu_st[bwlq](_[bl]e)\?_data_ra\>``
 + - ``\<cpu_ld[us]\?[bwlq]\(_[bl]e\)\?_data_ra\>``
 + - ``\<cpu_st[bwlq]\(_[bl]e\)\?_data_ra\>``
  ``cpu_{ld,st}*_data``
  ~~~~~~~~~~~~~~~~~~~~~
@@ -XXX,XX +XXX,XX @@ store: ``cpu_st{size}{end}_data(env, ptr, val)``
   - ``_be`` : big endian
   - ``_le`` : little endian
 -Regexes for git grep
 - - ``\<cpu_ld[us]\?[bwlq](_[bl]e)\?_data\>``
 - - ``\<cpu_st[bwlq](_[bl]e)\?_data\+\>``
 +Regexes for git grep:
 + - ``\<cpu_ld[us]\?[bwlq]\(_[bl]e\)\?_data\>``
 + - ``\<cpu_st[bwlq]\(_[bl]e\)\?_data\+\>``
  ``cpu_ld*_code``
  ~~~~~~~~~~~~~~~~
@@ -XXX,XX +XXX,XX @@ swap: ``translator_ld{sign}{size}_swap(env, ptr, swap)``
   - ``l`` : 32 bits
   - ``q`` : 64 bits
 -Regexes for git grep
 +Regexes for git grep:
   - ``\<translator_ld[us]\?[bwlq]\(_swap\)\?\>``
  ``helper_{ld,st}*_mmu``
@@ -XXX,XX +XXX,XX @@ store: ``helper_{size}_mmu(env, addr, val, opindex, retaddr)``
   - ``l`` : 32 bits
   - ``q`` : 64 bits
 -Regexes for git grep
 +Regexes for git grep:
   - ``\<helper_ld[us]\?[bwlq]_mmu\>``
   - ``\<helper_st[bwlq]_mmu\>``
@@ -XXX,XX +XXX,XX @@ succeeded using a MemTxResult return code.
  The ``_{endian}`` suffix is omitted for byte accesses.
 -Regexes for git grep
 +Regexes for git grep:
   - ``\<address_space_\(read\|write\|rw\)\>``
   - ``\<address_space_ldu\?[bwql]\(_[lb]e\)\?\>``
   - ``\<address_space_st[bwql]\(_[lb]e\)\?\>``
@@ -XXX,XX +XXX,XX @@ Note that portions of the write which attempt to write data to a
  device will be silently ignored -- only real RAM and ROM will
  be written to.
 -Regexes for git grep
 +Regexes for git grep:
   - ``address_space_write_rom``
  ``{ld,st}*_phys``
@@ -XXX,XX +XXX,XX @@ device doing the access has no way to report such an error.
  The ``_{endian}_`` infix is omitted for byte accesses.
 -Regexes for git grep
 +Regexes for git grep:
   - ``\<ldu\?[bwlq]\(_[bl]e\)\?_phys\>``
   - ``\<st[bwlq]\(_[bl]e\)\?_phys\>``
@@ -XXX,XX +XXX,XX @@ For new code they are better avoided:
  ``cpu_physical_memory_rw``
 -Regexes for git grep
 +Regexes for git grep:
   - ``\<cpu_physical_memory_\(read\|write\|rw\)\>``
  ``cpu_memory_rw_debug``
@@ -XXX,XX +XXX,XX @@ make sure our existing code is doing things correctly.
  ``dma_memory_rw``
 -Regexes for git grep
 +Regexes for git grep:
   - ``\<dma_memory_\(read\|write\|rw\)\>``
   - ``\<ldu\?[bwlq]\(_[bl]e\)\?_dma\>``
   - ``\<st[bwlq]\(_[bl]e\)\?_dma\>``
@@ -XXX,XX +XXX,XX @@ correct address space for that device.
  The ``_{endian}_`` infix is omitted for byte accesses.
 -Regexes for git grep
 +Regexes for git grep:
   - ``\<pci_dma_\(read\|write\|rw\)\>``
   - ``\<ldu\?[bwlq]\(_[bl]e\)\?_pci_dma\>``
   - ``\<st[bwlq]\(_[bl]e\)\?_pci_dma\>``
 --
-.20.1
+.34.1

-[PULL 25/51] linux-user/arm: Rebuild hflags for TARGET_WORDS_BIGENDIAN
+[PULL 03/30] hw/arm/boot: Set SCR_EL3.FGTEn when booting kernel
-From: Richard Henderson <richard.henderson@linaro.org>
+From: Fabian Vogt <fvogt@suse.de>
-Continue setting, but not relying upon, env->hflags.
+Just like d7ef5e16a17c sets SCR_EL3.HXEn for FEAT_HCX, this commit
 handles SCR_EL3.FGTEn for FEAT_FGT:
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+When we direct boot a kernel on a CPU which emulates EL3, we need to
-Message-id: 20191023150057.25731-24-richard.henderson@linaro.org
+set up the EL3 system registers as the Linux kernel documentation
 specifies:
     https://www.kernel.org/doc/Documentation/arm64/booting.rst
 > For CPUs with the Fine Grained Traps (FEAT_FGT) extension present:
 > - If EL3 is present and the kernel is entered at EL2:
 >   - SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.
 Cc: qemu-stable@nongnu.org
 Signed-off-by: Fabian Vogt <fvogt@suse.de>
 Message-id: 4831384.GXAFRqVoOG@linux-e202.suse.de
 Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- linux-user/arm/cpu_loop.c | 1 +
+ hw/arm/boot.c | 4 ++++
-file changed, 1 insertion(+)
+file changed, 4 insertions(+)
-diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c
+diff --git a/hw/arm/boot.c b/hw/arm/boot.c
 index XXXXXXX..XXXXXXX 100644
---- a/linux-user/arm/cpu_loop.c
+--- a/hw/arm/boot.c
-+++ b/linux-user/arm/cpu_loop.c
++++ b/hw/arm/boot.c
-@@ -XXX,XX +XXX,XX @@ void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
+@@ -XXX,XX +XXX,XX @@ static void do_cpu_reset(void *opaque)
-     } else {
+                     if (cpu_isar_feature(aa64_hcx, cpu)) {
-         env->cp15.sctlr_el[1] |= SCTLR_B;
+                         env->cp15.scr_el3 |= SCR_HXEN;
-     }
+                     }
-+    arm_rebuild_hflags(env);
++                    if (cpu_isar_feature(aa64_fgt, cpu)) {
- #endif
++                        env->cp15.scr_el3 |= SCR_FGTEN;
++                    }
-     ts->stack_base = info->start_stack;
++
                      /* AArch64 kernels never boot in secure mode */
                      assert(!info->secure_boot);
                      /* This hook is only supported for AArch32 currently:
 --
-.20.1
+.34.1

-[PULL 50/51] hw/arm/raspi: Use AddressSpace when using arm_boot::write_secondary_boot
+[PULL 04/30] linux-user/elfload.c: Correct SME feature names reported in cpuinfo
-From: Philippe Mathieu-Daudé <f4bug@amsat.org>
+Some of the names we use for CPU features in linux-user's dummy
 /proc/cpuinfo don't match the strings in the real kernel in
 arch/arm64/kernel/cpuinfo.c. Specifically, the SME related
 features have an underscore in the HWCAP_FOO define name,
 but (like the SVE ones) they do not have an underscore in the
 string in cpuinfo. Correct the errors.
-write_secondary_boot() is used in SMP configurations where the
+Fixes: a55b9e7226708 ("linux-user: Emulate /proc/cpuinfo on aarch64 and arm")
-CPU address space might not be the main System Bus.
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-The rom_add_blob_fixed_as() function allow us to specify an
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
-address space. Use it to write each boot blob in the corresponding
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-CPU address space.
+---
  linux-user/elfload.c | 14 +++++++-------
 file changed, 7 insertions(+), 7 deletions(-)
-Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+diff --git a/linux-user/elfload.c b/linux-user/elfload.c
 Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
 Message-id: 20191019234715.25750-11-f4bug@amsat.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
  hw/arm/raspi.c | 14 ++++++++------
 file changed, 8 insertions(+), 6 deletions(-)
 diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c
 index XXXXXXX..XXXXXXX 100644
---- a/hw/arm/raspi.c
+--- a/linux-user/elfload.c
-+++ b/hw/arm/raspi.c
++++ b/linux-user/elfload.c
-@@ -XXX,XX +XXX,XX @@ static void write_smpboot(ARMCPU *cpu, const struct arm_boot_info *info)
+@@ -XXX,XX +XXX,XX @@ const char *elf_hwcap2_str(uint32_t bit)
-     QEMU_BUILD_BUG_ON((BOARDSETUP_ADDR & 0xf) != 0
+     [__builtin_ctz(ARM_HWCAP2_A64_RPRES        )] = "rpres",
-                       || (BOARDSETUP_ADDR >> 4) >= 0x100);
+     [__builtin_ctz(ARM_HWCAP2_A64_MTE3         )] = "mte3",
+     [__builtin_ctz(ARM_HWCAP2_A64_SME          )] = "sme",
--    rom_add_blob_fixed("raspi_smpboot", smpboot, sizeof(smpboot),
+-    [__builtin_ctz(ARM_HWCAP2_A64_SME_I16I64   )] = "sme_i16i64",
--                       info->smp_loader_start);
+-    [__builtin_ctz(ARM_HWCAP2_A64_SME_F64F64   )] = "sme_f64f64",
-+    rom_add_blob_fixed_as("raspi_smpboot", smpboot, sizeof(smpboot),
+-    [__builtin_ctz(ARM_HWCAP2_A64_SME_I8I32    )] = "sme_i8i32",
-+                          info->smp_loader_start,
+-    [__builtin_ctz(ARM_HWCAP2_A64_SME_F16F32   )] = "sme_f16f32",
-+                          arm_boot_address_space(cpu, info));
+-    [__builtin_ctz(ARM_HWCAP2_A64_SME_B16F32   )] = "sme_b16f32",
- }
+-    [__builtin_ctz(ARM_HWCAP2_A64_SME_F32F32   )] = "sme_f32f32",
+-    [__builtin_ctz(ARM_HWCAP2_A64_SME_FA64     )] = "sme_fa64",
- static void write_smpboot64(ARMCPU *cpu, const struct arm_boot_info *info)
++    [__builtin_ctz(ARM_HWCAP2_A64_SME_I16I64   )] = "smei16i64",
- {
++    [__builtin_ctz(ARM_HWCAP2_A64_SME_F64F64   )] = "smef64f64",
-+    AddressSpace *as = arm_boot_address_space(cpu, info);
++    [__builtin_ctz(ARM_HWCAP2_A64_SME_I8I32    )] = "smei8i32",
-     /* Unlike the AArch32 version we don't need to call the board setup hook.
++    [__builtin_ctz(ARM_HWCAP2_A64_SME_F16F32   )] = "smef16f32",
-      * The mechanism for doing the spin-table is also entirely different.
++    [__builtin_ctz(ARM_HWCAP2_A64_SME_B16F32   )] = "smeb16f32",
-      * We must have four 64-bit fields at absolute addresses
++    [__builtin_ctz(ARM_HWCAP2_A64_SME_F32F32   )] = "smef32f32",
-@@ -XXX,XX +XXX,XX @@ static void write_smpboot64(ARMCPU *cpu, const struct arm_boot_info *info)
++    [__builtin_ctz(ARM_HWCAP2_A64_SME_FA64     )] = "smefa64",
 , 0, 0, 0
      };
--    rom_add_blob_fixed("raspi_smpboot", smpboot, sizeof(smpboot),
+     return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
 -                       info->smp_loader_start);
 -    rom_add_blob_fixed("raspi_spintables", spintables, sizeof(spintables),
 -                       SPINTABLE_ADDR);
 +    rom_add_blob_fixed_as("raspi_smpboot", smpboot, sizeof(smpboot),
 +                          info->smp_loader_start, as);
 +    rom_add_blob_fixed_as("raspi_spintables", spintables, sizeof(spintables),
 +                          SPINTABLE_ADDR, as);
  }
  static void write_board_setup(ARMCPU *cpu, const struct arm_boot_info *info)
 --
-.20.1
+.34.1

-[PULL 34/51] hw/watchdog/milkymist-sysctl.c: Switch to transaction-based ptimer API
+[PULL 05/30] linux-user/elfload.c: Add missing arm and arm64 hwcap values
-Switch the milkymist-sysctl code away from bottom-half based
+Our lists of Arm 32 and 64 bit hwcap values have lagged behind
-ptimers to the new transaction-based ptimer API.  This just requires
+the Linux kernel. Update them to include all the bits defined
-adding begin/commit calls around the various places that modify the
+as of upstream Linux git commit a48fa7efaf1161c1 (in the middle
-ptimer state, and using the new ptimer_init() function to create the
+of the kernel 6.6 dev cycle).
-timer.
 For 64-bit, we don't yet implement any of the features reported via
 these hwcap bits.  For 32-bit we do in fact already implement them
 all; we'll add the code to set them in a subsequent commit.
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
-Message-id: 20191021141040.11007-1-peter.maydell@linaro.org
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- hw/timer/milkymist-sysctl.c | 25 ++++++++++++++++++-------
+ linux-user/elfload.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
-file changed, 18 insertions(+), 7 deletions(-)
+file changed, 44 insertions(+)
-diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c
+diff --git a/linux-user/elfload.c b/linux-user/elfload.c
 index XXXXXXX..XXXXXXX 100644
---- a/hw/timer/milkymist-sysctl.c
+--- a/linux-user/elfload.c
-+++ b/hw/timer/milkymist-sysctl.c
++++ b/linux-user/elfload.c
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ enum
- #include "hw/ptimer.h"
+     ARM_HWCAP_ARM_VFPD32    = 1 << 19,
- #include "hw/qdev-properties.h"
+     ARM_HWCAP_ARM_LPAE      = 1 << 20,
- #include "qemu/error-report.h"
+     ARM_HWCAP_ARM_EVTSTRM   = 1 << 21,
--#include "qemu/main-loop.h"
++    ARM_HWCAP_ARM_FPHP      = 1 << 22,
- #include "qemu/module.h"
++    ARM_HWCAP_ARM_ASIMDHP   = 1 << 23,
 +    ARM_HWCAP_ARM_ASIMDDP   = 1 << 24,
 +    ARM_HWCAP_ARM_ASIMDFHM  = 1 << 25,
 +    ARM_HWCAP_ARM_ASIMDBF16 = 1 << 26,
 +    ARM_HWCAP_ARM_I8MM      = 1 << 27,
  };
  enum {
-@@ -XXX,XX +XXX,XX @@ struct MilkymistSysctlState {
+@@ -XXX,XX +XXX,XX @@ enum {
+     ARM_HWCAP2_ARM_SHA1     = 1 << 2,
-     MemoryRegion regs_region;
+     ARM_HWCAP2_ARM_SHA2     = 1 << 3,
+     ARM_HWCAP2_ARM_CRC32    = 1 << 4,
--    QEMUBH *bh0;
++    ARM_HWCAP2_ARM_SB       = 1 << 5,
--    QEMUBH *bh1;
++    ARM_HWCAP2_ARM_SSBS     = 1 << 6,
-     ptimer_state *ptimer0;
+ };
-     ptimer_state *ptimer1;
+ /* The commpage only exists for 32 bit kernels */
-@@ -XXX,XX +XXX,XX @@ static void sysctl_write(void *opaque, hwaddr addr, uint64_t value,
+@@ -XXX,XX +XXX,XX @@ const char *elf_hwcap_str(uint32_t bit)
-         s->regs[addr] = value;
+     [__builtin_ctz(ARM_HWCAP_ARM_VFPD32   )] = "vfpd32",
-         break;
+     [__builtin_ctz(ARM_HWCAP_ARM_LPAE     )] = "lpae",
-     case R_TIMER0_COMPARE:
+     [__builtin_ctz(ARM_HWCAP_ARM_EVTSTRM  )] = "evtstrm",
-+        ptimer_transaction_begin(s->ptimer0);
++    [__builtin_ctz(ARM_HWCAP_ARM_FPHP     )] = "fphp",
-         ptimer_set_limit(s->ptimer0, value, 0);
++    [__builtin_ctz(ARM_HWCAP_ARM_ASIMDHP  )] = "asimdhp",
-         s->regs[addr] = value;
++    [__builtin_ctz(ARM_HWCAP_ARM_ASIMDDP  )] = "asimddp",
-+        ptimer_transaction_commit(s->ptimer0);
++    [__builtin_ctz(ARM_HWCAP_ARM_ASIMDFHM )] = "asimdfhm",
-         break;
++    [__builtin_ctz(ARM_HWCAP_ARM_ASIMDBF16)] = "asimdbf16",
-     case R_TIMER1_COMPARE:
++    [__builtin_ctz(ARM_HWCAP_ARM_I8MM     )] = "i8mm",
-+        ptimer_transaction_begin(s->ptimer1);
+     };
-         ptimer_set_limit(s->ptimer1, value, 0);
-         s->regs[addr] = value;
+     return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
-+        ptimer_transaction_commit(s->ptimer1);
+@@ -XXX,XX +XXX,XX @@ const char *elf_hwcap2_str(uint32_t bit)
-         break;
+     [__builtin_ctz(ARM_HWCAP2_ARM_SHA1 )] = "sha1",
-     case R_TIMER0_CONTROL:
+     [__builtin_ctz(ARM_HWCAP2_ARM_SHA2 )] = "sha2",
-+        ptimer_transaction_begin(s->ptimer0);
+     [__builtin_ctz(ARM_HWCAP2_ARM_CRC32)] = "crc32",
-         s->regs[addr] = value;
++    [__builtin_ctz(ARM_HWCAP2_ARM_SB   )] = "sb",
-         if (s->regs[R_TIMER0_CONTROL] & CTRL_ENABLE) {
++    [__builtin_ctz(ARM_HWCAP2_ARM_SSBS )] = "ssbs",
-             trace_milkymist_sysctl_start_timer0();
+     };
-@@ -XXX,XX +XXX,XX @@ static void sysctl_write(void *opaque, hwaddr addr, uint64_t value,
-             trace_milkymist_sysctl_stop_timer0();
+     return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
-             ptimer_stop(s->ptimer0);
+@@ -XXX,XX +XXX,XX @@ enum {
-         }
+     ARM_HWCAP2_A64_SME_B16F32   = 1 << 28,
-+        ptimer_transaction_commit(s->ptimer0);
+     ARM_HWCAP2_A64_SME_F32F32   = 1 << 29,
-         break;
+     ARM_HWCAP2_A64_SME_FA64     = 1 << 30,
-     case R_TIMER1_CONTROL:
++    ARM_HWCAP2_A64_WFXT         = 1ULL << 31,
-+        ptimer_transaction_begin(s->ptimer1);
++    ARM_HWCAP2_A64_EBF16        = 1ULL << 32,
-         s->regs[addr] = value;
++    ARM_HWCAP2_A64_SVE_EBF16    = 1ULL << 33,
-         if (s->regs[R_TIMER1_CONTROL] & CTRL_ENABLE) {
++    ARM_HWCAP2_A64_CSSC         = 1ULL << 34,
-             trace_milkymist_sysctl_start_timer1();
++    ARM_HWCAP2_A64_RPRFM        = 1ULL << 35,
-@@ -XXX,XX +XXX,XX @@ static void sysctl_write(void *opaque, hwaddr addr, uint64_t value,
++    ARM_HWCAP2_A64_SVE2P1       = 1ULL << 36,
-             trace_milkymist_sysctl_stop_timer1();
++    ARM_HWCAP2_A64_SME2         = 1ULL << 37,
-             ptimer_stop(s->ptimer1);
++    ARM_HWCAP2_A64_SME2P1       = 1ULL << 38,
-         }
++    ARM_HWCAP2_A64_SME_I16I32   = 1ULL << 39,
-+        ptimer_transaction_commit(s->ptimer1);
++    ARM_HWCAP2_A64_SME_BI32I32  = 1ULL << 40,
-         break;
++    ARM_HWCAP2_A64_SME_B16B16   = 1ULL << 41,
-     case R_ICAP:
++    ARM_HWCAP2_A64_SME_F16F16   = 1ULL << 42,
-         sysctl_icap_write(s, value);
++    ARM_HWCAP2_A64_MOPS         = 1ULL << 43,
-@@ -XXX,XX +XXX,XX @@ static void milkymist_sysctl_reset(DeviceState *d)
++    ARM_HWCAP2_A64_HBC          = 1ULL << 44,
-         s->regs[i] = 0;
+ };
-     }
+ #define ELF_HWCAP   get_elf_hwcap()
-+    ptimer_transaction_begin(s->ptimer0);
+@@ -XXX,XX +XXX,XX @@ const char *elf_hwcap2_str(uint32_t bit)
-     ptimer_stop(s->ptimer0);
+     [__builtin_ctz(ARM_HWCAP2_A64_SME_B16F32   )] = "smeb16f32",
-+    ptimer_transaction_commit(s->ptimer0);
+     [__builtin_ctz(ARM_HWCAP2_A64_SME_F32F32   )] = "smef32f32",
-+    ptimer_transaction_begin(s->ptimer1);
+     [__builtin_ctz(ARM_HWCAP2_A64_SME_FA64     )] = "smefa64",
-     ptimer_stop(s->ptimer1);
++    [__builtin_ctz(ARM_HWCAP2_A64_WFXT         )] = "wfxt",
-+    ptimer_transaction_commit(s->ptimer1);
++    [__builtin_ctzll(ARM_HWCAP2_A64_EBF16      )] = "ebf16",
++    [__builtin_ctzll(ARM_HWCAP2_A64_SVE_EBF16  )] = "sveebf16",
-     /* defaults */
++    [__builtin_ctzll(ARM_HWCAP2_A64_CSSC       )] = "cssc",
-     s->regs[R_ICAP] = ICAP_READY;
++    [__builtin_ctzll(ARM_HWCAP2_A64_RPRFM      )] = "rprfm",
-@@ -XXX,XX +XXX,XX @@ static void milkymist_sysctl_realize(DeviceState *dev, Error **errp)
++    [__builtin_ctzll(ARM_HWCAP2_A64_SVE2P1     )] = "sve2p1",
- {
++    [__builtin_ctzll(ARM_HWCAP2_A64_SME2       )] = "sme2",
-     MilkymistSysctlState *s = MILKYMIST_SYSCTL(dev);
++    [__builtin_ctzll(ARM_HWCAP2_A64_SME2P1     )] = "sme2p1",
++    [__builtin_ctzll(ARM_HWCAP2_A64_SME_I16I32 )] = "smei16i32",
--    s->bh0 = qemu_bh_new(timer0_hit, s);
++    [__builtin_ctzll(ARM_HWCAP2_A64_SME_BI32I32)] = "smebi32i32",
--    s->bh1 = qemu_bh_new(timer1_hit, s);
++    [__builtin_ctzll(ARM_HWCAP2_A64_SME_B16B16 )] = "smeb16b16",
--    s->ptimer0 = ptimer_init_with_bh(s->bh0, PTIMER_POLICY_DEFAULT);
++    [__builtin_ctzll(ARM_HWCAP2_A64_SME_F16F16 )] = "smef16f16",
--    s->ptimer1 = ptimer_init_with_bh(s->bh1, PTIMER_POLICY_DEFAULT);
++    [__builtin_ctzll(ARM_HWCAP2_A64_MOPS       )] = "mops",
-+    s->ptimer0 = ptimer_init(timer0_hit, s, PTIMER_POLICY_DEFAULT);
++    [__builtin_ctzll(ARM_HWCAP2_A64_HBC        )] = "hbc",
-+    s->ptimer1 = ptimer_init(timer1_hit, s, PTIMER_POLICY_DEFAULT);
+     };
-+    ptimer_transaction_begin(s->ptimer0);
+     return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
      ptimer_set_freq(s->ptimer0, s->freq_hz);
 +    ptimer_transaction_commit(s->ptimer0);
 +    ptimer_transaction_begin(s->ptimer1);
      ptimer_set_freq(s->ptimer1, s->freq_hz);
 +    ptimer_transaction_commit(s->ptimer1);
  }
  static const VMStateDescription vmstate_milkymist_sysctl = {
 --
-.20.1
+.34.1

-[PULL 32/51] hw/timer/grlib_gptimer.c: Switch to transaction-based ptimer API
+[PULL 06/30] linux-user/elfload.c: Report previously missing arm32 hwcaps
-Switch the grlib_gptimer code away from bottom-half based ptimers to
+Add the code to report the arm32 hwcaps we were previously missing:
-the new transaction-based ptimer API.  This just requires adding
+ ss, ssbs, fphp, asimdhp, asimddp, asimdfhm, asimdbf16, i8mm
 begin/commit calls around the various places that modify the ptimer
 state, and using the new ptimer_init() function to create the timer.
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Message-id: 20191021134357.14266-3-peter.maydell@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- hw/timer/grlib_gptimer.c | 28 ++++++++++++++++++++++++----
+ linux-user/elfload.c | 12 ++++++++++++
-file changed, 24 insertions(+), 4 deletions(-)
+file changed, 12 insertions(+)
-diff --git a/hw/timer/grlib_gptimer.c b/hw/timer/grlib_gptimer.c
+diff --git a/linux-user/elfload.c b/linux-user/elfload.c
 index XXXXXXX..XXXXXXX 100644
---- a/hw/timer/grlib_gptimer.c
+--- a/linux-user/elfload.c
-+++ b/hw/timer/grlib_gptimer.c
++++ b/linux-user/elfload.c
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ uint32_t get_elf_hwcap(void)
- #include "hw/irq.h"
+         }
- #include "hw/ptimer.h"
+     }
- #include "hw/qdev-properties.h"
+     GET_FEATURE_ID(aa32_simdfmac, ARM_HWCAP_ARM_VFPv4);
--#include "qemu/main-loop.h"
++    /*
- #include "qemu/module.h"
++     * MVFR1.FPHP and .SIMDHP must be in sync, and QEMU uses the same
++     * isar_feature function for both. The kernel reports them as two hwcaps.
- #include "trace.h"
++     */
-@@ -XXX,XX +XXX,XX @@ typedef struct GPTimer     GPTimer;
++    GET_FEATURE_ID(aa32_fp16_arith, ARM_HWCAP_ARM_FPHP);
- typedef struct GPTimerUnit GPTimerUnit;
++    GET_FEATURE_ID(aa32_fp16_arith, ARM_HWCAP_ARM_ASIMDHP);
++    GET_FEATURE_ID(aa32_dp, ARM_HWCAP_ARM_ASIMDDP);
- struct GPTimer {
++    GET_FEATURE_ID(aa32_fhm, ARM_HWCAP_ARM_ASIMDFHM);
--    QEMUBH *bh;
++    GET_FEATURE_ID(aa32_bf16, ARM_HWCAP_ARM_ASIMDBF16);
-     struct ptimer_state *ptimer;
++    GET_FEATURE_ID(aa32_i8mm, ARM_HWCAP_ARM_I8MM);
-     qemu_irq     irq;
+     return hwcaps;
@@ -XXX,XX +XXX,XX @@ struct GPTimerUnit {
      uint32_t config;
  };
 +static void grlib_gptimer_tx_begin(GPTimer *timer)
 +{
 +    ptimer_transaction_begin(timer->ptimer);
 +}
 +
 +static void grlib_gptimer_tx_commit(GPTimer *timer)
 +{
 +    ptimer_transaction_commit(timer->ptimer);
 +}
 +
 +/* Must be called within grlib_gptimer_tx_begin/commit block */
  static void grlib_gptimer_enable(GPTimer *timer)
  {
      assert(timer != NULL);
@@ -XXX,XX +XXX,XX @@ static void grlib_gptimer_enable(GPTimer *timer)
      ptimer_run(timer->ptimer, 1);
  }
+@@ -XXX,XX +XXX,XX @@ uint32_t get_elf_hwcap2(void)
-+/* Must be called within grlib_gptimer_tx_begin/commit block */
+     GET_FEATURE_ID(aa32_sha1, ARM_HWCAP2_ARM_SHA1);
- static void grlib_gptimer_restart(GPTimer *timer)
+     GET_FEATURE_ID(aa32_sha2, ARM_HWCAP2_ARM_SHA2);
- {
+     GET_FEATURE_ID(aa32_crc32, ARM_HWCAP2_ARM_CRC32);
-     assert(timer != NULL);
++    GET_FEATURE_ID(aa32_sb, ARM_HWCAP2_ARM_SB);
-@@ -XXX,XX +XXX,XX @@ static void grlib_gptimer_set_scaler(GPTimerUnit *unit, uint32_t scaler)
++    GET_FEATURE_ID(aa32_ssbs, ARM_HWCAP2_ARM_SSBS);
-     trace_grlib_gptimer_set_scaler(scaler, value);
+     return hwcaps;
      for (i = 0; i < unit->nr_timers; i++) {
 +        ptimer_transaction_begin(unit->timers[i].ptimer);
          ptimer_set_freq(unit->timers[i].ptimer, value);
 +        ptimer_transaction_commit(unit->timers[i].ptimer);
      }
  }
-@@ -XXX,XX +XXX,XX @@ static void grlib_gptimer_write(void *opaque, hwaddr addr,
-         switch (timer_addr) {
-         case COUNTER_OFFSET:
-             trace_grlib_gptimer_writel(id, addr, value);
-+            grlib_gptimer_tx_begin(&unit->timers[id]);
-             unit->timers[id].counter = value;
-             grlib_gptimer_enable(&unit->timers[id]);
-+            grlib_gptimer_tx_commit(&unit->timers[id]);
-             return;
-         case COUNTER_RELOAD_OFFSET:
-@@ -XXX,XX +XXX,XX @@ static void grlib_gptimer_write(void *opaque, hwaddr addr,
-             /* gptimer_restart calls gptimer_enable, so if "enable" and "load"
-                bits are present, we just have to call restart. */
-+            grlib_gptimer_tx_begin(&unit->timers[id]);
-             if (value & GPTIMER_LOAD) {
-                 grlib_gptimer_restart(&unit->timers[id]);
-             } else if (value & GPTIMER_ENABLE) {
-@@ -XXX,XX +XXX,XX @@ static void grlib_gptimer_write(void *opaque, hwaddr addr,
-             value &= ~(GPTIMER_LOAD & GPTIMER_DEBUG_HALT);
-             unit->timers[id].config = value;
-+            grlib_gptimer_tx_commit(&unit->timers[id]);
-             return;
-         default:
-@@ -XXX,XX +XXX,XX @@ static void grlib_gptimer_reset(DeviceState *d)
-         timer->counter = 0;
-         timer->reload = 0;
-         timer->config = 0;
-+        ptimer_transaction_begin(timer->ptimer);
-         ptimer_stop(timer->ptimer);
-         ptimer_set_count(timer->ptimer, 0);
-         ptimer_set_freq(timer->ptimer, unit->freq_hz);
-+        ptimer_transaction_commit(timer->ptimer);
-     }
- }
-@@ -XXX,XX +XXX,XX @@ static void grlib_gptimer_realize(DeviceState *dev, Error **errp)
-         GPTimer *timer = &unit->timers[i];
-         timer->unit   = unit;
--        timer->bh     = qemu_bh_new(grlib_gptimer_hit, timer);
--        timer->ptimer = ptimer_init_with_bh(timer->bh, PTIMER_POLICY_DEFAULT);
-+        timer->ptimer = ptimer_init(grlib_gptimer_hit, timer,
-+                                    PTIMER_POLICY_DEFAULT);
-         timer->id     = i;
-         /* One IRQ line for each timer */
-         sysbus_init_irq(sbd, &timer->irq);
-+        ptimer_transaction_begin(timer->ptimer);
-         ptimer_set_freq(timer->ptimer, unit->freq_hz);
-+        ptimer_transaction_commit(timer->ptimer);
-     }
-     memory_region_init_io(&unit->iomem, OBJECT(unit), &grlib_gptimer_ops,
 --
-.20.1
+.34.1

-[PULL 13/51] target/arm: Hoist computation of TBFLAG_A32.VFPEN
+[PULL 07/30] target/arm: Update AArch64 ID register field definitions
-From: Richard Henderson <richard.henderson@linaro.org>
+Update our AArch64 ID register field definitions from the 2023-06
 system register XML release:
  https://developer.arm.com/documentation/ddi0601/2023-06/
-There are 3 conditions that each enable this flag.  M-profile always
-enables; A-profile with EL1 as AA64 always enables.  Both of these
-conditions can easily be cached.  The final condition relies on the
-FPEXC register which we are not prepared to cache.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-12-richard.henderson@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- target/arm/cpu.h    |  2 +-
+ target/arm/cpu.h | 23 +++++++++++++++++++++++
- target/arm/helper.c | 14 ++++++++++----
+file changed, 23 insertions(+)
 files changed, 11 insertions(+), 5 deletions(-)
 diff --git a/target/arm/cpu.h b/target/arm/cpu.h
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/cpu.h
 +++ b/target/arm/cpu.h
-@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, XSCALE_CPAR, 4, 2)
+@@ -XXX,XX +XXX,XX @@ FIELD(ID_AA64ISAR0, SHA1, 8, 4)
-  * the same thing as the current security state of the processor!
+ FIELD(ID_AA64ISAR0, SHA2, 12, 4)
-  */
+ FIELD(ID_AA64ISAR0, CRC32, 16, 4)
- FIELD(TBFLAG_A32, NS, 6, 1)
+ FIELD(ID_AA64ISAR0, ATOMIC, 20, 4)
--FIELD(TBFLAG_A32, VFPEN, 7, 1)          /* Not cached. */
++FIELD(ID_AA64ISAR0, TME, 24, 4)
-+FIELD(TBFLAG_A32, VFPEN, 7, 1)          /* Partially cached, minus FPEXC. */
+ FIELD(ID_AA64ISAR0, RDM, 28, 4)
- FIELD(TBFLAG_A32, CONDEXEC, 8, 8)       /* Not cached. */
+ FIELD(ID_AA64ISAR0, SHA3, 32, 4)
- FIELD(TBFLAG_A32, SCTLR_B, 16, 1)
+ FIELD(ID_AA64ISAR0, SM3, 36, 4)
- /* For M profile only, set if FPCCR.LSPACT is set */
+@@ -XXX,XX +XXX,XX @@ FIELD(ID_AA64ISAR2, APA3, 12, 4)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
+ FIELD(ID_AA64ISAR2, MOPS, 16, 4)
-index XXXXXXX..XXXXXXX 100644
+ FIELD(ID_AA64ISAR2, BC, 20, 4)
---- a/target/arm/helper.c
+ FIELD(ID_AA64ISAR2, PAC_FRAC, 24, 4)
-+++ b/target/arm/helper.c
++FIELD(ID_AA64ISAR2, CLRBHB, 28, 4)
-@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el,
++FIELD(ID_AA64ISAR2, SYSREG_128, 32, 4)
- {
++FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4)
-     uint32_t flags = 0;
++FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4)
++FIELD(ID_AA64ISAR2, RPRFM, 48, 4)
-+    /* v8M always enables the fpu.  */
++FIELD(ID_AA64ISAR2, CSSC, 52, 4)
-+    flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
++FIELD(ID_AA64ISAR2, ATS1A, 60, 4)
-+
-     if (arm_v7m_is_handler_mode(env)) {
+ FIELD(ID_AA64PFR0, EL0, 0, 4)
-         flags = FIELD_DP32(flags, TBFLAG_A32, HANDLER, 1);
+ FIELD(ID_AA64PFR0, EL1, 4, 4)
-     }
+@@ -XXX,XX +XXX,XX @@ FIELD(ID_AA64PFR1, SME, 24, 4)
-@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el,
+ FIELD(ID_AA64PFR1, RNDR_TRAP, 28, 4)
-                                    ARMMMUIdx mmu_idx)
+ FIELD(ID_AA64PFR1, CSV2_FRAC, 32, 4)
- {
+ FIELD(ID_AA64PFR1, NMI, 36, 4)
-     uint32_t flags = rebuild_hflags_aprofile(env);
++FIELD(ID_AA64PFR1, MTE_FRAC, 40, 4)
-+
++FIELD(ID_AA64PFR1, GCS, 44, 4)
-+    if (arm_el_is_aa64(env, 1)) {
++FIELD(ID_AA64PFR1, THE, 48, 4)
-+        flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
++FIELD(ID_AA64PFR1, MTEX, 52, 4)
-+    }
++FIELD(ID_AA64PFR1, DF2, 56, 4)
-     return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
++FIELD(ID_AA64PFR1, PFAR, 60, 4)
- }
+ FIELD(ID_AA64MMFR0, PARANGE, 0, 4)
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
+ FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4)
-                 flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE,
+@@ -XXX,XX +XXX,XX @@ FIELD(ID_AA64MMFR1, AFP, 44, 4)
-                                    env->vfp.vec_stride);
+ FIELD(ID_AA64MMFR1, NTLBPA, 48, 4)
-             }
+ FIELD(ID_AA64MMFR1, TIDCP1, 52, 4)
-+            if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
+ FIELD(ID_AA64MMFR1, CMOW, 56, 4)
-+                flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
++FIELD(ID_AA64MMFR1, ECBHB, 60, 4)
-+            }
-         }
+ FIELD(ID_AA64MMFR2, CNP, 0, 4)
+ FIELD(ID_AA64MMFR2, UAO, 4, 4)
-         flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
+@@ -XXX,XX +XXX,XX @@ FIELD(ID_AA64DFR0, DEBUGVER, 0, 4)
-         flags = FIELD_DP32(flags, TBFLAG_A32, CONDEXEC, env->condexec_bits);
+ FIELD(ID_AA64DFR0, TRACEVER, 4, 4)
--        if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
+ FIELD(ID_AA64DFR0, PMUVER, 8, 4)
--            || arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
+ FIELD(ID_AA64DFR0, BRPS, 12, 4)
--            flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
++FIELD(ID_AA64DFR0, PMSS, 16, 4)
--        }
+ FIELD(ID_AA64DFR0, WRPS, 20, 4)
-         pstate_for_ss = env->uncached_cpsr;
++FIELD(ID_AA64DFR0, SEBEP, 24, 4)
-     }
+ FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4)
+ FIELD(ID_AA64DFR0, PMSVER, 32, 4)
  FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4)
@@ -XXX,XX +XXX,XX @@ FIELD(ID_AA64DFR0, TRACEFILT, 40, 4)
  FIELD(ID_AA64DFR0, TRACEBUFFER, 44, 4)
  FIELD(ID_AA64DFR0, MTPMU, 48, 4)
  FIELD(ID_AA64DFR0, BRBE, 52, 4)
 +FIELD(ID_AA64DFR0, EXTTRCBUFF, 56, 4)
  FIELD(ID_AA64DFR0, HPMN0, 60, 4)
  FIELD(ID_AA64ZFR0, SVEVER, 0, 4)
  FIELD(ID_AA64ZFR0, AES, 4, 4)
  FIELD(ID_AA64ZFR0, BITPERM, 16, 4)
  FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4)
 +FIELD(ID_AA64ZFR0, B16B16, 24, 4)
  FIELD(ID_AA64ZFR0, SHA3, 32, 4)
  FIELD(ID_AA64ZFR0, SM4, 40, 4)
  FIELD(ID_AA64ZFR0, I8MM, 44, 4)
@@ -XXX,XX +XXX,XX @@ FIELD(ID_AA64ZFR0, F32MM, 52, 4)
  FIELD(ID_AA64ZFR0, F64MM, 56, 4)
  FIELD(ID_AA64SMFR0, F32F32, 32, 1)
 +FIELD(ID_AA64SMFR0, BI32I32, 33, 1)
  FIELD(ID_AA64SMFR0, B16F32, 34, 1)
  FIELD(ID_AA64SMFR0, F16F32, 35, 1)
  FIELD(ID_AA64SMFR0, I8I32, 36, 4)
 +FIELD(ID_AA64SMFR0, F16F16, 42, 1)
 +FIELD(ID_AA64SMFR0, B16B16, 43, 1)
 +FIELD(ID_AA64SMFR0, I16I32, 44, 4)
  FIELD(ID_AA64SMFR0, F64F64, 48, 1)
  FIELD(ID_AA64SMFR0, I16I64, 52, 4)
  FIELD(ID_AA64SMFR0, SMEVER, 56, 4)
 --
-.20.1
+.34.1

-[PULL 26/51] target/arm: Rely on hflags correct in cpu_get_tb_cpu_state
+[PULL 08/30] target/arm: Update user-mode ID reg mask values
-From: Richard Henderson <richard.henderson@linaro.org>
+For user-only mode we reveal a subset of the AArch64 ID registers
 to the guest, to emulate the kernel's trap-and-emulate-ID-regs
 handling. Update the feature bit masks to match upstream kernel
 commit a48fa7efaf1161c1c.
-This is the payoff.
+None of these features are yet implemented by QEMU, so this
 doesn't yet have a behavioural change, but implementation of
 FEAT_MOPS and FEAT_HBC is imminent.
-From perf record -g data of ubuntu 18 boot and shutdown:
-BEFORE:
--   23.02%     2.82%  qemu-system-aar  [.] helper_lookup_tb_ptr
-   - 20.22% helper_lookup_tb_ptr
-      + 10.05% tb_htable_lookup
-      - 9.13% cpu_get_tb_cpu_state
-.20% aa64_va_parameters_both
-.55% fp_exception_el
--   11.66%     4.74%  qemu-system-aar  [.] cpu_get_tb_cpu_state
-   - 6.96% cpu_get_tb_cpu_state
-.63% aa64_va_parameters_both
-.60% fp_exception_el
-.53% sve_exception_el
-AFTER:
--   16.40%     3.40%  qemu-system-aar  [.] helper_lookup_tb_ptr
-   - 13.03% helper_lookup_tb_ptr
-      + 11.19% tb_htable_lookup
-.55% cpu_get_tb_cpu_state
-.98%     0.71%  qemu-system-aar  [.] cpu_get_tb_cpu_state
-.87%     0.24%  qemu-system-aar  [.] rebuild_hflags_a64
-Before, helper_lookup_tb_ptr is the second hottest function in the
-application, consuming almost a quarter of the runtime.  Within the
-entire execution, cpu_get_tb_cpu_state consumes about 12%.
-After, helper_lookup_tb_ptr has dropped to the fourth hottest function,
-with consumption dropping to a sixth of the runtime.  Within the
-entire execution, cpu_get_tb_cpu_state has dropped below 1%, and the
-supporting function to rebuild hflags also consumes about 1%.
-Assertions are retained for --enable-debug-tcg.
-Tested-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-25-richard.henderson@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- target/arm/helper.c | 9 ++++++---
+ target/arm/helper.c         | 11 ++++++++++-
-file changed, 6 insertions(+), 3 deletions(-)
+ tests/tcg/aarch64/sysregs.c |  4 ++--
 files changed, 12 insertions(+), 3 deletions(-)
 diff --git a/target/arm/helper.c b/target/arm/helper.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/helper.c
 +++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el)
+@@ -XXX,XX +XXX,XX @@ void register_cp_regs_for_features(ARMCPU *cpu)
- void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
+                                R_ID_AA64ZFR0_F64MM_MASK },
-                           target_ulong *cs_base, uint32_t *pflags)
+             { .name = "ID_AA64SMFR0_EL1",
- {
+               .exported_bits = R_ID_AA64SMFR0_F32F32_MASK |
--    uint32_t flags, pstate_for_ss;
++                               R_ID_AA64SMFR0_BI32I32_MASK |
-+    uint32_t flags = env->hflags;
+                                R_ID_AA64SMFR0_B16F32_MASK |
-+    uint32_t pstate_for_ss;
+                                R_ID_AA64SMFR0_F16F32_MASK |
+                                R_ID_AA64SMFR0_I8I32_MASK |
-     *cs_base = 0;
++                               R_ID_AA64SMFR0_F16F16_MASK |
--    flags = rebuild_hflags_internal(env);
++                               R_ID_AA64SMFR0_B16B16_MASK |
-+#ifdef CONFIG_DEBUG_TCG
++                               R_ID_AA64SMFR0_I16I32_MASK |
-+    assert(flags == rebuild_hflags_internal(env));
+                                R_ID_AA64SMFR0_F64F64_MASK |
-+#endif
+                                R_ID_AA64SMFR0_I16I64_MASK |
++                               R_ID_AA64SMFR0_SMEVER_MASK |
--    if (is_a64(env)) {
+                                R_ID_AA64SMFR0_FA64_MASK },
-+    if (FIELD_EX32(flags, TBFLAG_ANY, AARCH64_STATE)) {
+             { .name = "ID_AA64MMFR0_EL1",
-         *pc = env->pc;
+               .exported_bits = R_ID_AA64MMFR0_ECV_MASK,
-         if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
+@@ -XXX,XX +XXX,XX @@ void register_cp_regs_for_features(ARMCPU *cpu)
-             flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
+               .exported_bits = R_ID_AA64ISAR2_WFXT_MASK |
                                 R_ID_AA64ISAR2_RPRES_MASK |
                                 R_ID_AA64ISAR2_GPA3_MASK |
 -                               R_ID_AA64ISAR2_APA3_MASK },
 +                               R_ID_AA64ISAR2_APA3_MASK |
 +                               R_ID_AA64ISAR2_MOPS_MASK |
 +                               R_ID_AA64ISAR2_BC_MASK |
 +                               R_ID_AA64ISAR2_RPRFM_MASK |
 +                               R_ID_AA64ISAR2_CSSC_MASK },
              { .name = "ID_AA64ISAR*_EL1_RESERVED",
                .is_glob = true },
          };
 diff --git a/tests/tcg/aarch64/sysregs.c b/tests/tcg/aarch64/sysregs.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tests/tcg/aarch64/sysregs.c
 +++ b/tests/tcg/aarch64/sysregs.c
@@ -XXX,XX +XXX,XX @@ int main(void)
       */
      get_cpu_reg_check_mask(id_aa64isar0_el1, _m(f0ff,ffff,f0ff,fff0));
      get_cpu_reg_check_mask(id_aa64isar1_el1, _m(00ff,f0ff,ffff,ffff));
 -    get_cpu_reg_check_mask(SYS_ID_AA64ISAR2_EL1, _m(0000,0000,0000,ffff));
 +    get_cpu_reg_check_mask(SYS_ID_AA64ISAR2_EL1, _m(00ff,0000,00ff,ffff));
      /* TGran4 & TGran64 as pegged to -1 */
      get_cpu_reg_check_mask(id_aa64mmfr0_el1, _m(f000,0000,ff00,0000));
      get_cpu_reg_check_mask(id_aa64mmfr1_el1, _m(0000,f000,0000,0000));
@@ -XXX,XX +XXX,XX @@ int main(void)
      get_cpu_reg_check_mask(id_aa64dfr0_el1,  _m(0000,0000,0000,0006));
      get_cpu_reg_check_zero(id_aa64dfr1_el1);
      get_cpu_reg_check_mask(SYS_ID_AA64ZFR0_EL1,  _m(0ff0,ff0f,00ff,00ff));
 -    get_cpu_reg_check_mask(SYS_ID_AA64SMFR0_EL1, _m(80f1,00fd,0000,0000));
 +    get_cpu_reg_check_mask(SYS_ID_AA64SMFR0_EL1, _m(8ff1,fcff,0000,0000));
      get_cpu_reg_check_zero(id_aa64afr0_el1);
      get_cpu_reg_check_zero(id_aa64afr1_el1);
 --
-.20.1
+.34.1

-[PULL 14/51] target/arm: Add arm_rebuild_hflags
+[PULL 09/30] target/arm: Implement FEAT_HBC
-From: Richard Henderson <richard.henderson@linaro.org>
+FEAT_HBC (Hinted conditional branches) provides a new instruction
 BC.cond, which behaves exactly like the existing B.cond except
 that it provides a hint to the branch predictor about the
 likely behaviour of the branch.
-This function assumes nothing about the current state of the cpu,
+Since QEMU does not implement branch prediction, we can treat
-and writes the computed value to env->hflags.
+this identically to B.cond.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-13-richard.henderson@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- target/arm/cpu.h    |  6 ++++++
+ docs/system/arm/emulation.rst  | 1 +
- target/arm/helper.c | 30 ++++++++++++++++++++++--------
+ target/arm/cpu.h               | 5 +++++
-files changed, 28 insertions(+), 8 deletions(-)
+ target/arm/tcg/a64.decode      | 3 ++-
  linux-user/elfload.c           | 1 +
  target/arm/tcg/cpu64.c         | 4 ++++
  target/arm/tcg/translate-a64.c | 4 ++++
 files changed, 17 insertions(+), 1 deletion(-)
+diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
+index XXXXXXX..XXXXXXX 100644
+--- a/docs/system/arm/emulation.rst
++++ b/docs/system/arm/emulation.rst
+@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
+ - FEAT_FlagM2 (Enhancements to flag manipulation instructions)
+ - FEAT_GTG (Guest translation granule size)
+ - FEAT_HAFDBS (Hardware management of the access flag and dirty bit state)
++- FEAT_HBC (Hinted conditional branches)
+ - FEAT_HCX (Support for the HCRX_EL2 register)
+ - FEAT_HPDS (Hierarchical permission disables)
+ - FEAT_HPDS2 (Translation table page-based hardware attributes)
 diff --git a/target/arm/cpu.h b/target/arm/cpu.h
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/cpu.h
 +++ b/target/arm/cpu.h
-@@ -XXX,XX +XXX,XX @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
+@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id)
- void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void
+     return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0;
          *opaque);
 +/**
 + * arm_rebuild_hflags:
 + * Rebuild the cached TBFLAGS for arbitrary changed processor state.
 + */
 +void arm_rebuild_hflags(CPUARMState *env);
 +
  /**
   * aa32_vfp_dreg:
   * Return a pointer to the Dn register within env in 32-bit mode.
 diff --git a/target/arm/helper.c b/target/arm/helper.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/helper.c
 +++ b/target/arm/helper.c
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
      return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
  }
-+static uint32_t rebuild_hflags_internal(CPUARMState *env)
++static inline bool isar_feature_aa64_hbc(const ARMISARegisters *id)
 +{
-+    int el = arm_current_el(env);
++    return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, BC) != 0;
 +    int fp_el = fp_exception_el(env, el);
 +    ARMMMUIdx mmu_idx = arm_mmu_idx(env);
 +
 +    if (is_a64(env)) {
 +        return rebuild_hflags_a64(env, el, fp_el, mmu_idx);
 +    } else if (arm_feature(env, ARM_FEATURE_M)) {
 +        return rebuild_hflags_m32(env, fp_el, mmu_idx);
 +    } else {
 +        return rebuild_hflags_a32(env, fp_el, mmu_idx);
 +    }
 +}
 +
-+void arm_rebuild_hflags(CPUARMState *env)
+ static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id)
-+{
+ {
-+    env->hflags = rebuild_hflags_internal(env);
+     return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 1;
-+}
+diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/a64.decode
 +++ b/target/arm/tcg/a64.decode
@@ -XXX,XX +XXX,XX @@ CBZ             sf:1 011010 nz:1 ................... rt:5 &cbz imm=%imm19
  TBZ             . 011011 nz:1 ..... .............. rt:5 &tbz  imm=%imm14 bitpos=%imm31_19
 -B_cond          0101010 0 ................... 0 cond:4 imm=%imm19
 +# B.cond and BC.cond
 +B_cond          0101010 0 ................... c:1 cond:4 imm=%imm19
  BR              1101011 0000 11111 000000 rn:5 00000 &r
  BLR             1101011 0001 11111 000000 rn:5 00000 &r
 diff --git a/linux-user/elfload.c b/linux-user/elfload.c
 index XXXXXXX..XXXXXXX 100644
 --- a/linux-user/elfload.c
 +++ b/linux-user/elfload.c
@@ -XXX,XX +XXX,XX @@ uint32_t get_elf_hwcap2(void)
      GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
      GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
      GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
 +    GET_FEATURE_ID(aa64_hbc, ARM_HWCAP2_A64_HBC);
      return hwcaps;
  }
 diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/cpu64.c
 +++ b/target/arm/tcg/cpu64.c
@@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj)
      t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1);     /* FEAT_I8MM */
      cpu->isar.id_aa64isar1 = t;
 +    t = cpu->isar.id_aa64isar2;
 +    t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1);      /* FEAT_HBC */
 +    cpu->isar.id_aa64isar2 = t;
 +
- void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
+     t = cpu->isar.id_aa64pfr0;
-                           target_ulong *cs_base, uint32_t *pflags)
+     t = FIELD_DP64(t, ID_AA64PFR0, FP, 1);        /* FEAT_FP16 */
      t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1);   /* FEAT_FP16 */
 diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/translate-a64.c
 +++ b/target/arm/tcg/translate-a64.c
@@ -XXX,XX +XXX,XX @@ static bool trans_TBZ(DisasContext *s, arg_tbz *a)
  static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
  {
--    ARMMMUIdx mmu_idx = arm_mmu_idx(env);
++    /* BC.cond is only present with FEAT_HBC */
--    int current_el = arm_current_el(env);
++    if (a->c && !dc_isar_feature(aa64_hbc, s)) {
--    int fp_el = fp_exception_el(env, current_el);
++        return false;
-     uint32_t flags, pstate_for_ss;
++    }
+     reset_btype(s);
-+    flags = rebuild_hflags_internal(env);
+     if (a->cond < 0x0e) {
-+
+         /* genuinely conditional branches */
      if (is_a64(env)) {
          *pc = env->pc;
 -        flags = rebuild_hflags_a64(env, current_el, fp_el, mmu_idx);
          if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
              flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
          }
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
          *pc = env->regs[15];
          if (arm_feature(env, ARM_FEATURE_M)) {
 -            flags = rebuild_hflags_m32(env, fp_el, mmu_idx);
 -
              if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
                  FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S)
                  != env->v7m.secure) {
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                  flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
              }
          } else {
 -            flags = rebuild_hflags_a32(env, fp_el, mmu_idx);
 -
              /*
               * Note that XSCALE_CPAR shares bits with VECSTRIDE.
               * Note that VECLEN+VECSTRIDE are RES0 for M-profile.
 --
-.20.1
+.34.1

-[PULL 27/51] hw/net/fsl_etsec/etsec.c: Switch to transaction-based ptimer API
+[PULL 10/30] target/arm: Remove unused allocation_tag_mem() argument
-Switch the fsl_etsec code away from bottom-half based ptimers to
+The allocation_tag_mem() function takes an argument tag_size,
-the new transaction-based ptimer API.  This just requires adding
+but it never uses it. Remove the argument. In mte_probe_int()
-begin/commit calls around the various places that modify the ptimer
+in particular this also lets us delete the code computing
-state, and using the new ptimer_init() function to create the timer.
+the value we were passing in.
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
 Message-id: 20191017132122.4402-2-peter.maydell@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- hw/net/fsl_etsec/etsec.h | 1 -
+ target/arm/tcg/mte_helper.c | 42 +++++++++++++------------------------
- hw/net/fsl_etsec/etsec.c | 9 +++++----
+file changed, 14 insertions(+), 28 deletions(-)
 files changed, 5 insertions(+), 5 deletions(-)
-diff --git a/hw/net/fsl_etsec/etsec.h b/hw/net/fsl_etsec/etsec.h
+diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
 index XXXXXXX..XXXXXXX 100644
---- a/hw/net/fsl_etsec/etsec.h
+--- a/target/arm/tcg/mte_helper.c
-+++ b/hw/net/fsl_etsec/etsec.h
++++ b/target/arm/tcg/mte_helper.c
-@@ -XXX,XX +XXX,XX @@ typedef struct eTSEC {
+@@ -XXX,XX +XXX,XX @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
-     uint16_t phy_control;
+  * @ptr_access: the access to use for the virtual address
+  * @ptr_size: the number of bytes in the normal memory access
-     /* Polling */
+  * @tag_access: the access to use for the tag memory
--    QEMUBH *bh;
+- * @tag_size: the number of bytes in the tag memory access
-     struct ptimer_state *ptimer;
+  * @ra: the return address for exception handling
+  *
-     /* Whether we should flush the rx queue when buffer becomes available. */
+  * Our tag memory is formatted as a sequence of little-endian nibbles.
-diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c
+@@ -XXX,XX +XXX,XX @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
-index XXXXXXX..XXXXXXX 100644
+  * a pointer to the corresponding tag byte.  Exit with exception if the
---- a/hw/net/fsl_etsec/etsec.c
+  * virtual address is not accessible for @ptr_access.
-+++ b/hw/net/fsl_etsec/etsec.c
+  *
-@@ -XXX,XX +XXX,XX @@
+- * The @ptr_size and @tag_size values may not have an obvious relation
- #include "etsec.h"
+- * due to the alignment of @ptr, and the number of tag checks required.
- #include "registers.h"
+- *
- #include "qemu/log.h"
+  * If there is no tag storage corresponding to @ptr, return NULL.
--#include "qemu/main-loop.h"
+  */
- #include "qemu/module.h"
+ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
+                                    uint64_t ptr, MMUAccessType ptr_access,
- /* #define HEX_DUMP */
+                                    int ptr_size, MMUAccessType tag_access,
-@@ -XXX,XX +XXX,XX @@ static void write_dmactrl(eTSEC          *etsec,
+-                                   int tag_size, uintptr_t ra)
++                                   uintptr_t ra)
-     if (!(value & DMACTRL_WOP)) {
+ {
-         /* Start polling */
+ #ifdef CONFIG_USER_ONLY
-+        ptimer_transaction_begin(etsec->ptimer);
+     uint64_t clean_ptr = useronly_clean_ptr(ptr);
-         ptimer_stop(etsec->ptimer);
+@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
-         ptimer_set_count(etsec->ptimer, 1);
-         ptimer_run(etsec->ptimer, 1);
+     /* Trap if accessing an invalid page.  */
-+        ptimer_transaction_commit(etsec->ptimer);
+     mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1,
 -                             MMU_DATA_LOAD, 1, GETPC());
 +                             MMU_DATA_LOAD, GETPC());
      /* Load if page supports tags. */
      if (mem) {
@@ -XXX,XX +XXX,XX @@ static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt,
      /* Trap if accessing an invalid page.  */
      mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE,
 -                             MMU_DATA_STORE, 1, ra);
 +                             MMU_DATA_STORE, ra);
      /* Store if page supports tags. */
      if (mem) {
@@ -XXX,XX +XXX,XX @@ static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt,
      if (ptr & TAG_GRANULE) {
          /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */
          mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
 -                                  TAG_GRANULE, MMU_DATA_STORE, 1, ra);
 +                                  TAG_GRANULE, MMU_DATA_STORE, ra);
          mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE,
                                    MMU_DATA_STORE, TAG_GRANULE,
 -                                  MMU_DATA_STORE, 1, ra);
 +                                  MMU_DATA_STORE, ra);
          /* Store if page(s) support tags. */
          if (mem1) {
@@ -XXX,XX +XXX,XX @@ static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt,
      } else {
          /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */
          mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
 -                                  2 * TAG_GRANULE, MMU_DATA_STORE, 1, ra);
 +                                  2 * TAG_GRANULE, MMU_DATA_STORE, ra);
          if (mem1) {
              tag |= tag << 4;
              qatomic_set(mem1, tag);
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)
      /* Trap if accessing an invalid page.  */
      tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD,
 -                                 gm_bs_bytes, MMU_DATA_LOAD,
 -                                 gm_bs_bytes / (2 * TAG_GRANULE), ra);
 +                                 gm_bs_bytes, MMU_DATA_LOAD, ra);
      /* The tag is squashed to zero if the page does not support tags.  */
      if (!tag_mem) {
@@ -XXX,XX +XXX,XX @@ void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
      /* Trap if accessing an invalid page.  */
      tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
 -                                 gm_bs_bytes, MMU_DATA_LOAD,
 -                                 gm_bs_bytes / (2 * TAG_GRANULE), ra);
 +                                 gm_bs_bytes, MMU_DATA_LOAD, ra);
      /*
       * Tag store only happens if the page support tags,
@@ -XXX,XX +XXX,XX @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
      ptr &= -dcz_bytes;
      mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes,
 -                             MMU_DATA_STORE, tag_bytes, ra);
 +                             MMU_DATA_STORE, ra);
      if (mem) {
          int tag_pair = (val & 0xf) * 0x11;
          memset(mem, tag_pair, tag_bytes);
@@ -XXX,XX +XXX,XX @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr,
      int mmu_idx, ptr_tag, bit55;
      uint64_t ptr_last, prev_page, next_page;
      uint64_t tag_first, tag_last;
 -    uint64_t tag_byte_first, tag_byte_last;
 -    uint32_t sizem1, tag_count, tag_size, n, c;
 +    uint32_t sizem1, tag_count, n, c;
      uint8_t *mem1, *mem2;
      MMUAccessType type;
@@ -XXX,XX +XXX,XX @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr,
      tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE);
      tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1;
 -    /* Round the bounds to twice the tag granule, and compute the bytes. */
 -    tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE);
 -    tag_byte_last = QEMU_ALIGN_DOWN(ptr_last, 2 * TAG_GRANULE);
 -
      /* Locate the page boundaries. */
      prev_page = ptr & TARGET_PAGE_MASK;
      next_page = prev_page + TARGET_PAGE_SIZE;
      if (likely(tag_last - prev_page < TARGET_PAGE_SIZE)) {
          /* Memory access stays on one page. */
 -        tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1;
          mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1,
 -                                  MMU_DATA_LOAD, tag_size, ra);
 +                                  MMU_DATA_LOAD, ra);
          if (!mem1) {
              return 1;
          }
@@ -XXX,XX +XXX,XX @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr,
          n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count);
      } else {
          /* Memory access crosses to next page. */
 -        tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE);
          mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr,
 -                                  MMU_DATA_LOAD, tag_size, ra);
 +                                  MMU_DATA_LOAD, ra);
 -        tag_size = ((tag_byte_last - next_page) / (2 * TAG_GRANULE)) + 1;
          mem2 = allocation_tag_mem(env, mmu_idx, next_page, type,
                                    ptr_last - next_page + 1,
 -                                  MMU_DATA_LOAD, tag_size, ra);
 +                                  MMU_DATA_LOAD, ra);
          /*
           * Perform all of the comparisons.
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr)
      mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
      (void) probe_write(env, ptr, 1, mmu_idx, ra);
      mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE,
 -                             dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra);
 +                             dcz_bytes, MMU_DATA_LOAD, ra);
      if (!mem) {
          goto done;
      }
- }
-@@ -XXX,XX +XXX,XX @@ static void etsec_realize(DeviceState *dev, Error **errp)
-                               object_get_typename(OBJECT(dev)), dev->id, etsec);
-     qemu_format_nic_info_str(qemu_get_queue(etsec->nic), etsec->conf.macaddr.a);
--
--    etsec->bh     = qemu_bh_new(etsec_timer_hit, etsec);
--    etsec->ptimer = ptimer_init_with_bh(etsec->bh, PTIMER_POLICY_DEFAULT);
-+    etsec->ptimer = ptimer_init(etsec_timer_hit, etsec, PTIMER_POLICY_DEFAULT);
-+    ptimer_transaction_begin(etsec->ptimer);
-     ptimer_set_freq(etsec->ptimer, 100);
-+    ptimer_transaction_commit(etsec->ptimer);
- }
- static void etsec_instance_init(Object *obj)
 --
-.20.1
+.34.1

-[PULL 23/51] target/arm: Rebuild hflags for M-profile NVIC
+[PULL 11/30] target/arm: Don't skip MTE checks for LDRT/STRT at EL0
-From: Richard Henderson <richard.henderson@linaro.org>
+The LDRT/STRT "unprivileged load/store" instructions behave like
 normal ones if executed at EL0. We handle this correctly for
 the load/store semantics, but get the MTE checking wrong.
-Continue setting, but not relying upon, env->hflags.
+We always look at s->mte_active[is_unpriv] to see whether we should
 be doing MTE checks, but in hflags.c when we set the TB flags that
 will be used to fill the mte_active[] array we only set the
 MTE0_ACTIVE bit if UNPRIV is true (i.e.  we are not at EL0).
-Suggested-by: Peter Maydell <peter.maydell@linaro.org>
+This means that a LDRT at EL0 will see s->mte_active[1] as 0,
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+and will not do MTE checks even when MTE is enabled.
-Message-id: 20191023150057.25731-22-richard.henderson@linaro.org
 To avoid the translate-time code having to do an explicit check on
 s->unpriv to see if it is OK to index into the mte_active[] array,
 duplicate MTE_ACTIVE into MTE0_ACTIVE when UNPRIV is false.
 (This isn't a very serious bug because generally nobody executes
 LDRT/STRT at EL0, because they have no use there.)
 Cc: qemu-stable@nongnu.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Message-id: 20230912140434.1333369-2-peter.maydell@linaro.org
 ---
- hw/intc/armv7m_nvic.c | 22 +++++++++++++---------
+ target/arm/tcg/hflags.c | 9 +++++++++
-file changed, 13 insertions(+), 9 deletions(-)
+file changed, 9 insertions(+)
-diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
+diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
 index XXXXXXX..XXXXXXX 100644
---- a/hw/intc/armv7m_nvic.c
+--- a/target/arm/tcg/hflags.c
-+++ b/hw/intc/armv7m_nvic.c
++++ b/target/arm/tcg/hflags.c
-@@ -XXX,XX +XXX,XX @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
+@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
                  && !(env->pstate & PSTATE_TCO)
                  && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) {
                  DP_TBFLAG_A64(flags, MTE_ACTIVE, 1);
 +                if (!EX_TBFLAG_A64(flags, UNPRIV)) {
 +                    /*
 +                     * In non-unpriv contexts (eg EL0), unpriv load/stores
 +                     * act like normal ones; duplicate the MTE info to
 +                     * avoid translate-a64.c having to check UNPRIV to see
 +                     * whether it is OK to index into MTE_ACTIVE[].
 +                     */
 +                    DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1);
 +                }
              }
          }
-         nvic_irq_update(s);
+         /* And again for unprivileged accesses, if required.  */
 -        return MEMTX_OK;
 +        goto exit_ok;
      case 0x200 ... 0x23f: /* NVIC Set pend */
          /* the special logic in armv7m_nvic_set_pending()
           * is not needed since IRQs are never escalated
@@ -XXX,XX +XXX,XX @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
              }
          }
          nvic_irq_update(s);
 -        return MEMTX_OK;
 +        goto exit_ok;
      case 0x300 ... 0x33f: /* NVIC Active */
 -        return MEMTX_OK; /* R/O */
 +        goto exit_ok; /* R/O */
      case 0x400 ... 0x5ef: /* NVIC Priority */
          startvec = (offset - 0x400) + NVIC_FIRST_IRQ; /* vector # */
@@ -XXX,XX +XXX,XX @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
              }
          }
          nvic_irq_update(s);
 -        return MEMTX_OK;
 +        goto exit_ok;
      case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */
          if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) {
 -            return MEMTX_OK;
 +            goto exit_ok;
          }
          /* fall through */
      case 0xd1c ... 0xd23: /* System Handler Priority (SHPR2, SHPR3) */
@@ -XXX,XX +XXX,XX @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
              set_prio(s, hdlidx, sbank, newprio);
          }
          nvic_irq_update(s);
 -        return MEMTX_OK;
 +        goto exit_ok;
      case 0xd28 ... 0xd2b: /* Configurable Fault Status (CFSR) */
          if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) {
 -            return MEMTX_OK;
 +            goto exit_ok;
          }
          /* All bits are W1C, so construct 32 bit value with 0s in
           * the parts not written by the access size
@@ -XXX,XX +XXX,XX @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
               */
              s->cpu->env.v7m.cfsr[M_REG_NS] &= ~(value & R_V7M_CFSR_BFSR_MASK);
          }
 -        return MEMTX_OK;
 +        goto exit_ok;
      }
      if (size == 4) {
          nvic_writel(s, offset, value, attrs);
 -        return MEMTX_OK;
 +        goto exit_ok;
      }
      qemu_log_mask(LOG_GUEST_ERROR,
                    "NVIC: Bad write of size %d at offset 0x%x\n", size, offset);
      /* This is UNPREDICTABLE; treat as RAZ/WI */
 +
 + exit_ok:
 +    /* Ensure any changes made are reflected in the cached hflags.  */
 +    arm_rebuild_hflags(&s->cpu->env);
      return MEMTX_OK;
  }
 --
-.20.1
+.34.1

-[PULL 06/51] target/arm: Split arm_cpu_data_is_big_endian
+[PULL 12/30] target/arm: Implement FEAT_MOPS enable bits
-From: Richard Henderson <richard.henderson@linaro.org>
+FEAT_MOPS defines a handful of new enable bits:
  * HCRX_EL2.MSCEn, SCTLR_EL1.MSCEn, SCTLR_EL2.MSCen:
    define whether the new insns should UNDEF or not
  * HCRX_EL2.MCE2: defines whether memops exceptions from
    EL1 should be taken to EL1 or EL2
-Set TBFLAG_ANY.BE_DATA in rebuild_hflags_common_32 and
+Since we don't sanitise what bits can be written for the SCTLR
-rebuild_hflags_a64 instead of rebuild_hflags_common, where we do
+registers, we only need to handle the new bits in HCRX_EL2, and
-not need to re-test is_a64() nor re-compute the various inputs.
+define SCTLR_MSCEN for the new SCTLR bit value.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+The precedence of "HCRX bits acts as 0 if SCR_EL3.HXEn is 0" versus
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+"bit acts as 1 if EL2 disabled" is not clear from the register
-Message-id: 20191023150057.25731-5-richard.henderson@linaro.org
+definition text, but it is clear in the CheckMOPSEnabled()
 pseudocode(), so we follow that.  We'll have to check whether other
 bits we need to implement in future follow the same logic or not.
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Message-id: 20230912140434.1333369-3-peter.maydell@linaro.org
 ---
- target/arm/cpu.h    | 49 +++++++++++++++++++++++++++------------------
+ target/arm/cpu.h    |  6 ++++++
- target/arm/helper.c | 16 +++++++++++----
+ target/arm/helper.c | 28 +++++++++++++++++++++-------
-files changed, 42 insertions(+), 23 deletions(-)
+files changed, 27 insertions(+), 7 deletions(-)
 diff --git a/target/arm/cpu.h b/target/arm/cpu.h
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/cpu.h
 +++ b/target/arm/cpu.h
-@@ -XXX,XX +XXX,XX @@ static inline uint64_t arm_sctlr(CPUARMState *env, int el)
+@@ -XXX,XX +XXX,XX @@ void pmu_init(ARMCPU *cpu);
-     }
+ #define SCTLR_EnIB    (1U << 30) /* v8.3, AArch64 only */
  #define SCTLR_EnIA    (1U << 31) /* v8.3, AArch64 only */
  #define SCTLR_DSSBS_32 (1U << 31) /* v8.5, AArch32 only */
 +#define SCTLR_MSCEN   (1ULL << 33) /* FEAT_MOPS */
  #define SCTLR_BT0     (1ULL << 35) /* v8.5-BTI */
  #define SCTLR_BT1     (1ULL << 36) /* v8.5-BTI */
  #define SCTLR_ITFSB   (1ULL << 37) /* v8.5-MemTag */
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_doublelock(const ARMISARegisters *id)
      return FIELD_SEX64(id->id_aa64dfr0, ID_AA64DFR0, DOUBLELOCK) >= 0;
  }
-+static inline bool arm_cpu_data_is_big_endian_a32(CPUARMState *env,
++static inline bool isar_feature_aa64_mops(const ARMISARegisters *id)
 +                                                  bool sctlr_b)
 +{
-+#ifdef CONFIG_USER_ONLY
++    return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS);
 +    /*
 +     * In system mode, BE32 is modelled in line with the
 +     * architecture (as word-invariant big-endianness), where loads
 +     * and stores are done little endian but from addresses which
 +     * are adjusted by XORing with the appropriate constant. So the
 +     * endianness to use for the raw data access is not affected by
 +     * SCTLR.B.
 +     * In user mode, however, we model BE32 as byte-invariant
 +     * big-endianness (because user-only code cannot tell the
 +     * difference), and so we need to use a data access endianness
 +     * that depends on SCTLR.B.
 +     */
 +    if (sctlr_b) {
 +        return true;
 +    }
 +#endif
 +    /* In 32bit endianness is determined by looking at CPSR's E bit */
 +    return env->uncached_cpsr & CPSR_E;
 +}
 +
-+static inline bool arm_cpu_data_is_big_endian_a64(int el, uint64_t sctlr)
+ /*
-+{
+  * Feature tests for "does this exist in either 32-bit or 64-bit?"
-+    return sctlr & (el ? SCTLR_EE : SCTLR_E0E);
+  */
 +}
  /* Return true if the processor is in big-endian mode. */
  static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
  {
 -    /* In 32bit endianness is determined by looking at CPSR's E bit */
      if (!is_a64(env)) {
 -        return
 -#ifdef CONFIG_USER_ONLY
 -            /* In system mode, BE32 is modelled in line with the
 -             * architecture (as word-invariant big-endianness), where loads
 -             * and stores are done little endian but from addresses which
 -             * are adjusted by XORing with the appropriate constant. So the
 -             * endianness to use for the raw data access is not affected by
 -             * SCTLR.B.
 -             * In user mode, however, we model BE32 as byte-invariant
 -             * big-endianness (because user-only code cannot tell the
 -             * difference), and so we need to use a data access endianness
 -             * that depends on SCTLR.B.
 -             */
 -            arm_sctlr_b(env) ||
 -#endif
 -                ((env->uncached_cpsr & CPSR_E) ? 1 : 0);
 +        return arm_cpu_data_is_big_endian_a32(env, arm_sctlr_b(env));
      } else {
          int cur_el = arm_current_el(env);
          uint64_t sctlr = arm_sctlr(env, cur_el);
 -
 -        return (sctlr & (cur_el ? SCTLR_EE : SCTLR_E0E)) != 0;
 +        return arm_cpu_data_is_big_endian_a64(cur_el, sctlr);
      }
  }
 diff --git a/target/arm/helper.c b/target/arm/helper.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/helper.c
 +++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
+@@ -XXX,XX +XXX,XX @@ static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri,
-     flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX,
+ {
-                        arm_to_core_mmu_idx(mmu_idx));
+     uint64_t valid_mask = 0;
--    if (arm_cpu_data_is_big_endian(env)) {
+-    /* No features adding bits to HCRX are implemented. */
--        flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
++    /* FEAT_MOPS adds MSCEn and MCE2 */
--    }
++    if (cpu_isar_feature(aa64_mops, env_archcpu(env))) {
-     if (arm_singlestep_active(env)) {
++        valid_mask |= HCRX_MSCEN | HCRX_MCE2;
-         flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1);
++    }
      /* Clear RES0 bits.  */
      env->cp15.hcrx_el2 = value & valid_mask;
@@ -XXX,XX +XXX,XX @@ uint64_t arm_hcrx_el2_eff(CPUARMState *env)
  {
      /*
       * The bits in this register behave as 0 for all purposes other than
 -     * direct reads of the register if:
 -     *   - EL2 is not enabled in the current security state,
 -     *   - SCR_EL3.HXEn is 0.
 +     * direct reads of the register if SCR_EL3.HXEn is 0.
 +     * If EL2 is not enabled in the current security state, then the
 +     * bit may behave as if 0, or as if 1, depending on the bit.
 +     * For the moment, we treat the EL2-disabled case as taking
 +     * priority over the HXEn-disabled case. This is true for the only
 +     * bit for a feature which we implement where the answer is different
 +     * for the two cases (MSCEn for FEAT_MOPS).
 +     * This may need to be revisited for future bits.
       */
 -    if (!arm_is_el2_enabled(env)
 -        || (arm_feature(env, ARM_FEATURE_EL3)
 -            && !(env->cp15.scr_el3 & SCR_HXEN))) {
 +    if (!arm_is_el2_enabled(env)) {
 +        uint64_t hcrx = 0;
 +        if (cpu_isar_feature(aa64_mops, env_archcpu(env))) {
 +            /* MSCEn behaves as 1 if EL2 is not enabled */
 +            hcrx |= HCRX_MSCEN;
 +        }
 +        return hcrx;
 +    }
 +    if (arm_feature(env, ARM_FEATURE_EL3) && !(env->cp15.scr_el3 & SCR_HXEN)) {
          return 0;
      }
-@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
+     return env->cp15.hcrx_el2;
  static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el,
                                           ARMMMUIdx mmu_idx, uint32_t flags)
  {
 -    flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, arm_sctlr_b(env));
 +    bool sctlr_b = arm_sctlr_b(env);
 +
 +    if (sctlr_b) {
 +        flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, 1);
 +    }
 +    if (arm_cpu_data_is_big_endian_a32(env, sctlr_b)) {
 +        flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
 +    }
      flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env));
      return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
      sctlr = arm_sctlr(env, el);
 +    if (arm_cpu_data_is_big_endian_a64(el, sctlr)) {
 +        flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
 +    }
 +
      if (cpu_isar_feature(aa64_pauth, env_archcpu(env))) {
          /*
           * In order to save space in flags, we record only whether
 --
-.20.1
+.34.1

-[PULL 31/51] hw/timer/slavio_timer.c: Switch to transaction-based ptimer API
+[PULL 13/30] target/arm: Pass unpriv bool to get_a64_user_mem_index()
-Switch the slavio_timer code away from bottom-half based ptimers to
+In every place that we call the get_a64_user_mem_index() function
-the new transaction-based ptimer API.  This just requires adding
+we do it like this:
-begin/commit calls around the various places that modify the ptimer
+ memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
-state, and using the new ptimer_init() function to create the timer.
+Refactor so the caller passes in the bool that says whether they
 want the 'unpriv' or 'normal' mem_index rather than having to
 do the ?: themselves.
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Message-id: 20230912140434.1333369-4-peter.maydell@linaro.org
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Message-id: 20191021134357.14266-4-peter.maydell@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- hw/timer/slavio_timer.c | 20 ++++++++++++++++----
+ target/arm/tcg/translate-a64.c | 20 ++++++++++++++------
-file changed, 16 insertions(+), 4 deletions(-)
+file changed, 14 insertions(+), 6 deletions(-)
-diff --git a/hw/timer/slavio_timer.c b/hw/timer/slavio_timer.c
+diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
 index XXXXXXX..XXXXXXX 100644
---- a/hw/timer/slavio_timer.c
+--- a/target/arm/tcg/translate-a64.c
-+++ b/hw/timer/slavio_timer.c
++++ b/target/arm/tcg/translate-a64.c
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ void a64_translate_init(void)
- #include "hw/sysbus.h"
+ }
  #include "migration/vmstate.h"
  #include "trace.h"
 -#include "qemu/main-loop.h"
  #include "qemu/module.h"
  /*
-@@ -XXX,XX +XXX,XX @@ static void slavio_timer_mem_writel(void *opaque, hwaddr addr,
+- * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
-     saddr = addr >> 2;
++ * Return the core mmu_idx to use for A64 load/store insns which
-     switch (saddr) {
++ * have a "unprivileged load/store" variant. Those insns access
-     case TIMER_LIMIT:
++ * EL0 if executed from an EL which has control over EL0 (usually
-+        ptimer_transaction_begin(t->timer);
++ * EL1) but behave like normal loads and stores if executed from
-         if (slavio_timer_is_user(tc)) {
++ * elsewhere (eg EL3).
-             uint64_t count;
++ *
++ * @unpriv : true for the unprivileged encoding; false for the
-@@ -XXX,XX +XXX,XX @@ static void slavio_timer_mem_writel(void *opaque, hwaddr addr,
++ *           normal encoding (in which case we will return the same
-                 ptimer_set_limit(t->timer, LIMIT_TO_PERIODS(t->limit), 1);
++ *           thing as get_mem_index().
-             }
+  */
-         }
+-static int get_a64_user_mem_index(DisasContext *s)
-+        ptimer_transaction_commit(t->timer);
++static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
-         break;
+ {
-     case TIMER_COUNTER:
+     /*
-         if (slavio_timer_is_user(tc)) {
+      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
-@@ -XXX,XX +XXX,XX @@ static void slavio_timer_mem_writel(void *opaque, hwaddr addr,
+@@ -XXX,XX +XXX,XX @@ static int get_a64_user_mem_index(DisasContext *s)
-             t->reached = 0;
+      */
-             count = ((uint64_t)t->counthigh) << 32 | t->count;
+     ARMMMUIdx useridx = s->mmu_idx;
-             trace_slavio_timer_mem_writel_limit(timer_index, count);
-+            ptimer_transaction_begin(t->timer);
+-    if (s->unpriv) {
-             ptimer_set_count(t->timer, LIMIT_TO_PERIODS(t->limit - count));
++    if (unpriv && s->unpriv) {
-+            ptimer_transaction_commit(t->timer);
+         /*
-         } else {
+          * We have pre-computed the condition for AccType_UNPRIV.
-             trace_slavio_timer_mem_writel_counter_invalid();
+          * Therefore we should never get here with a mmu_idx for
-         }
+@@ -XXX,XX +XXX,XX @@ static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
-@@ -XXX,XX +XXX,XX @@ static void slavio_timer_mem_writel(void *opaque, hwaddr addr,
+     if (!a->p) {
-     case TIMER_COUNTER_NORST:
+         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
          // set limit without resetting counter
          t->limit = val & TIMER_MAX_COUNT32;
 +        ptimer_transaction_begin(t->timer);
          if (t->limit == 0) { /* free-run */
              ptimer_set_limit(t->timer, LIMIT_TO_PERIODS(TIMER_MAX_COUNT32), 0);
          } else {
              ptimer_set_limit(t->timer, LIMIT_TO_PERIODS(t->limit), 0);
          }
 +        ptimer_transaction_commit(t->timer);
          break;
      case TIMER_STATUS:
 +        ptimer_transaction_begin(t->timer);
          if (slavio_timer_is_user(tc)) {
              // start/stop user counter
              if (val & 1) {
@@ -XXX,XX +XXX,XX @@ static void slavio_timer_mem_writel(void *opaque, hwaddr addr,
              }
          }
          t->run = val & 1;
 +        ptimer_transaction_commit(t->timer);
          break;
      case TIMER_MODE:
          if (timer_index == 0) {
@@ -XXX,XX +XXX,XX @@ static void slavio_timer_mem_writel(void *opaque, hwaddr addr,
                  unsigned int processor = 1 << i;
                  CPUTimerState *curr_timer = &s->cputimer[i + 1];
 +                ptimer_transaction_begin(curr_timer->timer);
                  // check for a change in timer mode for this processor
                  if ((val & processor) != (s->cputimer_mode & processor)) {
                      if (val & processor) { // counter -> user timer
@@ -XXX,XX +XXX,XX @@ static void slavio_timer_mem_writel(void *opaque, hwaddr addr,
                          trace_slavio_timer_mem_writel_mode_counter(timer_index);
                      }
                  }
 +                ptimer_transaction_commit(curr_timer->timer);
              }
          } else {
              trace_slavio_timer_mem_writel_mode_invalid();
@@ -XXX,XX +XXX,XX @@ static void slavio_timer_reset(DeviceState *d)
          curr_timer->count = 0;
          curr_timer->reached = 0;
          if (i <= s->num_cpus) {
 +            ptimer_transaction_begin(curr_timer->timer);
              ptimer_set_limit(curr_timer->timer,
                               LIMIT_TO_PERIODS(TIMER_MAX_COUNT32), 1);
              ptimer_run(curr_timer->timer, 0);
              curr_timer->run = 1;
 +            ptimer_transaction_commit(curr_timer->timer);
          }
      }
-     s->cputimer_mode = 0;
+-    memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
-@@ -XXX,XX +XXX,XX @@ static void slavio_timer_init(Object *obj)
++    memidx = get_a64_user_mem_index(s, a->unpriv);
      *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
                                          a->w || a->rn != 31,
                                          mop, a->unpriv, memidx);
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
  {
-     SLAVIO_TIMERState *s = SLAVIO_TIMER(obj);
+     bool iss_sf, iss_valid = !a->w;
-     SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
--    QEMUBH *bh;
+-    int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
-     unsigned int i;
++    int memidx = get_a64_user_mem_index(s, a->unpriv);
-     TimerContext *tc;
+     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
-@@ -XXX,XX +XXX,XX @@ static void slavio_timer_init(Object *obj)
+     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
-         tc->s = s;
+@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
-         tc->timer_index = i;
+ {
+     bool iss_sf, iss_valid = !a->w;
--        bh = qemu_bh_new(slavio_timer_irq, tc);
+     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
--        s->cputimer[i].timer = ptimer_init_with_bh(bh, PTIMER_POLICY_DEFAULT);
+-    int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
-+        s->cputimer[i].timer = ptimer_init(slavio_timer_irq, tc,
++    int memidx = get_a64_user_mem_index(s, a->unpriv);
-+                                           PTIMER_POLICY_DEFAULT);
+     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
-+        ptimer_transaction_begin(s->cputimer[i].timer);
-         ptimer_set_period(s->cputimer[i].timer, TIMER_PERIOD);
+     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
 +        ptimer_transaction_commit(s->cputimer[i].timer);
          size = i == 0 ? SYS_TIMER_SIZE : CPU_TIMER_SIZE;
          snprintf(timer_name, sizeof(timer_name), "timer-%i", i);
 --
-.20.1
+.34.1

-[PULL 28/51] hw/timer/xilinx_timer.c: Switch to transaction-based ptimer API
+[PULL 14/30] target/arm: Define syndrome function for MOPS exceptions
-Switch the xilinx_timer code away from bottom-half based ptimers to
+The FEAT_MOPS memory operations can raise a Memory Copy or Memory Set
-the new transaction-based ptimer API.  This just requires adding
+exception if a copy or set instruction is executed when the CPU
-begin/commit calls around the various places that modify the ptimer
+register state is not correct for that instruction. Define the
-state, and using the new ptimer_init() function to create the timer.
+usual syn_* function that constructs the syndrome register value
 for these exceptions.
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Message-id: 20230912140434.1333369-5-peter.maydell@linaro.org
 Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
 Message-id: 20191017132122.4402-3-peter.maydell@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- hw/timer/xilinx_timer.c | 13 ++++++++-----
+ target/arm/syndrome.h | 12 ++++++++++++
-file changed, 8 insertions(+), 5 deletions(-)
+file changed, 12 insertions(+)
-diff --git a/hw/timer/xilinx_timer.c b/hw/timer/xilinx_timer.c
+diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h
 index XXXXXXX..XXXXXXX 100644
---- a/hw/timer/xilinx_timer.c
+--- a/target/arm/syndrome.h
-+++ b/hw/timer/xilinx_timer.c
++++ b/target/arm/syndrome.h
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ enum arm_exception_class {
- #include "hw/ptimer.h"
+     EC_DATAABORT              = 0x24,
- #include "hw/qdev-properties.h"
+     EC_DATAABORT_SAME_EL      = 0x25,
- #include "qemu/log.h"
+     EC_SPALIGNMENT            = 0x26,
--#include "qemu/main-loop.h"
++    EC_MOP                    = 0x27,
- #include "qemu/module.h"
+     EC_AA32_FPTRAP            = 0x28,
+     EC_AA64_FPTRAP            = 0x2c,
- #define D(x)
+     EC_SERROR                 = 0x2f,
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ static inline uint32_t syn_serror(uint32_t extra)
+     return (EC_SERROR << ARM_EL_EC_SHIFT) | ARM_EL_IL | extra;
  struct xlx_timer
  {
 -    QEMUBH *bh;
      ptimer_state *ptimer;
      void *parent;
      int nr; /* for debug.  */
@@ -XXX,XX +XXX,XX @@ timer_read(void *opaque, hwaddr addr, unsigned int size)
      return r;
  }
-+/* Must be called inside ptimer transaction block */
++static inline uint32_t syn_mop(bool is_set, bool is_setg, int options,
- static void timer_enable(struct xlx_timer *xt)
++                               bool epilogue, bool wrong_option, bool option_a,
- {
++                               int destreg, int srcreg, int sizereg)
-     uint64_t count;
++{
-@@ -XXX,XX +XXX,XX @@ timer_write(void *opaque, hwaddr addr,
++    return (EC_MOP << ARM_EL_EC_SHIFT) | ARM_EL_IL |
-                 value &= ~TCSR_TINT;
++        (is_set << 24) | (is_setg << 23) | (options << 19) |
++        (epilogue << 18) | (wrong_option << 17) | (option_a << 16) |
-             xt->regs[addr] = value & 0x7ff;
++        (destreg << 10) | (srcreg << 5) | sizereg;
--            if (value & TCSR_ENT)
++}
-+            if (value & TCSR_ENT) {
++
-+                ptimer_transaction_begin(xt->ptimer);
++
-                 timer_enable(xt);
+ #endif /* TARGET_ARM_SYNDROME_H */
 +                ptimer_transaction_commit(xt->ptimer);
 +            }
              break;
          default:
@@ -XXX,XX +XXX,XX @@ static void xilinx_timer_realize(DeviceState *dev, Error **errp)
          xt->parent = t;
          xt->nr = i;
 -        xt->bh = qemu_bh_new(timer_hit, xt);
 -        xt->ptimer = ptimer_init_with_bh(xt->bh, PTIMER_POLICY_DEFAULT);
 +        xt->ptimer = ptimer_init(timer_hit, xt, PTIMER_POLICY_DEFAULT);
 +        ptimer_transaction_begin(xt->ptimer);
          ptimer_set_freq(xt->ptimer, t->freq_hz);
 +        ptimer_transaction_commit(xt->ptimer);
      }
      memory_region_init_io(&t->mmio, OBJECT(t), &timer_ops, t, "xlnx.xps-timer",
 --
-.20.1
+.34.1

-[PULL 37/51] target/arm: Allow SVE to be disabled via a CPU property
+[PULL 15/30] target/arm: New function allocation_tag_mem_probe()
-From: Andrew Jones <drjones@redhat.com>
+For the FEAT_MOPS operations, the existing allocation_tag_mem()
 function almost does what we want, but it will take a watchpoint
 exception even for an ra == 0 probe request, and it requires that the
 caller guarantee that the memory is accessible.  For FEAT_MOPS we
 want a function that will not take any kind of exception, and will
 return NULL for the not-accessible case.
-Since 97a28b0eeac14 ("target/arm: Allow VFP and Neon to be disabled via
+Rename allocation_tag_mem() to allocation_tag_mem_probe() and add an
-a CPU property") we can disable the 'max' cpu model's VFP and neon
+extra 'probe' argument that lets us distinguish these cases;
-features, but there's no way to disable SVE. Add the 'sve=on|off'
+allocation_tag_mem() is now a wrapper that always passes 'false'.
 property to give it that flexibility. We also rename
 cpu_max_get/set_sve_vq to cpu_max_get/set_sve_max_vq in order for them
 to follow the typical *_get/set_<property-name> pattern.
-Signed-off-by: Andrew Jones <drjones@redhat.com>
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-Reviewed-by: Eric Auger <eric.auger@redhat.com>
+Message-id: 20230912140434.1333369-6-peter.maydell@linaro.org
 Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
 Reviewed-by: Beata Michalska <beata.michalska@linaro.org>
 Message-id: 20191024121808.9612-4-drjones@redhat.com
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- target/arm/cpu.c         |  3 ++-
+ target/arm/tcg/mte_helper.c | 48 ++++++++++++++++++++++++++++---------
- target/arm/cpu64.c       | 52 ++++++++++++++++++++++++++++++++++------
+file changed, 37 insertions(+), 11 deletions(-)
  target/arm/monitor.c     |  2 +-
  tests/arm-cpu-features.c |  1 +
 files changed, 49 insertions(+), 9 deletions(-)
-diff --git a/target/arm/cpu.c b/target/arm/cpu.c
+diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/cpu.c
+--- a/target/arm/tcg/mte_helper.c
-+++ b/target/arm/cpu.c
++++ b/target/arm/tcg/mte_helper.c
-@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(CPUState *s)
+@@ -XXX,XX +XXX,XX @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
          env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 16, 2, 3);
          env->cp15.cptr_el[3] |= CPTR_EZ;
          /* with maximum vector length */
 -        env->vfp.zcr_el[1] = cpu->sve_max_vq - 1;
 +        env->vfp.zcr_el[1] = cpu_isar_feature(aa64_sve, cpu) ?
 +                             cpu->sve_max_vq - 1 : 0;
          env->vfp.zcr_el[2] = env->vfp.zcr_el[1];
          env->vfp.zcr_el[3] = env->vfp.zcr_el[1];
          /*
 diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/cpu64.c
 +++ b/target/arm/cpu64.c
@@ -XXX,XX +XXX,XX @@ static void aarch64_a72_initfn(Object *obj)
      define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
  }
--static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name,
+ /**
--                               void *opaque, Error **errp)
+- * allocation_tag_mem:
-+static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name,
++ * allocation_tag_mem_probe:
-+                                   void *opaque, Error **errp)
+  * @env: the cpu environment
   * @ptr_mmu_idx: the addressing regime to use for the virtual address
   * @ptr: the virtual address for which to look up tag memory
   * @ptr_access: the access to use for the virtual address
   * @ptr_size: the number of bytes in the normal memory access
   * @tag_access: the access to use for the tag memory
 + * @probe: true to merely probe, never taking an exception
   * @ra: the return address for exception handling
   *
   * Our tag memory is formatted as a sequence of little-endian nibbles.
@@ -XXX,XX +XXX,XX @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
   * for the higher addr.
   *
   * Here, resolve the physical address from the virtual address, and return
 - * a pointer to the corresponding tag byte.  Exit with exception if the
 - * virtual address is not accessible for @ptr_access.
 + * a pointer to the corresponding tag byte.
   *
   * If there is no tag storage corresponding to @ptr, return NULL.
 + *
 + * If the page is inaccessible for @ptr_access, or has a watchpoint, there are
 + * three options:
 + * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not
 + *     accessible, and do not take watchpoint traps. The calling code must
 + *     handle those cases in the right priority compared to MTE traps.
 + * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees
 + *     that the page is going to be accessible. We will take watchpoint traps.
 + * (3) probe = false, ra != 0 : non-probe -- we will take both memory access
 + *     traps and watchpoint traps.
 + * (probe = true, ra != 0 is invalid and will assert.)
   */
 -static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
 -                                   uint64_t ptr, MMUAccessType ptr_access,
 -                                   int ptr_size, MMUAccessType tag_access,
 -                                   uintptr_t ra)
 +static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
 +                                         uint64_t ptr, MMUAccessType ptr_access,
 +                                         int ptr_size, MMUAccessType tag_access,
 +                                         bool probe, uintptr_t ra)
  {
-     ARMCPU *cpu = ARM_CPU(obj);
+ #ifdef CONFIG_USER_ONLY
--    visit_type_uint32(v, name, &cpu->sve_max_vq, errp);
+     uint64_t clean_ptr = useronly_clean_ptr(ptr);
-+    uint32_t value;
+@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
      uint8_t *tags;
      uintptr_t index;
 +    assert(!(probe && ra));
 +
-+    /* All vector lengths are disabled when SVE is off. */
+     if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) {
-+    if (!cpu_isar_feature(aa64_sve, cpu)) {
+         cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access,
-+        value = 0;
+                               !(flags & PAGE_VALID), ra);
-+    } else {
+@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
-+        value = cpu->sve_max_vq;
+      * exception for inaccessible pages, and resolves the virtual address
       * into the softmmu tlb.
       *
 -     * When RA == 0, this is for mte_probe.  The page is expected to be
 -     * valid.  Indicate to probe_access_flags no-fault, then assert that
 -     * we received a valid page.
 +     * When RA == 0, this is either a pure probe or a no-fault-expected probe.
 +     * Indicate to probe_access_flags no-fault, then either return NULL
 +     * for the pure probe, or assert that we received a valid page for the
 +     * no-fault-expected probe.
       */
      flags = probe_access_full(env, ptr, 0, ptr_access, ptr_mmu_idx,
                                ra == 0, &host, &full, ra);
 +    if (probe && (flags & TLB_INVALID_MASK)) {
 +        return NULL;
 +    }
-+    visit_type_uint32(v, name, &value, errp);
+     assert(!(flags & TLB_INVALID_MASK));
      /* If the virtual page MemAttr != Tagged, access unchecked. */
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
      }
      /* Any debug exception has priority over a tag check exception. */
 -    if (unlikely(flags & TLB_WATCHPOINT)) {
 +    if (!probe && unlikely(flags & TLB_WATCHPOINT)) {
          int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
          assert(ra != 0);
          cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, attrs, wp, ra);
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
  #endif
  }
--static void cpu_max_set_sve_vq(Object *obj, Visitor *v, const char *name,
++static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
--                               void *opaque, Error **errp)
++                                   uint64_t ptr, MMUAccessType ptr_access,
-+static void cpu_max_set_sve_max_vq(Object *obj, Visitor *v, const char *name,
++                                   int ptr_size, MMUAccessType tag_access,
-+                                   void *opaque, Error **errp)
++                                   uintptr_t ra)
  {
      ARMCPU *cpu = ARM_CPU(obj);
      Error *err = NULL;
@@ -XXX,XX +XXX,XX @@ static void cpu_max_set_sve_vq(Object *obj, Visitor *v, const char *name,
      error_propagate(errp, err);
  }
 +static void cpu_arm_get_sve(Object *obj, Visitor *v, const char *name,
 +                            void *opaque, Error **errp)
 +{
-+    ARMCPU *cpu = ARM_CPU(obj);
++    return allocation_tag_mem_probe(env, ptr_mmu_idx, ptr, ptr_access,
-+    bool value = cpu_isar_feature(aa64_sve, cpu);
++                                    ptr_size, tag_access, false, ra);
 +
 +    visit_type_bool(v, name, &value, errp);
 +}
 +
-+static void cpu_arm_set_sve(Object *obj, Visitor *v, const char *name,
+ uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm)
-+                            void *opaque, Error **errp)
+ {
-+{
+     uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16);
 +    ARMCPU *cpu = ARM_CPU(obj);
 +    Error *err = NULL;
 +    bool value;
 +    uint64_t t;
 +
 +    visit_type_bool(v, name, &value, &err);
 +    if (err) {
 +        error_propagate(errp, err);
 +        return;
 +    }
 +
 +    t = cpu->isar.id_aa64pfr0;
 +    t = FIELD_DP64(t, ID_AA64PFR0, SVE, value);
 +    cpu->isar.id_aa64pfr0 = t;
 +}
 +
  /* -cpu max: if KVM is enabled, like -cpu host (best possible with this host);
   * otherwise, a CPU with as many features enabled as our emulation supports.
   * The version of '-cpu max' for qemu-system-arm is defined in cpu.c;
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
  #endif
          cpu->sve_max_vq = ARM_MAX_VQ;
 -        object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_vq,
 -                            cpu_max_set_sve_vq, NULL, NULL, &error_fatal);
 +        object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq,
 +                            cpu_max_set_sve_max_vq, NULL, NULL, &error_fatal);
 +        object_property_add(obj, "sve", "bool", cpu_arm_get_sve,
 +                            cpu_arm_set_sve, NULL, NULL, &error_fatal);
      }
  }
 diff --git a/target/arm/monitor.c b/target/arm/monitor.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/monitor.c
 +++ b/target/arm/monitor.c
@@ -XXX,XX +XXX,XX @@ GICCapabilityList *qmp_query_gic_capabilities(Error **errp)
   * then the order that considers those dependencies must be used.
   */
  static const char *cpu_model_advertised_features[] = {
 -    "aarch64", "pmu",
 +    "aarch64", "pmu", "sve",
      NULL
  };
 diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tests/arm-cpu-features.c
 +++ b/tests/arm-cpu-features.c
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion(const void *data)
      if (g_str_equal(qtest_get_arch(), "aarch64")) {
          assert_has_feature(qts, "max", "aarch64");
 +        assert_has_feature(qts, "max", "sve");
          assert_has_feature(qts, "cortex-a57", "pmu");
          assert_has_feature(qts, "cortex-a57", "aarch64");
 --
-.20.1
+.34.1

-[PULL 38/51] target/arm/cpu64: max cpu: Introduce sve<N> properties
+[PULL 16/30] target/arm: Implement MTE tag-checking functions for FEAT_MOPS
-From: Andrew Jones <drjones@redhat.com>
+The FEAT_MOPS instructions need a couple of helper routines that
 check for MTE tag failures:
  * mte_mops_probe() checks whether there is going to be a tag
    error in the next up-to-a-page worth of data
  * mte_check_fail() is an existing function to record the fact
    of a tag failure, which we need to make global so we can
    call it from helper-a64.c
-Introduce cpu properties to give fine control over SVE vector lengths.
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-We introduce a property for each valid length up to the current
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-maximum supported, which is 2048-bits. The properties are named, e.g.
+Message-id: 20230912140434.1333369-7-peter.maydell@linaro.org
-sve128, sve256, sve384, sve512, ..., where the number is the number of
+---
-bits. See the updates to docs/arm-cpu-features.rst for a description
+ target/arm/internals.h      | 28 +++++++++++++++++++
-of the semantics and for example uses.
+ target/arm/tcg/mte_helper.c | 54 +++++++++++++++++++++++++++++++++++--
 files changed, 80 insertions(+), 2 deletions(-)
-Note, as sve-max-vq is still present and we'd like to be able to
+diff --git a/target/arm/internals.h b/target/arm/internals.h
 support qmp_query_cpu_model_expansion with guests launched with e.g.
 -cpu max,sve-max-vq=8 on their command lines, then we do allow
 sve-max-vq and sve<N> properties to be provided at the same time, but
 this is not recommended, and is why sve-max-vq is not mentioned in the
 document.  If sve-max-vq is provided then it enables all lengths smaller
 than and including the max and disables all lengths larger. It also has
 the side-effect that no larger lengths may be enabled and that the max
 itself cannot be disabled. Smaller non-power-of-two lengths may,
 however, be disabled, e.g. -cpu max,sve-max-vq=4,sve384=off provides a
 guest the vector lengths 128, 256, and 512 bits.
 This patch has been co-authored with Richard Henderson, who reworked
 the target/arm/cpu64.c changes in order to push all the validation and
 auto-enabling/disabling steps into the finalizer, resulting in a nice
 LOC reduction.
 Signed-off-by: Andrew Jones <drjones@redhat.com>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Reviewed-by: Eric Auger <eric.auger@redhat.com>
 Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
 Reviewed-by: Beata Michalska <beata.michalska@linaro.org>
 Message-id: 20191024121808.9612-5-drjones@redhat.com
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
  include/qemu/bitops.h     |   1 +
  target/arm/cpu.h          |  19 ++++
  target/arm/cpu.c          |  19 ++++
  target/arm/cpu64.c        | 192 ++++++++++++++++++++++++++++++++++++-
  target/arm/helper.c       |  10 +-
  target/arm/monitor.c      |  12 +++
  tests/arm-cpu-features.c  | 194 ++++++++++++++++++++++++++++++++++++++
  docs/arm-cpu-features.rst | 168 +++++++++++++++++++++++++++++++--
 files changed, 606 insertions(+), 9 deletions(-)
 diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/qemu/bitops.h
+--- a/target/arm/internals.h
-+++ b/include/qemu/bitops.h
++++ b/target/arm/internals.h
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - 12)  /* size - 1 */
- #define BITS_PER_LONG           (sizeof (unsigned long) * BITS_PER_BYTE)
+ bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr);
+ uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra);
- #define BIT(nr)                 (1UL << (nr))
-+#define BIT_ULL(nr)             (1ULL << (nr))
++/**
- #define BIT_MASK(nr)            (1UL << ((nr) % BITS_PER_LONG))
++ * mte_mops_probe: Check where the next MTE failure is for a FEAT_MOPS operation
- #define BIT_WORD(nr)            ((nr) / BITS_PER_LONG)
++ * @env: CPU env
- #define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
++ * @ptr: start address of memory region (dirty pointer)
-diff --git a/target/arm/cpu.h b/target/arm/cpu.h
++ * @size: length of region (guaranteed not to cross a page boundary)
 + * @desc: MTEDESC descriptor word (0 means no MTE checks)
 + * Returns: the size of the region that can be copied without hitting
 + *          an MTE tag failure
 + *
 + * Note that we assume that the caller has already checked the TBI
 + * and TCMA bits with mte_checks_needed() and an MTE check is definitely
 + * required.
 + */
 +uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size,
 +                        uint32_t desc);
 +
 +/**
 + * mte_check_fail: Record an MTE tag check failure
 + * @env: CPU env
 + * @desc: MTEDESC descriptor word
 + * @dirty_ptr: Failing dirty address
 + * @ra: TCG retaddr
 + *
 + * This may never return (if the MTE tag checks are configured to fault).
 + */
 +void mte_check_fail(CPUARMState *env, uint32_t desc,
 +                    uint64_t dirty_ptr, uintptr_t ra);
 +
  static inline int allocation_tag_from_addr(uint64_t ptr)
  {
      return extract64(ptr, 56, 4);
 diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/cpu.h
+--- a/target/arm/tcg/mte_helper.c
-+++ b/target/arm/cpu.h
++++ b/target/arm/tcg/mte_helper.c
-@@ -XXX,XX +XXX,XX @@ typedef struct {
+@@ -XXX,XX +XXX,XX @@ static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr,
+ }
- #ifdef TARGET_AARCH64
- # define ARM_MAX_VQ    16
+ /* Record a tag check failure.  */
-+void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp);
+-static void mte_check_fail(CPUARMState *env, uint32_t desc,
-+uint32_t arm_cpu_vq_map_next_smaller(ARMCPU *cpu, uint32_t vq);
+-                           uint64_t dirty_ptr, uintptr_t ra)
- #else
++void mte_check_fail(CPUARMState *env, uint32_t desc,
- # define ARM_MAX_VQ    1
++                    uint64_t dirty_ptr, uintptr_t ra)
-+static inline void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { }
+ {
-+static inline uint32_t arm_cpu_vq_map_next_smaller(ARMCPU *cpu, uint32_t vq)
+     int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
-+{ return 0; }
+     ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
- #endif
+@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr)
+  done:
- typedef struct ARMVectorReg {
+     return useronly_clean_ptr(ptr);
-@@ -XXX,XX +XXX,XX @@ struct ARMCPU {
+ }
      /* Used to set the maximum vector length the cpu will support.  */
      uint32_t sve_max_vq;
 +
-+    /*
++uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size,
-+     * In sve_vq_map each set bit is a supported vector length of
++                        uint32_t desc)
-+     * (bit-number + 1) * 16 bytes, i.e. each bit number + 1 is the vector
++{
-+     * length in quadwords.
++    int mmu_idx, tag_count;
-+     *
++    uint64_t ptr_tag, tag_first, tag_last;
-+     * While processing properties during initialization, corresponding
++    void *mem;
-+     * sve_vq_init bits are set for bits in sve_vq_map that have been
++    bool w = FIELD_EX32(desc, MTEDESC, WRITE);
-+     * set by properties.
++    uint32_t n;
 +     */
 +    DECLARE_BITMAP(sve_vq_map, ARM_MAX_VQ);
 +    DECLARE_BITMAP(sve_vq_init, ARM_MAX_VQ);
  };
  void arm_cpu_post_init(Object *obj);
@@ -XXX,XX +XXX,XX @@ static inline int arm_feature(CPUARMState *env, int feature)
      return (env->features & (1ULL << feature)) != 0;
  }
 +void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp);
 +
- #if !defined(CONFIG_USER_ONLY)
++    mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
- /* Return true if exception levels below EL3 are in secure state,
++    /* True probe; this will never fault */
-  * or would be following an exception return to that level.
++    mem = allocation_tag_mem_probe(env, mmu_idx, ptr,
-diff --git a/target/arm/cpu.c b/target/arm/cpu.c
++                                   w ? MMU_DATA_STORE : MMU_DATA_LOAD,
-index XXXXXXX..XXXXXXX 100644
++                                   size, MMU_DATA_LOAD, true, 0);
---- a/target/arm/cpu.c
++    if (!mem) {
-+++ b/target/arm/cpu.c
++        return size;
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_finalizefn(Object *obj)
  #endif
  }
 +void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp)
 +{
 +    Error *local_err = NULL;
 +
 +    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
 +        arm_cpu_sve_finalize(cpu, &local_err);
 +        if (local_err != NULL) {
 +            error_propagate(errp, local_err);
 +            return;
 +        }
 +    }
 +}
 +
  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
  {
      CPUState *cs = CPU(dev);
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
          return;
      }
 +    arm_cpu_finalize_features(cpu, &local_err);
 +    if (local_err != NULL) {
 +        error_propagate(errp, local_err);
 +        return;
 +    }
 +
      if (arm_feature(env, ARM_FEATURE_AARCH64) &&
          cpu->has_vfp != cpu->has_neon) {
          /*
 diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/cpu64.c
 +++ b/target/arm/cpu64.c
@@ -XXX,XX +XXX,XX @@ static void aarch64_a72_initfn(Object *obj)
      define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
  }
 +void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
 +{
 +    /*
 +     * If any vector lengths are explicitly enabled with sve<N> properties,
 +     * then all other lengths are implicitly disabled.  If sve-max-vq is
 +     * specified then it is the same as explicitly enabling all lengths
 +     * up to and including the specified maximum, which means all larger
 +     * lengths will be implicitly disabled.  If no sve<N> properties
 +     * are enabled and sve-max-vq is not specified, then all lengths not
 +     * explicitly disabled will be enabled.  Additionally, all power-of-two
 +     * vector lengths less than the maximum enabled length will be
 +     * automatically enabled and all vector lengths larger than the largest
 +     * disabled power-of-two vector length will be automatically disabled.
 +     * Errors are generated if the user provided input that interferes with
 +     * any of the above.  Finally, if SVE is not disabled, then at least one
 +     * vector length must be enabled.
 +     */
 +    DECLARE_BITMAP(tmp, ARM_MAX_VQ);
 +    uint32_t vq, max_vq = 0;
 +
 +    /*
 +     * Process explicit sve<N> properties.
 +     * From the properties, sve_vq_map<N> implies sve_vq_init<N>.
 +     * Check first for any sve<N> enabled.
 +     */
 +    if (!bitmap_empty(cpu->sve_vq_map, ARM_MAX_VQ)) {
 +        max_vq = find_last_bit(cpu->sve_vq_map, ARM_MAX_VQ) + 1;
 +
 +        if (cpu->sve_max_vq && max_vq > cpu->sve_max_vq) {
 +            error_setg(errp, "cannot enable sve%d", max_vq * 128);
 +            error_append_hint(errp, "sve%d is larger than the maximum vector "
 +                              "length, sve-max-vq=%d (%d bits)\n",
 +                              max_vq * 128, cpu->sve_max_vq,
 +                              cpu->sve_max_vq * 128);
 +            return;
 +        }
 +
 +        /* Propagate enabled bits down through required powers-of-two. */
 +        for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
 +            if (!test_bit(vq - 1, cpu->sve_vq_init)) {
 +                set_bit(vq - 1, cpu->sve_vq_map);
 +            }
 +        }
 +    } else if (cpu->sve_max_vq == 0) {
 +        /*
 +         * No explicit bits enabled, and no implicit bits from sve-max-vq.
 +         */
 +        if (!cpu_isar_feature(aa64_sve, cpu)) {
 +            /* SVE is disabled and so are all vector lengths.  Good. */
 +            return;
 +        }
 +
 +        /* Disabling a power-of-two disables all larger lengths. */
 +        if (test_bit(0, cpu->sve_vq_init)) {
 +            error_setg(errp, "cannot disable sve128");
 +            error_append_hint(errp, "Disabling sve128 results in all vector "
 +                              "lengths being disabled.\n");
 +            error_append_hint(errp, "With SVE enabled, at least one vector "
 +                              "length must be enabled.\n");
 +            return;
 +        }
 +        for (vq = 2; vq <= ARM_MAX_VQ; vq <<= 1) {
 +            if (test_bit(vq - 1, cpu->sve_vq_init)) {
 +                break;
 +            }
 +        }
 +        max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
 +
 +        bitmap_complement(cpu->sve_vq_map, cpu->sve_vq_init, max_vq);
 +        max_vq = find_last_bit(cpu->sve_vq_map, max_vq) + 1;
 +    }
 +
 +    /*
-+     * Process the sve-max-vq property.
++     * TODO: checkN() is not designed for checks of the size we expect
-+     * Note that we know from the above that no bit above
++     * for FEAT_MOPS operations, so we should implement this differently.
-+     * sve-max-vq is currently set.
++     * Maybe we should do something like
 +     *   if (region start and size are aligned nicely) {
 +     *      do direct loads of 64 tag bits at a time;
 +     *   } else {
 +     *      call checkN()
 +     *   }
 +     */
-+    if (cpu->sve_max_vq != 0) {
++    /* Round the bounds to the tag granule, and compute the number of tags. */
-+        max_vq = cpu->sve_max_vq;
++    ptr_tag = allocation_tag_from_addr(ptr);
-+
++    tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
-+        if (!test_bit(max_vq - 1, cpu->sve_vq_map) &&
++    tag_last = QEMU_ALIGN_DOWN(ptr + size - 1, TAG_GRANULE);
-+            test_bit(max_vq - 1, cpu->sve_vq_init)) {
++    tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1;
-+            error_setg(errp, "cannot disable sve%d", max_vq * 128);
++    n = checkN(mem, ptr & TAG_GRANULE, ptr_tag, tag_count);
-+            error_append_hint(errp, "The maximum vector length must be "
++    if (likely(n == tag_count)) {
-+                              "enabled, sve-max-vq=%d (%d bits)\n",
++        return size;
 +                              max_vq, max_vq * 128);
 +            return;
 +        }
 +
 +        /* Set all bits not explicitly set within sve-max-vq. */
 +        bitmap_complement(tmp, cpu->sve_vq_init, max_vq);
 +        bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq);
 +    }
 +
 +    /*
-+     * We should know what max-vq is now.  Also, as we're done
++     * Failure; for the first granule, it's at @ptr. Otherwise
-+     * manipulating sve-vq-map, we ensure any bits above max-vq
++     * it's at the first byte of the nth granule. Calculate how
-+     * are clear, just in case anybody looks.
++     * many bytes we can access without hitting that failure.
 +     */
-+    assert(max_vq != 0);
++    if (n == 0) {
-+    bitmap_clear(cpu->sve_vq_map, max_vq, ARM_MAX_VQ - max_vq);
++        return 0;
-+
++    } else {
-+    /* Ensure all required powers-of-two are enabled. */
++        return n * TAG_GRANULE - (ptr - tag_first);
 +    for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
 +        if (!test_bit(vq - 1, cpu->sve_vq_map)) {
 +            error_setg(errp, "cannot disable sve%d", vq * 128);
 +            error_append_hint(errp, "sve%d is required as it "
 +                              "is a power-of-two length smaller than "
 +                              "the maximum, sve%d\n",
 +                              vq * 128, max_vq * 128);
 +            return;
 +        }
 +    }
-+
-+    /*
-+     * Now that we validated all our vector lengths, the only question
-+     * left to answer is if we even want SVE at all.
-+     */
-+    if (!cpu_isar_feature(aa64_sve, cpu)) {
-+        error_setg(errp, "cannot enable sve%d", max_vq * 128);
-+        error_append_hint(errp, "SVE must be enabled to enable vector "
-+                          "lengths.\n");
-+        error_append_hint(errp, "Add sve=on to the CPU property list.\n");
-+        return;
-+    }
-+
-+    /* From now on sve_max_vq is the actual maximum supported length. */
-+    cpu->sve_max_vq = max_vq;
 +}
-+
-+uint32_t arm_cpu_vq_map_next_smaller(ARMCPU *cpu, uint32_t vq)
-+{
-+    uint32_t bitnum;
-+
-+    /*
-+     * We allow vq == ARM_MAX_VQ + 1 to be input because the caller may want
-+     * to find the maximum vq enabled, which may be ARM_MAX_VQ, but this
-+     * function always returns the next smaller than the input.
-+     */
-+    assert(vq && vq <= ARM_MAX_VQ + 1);
-+
-+    bitnum = find_last_bit(cpu->sve_vq_map, vq - 1);
-+    return bitnum == vq - 1 ? 0 : bitnum + 1;
-+}
-+
- static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name,
-                                    void *opaque, Error **errp)
- {
-@@ -XXX,XX +XXX,XX @@ static void cpu_max_set_sve_max_vq(Object *obj, Visitor *v, const char *name,
-     error_propagate(errp, err);
- }
-+static void cpu_arm_get_sve_vq(Object *obj, Visitor *v, const char *name,
-+                               void *opaque, Error **errp)
-+{
-+    ARMCPU *cpu = ARM_CPU(obj);
-+    uint32_t vq = atoi(&name[3]) / 128;
-+    bool value;
-+
-+    /* All vector lengths are disabled when SVE is off. */
-+    if (!cpu_isar_feature(aa64_sve, cpu)) {
-+        value = false;
-+    } else {
-+        value = test_bit(vq - 1, cpu->sve_vq_map);
-+    }
-+    visit_type_bool(v, name, &value, errp);
-+}
-+
-+static void cpu_arm_set_sve_vq(Object *obj, Visitor *v, const char *name,
-+                               void *opaque, Error **errp)
-+{
-+    ARMCPU *cpu = ARM_CPU(obj);
-+    uint32_t vq = atoi(&name[3]) / 128;
-+    Error *err = NULL;
-+    bool value;
-+
-+    visit_type_bool(v, name, &value, &err);
-+    if (err) {
-+        error_propagate(errp, err);
-+        return;
-+    }
-+
-+    if (value) {
-+        set_bit(vq - 1, cpu->sve_vq_map);
-+    } else {
-+        clear_bit(vq - 1, cpu->sve_vq_map);
-+    }
-+    set_bit(vq - 1, cpu->sve_vq_init);
-+}
-+
- static void cpu_arm_get_sve(Object *obj, Visitor *v, const char *name,
-                             void *opaque, Error **errp)
- {
-@@ -XXX,XX +XXX,XX @@ static void cpu_arm_set_sve(Object *obj, Visitor *v, const char *name,
- static void aarch64_max_initfn(Object *obj)
- {
-     ARMCPU *cpu = ARM_CPU(obj);
-+    uint32_t vq;
-     if (kvm_enabled()) {
-         kvm_arm_set_cpu_features_from_host(cpu);
-@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
-         cpu->dcz_blocksize = 7; /*  512 bytes */
- #endif
--        cpu->sve_max_vq = ARM_MAX_VQ;
-         object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq,
-                             cpu_max_set_sve_max_vq, NULL, NULL, &error_fatal);
-         object_property_add(obj, "sve", "bool", cpu_arm_get_sve,
-                             cpu_arm_set_sve, NULL, NULL, &error_fatal);
-+
-+        for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
-+            char name[8];
-+            sprintf(name, "sve%d", vq * 128);
-+            object_property_add(obj, name, "bool", cpu_arm_get_sve_vq,
-+                                cpu_arm_set_sve_vq, NULL, NULL, &error_fatal);
-+        }
-     }
- }
-diff --git a/target/arm/helper.c b/target/arm/helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
-+++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el)
-     return 0;
- }
-+static uint32_t sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len)
-+{
-+    uint32_t start_vq = (start_len & 0xf) + 1;
-+
-+    return arm_cpu_vq_map_next_smaller(cpu, start_vq + 1) - 1;
-+}
-+
- /*
-  * Given that SVE is enabled, return the vector length for EL.
-  */
-@@ -XXX,XX +XXX,XX @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
-     if (arm_feature(env, ARM_FEATURE_EL3)) {
-         zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
-     }
--    return zcr_len;
-+
-+    return sve_zcr_get_valid_len(cpu, zcr_len);
- }
- static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-diff --git a/target/arm/monitor.c b/target/arm/monitor.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/monitor.c
-+++ b/target/arm/monitor.c
-@@ -XXX,XX +XXX,XX @@ GICCapabilityList *qmp_query_gic_capabilities(Error **errp)
-     return head;
- }
-+QEMU_BUILD_BUG_ON(ARM_MAX_VQ > 16);
-+
- /*
-  * These are cpu model features we want to advertise. The order here
-  * matters as this is the order in which qmp_query_cpu_model_expansion
-@@ -XXX,XX +XXX,XX @@ GICCapabilityList *qmp_query_gic_capabilities(Error **errp)
-  */
- static const char *cpu_model_advertised_features[] = {
-     "aarch64", "pmu", "sve",
-+    "sve128", "sve256", "sve384", "sve512",
-+    "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280",
-+    "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048",
-     NULL
- };
-@@ -XXX,XX +XXX,XX @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
-         if (!err) {
-             visit_check_struct(visitor, &err);
-         }
-+        if (!err) {
-+            arm_cpu_finalize_features(ARM_CPU(obj), &err);
-+        }
-         visit_end_struct(visitor, NULL);
-         visit_free(visitor);
-         if (err) {
-@@ -XXX,XX +XXX,XX @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
-             error_propagate(errp, err);
-             return NULL;
-         }
-+    } else {
-+        Error *err = NULL;
-+        arm_cpu_finalize_features(ARM_CPU(obj), &err);
-+        assert(err == NULL);
-     }
-     expansion_info = g_new0(CpuModelExpansionInfo, 1);
-diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c
-index XXXXXXX..XXXXXXX 100644
---- a/tests/arm-cpu-features.c
-+++ b/tests/arm-cpu-features.c
-@@ -XXX,XX +XXX,XX @@
-  * See the COPYING file in the top-level directory.
-  */
- #include "qemu/osdep.h"
-+#include "qemu/bitops.h"
- #include "libqtest.h"
- #include "qapi/qmp/qdict.h"
- #include "qapi/qmp/qjson.h"
-+/*
-+ * We expect the SVE max-vq to be 16. Also it must be <= 64
-+ * for our test code, otherwise 'vls' can't just be a uint64_t.
-+ */
-+#define SVE_MAX_VQ 16
-+
- #define MACHINE    "-machine virt,gic-version=max "
- #define QUERY_HEAD "{ 'execute': 'query-cpu-model-expansion', " \
-                      "'arguments': { 'type': 'full', "
-@@ -XXX,XX +XXX,XX @@ static void assert_bad_props(QTestState *qts, const char *cpu_type)
-     qobject_unref(resp);
- }
-+static uint64_t resp_get_sve_vls(QDict *resp)
-+{
-+    QDict *props;
-+    const QDictEntry *e;
-+    uint64_t vls = 0;
-+    int n = 0;
-+
-+    g_assert(resp);
-+    g_assert(resp_has_props(resp));
-+
-+    props = resp_get_props(resp);
-+
-+    for (e = qdict_first(props); e; e = qdict_next(props, e)) {
-+        if (strlen(e->key) > 3 && !strncmp(e->key, "sve", 3) &&
-+            g_ascii_isdigit(e->key[3])) {
-+            char *endptr;
-+            int bits;
-+
-+            bits = g_ascii_strtoll(&e->key[3], &endptr, 10);
-+            if (!bits || *endptr != '\0') {
-+                continue;
-+            }
-+
-+            if (qdict_get_bool(props, e->key)) {
-+                vls |= BIT_ULL((bits / 128) - 1);
-+            }
-+            ++n;
-+        }
-+    }
-+
-+    g_assert(n == SVE_MAX_VQ);
-+
-+    return vls;
-+}
-+
-+#define assert_sve_vls(qts, cpu_type, expected_vls, fmt, ...)          \
-+({                                                                     \
-+    QDict *_resp = do_query(qts, cpu_type, fmt, ##__VA_ARGS__);        \
-+    g_assert(_resp);                                                   \
-+    g_assert(resp_has_props(_resp));                                   \
-+    g_assert(resp_get_sve_vls(_resp) == expected_vls);                 \
-+    qobject_unref(_resp);                                              \
-+})
-+
-+static void sve_tests_default(QTestState *qts, const char *cpu_type)
-+{
-+    /*
-+     * With no sve-max-vq or sve<N> properties on the command line
-+     * the default is to have all vector lengths enabled. This also
-+     * tests that 'sve' is 'on' by default.
-+     */
-+    assert_sve_vls(qts, cpu_type, BIT_ULL(SVE_MAX_VQ) - 1, NULL);
-+
-+    /* With SVE off, all vector lengths should also be off. */
-+    assert_sve_vls(qts, cpu_type, 0, "{ 'sve': false }");
-+
-+    /* With SVE on, we must have at least one vector length enabled. */
-+    assert_error(qts, cpu_type, "cannot disable sve128", "{ 'sve128': false }");
-+
-+    /* Basic enable/disable tests. */
-+    assert_sve_vls(qts, cpu_type, 0x7, "{ 'sve384': true }");
-+    assert_sve_vls(qts, cpu_type, ((BIT_ULL(SVE_MAX_VQ) - 1) & ~BIT_ULL(2)),
-+                   "{ 'sve384': false }");
-+
-+    /*
-+     * ---------------------------------------------------------------------
-+     *               power-of-two(vq)   all-power-            can      can
-+     *                                  of-two(< vq)        enable   disable
-+     * ---------------------------------------------------------------------
-+     * vq < max_vq      no                MUST*              yes      yes
-+     * vq < max_vq      yes               MUST*              yes      no
-+     * ---------------------------------------------------------------------
-+     * vq == max_vq     n/a               MUST*              yes**    yes**
-+     * ---------------------------------------------------------------------
-+     * vq > max_vq      n/a               no                 no       yes
-+     * vq > max_vq      n/a               yes                yes      yes
-+     * ---------------------------------------------------------------------
-+     *
-+     * [*] "MUST" means this requirement must already be satisfied,
-+     *     otherwise 'max_vq' couldn't itself be enabled.
-+     *
-+     * [**] Not testable with the QMP interface, only with the command line.
-+     */
-+
-+    /* max_vq := 8 */
-+    assert_sve_vls(qts, cpu_type, 0x8b, "{ 'sve1024': true }");
-+
-+    /* max_vq := 8, vq < max_vq, !power-of-two(vq) */
-+    assert_sve_vls(qts, cpu_type, 0x8f,
-+                   "{ 'sve1024': true, 'sve384': true }");
-+    assert_sve_vls(qts, cpu_type, 0x8b,
-+                   "{ 'sve1024': true, 'sve384': false }");
-+
-+    /* max_vq := 8, vq < max_vq, power-of-two(vq) */
-+    assert_sve_vls(qts, cpu_type, 0x8b,
-+                   "{ 'sve1024': true, 'sve256': true }");
-+    assert_error(qts, cpu_type, "cannot disable sve256",
-+                 "{ 'sve1024': true, 'sve256': false }");
-+
-+    /* max_vq := 3, vq > max_vq, !all-power-of-two(< vq) */
-+    assert_error(qts, cpu_type, "cannot disable sve512",
-+                 "{ 'sve384': true, 'sve512': false, 'sve640': true }");
-+
-+    /*
-+     * We can disable power-of-two vector lengths when all larger lengths
-+     * are also disabled. We only need to disable the power-of-two length,
-+     * as all non-enabled larger lengths will then be auto-disabled.
-+     */
-+    assert_sve_vls(qts, cpu_type, 0x7, "{ 'sve512': false }");
-+
-+    /* max_vq := 3, vq > max_vq, all-power-of-two(< vq) */
-+    assert_sve_vls(qts, cpu_type, 0x1f,
-+                   "{ 'sve384': true, 'sve512': true, 'sve640': true }");
-+    assert_sve_vls(qts, cpu_type, 0xf,
-+                   "{ 'sve384': true, 'sve512': true, 'sve640': false }");
-+}
-+
-+static void sve_tests_sve_max_vq_8(const void *data)
-+{
-+    QTestState *qts;
-+
-+    qts = qtest_init(MACHINE "-cpu max,sve-max-vq=8");
-+
-+    assert_sve_vls(qts, "max", BIT_ULL(8) - 1, NULL);
-+
-+    /*
-+     * Disabling the max-vq set by sve-max-vq is not allowed, but
-+     * of course enabling it is OK.
-+     */
-+    assert_error(qts, "max", "cannot disable sve1024", "{ 'sve1024': false }");
-+    assert_sve_vls(qts, "max", 0xff, "{ 'sve1024': true }");
-+
-+    /*
-+     * Enabling anything larger than max-vq set by sve-max-vq is not
-+     * allowed, but of course disabling everything larger is OK.
-+     */
-+    assert_error(qts, "max", "cannot enable sve1152", "{ 'sve1152': true }");
-+    assert_sve_vls(qts, "max", 0xff, "{ 'sve1152': false }");
-+
-+    /*
-+     * We can enable/disable non power-of-two lengths smaller than the
-+     * max-vq set by sve-max-vq, but, while we can enable power-of-two
-+     * lengths, we can't disable them.
-+     */
-+    assert_sve_vls(qts, "max", 0xff, "{ 'sve384': true }");
-+    assert_sve_vls(qts, "max", 0xfb, "{ 'sve384': false }");
-+    assert_sve_vls(qts, "max", 0xff, "{ 'sve256': true }");
-+    assert_error(qts, "max", "cannot disable sve256", "{ 'sve256': false }");
-+
-+    qtest_quit(qts);
-+}
-+
-+static void sve_tests_sve_off(const void *data)
-+{
-+    QTestState *qts;
-+
-+    qts = qtest_init(MACHINE "-cpu max,sve=off");
-+
-+    /* SVE is off, so the map should be empty. */
-+    assert_sve_vls(qts, "max", 0, NULL);
-+
-+    /* The map stays empty even if we turn lengths off. */
-+    assert_sve_vls(qts, "max", 0, "{ 'sve128': false }");
-+
-+    /* It's an error to enable lengths when SVE is off. */
-+    assert_error(qts, "max", "cannot enable sve128", "{ 'sve128': true }");
-+
-+    /* With SVE re-enabled we should get all vector lengths enabled. */
-+    assert_sve_vls(qts, "max", BIT_ULL(SVE_MAX_VQ) - 1, "{ 'sve': true }");
-+
-+    /* Or enable SVE with just specific vector lengths. */
-+    assert_sve_vls(qts, "max", 0x3,
-+                   "{ 'sve': true, 'sve128': true, 'sve256': true }");
-+
-+    qtest_quit(qts);
-+}
-+
- static void test_query_cpu_model_expansion(const void *data)
- {
-     QTestState *qts;
-@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion(const void *data)
-     if (g_str_equal(qtest_get_arch(), "aarch64")) {
-         assert_has_feature(qts, "max", "aarch64");
-         assert_has_feature(qts, "max", "sve");
-+        assert_has_feature(qts, "max", "sve128");
-         assert_has_feature(qts, "cortex-a57", "pmu");
-         assert_has_feature(qts, "cortex-a57", "aarch64");
-+        sve_tests_default(qts, "max");
-+
-         /* Test that features that depend on KVM generate errors without. */
-         assert_error(qts, "max",
-                      "'aarch64' feature cannot be disabled "
-@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
-     qtest_add_data_func("/arm/query-cpu-model-expansion",
-                         NULL, test_query_cpu_model_expansion);
-+    if (g_str_equal(qtest_get_arch(), "aarch64")) {
-+        qtest_add_data_func("/arm/max/query-cpu-model-expansion/sve-max-vq-8",
-+                            NULL, sve_tests_sve_max_vq_8);
-+        qtest_add_data_func("/arm/max/query-cpu-model-expansion/sve-off",
-+                            NULL, sve_tests_sve_off);
-+    }
-+
-     if (kvm_available) {
-         qtest_add_data_func("/arm/kvm/query-cpu-model-expansion",
-                             NULL, test_query_cpu_model_expansion_kvm);
-diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst
-index XXXXXXX..XXXXXXX 100644
---- a/docs/arm-cpu-features.rst
-+++ b/docs/arm-cpu-features.rst
-@@ -XXX,XX +XXX,XX @@ block in the script for usage) is used to issue the QMP commands.
-       (QEMU) query-cpu-model-expansion type=full model={"name":"max"}
-       { "return": {
-         "model": { "name": "max", "props": {
--        "pmu": true, "aarch64": true
-+        "sve1664": true, "pmu": true, "sve1792": true, "sve1920": true,
-+        "sve128": true, "aarch64": true, "sve1024": true, "sve": true,
-+        "sve640": true, "sve768": true, "sve1408": true, "sve256": true,
-+        "sve1152": true, "sve512": true, "sve384": true, "sve1536": true,
-+        "sve896": true, "sve1280": true, "sve2048": true
-       }}}}
--We see that the `max` CPU type has the `pmu` and `aarch64` CPU features.
--We also see that the CPU features are enabled, as they are all `true`.
-+We see that the `max` CPU type has the `pmu`, `aarch64`, `sve`, and many
-+`sve<N>` CPU features.  We also see that all the CPU features are
-+enabled, as they are all `true`.  (The `sve<N>` CPU features are all
-+optional SVE vector lengths (see "SVE CPU Properties").  While with TCG
-+all SVE vector lengths can be supported, when KVM is in use it's more
-+likely that only a few lengths will be supported, if SVE is supported at
-+all.)
- (2) Let's try to disable the PMU::
-       (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"pmu":false}}
-       { "return": {
-         "model": { "name": "max", "props": {
--        "pmu": false, "aarch64": true
-+        "sve1664": true, "pmu": false, "sve1792": true, "sve1920": true,
-+        "sve128": true, "aarch64": true, "sve1024": true, "sve": true,
-+        "sve640": true, "sve768": true, "sve1408": true, "sve256": true,
-+        "sve1152": true, "sve512": true, "sve384": true, "sve1536": true,
-+        "sve896": true, "sve1280": true, "sve2048": true
-       }}}}
- We see it worked, as `pmu` is now `false`.
-@@ -XXX,XX +XXX,XX @@ We see it worked, as `pmu` is now `false`.
- It looks like this feature is limited to a configuration we do not
- currently have.
--(4) Let's try probing CPU features for the Cortex-A15 CPU type::
-+(4) Let's disable `sve` and see what happens to all the optional SVE
-+    vector lengths::
-+
-+      (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"sve":false}}
-+      { "return": {
-+        "model": { "name": "max", "props": {
-+        "sve1664": false, "pmu": true, "sve1792": false, "sve1920": false,
-+        "sve128": false, "aarch64": true, "sve1024": false, "sve": false,
-+        "sve640": false, "sve768": false, "sve1408": false, "sve256": false,
-+        "sve1152": false, "sve512": false, "sve384": false, "sve1536": false,
-+        "sve896": false, "sve1280": false, "sve2048": false
-+      }}}}
-+
-+As expected they are now all `false`.
-+
-+(5) Let's try probing CPU features for the Cortex-A15 CPU type::
-       (QEMU) query-cpu-model-expansion type=full model={"name":"cortex-a15"}
-       {"return": {"model": {"name": "cortex-a15", "props": {"pmu": true}}}}
-@@ -XXX,XX +XXX,XX @@ After determining which CPU features are available and supported for a
- given CPU type, then they may be selectively enabled or disabled on the
- QEMU command line with that CPU type::
--  $ qemu-system-aarch64 -M virt -cpu max,pmu=off
-+  $ qemu-system-aarch64 -M virt -cpu max,pmu=off,sve=on,sve128=on,sve256=on
--The example above disables the PMU for the `max` CPU type.
-+The example above disables the PMU and enables the first two SVE vector
-+lengths for the `max` CPU type.  Note, the `sve=on` isn't actually
-+necessary, because, as we observed above with our probe of the `max` CPU
-+type, `sve` is already on by default.  Also, based on our probe of
-+defaults, it would seem we need to disable many SVE vector lengths, rather
-+than only enabling the two we want.  This isn't the case, because, as
-+disabling many SVE vector lengths would be quite verbose, the `sve<N>` CPU
-+properties have special semantics (see "SVE CPU Property Parsing
-+Semantics").
-+
-+SVE CPU Properties
-+==================
-+
-+There are two types of SVE CPU properties: `sve` and `sve<N>`.  The first
-+is used to enable or disable the entire SVE feature, just as the `pmu`
-+CPU property completely enables or disables the PMU.  The second type
-+is used to enable or disable specific vector lengths, where `N` is the
-+number of bits of the length.  The `sve<N>` CPU properties have special
-+dependencies and constraints, see "SVE CPU Property Dependencies and
-+Constraints" below.  Additionally, as we want all supported vector lengths
-+to be enabled by default, then, in order to avoid overly verbose command
-+lines (command lines full of `sve<N>=off`, for all `N` not wanted), we
-+provide the parsing semantics listed in "SVE CPU Property Parsing
-+Semantics".
-+
-+SVE CPU Property Dependencies and Constraints
-+---------------------------------------------
-+
-+  1) At least one vector length must be enabled when `sve` is enabled.
-+
-+  2) If a vector length `N` is enabled, then all power-of-two vector
-+     lengths smaller than `N` must also be enabled.  E.g. if `sve512`
-+     is enabled, then the 128-bit and 256-bit vector lengths must also
-+     be enabled.
-+
-+SVE CPU Property Parsing Semantics
-+----------------------------------
-+
-+  1) If SVE is disabled (`sve=off`), then which SVE vector lengths
-+     are enabled or disabled is irrelevant to the guest, as the entire
-+     SVE feature is disabled and that disables all vector lengths for
-+     the guest.  However QEMU will still track any `sve<N>` CPU
-+     properties provided by the user.  If later an `sve=on` is provided,
-+     then the guest will get only the enabled lengths.  If no `sve=on`
-+     is provided and there are explicitly enabled vector lengths, then
-+     an error is generated.
-+
-+  2) If SVE is enabled (`sve=on`), but no `sve<N>` CPU properties are
-+     provided, then all supported vector lengths are enabled, including
-+     the non-power-of-two lengths.
-+
-+  3) If SVE is enabled, then an error is generated when attempting to
-+     disable the last enabled vector length (see constraint (1) of "SVE
-+     CPU Property Dependencies and Constraints").
-+
-+  4) If one or more vector lengths have been explicitly enabled and at
-+     at least one of the dependency lengths of the maximum enabled length
-+     has been explicitly disabled, then an error is generated (see
-+     constraint (2) of "SVE CPU Property Dependencies and Constraints").
-+
-+  5) If one or more `sve<N>` CPU properties are set `off`, but no `sve<N>`,
-+     CPU properties are set `on`, then the specified vector lengths are
-+     disabled but the default for any unspecified lengths remains enabled.
-+     Disabling a power-of-two vector length also disables all vector
-+     lengths larger than the power-of-two length (see constraint (2) of
-+     "SVE CPU Property Dependencies and Constraints").
-+
-+  6) If one or more `sve<N>` CPU properties are set to `on`, then they
-+     are enabled and all unspecified lengths default to disabled, except
-+     for the required lengths per constraint (2) of "SVE CPU Property
-+     Dependencies and Constraints", which will even be auto-enabled if
-+     they were not explicitly enabled.
-+
-+  7) If SVE was disabled (`sve=off`), allowing all vector lengths to be
-+     explicitly disabled (i.e. avoiding the error specified in (3) of
-+     "SVE CPU Property Parsing Semantics"), then if later an `sve=on` is
-+     provided an error will be generated.  To avoid this error, one must
-+     enable at least one vector length prior to enabling SVE.
-+
-+SVE CPU Property Examples
-+-------------------------
-+
-+  1) Disable SVE::
-+
-+     $ qemu-system-aarch64 -M virt -cpu max,sve=off
-+
-+  2) Implicitly enable all vector lengths for the `max` CPU type::
-+
-+     $ qemu-system-aarch64 -M virt -cpu max
-+
-+  3) Only enable the 128-bit vector length::
-+
-+     $ qemu-system-aarch64 -M virt -cpu max,sve128=on
-+
-+  4) Disable the 512-bit vector length and all larger vector lengths,
-+     since 512 is a power-of-two.  This results in all the smaller,
-+     uninitialized lengths (128, 256, and 384) defaulting to enabled::
-+
-+     $ qemu-system-aarch64 -M virt -cpu max,sve512=off
-+
-+  5) Enable the 128-bit, 256-bit, and 512-bit vector lengths::
-+
-+     $ qemu-system-aarch64 -M virt -cpu max,sve128=on,sve256=on,sve512=on
-+
-+  6) The same as (5), but since the 128-bit and 256-bit vector
-+     lengths are required for the 512-bit vector length to be enabled,
-+     then allow them to be auto-enabled::
-+
-+     $ qemu-system-aarch64 -M virt -cpu max,sve512=on
-+
-+  7) Do the same as (6), but by first disabling SVE and then re-enabling it::
-+
-+     $ qemu-system-aarch64 -M virt -cpu max,sve=off,sve512=on,sve=on
-+
-+  8) Force errors regarding the last vector length::
-+
-+     $ qemu-system-aarch64 -M virt -cpu max,sve128=off
-+     $ qemu-system-aarch64 -M virt -cpu max,sve=off,sve128=off,sve=on
-+
-+SVE CPU Property Recommendations
-+--------------------------------
-+
-+The examples in "SVE CPU Property Examples" exhibit many ways to select
-+vector lengths which developers may find useful in order to avoid overly
-+verbose command lines.  However, the recommended way to select vector
-+lengths is to explicitly enable each desired length.  Therefore only
-+example's (1), (3), and (5) exhibit recommended uses of the properties.
 --
-.20.1
+.34.1

-[PULL 35/51] target/arm/monitor: Introduce qmp_query_cpu_model_expansion
+[PULL 17/30] target/arm: Implement the SET* instructions
-From: Andrew Jones <drjones@redhat.com>
+Implement the SET* instructions which collectively implement a
 "memset" operation.  These come in a set of three, eg SETP
 (prologue), SETM (main), SETE (epilogue), and each of those has
 different flavours to indicate whether memory accesses should be
 unpriv or non-temporal.
-Add support for the query-cpu-model-expansion QMP command to Arm. We
+This commit does not include the "memset with tag setting"
-do this selectively, only exposing CPU properties which represent
+SETG* instructions.
 optional CPU features which the user may want to enable/disable.
 Additionally we restrict the list of queryable cpu models to 'max',
 'host', or the current type when KVM is in use. And, finally, we only
 implement expansion type 'full', as Arm does not yet have a "base"
 CPU type. More details and example queries are described in a new
 document (docs/arm-cpu-features.rst).
-Note, certainly more features may be added to the list of advertised
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-features, e.g. 'vfp' and 'neon'. The only requirement is that we can
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-detect invalid configurations and emit failures at QMP query time.
+Message-id: 20230912140434.1333369-8-peter.maydell@linaro.org
-For 'vfp' and 'neon' this will require some refactoring to share a
+---
-validation function between the QMP query and the CPU realize
+ target/arm/tcg/helper-a64.h    |   4 +
-functions.
+ target/arm/tcg/a64.decode      |  16 ++
  target/arm/tcg/helper-a64.c    | 344 +++++++++++++++++++++++++++++++++
  target/arm/tcg/translate-a64.c |  49 +++++
 files changed, 413 insertions(+)
-Signed-off-by: Andrew Jones <drjones@redhat.com>
+diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Reviewed-by: Eric Auger <eric.auger@redhat.com>
 Reviewed-by: Beata Michalska <beata.michalska@linaro.org>
 Message-id: 20191024121808.9612-2-drjones@redhat.com
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
  qapi/machine-target.json  |   6 +-
  target/arm/monitor.c      | 146 ++++++++++++++++++++++++++++++++++++++
  docs/arm-cpu-features.rst | 137 +++++++++++++++++++++++++++++++++++
 files changed, 286 insertions(+), 3 deletions(-)
  create mode 100644 docs/arm-cpu-features.rst
 diff --git a/qapi/machine-target.json b/qapi/machine-target.json
 index XXXXXXX..XXXXXXX 100644
---- a/qapi/machine-target.json
+--- a/target/arm/tcg/helper-a64.h
-+++ b/qapi/machine-target.json
++++ b/target/arm/tcg/helper-a64.h
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64)
- ##
- { 'struct': 'CpuModelExpansionInfo',
+ DEF_HELPER_FLAGS_4(unaligned_access, TCG_CALL_NO_WG,
-   'data': { 'model': 'CpuModelInfo' },
+                    noreturn, env, i64, i32, i32)
--  'if': 'defined(TARGET_S390X) || defined(TARGET_I386)' }
++
-+  'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' }
++DEF_HELPER_3(setp, void, env, i32, i32)
++DEF_HELPER_3(setm, void, env, i32, i32)
- ##
++DEF_HELPER_3(sete, void, env, i32, i32)
- # @query-cpu-model-expansion:
+diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
@@ -XXX,XX +XXX,XX @@
  #   query-cpu-model-expansion while using these is not advised.
  #
  # Some architectures may not support all expansion types. s390x supports
 -# "full" and "static".
 +# "full" and "static". Arm only supports "full".
  #
  # Returns: a CpuModelExpansionInfo. Returns an error if expanding CPU models is
  #          not supported, if the model cannot be expanded, if the model contains
@@ -XXX,XX +XXX,XX @@
    'data': { 'type': 'CpuModelExpansionType',
              'model': 'CpuModelInfo' },
    'returns': 'CpuModelExpansionInfo',
 -  'if': 'defined(TARGET_S390X) || defined(TARGET_I386)' }
 +  'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' }
  ##
  # @CpuDefinitionInfo:
 diff --git a/target/arm/monitor.c b/target/arm/monitor.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/monitor.c
+--- a/target/arm/tcg/a64.decode
-+++ b/target/arm/monitor.c
++++ b/target/arm/tcg/a64.decode
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ LDGM            11011001 11 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0
-  */
+ STZ2G           11011001 11 1 ......... 01 ..... ..... @ldst_tag p=1 w=1
+ STZ2G           11011001 11 1 ......... 10 ..... ..... @ldst_tag p=0 w=0
- #include "qemu/osdep.h"
+ STZ2G           11011001 11 1 ......... 11 ..... ..... @ldst_tag p=0 w=1
-+#include "hw/boards.h"
++
- #include "kvm_arm.h"
++# Memory operations (memset, memcpy, memmove)
-+#include "qapi/error.h"
++# Each of these comes in a set of three, eg SETP (prologue), SETM (main),
-+#include "qapi/visitor.h"
++# SETE (epilogue), and each of those has different flavours to
-+#include "qapi/qobject-input-visitor.h"
++# indicate whether memory accesses should be unpriv or non-temporal.
-+#include "qapi/qapi-commands-machine-target.h"
++# We don't distinguish temporal and non-temporal accesses, but we
- #include "qapi/qapi-commands-misc-target.h"
++# do need to report it in syndrome register values.
-+#include "qapi/qmp/qerror.h"
++
-+#include "qapi/qmp/qdict.h"
++# Memset
-+#include "qom/qom-qobject.h"
++&set rs rn rd unpriv nontemp
++# op2 bit 1 is nontemporal bit
- static GICCapability *gic_cap_new(int version)
++@set         .. ......... rs:5 .. nontemp:1 unpriv:1 .. rn:5 rd:5 &set
- {
++
-@@ -XXX,XX +XXX,XX @@ GICCapabilityList *qmp_query_gic_capabilities(Error **errp)
++SETP            00 011001110 ..... 00 . . 01 ..... ..... @set
++SETM            00 011001110 ..... 01 . . 01 ..... ..... @set
-     return head;
++SETE            00 011001110 ..... 10 . . 01 ..... ..... @set
 diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/helper-a64.c
 +++ b/target/arm/tcg/helper-a64.c
@@ -XXX,XX +XXX,XX @@ void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr,
      arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type,
                                  mmu_idx, GETPC());
  }
 +
-+/*
++/* Memory operations (memset, memmove, memcpy) */
-+ * These are cpu model features we want to advertise. The order here
++
-+ * matters as this is the order in which qmp_query_cpu_model_expansion
++/*
-+ * will attempt to set them. If there are dependencies between features,
++ * Return true if the CPY* and SET* insns can execute; compare
-+ * then the order that considers those dependencies must be used.
++ * pseudocode CheckMOPSEnabled(), though we refactor it a little.
 + */
-+static const char *cpu_model_advertised_features[] = {
++static bool mops_enabled(CPUARMState *env)
-+    "aarch64", "pmu",
++{
-+    NULL
++    int el = arm_current_el(env);
-+};
++
-+
++    if (el < 2 &&
-+CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
++        (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) &&
-+                                                     CpuModelInfo *model,
++        !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) {
-+                                                     Error **errp)
++        return false;
-+{
++    }
-+    CpuModelExpansionInfo *expansion_info;
++
-+    const QDict *qdict_in = NULL;
++    if (el == 0) {
-+    QDict *qdict_out;
++        if (!el_is_in_host(env, 0)) {
-+    ObjectClass *oc;
++            return env->cp15.sctlr_el[1] & SCTLR_MSCEN;
-+    Object *obj;
++        } else {
-+    const char *name;
++            return env->cp15.sctlr_el[2] & SCTLR_MSCEN;
 +    int i;
 +
 +    if (type != CPU_MODEL_EXPANSION_TYPE_FULL) {
 +        error_setg(errp, "The requested expansion type is not supported");
 +        return NULL;
 +    }
 +
 +    if (!kvm_enabled() && !strcmp(model->name, "host")) {
 +        error_setg(errp, "The CPU type '%s' requires KVM", model->name);
 +        return NULL;
 +    }
 +
 +    oc = cpu_class_by_name(TYPE_ARM_CPU, model->name);
 +    if (!oc) {
 +        error_setg(errp, "The CPU type '%s' is not a recognized ARM CPU type",
 +                   model->name);
 +        return NULL;
 +    }
 +
 +    if (kvm_enabled()) {
 +        const char *cpu_type = current_machine->cpu_type;
 +        int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX);
 +        bool supported = false;
 +
 +        if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) {
 +            /* These are kvmarm's recommended cpu types */
 +            supported = true;
 +        } else if (strlen(model->name) == len &&
 +                   !strncmp(model->name, cpu_type, len)) {
 +            /* KVM is enabled and we're using this type, so it works. */
 +            supported = true;
 +        }
-+        if (!supported) {
++    }
-+            error_setg(errp, "We cannot guarantee the CPU type '%s' works "
++    return true;
-+                             "with KVM on this host", model->name);
++}
-+            return NULL;
++
 +static void check_mops_enabled(CPUARMState *env, uintptr_t ra)
 +{
 +    if (!mops_enabled(env)) {
 +        raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(),
 +                           exception_target_el(env), ra);
 +    }
 +}
 +
 +/*
 + * Return the target exception level for an exception due
 + * to mismatched arguments in a FEAT_MOPS copy or set.
 + * Compare pseudocode MismatchedCpySetTargetEL()
 + */
 +static int mops_mismatch_exception_target_el(CPUARMState *env)
 +{
 +    int el = arm_current_el(env);
 +
 +    if (el > 1) {
 +        return el;
 +    }
 +    if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
 +        return 2;
 +    }
 +    if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) {
 +        return 2;
 +    }
 +    return 1;
 +}
 +
 +/*
 + * Check whether an M or E instruction was executed with a CF value
 + * indicating the wrong option for this implementation.
 + * Assumes we are always Option A.
 + */
 +static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome,
 +                                    uintptr_t ra)
 +{
 +    if (env->CF != 0) {
 +        syndrome |= 1 << 17; /* Set the wrong-option bit */
 +        raise_exception_ra(env, EXCP_UDEF, syndrome,
 +                           mops_mismatch_exception_target_el(env), ra);
 +    }
 +}
 +
 +/*
 + * Return the maximum number of bytes we can transfer starting at addr
 + * without crossing a page boundary.
 + */
 +static uint64_t page_limit(uint64_t addr)
 +{
 +    return TARGET_PAGE_ALIGN(addr + 1) - addr;
 +}
 +
 +/*
 + * Perform part of a memory set on an area of guest memory starting at
 + * toaddr (a dirty address) and extending for setsize bytes.
 + *
 + * Returns the number of bytes actually set, which might be less than
 + * setsize; the caller should loop until the whole set has been done.
 + * The caller should ensure that the guest registers are correct
 + * for the possibility that the first byte of the set encounters
 + * an exception or watchpoint. We guarantee not to take any faults
 + * for bytes other than the first.
 + */
 +static uint64_t set_step(CPUARMState *env, uint64_t toaddr,
 +                         uint64_t setsize, uint32_t data, int memidx,
 +                         uint32_t *mtedesc, uintptr_t ra)
 +{
 +    void *mem;
 +
 +    setsize = MIN(setsize, page_limit(toaddr));
 +    if (*mtedesc) {
 +        uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc);
 +        if (mtesize == 0) {
 +            /* Trap, or not. All CPU state is up to date */
 +            mte_check_fail(env, *mtedesc, toaddr, ra);
 +            /* Continue, with no further MTE checks required */
 +            *mtedesc = 0;
 +        } else {
 +            /* Advance to the end, or to the tag mismatch */
 +            setsize = MIN(setsize, mtesize);
 +        }
 +    }
 +
-+    if (model->props) {
++    toaddr = useronly_clean_ptr(toaddr);
-+        qdict_in = qobject_to(QDict, model->props);
++    /*
-+        if (!qdict_in) {
++     * Trapless lookup: returns NULL for invalid page, I/O,
-+            error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict");
++     * watchpoints, clean pages, etc.
-+            return NULL;
++     */
 +    mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx);
 +
 +#ifndef CONFIG_USER_ONLY
 +    if (unlikely(!mem)) {
 +        /*
 +         * Slow-path: just do one byte write. This will handle the
 +         * watchpoint, invalid page, etc handling correctly.
 +         * For clean code pages, the next iteration will see
 +         * the page dirty and will use the fast path.
 +         */
 +        cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra);
 +        return 1;
 +    }
 +#endif
 +    /* Easy case: just memset the host memory */
 +    memset(mem, data, setsize);
 +    return setsize;
 +}
 +
 +typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr,
 +                        uint64_t setsize, uint32_t data,
 +                        int memidx, uint32_t *mtedesc, uintptr_t ra);
 +
 +/* Extract register numbers from a MOPS exception syndrome value */
 +static int mops_destreg(uint32_t syndrome)
 +{
 +    return extract32(syndrome, 10, 5);
 +}
 +
 +static int mops_srcreg(uint32_t syndrome)
 +{
 +    return extract32(syndrome, 5, 5);
 +}
 +
 +static int mops_sizereg(uint32_t syndrome)
 +{
 +    return extract32(syndrome, 0, 5);
 +}
 +
 +/*
 + * Return true if TCMA and TBI bits mean we need to do MTE checks.
 + * We only need to do this once per MOPS insn, not for every page.
 + */
 +static bool mte_checks_needed(uint64_t ptr, uint32_t desc)
 +{
 +    int bit55 = extract64(ptr, 55, 1);
 +
 +    /*
 +     * Note that tbi_check() returns true for "access checked" but
 +     * tcma_check() returns true for "access unchecked".
 +     */
 +    if (!tbi_check(desc, bit55)) {
 +        return false;
 +    }
 +    return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr));
 +}
 +
 +/*
 + * For the Memory Set operation, our implementation chooses
 + * always to use "option A", where we update Xd to the final
 + * address in the SETP insn, and set Xn to be -(bytes remaining).
 + * On SETM and SETE insns we only need update Xn.
 + *
 + * @env: CPU
 + * @syndrome: syndrome value for mismatch exceptions
 + * (also contains the register numbers we need to use)
 + * @mtedesc: MTE descriptor word
 + * @stepfn: function which does a single part of the set operation
 + * @is_setg: true if this is the tag-setting SETG variant
 + */
 +static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
 +                    StepFn *stepfn, bool is_setg, uintptr_t ra)
 +{
 +    /* Prologue: we choose to do up to the next page boundary */
 +    int rd = mops_destreg(syndrome);
 +    int rs = mops_srcreg(syndrome);
 +    int rn = mops_sizereg(syndrome);
 +    uint8_t data = env->xregs[rs];
 +    uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
 +    uint64_t toaddr = env->xregs[rd];
 +    uint64_t setsize = env->xregs[rn];
 +    uint64_t stagesetsize, step;
 +
 +    check_mops_enabled(env, ra);
 +
 +    if (setsize > INT64_MAX) {
 +        setsize = INT64_MAX;
 +    }
 +
 +    if (!mte_checks_needed(toaddr, mtedesc)) {
 +        mtedesc = 0;
 +    }
 +
 +    stagesetsize = MIN(setsize, page_limit(toaddr));
 +    while (stagesetsize) {
 +        env->xregs[rd] = toaddr;
 +        env->xregs[rn] = setsize;
 +        step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra);
 +        toaddr += step;
 +        setsize -= step;
 +        stagesetsize -= step;
 +    }
 +    /* Insn completed, so update registers to the Option A format */
 +    env->xregs[rd] = toaddr + setsize;
 +    env->xregs[rn] = -setsize;
 +
 +    /* Set NZCV = 0000 to indicate we are an Option A implementation */
 +    env->NF = 0;
 +    env->ZF = 1; /* our env->ZF encoding is inverted */
 +    env->CF = 0;
 +    env->VF = 0;
 +    return;
 +}
 +
 +void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
 +{
 +    do_setp(env, syndrome, mtedesc, set_step, false, GETPC());
 +}
 +
 +static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
 +                    StepFn *stepfn, bool is_setg, uintptr_t ra)
 +{
 +    /* Main: we choose to do all the full-page chunks */
 +    CPUState *cs = env_cpu(env);
 +    int rd = mops_destreg(syndrome);
 +    int rs = mops_srcreg(syndrome);
 +    int rn = mops_sizereg(syndrome);
 +    uint8_t data = env->xregs[rs];
 +    uint64_t toaddr = env->xregs[rd] + env->xregs[rn];
 +    uint64_t setsize = -env->xregs[rn];
 +    uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
 +    uint64_t step, stagesetsize;
 +
 +    check_mops_enabled(env, ra);
 +
 +    /*
 +     * We're allowed to NOP out "no data to copy" before the consistency
 +     * checks; we choose to do so.
 +     */
 +    if (env->xregs[rn] == 0) {
 +        return;
 +    }
 +
 +    check_mops_wrong_option(env, syndrome, ra);
 +
 +    /*
 +     * Our implementation will work fine even if we have an unaligned
 +     * destination address, and because we update Xn every time around
 +     * the loop below and the return value from stepfn() may be less
 +     * than requested, we might find toaddr is unaligned. So we don't
 +     * have an IMPDEF check for alignment here.
 +     */
 +
 +    if (!mte_checks_needed(toaddr, mtedesc)) {
 +        mtedesc = 0;
 +    }
 +
 +    /* Do the actual memset: we leave the last partial page to SETE */
 +    stagesetsize = setsize & TARGET_PAGE_MASK;
 +    while (stagesetsize > 0) {
 +        step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra);
 +        toaddr += step;
 +        setsize -= step;
 +        stagesetsize -= step;
 +        env->xregs[rn] = -setsize;
 +        if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) {
 +            cpu_loop_exit_restore(cs, ra);
 +        }
 +    }
-+
++}
-+    obj = object_new(object_class_get_name(oc));
++
-+
++void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
-+    if (qdict_in) {
++{
-+        Visitor *visitor;
++    do_setm(env, syndrome, mtedesc, set_step, false, GETPC());
-+        Error *err = NULL;
++}
 +
-+        visitor = qobject_input_visitor_new(model->props);
++static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
-+        visit_start_struct(visitor, NULL, NULL, 0, &err);
++                    StepFn *stepfn, bool is_setg, uintptr_t ra)
-+        if (err) {
++{
-+            visit_free(visitor);
++    /* Epilogue: do the last partial page */
-+            object_unref(obj);
++    int rd = mops_destreg(syndrome);
-+            error_propagate(errp, err);
++    int rs = mops_srcreg(syndrome);
-+            return NULL;
++    int rn = mops_sizereg(syndrome);
-+        }
++    uint8_t data = env->xregs[rs];
-+
++    uint64_t toaddr = env->xregs[rd] + env->xregs[rn];
-+        i = 0;
++    uint64_t setsize = -env->xregs[rn];
-+        while ((name = cpu_model_advertised_features[i++]) != NULL) {
++    uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
-+            if (qdict_get(qdict_in, name)) {
++    uint64_t step;
-+                object_property_set(obj, visitor, name, &err);
++
-+                if (err) {
++    check_mops_enabled(env, ra);
-+                    break;
++
-+                }
++    /*
-+            }
++     * We're allowed to NOP out "no data to copy" before the consistency
-+        }
++     * checks; we choose to do so.
-+
++     */
-+        if (!err) {
++    if (setsize == 0) {
-+            visit_check_struct(visitor, &err);
++        return;
-+        }
++    }
-+        visit_end_struct(visitor, NULL);
++
-+        visit_free(visitor);
++    check_mops_wrong_option(env, syndrome, ra);
-+        if (err) {
++
-+            object_unref(obj);
++    /*
-+            error_propagate(errp, err);
++     * Our implementation has no address alignment requirements, but
-+            return NULL;
++     * we do want to enforce the "less than a page" size requirement,
-+        }
++     * so we don't need to have the "check for interrupts" here.
-+    }
++     */
-+
++    if (setsize >= TARGET_PAGE_SIZE) {
-+    expansion_info = g_new0(CpuModelExpansionInfo, 1);
++        raise_exception_ra(env, EXCP_UDEF, syndrome,
-+    expansion_info->model = g_malloc0(sizeof(*expansion_info->model));
++                           mops_mismatch_exception_target_el(env), ra);
-+    expansion_info->model->name = g_strdup(model->name);
++    }
 +
-+    qdict_out = qdict_new();
++    if (!mte_checks_needed(toaddr, mtedesc)) {
-+
++        mtedesc = 0;
-+    i = 0;
++    }
-+    while ((name = cpu_model_advertised_features[i++]) != NULL) {
++
-+        ObjectProperty *prop = object_property_find(obj, name, NULL);
++    /* Do the actual memset */
-+        if (prop) {
++    while (setsize > 0) {
-+            Error *err = NULL;
++        step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra);
-+            QObject *value;
++        toaddr += step;
-+
++        setsize -= step;
-+            assert(prop->get);
++        env->xregs[rn] = -setsize;
-+            value = object_property_get_qobject(obj, name, &err);
++    }
-+            assert(!err);
++}
 +
-+            qdict_put_obj(qdict_out, name, value);
++void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
-+        }
++{
-+    }
++    do_sete(env, syndrome, mtedesc, set_step, false, GETPC());
-+
++}
-+    if (!qdict_size(qdict_out)) {
+diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
-+        qobject_unref(qdict_out);
+index XXXXXXX..XXXXXXX 100644
-+    } else {
+--- a/target/arm/tcg/translate-a64.c
-+        expansion_info->model->props = QOBJECT(qdict_out);
++++ b/target/arm/tcg/translate-a64.c
-+        expansion_info->model->has_props = true;
+@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
-+    }
+ TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
-+
+ TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
-+    object_unref(obj);
-+
++typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
-+    return expansion_info;
++
-+}
++static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, SetFn fn)
-diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst
++{
-new file mode 100644
++    int memidx;
-index XXXXXXX..XXXXXXX
++    uint32_t syndrome, desc = 0;
---- /dev/null
++
-+++ b/docs/arm-cpu-features.rst
++    /*
-@@ -XXX,XX +XXX,XX @@
++     * UNPREDICTABLE cases: we choose to UNDEF, which allows
-+================
++     * us to pull this check before the CheckMOPSEnabled() test
-+ARM CPU Features
++     * (which we do in the helper function)
-+================
++     */
-+
++    if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
-+Examples of probing and using ARM CPU features
++        a->rd == 31 || a->rn == 31) {
-+
++        return false;
-+Introduction
++    }
-+============
++
-+
++    memidx = get_a64_user_mem_index(s, a->unpriv);
-+CPU features are optional features that a CPU of supporting type may
++
-+choose to implement or not.  In QEMU, optional CPU features have
++    /*
-+corresponding boolean CPU proprieties that, when enabled, indicate
++     * We pass option_a == true, matching our implementation;
-+that the feature is implemented, and, conversely, when disabled,
++     * we pass wrong_option == false: helper function may set that bit.
-+indicate that it is not implemented. An example of an ARM CPU feature
++     */
-+is the Performance Monitoring Unit (PMU).  CPU types such as the
++    syndrome = syn_mop(true, false, (a->nontemp << 1) | a->unpriv,
-+Cortex-A15 and the Cortex-A57, which respectively implement ARM
++                       is_epilogue, false, true, a->rd, a->rs, a->rn);
-+architecture reference manuals ARMv7-A and ARMv8-A, may both optionally
++
-+implement PMUs.  For example, if a user wants to use a Cortex-A15 without
++    if (s->mte_active[a->unpriv]) {
-+a PMU, then the `-cpu` parameter should contain `pmu=off` on the QEMU
++        /* We may need to do MTE tag checking, so assemble the descriptor */
-+command line, i.e. `-cpu cortex-a15,pmu=off`.
++        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
-+
++        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
-+As not all CPU types support all optional CPU features, then whether or
++        desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
-+not a CPU property exists depends on the CPU type.  For example, CPUs
++        /* SIZEM1 and ALIGN we leave 0 (byte write) */
-+that implement the ARMv8-A architecture reference manual may optionally
++    }
-+support the AArch32 CPU feature, which may be enabled by disabling the
++    /* The helper function always needs the memidx even with MTE disabled */
-+`aarch64` CPU property.  A CPU type such as the Cortex-A15, which does
++    desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
-+not implement ARMv8-A, will not have the `aarch64` CPU property.
++
-+
++    /*
-+QEMU's support may be limited for some CPU features, only partially
++     * The helper needs the register numbers, but since they're in
-+supporting the feature or only supporting the feature under certain
++     * the syndrome anyway, we let it extract them from there rather
-+configurations.  For example, the `aarch64` CPU feature, which, when
++     * than passing in an extra three integer arguments.
-+disabled, enables the optional AArch32 CPU feature, is only supported
++     */
-+when using the KVM accelerator and when running on a host CPU type that
++    fn(cpu_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
-+supports the feature.
++    return true;
-+
++}
-+CPU Feature Probing
++
-+===================
++TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, gen_helper_setp)
-+
++TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, gen_helper_setm)
-+Determining which CPU features are available and functional for a given
++TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, gen_helper_sete)
-+CPU type is possible with the `query-cpu-model-expansion` QMP command.
++
-+Below are some examples where `scripts/qmp/qmp-shell` (see the top comment
+ typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
-+block in the script for usage) is used to issue the QMP commands.
-+
+ static bool gen_rri(DisasContext *s, arg_rri_sf *a,
 +(1) Determine which CPU features are available for the `max` CPU type
 +    (Note, we started QEMU with qemu-system-aarch64, so `max` is
 +     implementing the ARMv8-A reference manual in this case)::
 +
 +      (QEMU) query-cpu-model-expansion type=full model={"name":"max"}
 +      { "return": {
 +        "model": { "name": "max", "props": {
 +        "pmu": true, "aarch64": true
 +      }}}}
 +
 +We see that the `max` CPU type has the `pmu` and `aarch64` CPU features.
 +We also see that the CPU features are enabled, as they are all `true`.
 +
 +(2) Let's try to disable the PMU::
 +
 +      (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"pmu":false}}
 +      { "return": {
 +        "model": { "name": "max", "props": {
 +        "pmu": false, "aarch64": true
 +      }}}}
 +
 +We see it worked, as `pmu` is now `false`.
 +
 +(3) Let's try to disable `aarch64`, which enables the AArch32 CPU feature::
 +
 +      (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"aarch64":false}}
 +      {"error": {
 +       "class": "GenericError", "desc":
 +       "'aarch64' feature cannot be disabled unless KVM is enabled and 32-bit EL1 is supported"
 +      }}
 +
 +It looks like this feature is limited to a configuration we do not
 +currently have.
 +
 +(4) Let's try probing CPU features for the Cortex-A15 CPU type::
 +
 +      (QEMU) query-cpu-model-expansion type=full model={"name":"cortex-a15"}
 +      {"return": {"model": {"name": "cortex-a15", "props": {"pmu": true}}}}
 +
 +Only the `pmu` CPU feature is available.
 +
 +A note about CPU feature dependencies
 +-------------------------------------
 +
 +It's possible for features to have dependencies on other features. I.e.
 +it may be possible to change one feature at a time without error, but
 +when attempting to change all features at once an error could occur
 +depending on the order they are processed.  It's also possible changing
 +all at once doesn't generate an error, because a feature's dependencies
 +are satisfied with other features, but the same feature cannot be changed
 +independently without error.  For these reasons callers should always
 +attempt to make their desired changes all at once in order to ensure the
 +collection is valid.
 +
 +A note about CPU models and KVM
 +-------------------------------
 +
 +Named CPU models generally do not work with KVM.  There are a few cases
 +that do work, e.g. using the named CPU model `cortex-a57` with KVM on a
 +seattle host, but mostly if KVM is enabled the `host` CPU type must be
 +used.  This means the guest is provided all the same CPU features as the
 +host CPU type has.  And, for this reason, the `host` CPU type should
 +enable all CPU features that the host has by default.  Indeed it's even
 +a bit strange to allow disabling CPU features that the host has when using
 +the `host` CPU type, but in the absence of CPU models it's the best we can
 +do if we want to launch guests without all the host's CPU features enabled.
 +
 +Enabling KVM also affects the `query-cpu-model-expansion` QMP command.  The
 +affect is not only limited to specific features, as pointed out in example
 +(3) of "CPU Feature Probing", but also to which CPU types may be expanded.
 +When KVM is enabled, only the `max`, `host`, and current CPU type may be
 +expanded.  This restriction is necessary as it's not possible to know all
 +CPU types that may work with KVM, but it does impose a small risk of users
 +experiencing unexpected errors.  For example on a seattle, as mentioned
 +above, the `cortex-a57` CPU type is also valid when KVM is enabled.
 +Therefore a user could use the `host` CPU type for the current type, but
 +then attempt to query `cortex-a57`, however that query will fail with our
 +restrictions.  This shouldn't be an issue though as management layers and
 +users have been preferring the `host` CPU type for use with KVM for quite
 +some time.  Additionally, if the KVM-enabled QEMU instance running on a
 +seattle host is using the `cortex-a57` CPU type, then querying `cortex-a57`
 +will work.
 +
 +Using CPU Features
 +==================
 +
 +After determining which CPU features are available and supported for a
 +given CPU type, then they may be selectively enabled or disabled on the
 +QEMU command line with that CPU type::
 +
 +  $ qemu-system-aarch64 -M virt -cpu max,pmu=off
 +
 +The example above disables the PMU for the `max` CPU type.
 +
 --
-.20.1
+.34.1

-[PULL 03/51] target/arm: Split out rebuild_hflags_common
+[PULL 18/30] target/arm: Define new TB flag for ATA0
-From: Richard Henderson <richard.henderson@linaro.org>
+Currently the only tag-setting instructions always do so in the
 context of the current EL, and so we only need one ATA bit in the TB
 flags.  The FEAT_MOPS SETG instructions include ones which set tags
 for a non-privileged access, so we now also need the equivalent "are
 tags enabled?" information for EL0.
-Create a function to compute the values of the TBFLAG_ANY bits
+Add the new TB flag, and convert the existing 'bool ata' field in
-that will be cached.  For now, the env->hflags variable is not
+DisasContext to a 'bool ata[2]' that can be indexed by the is_unpriv
-used, and the results are fed back to cpu_get_tb_cpu_state.
+bit in an instruction, similarly to mte[2].
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-2-richard.henderson@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Message-id: 20230912140434.1333369-9-peter.maydell@linaro.org
 ---
- target/arm/cpu.h    | 29 ++++++++++++++++++-----------
+ target/arm/cpu.h               |  1 +
- target/arm/helper.c | 26 +++++++++++++++++++-------
+ target/arm/tcg/translate.h     |  4 ++--
-files changed, 37 insertions(+), 18 deletions(-)
+ target/arm/tcg/hflags.c        | 12 ++++++++++++
  target/arm/tcg/translate-a64.c | 23 ++++++++++++-----------
 files changed, 27 insertions(+), 13 deletions(-)
 diff --git a/target/arm/cpu.h b/target/arm/cpu.h
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/cpu.h
 +++ b/target/arm/cpu.h
-@@ -XXX,XX +XXX,XX @@ typedef struct CPUARMState {
+@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SVL, 24, 4)
-     uint32_t pstate;
+ FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
-     uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */
+ FIELD(TBFLAG_A64, FGT_ERET, 29, 1)
+ FIELD(TBFLAG_A64, NAA, 30, 1)
-+    /* Cached TBFLAGS state.  See below for which bits are included.  */
++FIELD(TBFLAG_A64, ATA0, 31, 1)
-+    uint32_t hflags;
 +
      /* Frequently accessed CPSR bits are stored separately for efficiency.
         This contains all the other bits.  Use cpsr_{read,write} to access
         the whole CPSR.  */
@@ -XXX,XX +XXX,XX @@ typedef ARMCPU ArchCPU;
  #include "exec/cpu-all.h"
 -/* Bit usage in the TB flags field: bit 31 indicates whether we are
 +/*
 + * Bit usage in the TB flags field: bit 31 indicates whether we are
   * in 32 or 64 bit mode. The meaning of the other bits depends on that.
   * We put flags which are shared between 32 and 64 bit mode at the top
   * of the word, and flags which apply to only one mode at the bottom.
 + *
 + * Unless otherwise noted, these bits are cached in env->hflags.
   */
  FIELD(TBFLAG_ANY, AARCH64_STATE, 31, 1)
  FIELD(TBFLAG_ANY, MMUIDX, 28, 3)
  FIELD(TBFLAG_ANY, SS_ACTIVE, 27, 1)
 -FIELD(TBFLAG_ANY, PSTATE_SS, 26, 1)
 +FIELD(TBFLAG_ANY, PSTATE_SS, 26, 1)     /* Not cached. */
  /* Target EL if we take a floating-point-disabled exception */
  FIELD(TBFLAG_ANY, FPEXC_EL, 24, 2)
  FIELD(TBFLAG_ANY, BE_DATA, 23, 1)
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_ANY, BE_DATA, 23, 1)
  FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 21, 2)
  /* Bit usage when in AArch32 state: */
 -FIELD(TBFLAG_A32, THUMB, 0, 1)
 -FIELD(TBFLAG_A32, VECLEN, 1, 3)
 -FIELD(TBFLAG_A32, VECSTRIDE, 4, 2)
 +FIELD(TBFLAG_A32, THUMB, 0, 1)          /* Not cached. */
 +FIELD(TBFLAG_A32, VECLEN, 1, 3)         /* Not cached. */
 +FIELD(TBFLAG_A32, VECSTRIDE, 4, 2)      /* Not cached. */
  /*
-  * We store the bottom two bits of the CPAR as TB flags and handle
+  * Helpers for using the above.
-  * checks on the other bits at runtime. This shares the same bits as
+diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
   * VECSTRIDE, which is OK as no XScale CPU has VFP.
 + * Not cached, because VECLEN+VECSTRIDE are not cached.
   */
  FIELD(TBFLAG_A32, XSCALE_CPAR, 4, 2)
  /*
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, XSCALE_CPAR, 4, 2)
   * the same thing as the current security state of the processor!
   */
  FIELD(TBFLAG_A32, NS, 6, 1)
 -FIELD(TBFLAG_A32, VFPEN, 7, 1)
 -FIELD(TBFLAG_A32, CONDEXEC, 8, 8)
 +FIELD(TBFLAG_A32, VFPEN, 7, 1)          /* Not cached. */
 +FIELD(TBFLAG_A32, CONDEXEC, 8, 8)       /* Not cached. */
  FIELD(TBFLAG_A32, SCTLR_B, 16, 1)
  /* For M profile only, set if FPCCR.LSPACT is set */
 -FIELD(TBFLAG_A32, LSPACT, 18, 1)
 +FIELD(TBFLAG_A32, LSPACT, 18, 1)        /* Not cached. */
  /* For M profile only, set if we must create a new FP context */
 -FIELD(TBFLAG_A32, NEW_FP_CTXT_NEEDED, 19, 1)
 +FIELD(TBFLAG_A32, NEW_FP_CTXT_NEEDED, 19, 1) /* Not cached. */
  /* For M profile only, set if FPCCR.S does not match current security state */
 -FIELD(TBFLAG_A32, FPCCR_S_WRONG, 20, 1)
 +FIELD(TBFLAG_A32, FPCCR_S_WRONG, 20, 1) /* Not cached. */
  /* For M profile only, Handler (ie not Thread) mode */
  FIELD(TBFLAG_A32, HANDLER, 21, 1)
  /* For M profile only, whether we should generate stack-limit checks */
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SVEEXC_EL, 2, 2)
  FIELD(TBFLAG_A64, ZCR_LEN, 4, 4)
  FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1)
  FIELD(TBFLAG_A64, BT, 9, 1)
 -FIELD(TBFLAG_A64, BTYPE, 10, 2)
 +FIELD(TBFLAG_A64, BTYPE, 10, 2)         /* Not cached. */
  FIELD(TBFLAG_A64, TBID, 12, 2)
  static inline bool bswap_code(bool sctlr_b)
 diff --git a/target/arm/helper.c b/target/arm/helper.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
+--- a/target/arm/tcg/translate.h
-+++ b/target/arm/helper.c
++++ b/target/arm/tcg/translate.h
-@@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
+@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
- }
+     bool unpriv;
- #endif
+     /* True if v8.3-PAuth is active.  */
+     bool pauth_active;
-+static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
+-    /* True if v8.5-MTE access to tags is enabled.  */
-+                                      ARMMMUIdx mmu_idx, uint32_t flags)
+-    bool ata;
-+{
++    /* True if v8.5-MTE access to tags is enabled; index with is_unpriv.  */
-+    flags = FIELD_DP32(flags, TBFLAG_ANY, FPEXC_EL, fp_el);
++    bool ata[2];
-+    flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX,
+     /* True if v8.5-MTE tag checks affect the PE; index with is_unpriv.  */
-+                       arm_to_core_mmu_idx(mmu_idx));
+     bool mte_active[2];
-+
+     /* True with v8.5-BTI and SCTLR_ELx.BT* set.  */
-+    if (arm_cpu_data_is_big_endian(env)) {
+diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
-+        flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
+index XXXXXXX..XXXXXXX 100644
-+    }
+--- a/target/arm/tcg/hflags.c
-+    if (arm_singlestep_active(env)) {
++++ b/target/arm/tcg/hflags.c
-+        flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1);
+@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
-+    }
+             && allocation_tag_access_enabled(env, 0, sctlr)) {
-+    return flags;
+             DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1);
 +}
 +
  void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                            target_ulong *cs_base, uint32_t *pflags)
  {
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
          }
++        /*
++         * For unpriv tag-setting accesses we alse need ATA0. Again, in
++         * contexts where unpriv and normal insns are the same we
++         * duplicate the ATA bit to save effort for translate-a64.c.
++         */
++        if (EX_TBFLAG_A64(flags, UNPRIV)) {
++            if (allocation_tag_access_enabled(env, 0, sctlr)) {
++                DP_TBFLAG_A64(flags, ATA0, 1);
++            }
++        } else {
++            DP_TBFLAG_A64(flags, ATA0, EX_TBFLAG_A64(flags, ATA));
++        }
+         /* Cache TCMA as well as TBI. */
+         DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx));
      }
+diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
--    flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX, arm_to_core_mmu_idx(mmu_idx));
+index XXXXXXX..XXXXXXX 100644
-+    flags = rebuild_hflags_common(env, fp_el, mmu_idx, flags);
+--- a/target/arm/tcg/translate-a64.c
++++ b/target/arm/tcg/translate-a64.c
-     /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
+@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, bool isread,
-      * states defined in the ARM ARM for software singlestep:
+             clean_addr = clean_data_tbi(s, tcg_rt);
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
+             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
-      *     0            x       Inactive (the TB flag for SS is always 0)
-      *     1            0       Active-pending
+-            if (s->ata) {
-      *     1            1       Active-not-pending
++            if (s->ata[0]) {
-+     * SS_ACTIVE is set in hflags; PSTATE_SS is computed every TB.
+                 /* Extract the tag from the register to match STZGM.  */
-      */
+                 tag = tcg_temp_new_i64();
--    if (arm_singlestep_active(env)) {
+                 tcg_gen_shri_i64(tag, tcg_rt, 56);
--        flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1);
+@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, bool isread,
-+    if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE)) {
+             clean_addr = clean_data_tbi(s, tcg_rt);
-         if (is_a64(env)) {
+             gen_helper_dc_zva(cpu_env, clean_addr);
-             if (env->pstate & PSTATE_SS) {
-                 flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
+-            if (s->ata) {
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
++            if (s->ata[0]) {
-             }
+                 /* Extract the tag from the register to match STZGM.  */
                  tag = tcg_temp_new_i64();
                  tcg_gen_shri_i64(tag, tcg_rt, 56);
@@ -XXX,XX +XXX,XX @@ static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
      tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
      /* Perform the tag store, if tag access enabled. */
 -    if (s->ata) {
 +    if (s->ata[0]) {
          if (tb_cflags(s->base.tb) & CF_PARALLEL) {
              gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
          } else {
@@ -XXX,XX +XXX,XX @@ static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
      tcg_gen_addi_i64(addr, addr, a->imm);
      tcg_rt = cpu_reg(s, a->rt);
 -    if (s->ata) {
 +    if (s->ata[0]) {
          gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
      }
      /*
@@ -XXX,XX +XXX,XX @@ static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
      tcg_gen_addi_i64(addr, addr, a->imm);
      tcg_rt = cpu_reg(s, a->rt);
 -    if (s->ata) {
 +    if (s->ata[0]) {
          gen_helper_stgm(cpu_env, addr, tcg_rt);
      } else {
          MMUAccessType acc = MMU_DATA_STORE;
@@ -XXX,XX +XXX,XX @@ static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
      tcg_gen_addi_i64(addr, addr, a->imm);
      tcg_rt = cpu_reg(s, a->rt);
 -    if (s->ata) {
 +    if (s->ata[0]) {
          gen_helper_ldgm(tcg_rt, cpu_env, addr);
      } else {
          MMUAccessType acc = MMU_DATA_LOAD;
@@ -XXX,XX +XXX,XX @@ static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
      tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
      tcg_rt = cpu_reg(s, a->rt);
 -    if (s->ata) {
 +    if (s->ata[0]) {
          gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
      } else {
          /*
@@ -XXX,XX +XXX,XX @@ static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
          tcg_gen_addi_i64(addr, addr, a->imm);
      }
      tcg_rt = cpu_reg_sp(s, a->rt);
 -    if (!s->ata) {
 +    if (!s->ata[0]) {
          /*
           * For STG and ST2G, we need to check alignment and probe memory.
           * TODO: For STZG and STZ2G, we could rely on the stores below,
@@ -XXX,XX +XXX,XX @@ static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
      tcg_rn = cpu_reg_sp(s, a->rn);
      tcg_rd = cpu_reg_sp(s, a->rd);
 -    if (s->ata) {
 +    if (s->ata[0]) {
          gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn,
                             tcg_constant_i32(imm),
                             tcg_constant_i32(a->uimm4));
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
          if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
              goto do_unallocated;
          }
-     }
+-        if (s->ata) {
--    if (arm_cpu_data_is_big_endian(env)) {
++        if (s->ata[0]) {
--        flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
+             gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
--    }
+                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
--    flags = FIELD_DP32(flags, TBFLAG_ANY, FPEXC_EL, fp_el);
+         } else {
+@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
-     if (arm_v7m_is_handler_mode(env)) {
+     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
-         flags = FIELD_DP32(flags, TBFLAG_A32, HANDLER, 1);
+     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
      dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
 -    dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
 +    dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
 +    dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
      dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
      dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
      dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
 --
-.20.1
+.34.1

-[PULL 40/51] target/arm/kvm64: max cpu: Enable SVE when available
+[PULL 19/30] target/arm: Implement the SETG* instructions
-From: Andrew Jones <drjones@redhat.com>
+The FEAT_MOPS SETG* instructions are very similar to the SET*
 instructions, but as well as setting memory contents they also
 set the MTE tags. They are architecturally required to operate
 on tag-granule aligned regions only.
-Enable SVE in the KVM guest when the 'max' cpu type is configured
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-and KVM supports it. KVM SVE requires use of the new finalize
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-vcpu ioctl, so we add that now too. For starters SVE can only be
+Message-id: 20230912140434.1333369-10-peter.maydell@linaro.org
-turned on or off, getting all vector lengths the host CPU supports
+---
-when on. We'll add the other SVE CPU properties in later patches.
+ target/arm/internals.h         | 10 ++++
  target/arm/tcg/helper-a64.h    |  3 ++
  target/arm/tcg/a64.decode      |  5 ++
  target/arm/tcg/helper-a64.c    | 86 ++++++++++++++++++++++++++++++++--
  target/arm/tcg/mte_helper.c    | 40 ++++++++++++++++
  target/arm/tcg/translate-a64.c | 20 +++++---
 files changed, 155 insertions(+), 9 deletions(-)
-Signed-off-by: Andrew Jones <drjones@redhat.com>
+diff --git a/target/arm/internals.h b/target/arm/internals.h
-Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+index XXXXXXX..XXXXXXX 100644
-Reviewed-by: Eric Auger <eric.auger@redhat.com>
+--- a/target/arm/internals.h
-Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
++++ b/target/arm/internals.h
-Reviewed-by: Beata Michalska <beata.michalska@linaro.org>
+@@ -XXX,XX +XXX,XX @@ uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size,
-Message-id: 20191024121808.9612-7-drjones@redhat.com
+ void mte_check_fail(CPUARMState *env, uint32_t desc,
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+                     uint64_t dirty_ptr, uintptr_t ra);
 ---
  target/arm/kvm_arm.h     | 27 +++++++++++++++++++++++++++
  target/arm/cpu64.c       | 17 ++++++++++++++---
  target/arm/kvm.c         |  5 +++++
  target/arm/kvm64.c       | 20 +++++++++++++++++++-
  tests/arm-cpu-features.c |  4 ++++
 files changed, 69 insertions(+), 4 deletions(-)
 diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/kvm_arm.h
 +++ b/target/arm/kvm_arm.h
@@ -XXX,XX +XXX,XX @@
   */
  int kvm_arm_vcpu_init(CPUState *cs);
 +/**
-+ * kvm_arm_vcpu_finalize
++ * mte_mops_set_tags: Set MTE tags for a portion of a FEAT_MOPS operation
-+ * @cs: CPUState
++ * @env: CPU env
-+ * @feature: int
++ * @dirty_ptr: Start address of memory region (dirty pointer)
-+ *
++ * @size: length of region (guaranteed not to cross page boundary)
-+ * Finalizes the configuration of the specified VCPU feature by
++ * @desc: MTEDESC descriptor word
 + * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring
 + * this are documented in the "KVM_ARM_VCPU_FINALIZE" section of
 + * KVM's API documentation.
 + *
 + * Returns: 0 if success else < 0 error code
 + */
-+int kvm_arm_vcpu_finalize(CPUState *cs, int feature);
++void mte_mops_set_tags(CPUARMState *env, uint64_t dirty_ptr, uint64_t size,
-+
++                       uint32_t desc);
- /**
++
-  * kvm_arm_register_device:
+ static inline int allocation_tag_from_addr(uint64_t ptr)
-  * @mr: memory region for this device
+ {
-@@ -XXX,XX +XXX,XX @@ bool kvm_arm_aarch32_supported(CPUState *cs);
+     return extract64(ptr, 56, 4);
-  */
+diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
- bool kvm_arm_pmu_supported(CPUState *cs);
+index XXXXXXX..XXXXXXX 100644
+--- a/target/arm/tcg/helper-a64.h
-+/**
++++ b/target/arm/tcg/helper-a64.h
-+ * bool kvm_arm_sve_supported:
+@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(unaligned_access, TCG_CALL_NO_WG,
-+ * @cs: CPUState
+ DEF_HELPER_3(setp, void, env, i32, i32)
-+ *
+ DEF_HELPER_3(setm, void, env, i32, i32)
-+ * Returns true if the KVM VCPU can enable SVE and false otherwise.
+ DEF_HELPER_3(sete, void, env, i32, i32)
 +DEF_HELPER_3(setgp, void, env, i32, i32)
 +DEF_HELPER_3(setgm, void, env, i32, i32)
 +DEF_HELPER_3(setge, void, env, i32, i32)
 diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/a64.decode
 +++ b/target/arm/tcg/a64.decode
@@ -XXX,XX +XXX,XX @@ STZ2G           11011001 11 1 ......... 11 ..... ..... @ldst_tag p=0 w=1
  SETP            00 011001110 ..... 00 . . 01 ..... ..... @set
  SETM            00 011001110 ..... 01 . . 01 ..... ..... @set
  SETE            00 011001110 ..... 10 . . 01 ..... ..... @set
 +
 +# Like SET, but also setting MTE tags
 +SETGP           00 011101110 ..... 00 . . 01 ..... ..... @set
 +SETGM           00 011101110 ..... 01 . . 01 ..... ..... @set
 +SETGE           00 011101110 ..... 10 . . 01 ..... ..... @set
 diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/helper-a64.c
 +++ b/target/arm/tcg/helper-a64.c
@@ -XXX,XX +XXX,XX @@ static uint64_t set_step(CPUARMState *env, uint64_t toaddr,
      return setsize;
  }
 +/*
 + * Similar, but setting tags. The architecture requires us to do this
 + * in 16-byte chunks. SETP accesses are not tag checked; they set
 + * the tags.
 + */
-+bool kvm_arm_sve_supported(CPUState *cs);
++static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr,
-+
++                              uint64_t setsize, uint32_t data, int memidx,
- /**
++                              uint32_t *mtedesc, uintptr_t ra)
-  * kvm_arm_get_max_vm_ipa_size - Returns the number of bits in the
++{
-  * IPA address space supported by KVM
++    void *mem;
-@@ -XXX,XX +XXX,XX @@ static inline bool kvm_arm_pmu_supported(CPUState *cs)
++    uint64_t cleanaddr;
-     return false;
++
- }
++    setsize = MIN(setsize, page_limit(toaddr));
++
-+static inline bool kvm_arm_sve_supported(CPUState *cs)
++    cleanaddr = useronly_clean_ptr(toaddr);
-+{
++    /*
-+    return false;
++     * Trapless lookup: returns NULL for invalid page, I/O,
-+}
++     * watchpoints, clean pages, etc.
-+
++     */
- static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms)
++    mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx);
- {
++
-     return -ENOENT;
++#ifndef CONFIG_USER_ONLY
-diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
++    if (unlikely(!mem)) {
-index XXXXXXX..XXXXXXX 100644
++        /*
---- a/target/arm/cpu64.c
++         * Slow-path: just do one write. This will handle the
-+++ b/target/arm/cpu64.c
++         * watchpoint, invalid page, etc handling correctly.
-@@ -XXX,XX +XXX,XX @@ static void cpu_arm_set_sve(Object *obj, Visitor *v, const char *name,
++         * The architecture requires that we do 16 bytes at a time,
-         return;
++         * and we know both ptr and size are 16 byte aligned.
-     }
++         * For clean code pages, the next iteration will see
++         * the page dirty and will use the fast path.
-+    if (value && kvm_enabled() && !kvm_arm_sve_supported(CPU(cpu))) {
++         */
-+        error_setg(errp, "'sve' feature not supported by KVM on this host");
++        uint64_t repldata = data * 0x0101010101010101ULL;
 +        MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx);
 +        cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra);
 +        mte_mops_set_tags(env, toaddr, 16, *mtedesc);
 +        return 16;
 +    }
 +#endif
 +    /* Easy case: just memset the host memory */
 +    memset(mem, data, setsize);
 +    mte_mops_set_tags(env, toaddr, setsize, *mtedesc);
 +    return setsize;
 +}
 +
  typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr,
                          uint64_t setsize, uint32_t data,
                          int memidx, uint32_t *mtedesc, uintptr_t ra);
@@ -XXX,XX +XXX,XX @@ static bool mte_checks_needed(uint64_t ptr, uint32_t desc)
      return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr));
  }
 +/* Take an exception if the SETG addr/size are not granule aligned */
 +static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size,
 +                                 uint32_t memidx, uintptr_t ra)
 +{
 +    if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) ||
 +        !QEMU_IS_ALIGNED(size, TAG_GRANULE)) {
 +        arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE,
 +                                    memidx, ra);
 +
 +    }
 +}
 +
  /*
   * For the Memory Set operation, our implementation chooses
   * always to use "option A", where we update Xd to the final
@@ -XXX,XX +XXX,XX @@ static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
      if (setsize > INT64_MAX) {
          setsize = INT64_MAX;
 +        if (is_setg) {
 +            setsize &= ~0xf;
 +        }
      }
 -    if (!mte_checks_needed(toaddr, mtedesc)) {
 +    if (unlikely(is_setg)) {
 +        check_setg_alignment(env, toaddr, setsize, memidx, ra);
 +    } else if (!mte_checks_needed(toaddr, mtedesc)) {
          mtedesc = 0;
      }
@@ -XXX,XX +XXX,XX @@ void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
      do_setp(env, syndrome, mtedesc, set_step, false, GETPC());
  }
 +void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
 +{
 +    do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC());
 +}
 +
  static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
                      StepFn *stepfn, bool is_setg, uintptr_t ra)
  {
@@ -XXX,XX +XXX,XX @@ static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
       * have an IMPDEF check for alignment here.
       */
 -    if (!mte_checks_needed(toaddr, mtedesc)) {
 +    if (unlikely(is_setg)) {
 +        check_setg_alignment(env, toaddr, setsize, memidx, ra);
 +    } else if (!mte_checks_needed(toaddr, mtedesc)) {
          mtedesc = 0;
      }
@@ -XXX,XX +XXX,XX @@ void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
      do_setm(env, syndrome, mtedesc, set_step, false, GETPC());
  }
 +void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
 +{
 +    do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC());
 +}
 +
  static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
                      StepFn *stepfn, bool is_setg, uintptr_t ra)
  {
@@ -XXX,XX +XXX,XX @@ static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
                             mops_mismatch_exception_target_el(env), ra);
      }
 -    if (!mte_checks_needed(toaddr, mtedesc)) {
 +    if (unlikely(is_setg)) {
 +        check_setg_alignment(env, toaddr, setsize, memidx, ra);
 +    } else if (!mte_checks_needed(toaddr, mtedesc)) {
          mtedesc = 0;
      }
@@ -XXX,XX +XXX,XX @@ void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
  {
      do_sete(env, syndrome, mtedesc, set_step, false, GETPC());
  }
 +
 +void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
 +{
 +    do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC());
 +}
 diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/mte_helper.c
 +++ b/target/arm/tcg/mte_helper.c
@@ -XXX,XX +XXX,XX @@ uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size,
          return n * TAG_GRANULE - (ptr - tag_first);
      }
  }
 +
 +void mte_mops_set_tags(CPUARMState *env, uint64_t ptr, uint64_t size,
 +                       uint32_t desc)
 +{
 +    int mmu_idx, tag_count;
 +    uint64_t ptr_tag;
 +    void *mem;
 +
 +    if (!desc) {
 +        /* Tags not actually enabled */
 +        return;
 +    }
 +
-     t = cpu->isar.id_aa64pfr0;
++    mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
-     t = FIELD_DP64(t, ID_AA64PFR0, SVE, value);
++    /* True probe: this will never fault */
-     cpu->isar.id_aa64pfr0 = t;
++    mem = allocation_tag_mem_probe(env, mmu_idx, ptr, MMU_DATA_STORE, size,
-@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
++                                   MMU_DATA_STORE, true, 0);
- {
++    if (!mem) {
-     ARMCPU *cpu = ARM_CPU(obj);
++        return;
-     uint32_t vq;
++    }
-+    uint64_t t;
++
++    /*
-     if (kvm_enabled()) {
++     * We know that ptr and size are both TAG_GRANULE aligned; store
-         kvm_arm_set_cpu_features_from_host(cpu);
++     * the tag from the pointer value into the tag memory.
-+        if (kvm_arm_sve_supported(CPU(cpu))) {
++     */
-+            t = cpu->isar.id_aa64pfr0;
++    ptr_tag = allocation_tag_from_addr(ptr);
-+            t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1);
++    tag_count = size / TAG_GRANULE;
-+            cpu->isar.id_aa64pfr0 = t;
++    if (ptr & TAG_GRANULE) {
-+        }
++        /* Not 2*TAG_GRANULE-aligned: store tag to first nibble */
-     } else {
++        store_tag1_parallel(TAG_GRANULE, mem, ptr_tag);
--        uint64_t t;
++        mem++;
-         uint32_t u;
++        tag_count--;
-         aarch64_a57_initfn(obj);
++    }
++    memset(mem, ptr_tag | (ptr_tag << 4), tag_count / 2);
-@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
++    if (tag_count & 1) {
++        /* Final trailing unaligned nibble */
-         object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq,
++        mem += tag_count / 2;
-                             cpu_max_set_sve_max_vq, NULL, NULL, &error_fatal);
++        store_tag1_parallel(0, mem, ptr_tag);
--        object_property_add(obj, "sve", "bool", cpu_arm_get_sve,
++    }
--                            cpu_arm_set_sve, NULL, NULL, &error_fatal);
++}
+diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
-         for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
+index XXXXXXX..XXXXXXX 100644
-             char name[8];
+--- a/target/arm/tcg/translate-a64.c
-@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
++++ b/target/arm/tcg/translate-a64.c
-                                 cpu_arm_set_sve_vq, NULL, NULL, &error_fatal);
+@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
-         }
-     }
+ typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
-+
-+    object_property_add(obj, "sve", "bool", cpu_arm_get_sve,
+-static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, SetFn fn)
-+                        cpu_arm_set_sve, NULL, NULL, &error_fatal);
++static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
- }
++                   bool is_setg, SetFn fn)
+ {
- struct ARMCPUInfo {
+     int memidx;
-diff --git a/target/arm/kvm.c b/target/arm/kvm.c
+     uint32_t syndrome, desc = 0;
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/kvm.c
++    if (is_setg && !dc_isar_feature(aa64_mte, s)) {
-+++ b/target/arm/kvm.c
++        return false;
@@ -XXX,XX +XXX,XX @@ int kvm_arm_vcpu_init(CPUState *cs)
      return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
  }
 +int kvm_arm_vcpu_finalize(CPUState *cs, int feature)
 +{
 +    return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_FINALIZE, &feature);
 +}
 +
  void kvm_arm_init_serror_injection(CPUState *cs)
  {
      cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state,
 diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/kvm64.c
 +++ b/target/arm/kvm64.c
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_aarch32_supported(CPUState *cpu)
      return kvm_check_extension(s, KVM_CAP_ARM_EL1_32BIT);
  }
 +bool kvm_arm_sve_supported(CPUState *cpu)
 +{
 +    KVMState *s = KVM_STATE(current_machine->accelerator);
 +
 +    return kvm_check_extension(s, KVM_CAP_ARM_SVE);
 +}
 +
  #define ARM_CPU_ID_MPIDR       3, 0, 0, 0, 5
  int kvm_arch_init_vcpu(CPUState *cs)
@@ -XXX,XX +XXX,XX @@ int kvm_arch_init_vcpu(CPUState *cs)
          cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
      }
      if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
 -            cpu->has_pmu = false;
 +        cpu->has_pmu = false;
      }
      if (cpu->has_pmu) {
          cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
      } else {
          unset_feature(&env->features, ARM_FEATURE_PMU);
      }
 +    if (cpu_isar_feature(aa64_sve, cpu)) {
 +        assert(kvm_arm_sve_supported(cs));
 +        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE;
 +    }
      /* Do KVM_ARM_VCPU_INIT ioctl */
      ret = kvm_arm_vcpu_init(cs);
@@ -XXX,XX +XXX,XX @@ int kvm_arch_init_vcpu(CPUState *cs)
          return ret;
      }
 +    if (cpu_isar_feature(aa64_sve, cpu)) {
 +        ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE);
 +        if (ret) {
 +            return ret;
 +        }
 +    }
 +
      /*
-      * When KVM is in use, PSCI is emulated in-kernel and not by qemu.
+      * UNPREDICTABLE cases: we choose to UNDEF, which allows
-      * Currently KVM has its own idea about MPIDR assignment, so we
+      * us to pull this check before the CheckMOPSEnabled() test
-diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c
+@@ -XXX,XX +XXX,XX @@ static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, SetFn fn)
-index XXXXXXX..XXXXXXX 100644
+      * We pass option_a == true, matching our implementation;
---- a/tests/arm-cpu-features.c
+      * we pass wrong_option == false: helper function may set that bit.
-+++ b/tests/arm-cpu-features.c
+      */
-@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
+-    syndrome = syn_mop(true, false, (a->nontemp << 1) | a->unpriv,
-         assert_has_feature(qts, "host", "aarch64");
++    syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
-         assert_has_feature(qts, "host", "pmu");
+                        is_epilogue, false, true, a->rd, a->rs, a->rn);
-+        assert_has_feature(qts, "max", "sve");
+-    if (s->mte_active[a->unpriv]) {
-+
++    if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
-         assert_error(qts, "cortex-a15",
+         /* We may need to do MTE tag checking, so assemble the descriptor */
-             "We cannot guarantee the CPU type 'cortex-a15' works "
+         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
-             "with KVM on this host", NULL);
+         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
-     } else {
+@@ -XXX,XX +XXX,XX @@ static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, SetFn fn)
-         assert_has_not_feature(qts, "host", "aarch64");
+     return true;
-         assert_has_not_feature(qts, "host", "pmu");
+ }
-+
-+        assert_has_not_feature(qts, "max", "sve");
+-TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, gen_helper_setp)
-     }
+-TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, gen_helper_setm)
+-TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, gen_helper_sete)
-     qtest_quit(qts);
++TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
 +TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
 +TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
 +TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
 +TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
 +TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
  typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
 --
-.20.1
+.34.1

-[PULL 15/51] target/arm: Split out arm_mmu_idx_el
+[PULL 20/30] target/arm: Implement MTE tag-checking functions for FEAT_MOPS copies
-From: Richard Henderson <richard.henderson@linaro.org>
+The FEAT_MOPS memory copy operations need an extra helper routine
 for checking for MTE tag checking failures beyond the ones we
 already added for memory set operations:
  * mte_mops_probe_rev() does the same job as mte_mops_probe(), but
    it checks tags starting at the provided address and working
    backwards, rather than forwards
-Avoid calling arm_current_el() twice.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-14-richard.henderson@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Message-id: 20230912140434.1333369-11-peter.maydell@linaro.org
 ---
- target/arm/internals.h |  9 +++++++++
+ target/arm/internals.h      | 17 +++++++
- target/arm/helper.c    | 12 +++++++-----
+ target/arm/tcg/mte_helper.c | 99 +++++++++++++++++++++++++++++++++++++
-files changed, 16 insertions(+), 5 deletions(-)
+files changed, 116 insertions(+)
 diff --git a/target/arm/internals.h b/target/arm/internals.h
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/internals.h
 +++ b/target/arm/internals.h
-@@ -XXX,XX +XXX,XX @@ void arm_cpu_update_virq(ARMCPU *cpu);
+@@ -XXX,XX +XXX,XX @@ uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra);
-  */
+ uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size,
- void arm_cpu_update_vfiq(ARMCPU *cpu);
+                         uint32_t desc);
 +/**
-+ * arm_mmu_idx_el:
++ * mte_mops_probe_rev: Check where the next MTE failure is for a FEAT_MOPS
-+ * @env: The cpu environment
++ *                     operation going in the reverse direction
-+ * @el: The EL to use.
++ * @env: CPU env
 + * @ptr: *end* address of memory region (dirty pointer)
 + * @size: length of region (guaranteed not to cross a page boundary)
 + * @desc: MTEDESC descriptor word (0 means no MTE checks)
 + * Returns: the size of the region that can be copied without hitting
 + *          an MTE tag failure
 + *
-+ * Return the full ARMMMUIdx for the translation regime for EL.
++ * Note that we assume that the caller has already checked the TBI
 + * and TCMA bits with mte_checks_needed() and an MTE check is definitely
 + * required.
 + */
-+ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el);
++uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size,
 +                            uint32_t desc);
 +
  /**
-  * arm_mmu_idx:
+  * mte_check_fail: Record an MTE tag check failure
-  * @env: The cpu environment
+  * @env: CPU env
-diff --git a/target/arm/helper.c b/target/arm/helper.c
+diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
+--- a/target/arm/tcg/mte_helper.c
-+++ b/target/arm/helper.c
++++ b/target/arm/tcg/mte_helper.c
-@@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
+@@ -XXX,XX +XXX,XX @@ static int checkN(uint8_t *mem, int odd, int cmp, int count)
      return n;
  }
- #endif
++/**
--ARMMMUIdx arm_mmu_idx(CPUARMState *env)
++ * checkNrev:
-+ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el)
++ * @tag: tag memory to test
- {
++ * @odd: true to begin testing at tags at odd nibble
--    int el;
++ * @cmp: the tag to compare against
--
++ * @count: number of tags to test
-     if (arm_feature(env, ARM_FEATURE_M)) {
++ *
-         return arm_v7m_mmu_idx_for_secstate(env, env->v7m.secure);
++ * Return the number of successful tests.
-     }
++ * Thus a return value < @count indicates a failure.
++ *
--    el = arm_current_el(env);
++ * This is like checkN, but it runs backwards, checking the
-     if (el < 2 && arm_is_secure_below_el3(env)) {
++ * tags starting with @tag and then the tags preceding it.
-         return ARMMMUIdx_S1SE0 + el;
++ * This is needed by the backwards-memory-copying operations.
-     } else {
++ */
-@@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env)
++static int checkNrev(uint8_t *mem, int odd, int cmp, int count)
 +{
 +    int n = 0, diff;
 +
 +    /* Replicate the test tag and compare.  */
 +    cmp *= 0x11;
 +    diff = *mem-- ^ cmp;
 +
 +    if (!odd) {
 +        goto start_even;
 +    }
 +
 +    while (1) {
 +        /* Test odd tag. */
 +        if (unlikely((diff) & 0xf0)) {
 +            break;
 +        }
 +        if (++n == count) {
 +            break;
 +        }
 +
 +    start_even:
 +        /* Test even tag. */
 +        if (unlikely((diff) & 0x0f)) {
 +            break;
 +        }
 +        if (++n == count) {
 +            break;
 +        }
 +
 +        diff = *mem-- ^ cmp;
 +    }
 +    return n;
 +}
 +
  /**
   * mte_probe_int() - helper for mte_probe and mte_check
   * @env: CPU environment
@@ -XXX,XX +XXX,XX @@ uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size,
      }
  }
-+ARMMMUIdx arm_mmu_idx(CPUARMState *env)
++uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size,
 +                            uint32_t desc)
 +{
-+    return arm_mmu_idx_el(env, arm_current_el(env));
++    int mmu_idx, tag_count;
 +    uint64_t ptr_tag, tag_first, tag_last;
 +    void *mem;
 +    bool w = FIELD_EX32(desc, MTEDESC, WRITE);
 +    uint32_t n;
 +
 +    mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
 +    /* True probe; this will never fault */
 +    mem = allocation_tag_mem_probe(env, mmu_idx, ptr,
 +                                   w ? MMU_DATA_STORE : MMU_DATA_LOAD,
 +                                   size, MMU_DATA_LOAD, true, 0);
 +    if (!mem) {
 +        return size;
 +    }
 +
 +    /*
 +     * TODO: checkNrev() is not designed for checks of the size we expect
 +     * for FEAT_MOPS operations, so we should implement this differently.
 +     * Maybe we should do something like
 +     *   if (region start and size are aligned nicely) {
 +     *      do direct loads of 64 tag bits at a time;
 +     *   } else {
 +     *      call checkN()
 +     *   }
 +     */
 +    /* Round the bounds to the tag granule, and compute the number of tags. */
 +    ptr_tag = allocation_tag_from_addr(ptr);
 +    tag_first = QEMU_ALIGN_DOWN(ptr - (size - 1), TAG_GRANULE);
 +    tag_last = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
 +    tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1;
 +    n = checkNrev(mem, ptr & TAG_GRANULE, ptr_tag, tag_count);
 +    if (likely(n == tag_count)) {
 +        return size;
 +    }
 +
 +    /*
 +     * Failure; for the first granule, it's at @ptr. Otherwise
 +     * it's at the last byte of the nth granule. Calculate how
 +     * many bytes we can access without hitting that failure.
 +     */
 +    if (n == 0) {
 +        return 0;
 +    } else {
 +        return (n - 1) * TAG_GRANULE + ((ptr + 1) - tag_last);
 +    }
 +}
 +
- int cpu_mmu_index(CPUARMState *env, bool ifetch)
+ void mte_mops_set_tags(CPUARMState *env, uint64_t ptr, uint64_t size,
                         uint32_t desc)
  {
-     return arm_to_core_mmu_idx(arm_mmu_idx(env));
-@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_internal(CPUARMState *env)
- {
-     int el = arm_current_el(env);
-     int fp_el = fp_exception_el(env, el);
--    ARMMMUIdx mmu_idx = arm_mmu_idx(env);
-+    ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
-     if (is_a64(env)) {
-         return rebuild_hflags_a64(env, el, fp_el, mmu_idx);
 --
-.20.1
+.34.1

-[PULL 39/51] target/arm/kvm64: Add kvm_arch_get/put_sve
+[PULL 21/30] target/arm: Implement the CPY* instructions
-From: Andrew Jones <drjones@redhat.com>
+The FEAT_MOPS CPY* instructions implement memory copies. These
 come in both "always forwards" (memcpy-style) and "overlap OK"
 (memmove-style) flavours.
-These are the SVE equivalents to kvm_arch_get/put_fpsimd. Note, the
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-swabbing is different than it is for fpsmid because the vector format
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-is a little-endian stream of words.
+Message-id: 20230912140434.1333369-12-peter.maydell@linaro.org
 ---
  target/arm/tcg/helper-a64.h    |   7 +
  target/arm/tcg/a64.decode      |  14 +
  target/arm/tcg/helper-a64.c    | 454 +++++++++++++++++++++++++++++++++
  target/arm/tcg/translate-a64.c |  60 +++++
 files changed, 535 insertions(+)
-Signed-off-by: Andrew Jones <drjones@redhat.com>
+diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Reviewed-by: Eric Auger <eric.auger@redhat.com>
 Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
 Message-id: 20191024121808.9612-6-drjones@redhat.com
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
  target/arm/kvm64.c | 185 ++++++++++++++++++++++++++++++++++++++-------
 file changed, 156 insertions(+), 29 deletions(-)
 diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/kvm64.c
+--- a/target/arm/tcg/helper-a64.h
-+++ b/target/arm/kvm64.c
++++ b/target/arm/tcg/helper-a64.h
-@@ -XXX,XX +XXX,XX @@ int kvm_arch_destroy_vcpu(CPUState *cs)
+@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(sete, void, env, i32, i32)
- bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
+ DEF_HELPER_3(setgp, void, env, i32, i32)
  DEF_HELPER_3(setgm, void, env, i32, i32)
  DEF_HELPER_3(setge, void, env, i32, i32)
 +
 +DEF_HELPER_4(cpyp, void, env, i32, i32, i32)
 +DEF_HELPER_4(cpym, void, env, i32, i32, i32)
 +DEF_HELPER_4(cpye, void, env, i32, i32, i32)
 +DEF_HELPER_4(cpyfp, void, env, i32, i32, i32)
 +DEF_HELPER_4(cpyfm, void, env, i32, i32, i32)
 +DEF_HELPER_4(cpyfe, void, env, i32, i32, i32)
 diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/a64.decode
 +++ b/target/arm/tcg/a64.decode
@@ -XXX,XX +XXX,XX @@ SETE            00 011001110 ..... 10 . . 01 ..... ..... @set
  SETGP           00 011101110 ..... 00 . . 01 ..... ..... @set
  SETGM           00 011101110 ..... 01 . . 01 ..... ..... @set
  SETGE           00 011101110 ..... 10 . . 01 ..... ..... @set
 +
 +# Memmove/Memcopy: the CPY insns allow overlapping src/dest and
 +# copy in the correct direction; the CPYF insns always copy forwards.
 +#
 +# options has the nontemporal and unpriv bits for src and dest
 +&cpy rs rn rd options
 +@cpy            .. ... . ..... rs:5 options:4 .. rn:5 rd:5 &cpy
 +
 +CPYFP           00 011 0 01000 ..... .... 01 ..... ..... @cpy
 +CPYFM           00 011 0 01010 ..... .... 01 ..... ..... @cpy
 +CPYFE           00 011 0 01100 ..... .... 01 ..... ..... @cpy
 +CPYP            00 011 1 01000 ..... .... 01 ..... ..... @cpy
 +CPYM            00 011 1 01010 ..... .... 01 ..... ..... @cpy
 +CPYE            00 011 1 01100 ..... .... 01 ..... ..... @cpy
 diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/helper-a64.c
 +++ b/target/arm/tcg/helper-a64.c
@@ -XXX,XX +XXX,XX @@ static uint64_t page_limit(uint64_t addr)
      return TARGET_PAGE_ALIGN(addr + 1) - addr;
  }
 +/*
 + * Return the number of bytes we can copy starting from addr and working
 + * backwards without crossing a page boundary.
 + */
 +static uint64_t page_limit_rev(uint64_t addr)
 +{
 +    return (addr & ~TARGET_PAGE_MASK) + 1;
 +}
 +
  /*
   * Perform part of a memory set on an area of guest memory starting at
   * toaddr (a dirty address) and extending for setsize bytes.
@@ -XXX,XX +XXX,XX @@ void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
  {
-     /* Return true if the regidx is a register we should synchronize
+     do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC());
--     * via the cpreg_tuples array (ie is not a core reg we sync by
+ }
 -     * hand in kvm_arch_get/put_registers())
 +     * via the cpreg_tuples array (ie is not a core or sve reg that
 +     * we sync by hand in kvm_arch_get/put_registers())
       */
      switch (regidx & KVM_REG_ARM_COPROC_MASK) {
      case KVM_REG_ARM_CORE:
 +    case KVM_REG_ARM64_SVE:
          return false;
      default:
          return true;
@@ -XXX,XX +XXX,XX @@ int kvm_arm_cpreg_level(uint64_t regidx)
  static int kvm_arch_put_fpsimd(CPUState *cs)
  {
 -    ARMCPU *cpu = ARM_CPU(cs);
 -    CPUARMState *env = &cpu->env;
 +    CPUARMState *env = &ARM_CPU(cs)->env;
      struct kvm_one_reg reg;
 -    uint32_t fpr;
      int i, ret;
      for (i = 0; i < 32; i++) {
@@ -XXX,XX +XXX,XX @@ static int kvm_arch_put_fpsimd(CPUState *cs)
          }
      }
 -    reg.addr = (uintptr_t)(&fpr);
 -    fpr = vfp_get_fpsr(env);
 -    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
 -    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 -    if (ret) {
 -        return ret;
 +    return 0;
 +}
 +
 +/*
-+ * SVE registers are encoded in KVM's memory in an endianness-invariant format.
++ * Perform part of a memory copy from the guest memory at fromaddr
-+ * The byte at offset i from the start of the in-memory representation contains
++ * and extending for copysize bytes, to the guest memory at
-+ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
++ * toaddr. Both addreses are dirty.
-+ * lowest offsets are stored in the lowest memory addresses, then that nearly
++ *
-+ * matches QEMU's representation, which is to use an array of host-endian
++ * Returns the number of bytes actually set, which might be less than
-+ * uint64_t's, where the lower offsets are at the lower indices. To complete
++ * copysize; the caller should loop until the whole copy has been done.
-+ * the translation we just need to byte swap the uint64_t's on big-endian hosts.
++ * The caller should ensure that the guest registers are correct
 + * for the possibility that the first byte of the copy encounters
 + * an exception or watchpoint. We guarantee not to take any faults
 + * for bytes other than the first.
 + */
-+static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
++static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr,
-+{
++                          uint64_t copysize, int wmemidx, int rmemidx,
-+#ifdef HOST_WORDS_BIGENDIAN
++                          uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra)
-+    int i;
++{
-+
++    void *rmem;
-+    for (i = 0; i < nr; ++i) {
++    void *wmem;
-+        dst[i] = bswap64(src[i]);
++
-     }
++    /* Don't cross a page boundary on either source or destination */
++    copysize = MIN(copysize, page_limit(toaddr));
--    reg.addr = (uintptr_t)(&fpr);
++    copysize = MIN(copysize, page_limit(fromaddr));
--    fpr = vfp_get_fpcr(env);
++    /*
--    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
++     * Handle MTE tag checks: either handle the tag mismatch for byte 0,
-+    return dst;
++     * or else copy up to but not including the byte with the mismatch.
-+#else
++     */
-+    return src;
++    if (*rdesc) {
 +        uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc);
 +        if (mtesize == 0) {
 +            mte_check_fail(env, *rdesc, fromaddr, ra);
 +            *rdesc = 0;
 +        } else {
 +            copysize = MIN(copysize, mtesize);
 +        }
 +    }
 +    if (*wdesc) {
 +        uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc);
 +        if (mtesize == 0) {
 +            mte_check_fail(env, *wdesc, toaddr, ra);
 +            *wdesc = 0;
 +        } else {
 +            copysize = MIN(copysize, mtesize);
 +        }
 +    }
 +
 +    toaddr = useronly_clean_ptr(toaddr);
 +    fromaddr = useronly_clean_ptr(fromaddr);
 +    /* Trapless lookup of whether we can get a host memory pointer */
 +    wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx);
 +    rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx);
 +
 +#ifndef CONFIG_USER_ONLY
 +    /*
 +     * If we don't have host memory for both source and dest then just
 +     * do a single byte copy. This will handle watchpoints, invalid pages,
 +     * etc correctly. For clean code pages, the next iteration will see
 +     * the page dirty and will use the fast path.
 +     */
 +    if (unlikely(!rmem || !wmem)) {
 +        uint8_t byte;
 +        if (rmem) {
 +            byte = *(uint8_t *)rmem;
 +        } else {
 +            byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra);
 +        }
 +        if (wmem) {
 +            *(uint8_t *)wmem = byte;
 +        } else {
 +            cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra);
 +        }
 +        return 1;
 +    }
 +#endif
++    /* Easy case: just memmove the host memory */
++    memmove(wmem, rmem, copysize);
++    return copysize;
 +}
 +
 +/*
-+ * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
++ * Do part of a backwards memory copy. Here toaddr and fromaddr point
-+ * and PREGS and the FFR have a slice size of 256 bits. However we simply hard
++ * to the *last* byte to be copied.
 + * code the slice index to zero for now as it's unlikely we'll need more than
 + * one slice for quite some time.
 + */
-+static int kvm_arch_put_sve(CPUState *cs)
++static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr,
-+{
++                              uint64_t fromaddr,
-+    ARMCPU *cpu = ARM_CPU(cs);
++                              uint64_t copysize, int wmemidx, int rmemidx,
-+    CPUARMState *env = &cpu->env;
++                              uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra)
-+    uint64_t tmp[ARM_MAX_VQ * 2];
++{
-+    uint64_t *r;
++    void *rmem;
-+    struct kvm_one_reg reg;
++    void *wmem;
-+    int n, ret;
++
-+
++    /* Don't cross a page boundary on either source or destination */
-+    for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) {
++    copysize = MIN(copysize, page_limit_rev(toaddr));
-+        r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2);
++    copysize = MIN(copysize, page_limit_rev(fromaddr));
-+        reg.addr = (uintptr_t)r;
++
-+        reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0);
++    /*
-+        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
++     * Handle MTE tag checks: either handle the tag mismatch for byte 0,
-+        if (ret) {
++     * or else copy up to but not including the byte with the mismatch.
-+            return ret;
++     */
-+        }
++    if (*rdesc) {
-+    }
++        uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc);
-+
++        if (mtesize == 0) {
-+    for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) {
++            mte_check_fail(env, *rdesc, fromaddr, ra);
-+        r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0],
++            *rdesc = 0;
-+                        DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
++        } else {
-+        reg.addr = (uintptr_t)r;
++            copysize = MIN(copysize, mtesize);
-+        reg.id = KVM_REG_ARM64_SVE_PREG(n, 0);
++        }
-+        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
++    }
-+        if (ret) {
++    if (*wdesc) {
-+            return ret;
++        uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc);
-+        }
++        if (mtesize == 0) {
-+    }
++            mte_check_fail(env, *wdesc, toaddr, ra);
-+
++            *wdesc = 0;
-+    r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0],
++        } else {
-+                    DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
++            copysize = MIN(copysize, mtesize);
-+    reg.addr = (uintptr_t)r;
++        }
-+    reg.id = KVM_REG_ARM64_SVE_FFR(0);
++    }
-     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
++
-     if (ret) {
++    toaddr = useronly_clean_ptr(toaddr);
-         return ret;
++    fromaddr = useronly_clean_ptr(fromaddr);
-@@ -XXX,XX +XXX,XX @@ int kvm_arch_put_registers(CPUState *cs, int level)
++    /* Trapless lookup of whether we can get a host memory pointer */
- {
++    wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx);
-     struct kvm_one_reg reg;
++    rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx);
-     uint64_t val;
++
-+    uint32_t fpr;
++#ifndef CONFIG_USER_ONLY
-     int i, ret;
++    /*
-     unsigned int el;
++     * If we don't have host memory for both source and dest then just
++     * do a single byte copy. This will handle watchpoints, invalid pages,
-@@ -XXX,XX +XXX,XX @@ int kvm_arch_put_registers(CPUState *cs, int level)
++     * etc correctly. For clean code pages, the next iteration will see
-         }
++     * the page dirty and will use the fast path.
-     }
++     */
++    if (unlikely(!rmem || !wmem)) {
--    ret = kvm_arch_put_fpsimd(cs);
++        uint8_t byte;
-+    if (cpu_isar_feature(aa64_sve, cpu)) {
++        if (rmem) {
-+        ret = kvm_arch_put_sve(cs);
++            byte = *(uint8_t *)rmem;
 +        } else {
 +            byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra);
 +        }
 +        if (wmem) {
 +            *(uint8_t *)wmem = byte;
 +        } else {
 +            cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra);
 +        }
 +        return 1;
 +    }
 +#endif
 +    /*
 +     * Easy case: just memmove the host memory. Note that wmem and
 +     * rmem here point to the *last* byte to copy.
 +     */
 +    memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize);
 +    return copysize;
 +}
 +
 +/*
 + * for the Memory Copy operation, our implementation chooses always
 + * to use "option A", where we update Xd and Xs to the final addresses
 + * in the CPYP insn, and then in CPYM and CPYE only need to update Xn.
 + *
 + * @env: CPU
 + * @syndrome: syndrome value for mismatch exceptions
 + * (also contains the register numbers we need to use)
 + * @wdesc: MTE descriptor for the writes (destination)
 + * @rdesc: MTE descriptor for the reads (source)
 + * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards)
 + */
 +static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
 +                    uint32_t rdesc, uint32_t move, uintptr_t ra)
 +{
 +    int rd = mops_destreg(syndrome);
 +    int rs = mops_srcreg(syndrome);
 +    int rn = mops_sizereg(syndrome);
 +    uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
 +    uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
 +    bool forwards = true;
 +    uint64_t toaddr = env->xregs[rd];
 +    uint64_t fromaddr = env->xregs[rs];
 +    uint64_t copysize = env->xregs[rn];
 +    uint64_t stagecopysize, step;
 +
 +    check_mops_enabled(env, ra);
 +
 +
 +    if (move) {
 +        /*
 +         * Copy backwards if necessary. The direction for a non-overlapping
 +         * copy is IMPDEF; we choose forwards.
 +         */
 +        if (copysize > 0x007FFFFFFFFFFFFFULL) {
 +            copysize = 0x007FFFFFFFFFFFFFULL;
 +        }
 +        uint64_t fs = extract64(fromaddr, 0, 56);
 +        uint64_t ts = extract64(toaddr, 0, 56);
 +        uint64_t fe = extract64(fromaddr + copysize, 0, 56);
 +
 +        if (fs < ts && fe > ts) {
 +            forwards = false;
 +        }
 +    } else {
-+        ret = kvm_arch_put_fpsimd(cs);
++        if (copysize > INT64_MAX) {
-+    }
++            copysize = INT64_MAX;
-+    if (ret) {
++        }
-+        return ret;
++    }
-+    }
++
-+
++    if (!mte_checks_needed(fromaddr, rdesc)) {
-+    reg.addr = (uintptr_t)(&fpr);
++        rdesc = 0;
-+    fpr = vfp_get_fpsr(env);
++    }
-+    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
++    if (!mte_checks_needed(toaddr, wdesc)) {
-+    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
++        wdesc = 0;
-+    if (ret) {
++    }
-+        return ret;
++
-+    }
++    if (forwards) {
-+
++        stagecopysize = MIN(copysize, page_limit(toaddr));
-+    reg.addr = (uintptr_t)(&fpr);
++        stagecopysize = MIN(stagecopysize, page_limit(fromaddr));
-+    fpr = vfp_get_fpcr(env);
++        while (stagecopysize) {
-+    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
++            env->xregs[rd] = toaddr;
-+    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
++            env->xregs[rs] = fromaddr;
-     if (ret) {
++            env->xregs[rn] = copysize;
-         return ret;
++            step = copy_step(env, toaddr, fromaddr, stagecopysize,
-     }
++                             wmemidx, rmemidx, &wdesc, &rdesc, ra);
-@@ -XXX,XX +XXX,XX @@ int kvm_arch_put_registers(CPUState *cs, int level)
++            toaddr += step;
++            fromaddr += step;
- static int kvm_arch_get_fpsimd(CPUState *cs)
++            copysize -= step;
- {
++            stagecopysize -= step;
--    ARMCPU *cpu = ARM_CPU(cs);
++        }
--    CPUARMState *env = &cpu->env;
++        /* Insn completed, so update registers to the Option A format */
-+    CPUARMState *env = &ARM_CPU(cs)->env;
++        env->xregs[rd] = toaddr + copysize;
-     struct kvm_one_reg reg;
++        env->xregs[rs] = fromaddr + copysize;
--    uint32_t fpr;
++        env->xregs[rn] = -copysize;
      int i, ret;
      for (i = 0; i < 32; i++) {
@@ -XXX,XX +XXX,XX @@ static int kvm_arch_get_fpsimd(CPUState *cs)
          }
      }
 -    reg.addr = (uintptr_t)(&fpr);
 -    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
 -    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 -    if (ret) {
 -        return ret;
 -    }
 -    vfp_set_fpsr(env, fpr);
 +    return 0;
 +}
 -    reg.addr = (uintptr_t)(&fpr);
 -    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
 +/*
 + * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
 + * and PREGS and the FFR have a slice size of 256 bits. However we simply hard
 + * code the slice index to zero for now as it's unlikely we'll need more than
 + * one slice for quite some time.
 + */
 +static int kvm_arch_get_sve(CPUState *cs)
 +{
 +    ARMCPU *cpu = ARM_CPU(cs);
 +    CPUARMState *env = &cpu->env;
 +    struct kvm_one_reg reg;
 +    uint64_t *r;
 +    int n, ret;
 +
 +    for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) {
 +        r = &env->vfp.zregs[n].d[0];
 +        reg.addr = (uintptr_t)r;
 +        reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0);
 +        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 +        if (ret) {
 +            return ret;
 +        }
 +        sve_bswap64(r, r, cpu->sve_max_vq * 2);
 +    }
 +
 +    for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) {
 +        r = &env->vfp.pregs[n].p[0];
 +        reg.addr = (uintptr_t)r;
 +        reg.id = KVM_REG_ARM64_SVE_PREG(n, 0);
 +        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 +        if (ret) {
 +            return ret;
 +        }
 +        sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
 +    }
 +
 +    r = &env->vfp.pregs[FFR_PRED_NUM].p[0];
 +    reg.addr = (uintptr_t)r;
 +    reg.id = KVM_REG_ARM64_SVE_FFR(0);
      ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
      if (ret) {
          return ret;
      }
 -    vfp_set_fpcr(env, fpr);
 +    sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
      return 0;
  }
@@ -XXX,XX +XXX,XX @@ int kvm_arch_get_registers(CPUState *cs)
      struct kvm_one_reg reg;
      uint64_t val;
      unsigned int el;
 +    uint32_t fpr;
      int i, ret;
      ARMCPU *cpu = ARM_CPU(cs);
@@ -XXX,XX +XXX,XX @@ int kvm_arch_get_registers(CPUState *cs)
          env->spsr = env->banked_spsr[i];
      }
 -    ret = kvm_arch_get_fpsimd(cs);
 +    if (cpu_isar_feature(aa64_sve, cpu)) {
 +        ret = kvm_arch_get_sve(cs);
 +    } else {
-+        ret = kvm_arch_get_fpsimd(cs);
++        /*
-+    }
++         * In a reverse copy the to and from addrs in Xs and Xd are the start
-     if (ret) {
++         * of the range, but it's more convenient for us to work with pointers
-         return ret;
++         * to the last byte being copied.
-     }
++         */
++        toaddr += copysize - 1;
-+    reg.addr = (uintptr_t)(&fpr);
++        fromaddr += copysize - 1;
-+    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
++        stagecopysize = MIN(copysize, page_limit_rev(toaddr));
-+    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
++        stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr));
-+    if (ret) {
++        while (stagecopysize) {
-+        return ret;
++            env->xregs[rn] = copysize;
-+    }
++            step = copy_step_rev(env, toaddr, fromaddr, stagecopysize,
-+    vfp_set_fpsr(env, fpr);
++                                 wmemidx, rmemidx, &wdesc, &rdesc, ra);
-+
++            copysize -= step;
-+    reg.addr = (uintptr_t)(&fpr);
++            stagecopysize -= step;
-+    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
++            toaddr -= step;
-+    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
++            fromaddr -= step;
-+    if (ret) {
++        }
-+        return ret;
++        /*
-+    }
++         * Insn completed, so update registers to the Option A format.
-+    vfp_set_fpcr(env, fpr);
++         * For a reverse copy this is no different to the CPYP input format.
-+
++         */
-     ret = kvm_get_vcpu_events(cpu);
++        env->xregs[rn] = copysize;
-     if (ret) {
++    }
-         return ret;
++
 +    /* Set NZCV = 0000 to indicate we are an Option A implementation */
 +    env->NF = 0;
 +    env->ZF = 1; /* our env->ZF encoding is inverted */
 +    env->CF = 0;
 +    env->VF = 0;
 +    return;
 +}
 +
 +void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
 +                  uint32_t rdesc)
 +{
 +    do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC());
 +}
 +
 +void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
 +                   uint32_t rdesc)
 +{
 +    do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC());
 +}
 +
 +static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
 +                    uint32_t rdesc, uint32_t move, uintptr_t ra)
 +{
 +    /* Main: we choose to copy until less than a page remaining */
 +    CPUState *cs = env_cpu(env);
 +    int rd = mops_destreg(syndrome);
 +    int rs = mops_srcreg(syndrome);
 +    int rn = mops_sizereg(syndrome);
 +    uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
 +    uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
 +    bool forwards = true;
 +    uint64_t toaddr, fromaddr, copysize, step;
 +
 +    check_mops_enabled(env, ra);
 +
 +    /* We choose to NOP out "no data to copy" before consistency checks */
 +    if (env->xregs[rn] == 0) {
 +        return;
 +    }
 +
 +    check_mops_wrong_option(env, syndrome, ra);
 +
 +    if (move) {
 +        forwards = (int64_t)env->xregs[rn] < 0;
 +    }
 +
 +    if (forwards) {
 +        toaddr = env->xregs[rd] + env->xregs[rn];
 +        fromaddr = env->xregs[rs] + env->xregs[rn];
 +        copysize = -env->xregs[rn];
 +    } else {
 +        copysize = env->xregs[rn];
 +        /* This toaddr and fromaddr point to the *last* byte to copy */
 +        toaddr = env->xregs[rd] + copysize - 1;
 +        fromaddr = env->xregs[rs] + copysize - 1;
 +    }
 +
 +    if (!mte_checks_needed(fromaddr, rdesc)) {
 +        rdesc = 0;
 +    }
 +    if (!mte_checks_needed(toaddr, wdesc)) {
 +        wdesc = 0;
 +    }
 +
 +    /* Our implementation has no particular parameter requirements for CPYM */
 +
 +    /* Do the actual memmove */
 +    if (forwards) {
 +        while (copysize >= TARGET_PAGE_SIZE) {
 +            step = copy_step(env, toaddr, fromaddr, copysize,
 +                             wmemidx, rmemidx, &wdesc, &rdesc, ra);
 +            toaddr += step;
 +            fromaddr += step;
 +            copysize -= step;
 +            env->xregs[rn] = -copysize;
 +            if (copysize >= TARGET_PAGE_SIZE &&
 +                unlikely(cpu_loop_exit_requested(cs))) {
 +                cpu_loop_exit_restore(cs, ra);
 +            }
 +        }
 +    } else {
 +        while (copysize >= TARGET_PAGE_SIZE) {
 +            step = copy_step_rev(env, toaddr, fromaddr, copysize,
 +                                 wmemidx, rmemidx, &wdesc, &rdesc, ra);
 +            toaddr -= step;
 +            fromaddr -= step;
 +            copysize -= step;
 +            env->xregs[rn] = copysize;
 +            if (copysize >= TARGET_PAGE_SIZE &&
 +                unlikely(cpu_loop_exit_requested(cs))) {
 +                cpu_loop_exit_restore(cs, ra);
 +            }
 +        }
 +    }
 +}
 +
 +void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
 +                  uint32_t rdesc)
 +{
 +    do_cpym(env, syndrome, wdesc, rdesc, true, GETPC());
 +}
 +
 +void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
 +                   uint32_t rdesc)
 +{
 +    do_cpym(env, syndrome, wdesc, rdesc, false, GETPC());
 +}
 +
 +static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
 +                    uint32_t rdesc, uint32_t move, uintptr_t ra)
 +{
 +    /* Epilogue: do the last partial page */
 +    int rd = mops_destreg(syndrome);
 +    int rs = mops_srcreg(syndrome);
 +    int rn = mops_sizereg(syndrome);
 +    uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
 +    uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
 +    bool forwards = true;
 +    uint64_t toaddr, fromaddr, copysize, step;
 +
 +    check_mops_enabled(env, ra);
 +
 +    /* We choose to NOP out "no data to copy" before consistency checks */
 +    if (env->xregs[rn] == 0) {
 +        return;
 +    }
 +
 +    check_mops_wrong_option(env, syndrome, ra);
 +
 +    if (move) {
 +        forwards = (int64_t)env->xregs[rn] < 0;
 +    }
 +
 +    if (forwards) {
 +        toaddr = env->xregs[rd] + env->xregs[rn];
 +        fromaddr = env->xregs[rs] + env->xregs[rn];
 +        copysize = -env->xregs[rn];
 +    } else {
 +        copysize = env->xregs[rn];
 +        /* This toaddr and fromaddr point to the *last* byte to copy */
 +        toaddr = env->xregs[rd] + copysize - 1;
 +        fromaddr = env->xregs[rs] + copysize - 1;
 +    }
 +
 +    if (!mte_checks_needed(fromaddr, rdesc)) {
 +        rdesc = 0;
 +    }
 +    if (!mte_checks_needed(toaddr, wdesc)) {
 +        wdesc = 0;
 +    }
 +
 +    /* Check the size; we don't want to have do a check-for-interrupts */
 +    if (copysize >= TARGET_PAGE_SIZE) {
 +        raise_exception_ra(env, EXCP_UDEF, syndrome,
 +                           mops_mismatch_exception_target_el(env), ra);
 +    }
 +
 +    /* Do the actual memmove */
 +    if (forwards) {
 +        while (copysize > 0) {
 +            step = copy_step(env, toaddr, fromaddr, copysize,
 +                             wmemidx, rmemidx, &wdesc, &rdesc, ra);
 +            toaddr += step;
 +            fromaddr += step;
 +            copysize -= step;
 +            env->xregs[rn] = -copysize;
 +        }
 +    } else {
 +        while (copysize > 0) {
 +            step = copy_step_rev(env, toaddr, fromaddr, copysize,
 +                                 wmemidx, rmemidx, &wdesc, &rdesc, ra);
 +            toaddr -= step;
 +            fromaddr -= step;
 +            copysize -= step;
 +            env->xregs[rn] = copysize;
 +        }
 +    }
 +}
 +
 +void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
 +                  uint32_t rdesc)
 +{
 +    do_cpye(env, syndrome, wdesc, rdesc, true, GETPC());
 +}
 +
 +void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
 +                   uint32_t rdesc)
 +{
 +    do_cpye(env, syndrome, wdesc, rdesc, false, GETPC());
 +}
 diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/translate-a64.c
 +++ b/target/arm/tcg/translate-a64.c
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
  TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
  TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
 +typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
 +
 +static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
 +{
 +    int rmemidx, wmemidx;
 +    uint32_t syndrome, rdesc = 0, wdesc = 0;
 +    bool wunpriv = extract32(a->options, 0, 1);
 +    bool runpriv = extract32(a->options, 1, 1);
 +
 +    /*
 +     * UNPREDICTABLE cases: we choose to UNDEF, which allows
 +     * us to pull this check before the CheckMOPSEnabled() test
 +     * (which we do in the helper function)
 +     */
 +    if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
 +        a->rd == 31 || a->rs == 31 || a->rn == 31) {
 +        return false;
 +    }
 +
 +    rmemidx = get_a64_user_mem_index(s, runpriv);
 +    wmemidx = get_a64_user_mem_index(s, wunpriv);
 +
 +    /*
 +     * We pass option_a == true, matching our implementation;
 +     * we pass wrong_option == false: helper function may set that bit.
 +     */
 +    syndrome = syn_mop(false, false, a->options, is_epilogue,
 +                       false, true, a->rd, a->rs, a->rn);
 +
 +    /* If we need to do MTE tag checking, assemble the descriptors */
 +    if (s->mte_active[runpriv]) {
 +        rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
 +        rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
 +    }
 +    if (s->mte_active[wunpriv]) {
 +        wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
 +        wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
 +        wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
 +    }
 +    /* The helper function needs these parts of the descriptor regardless */
 +    rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
 +    wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
 +
 +    /*
 +     * The helper needs the register numbers, but since they're in
 +     * the syndrome anyway, we let it extract them from there rather
 +     * than passing in an extra three integer arguments.
 +     */
 +    fn(cpu_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
 +       tcg_constant_i32(rdesc));
 +    return true;
 +}
 +
 +TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
 +TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
 +TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
 +TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
 +TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
 +TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
 +
  typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
  static bool gen_rri(DisasContext *s, arg_rri_sf *a,
 --
-.20.1
+.34.1

-[PULL 29/51] hw/dma/xilinx_axidma.c: Switch to transaction-based ptimer API
+[PULL 22/30] target/arm: Enable FEAT_MOPS for CPU 'max'
-Switch the xilinx_axidma code away from bottom-half based ptimers to
+Enable FEAT_MOPS on the AArch64 'max' CPU, and add it to
-the new transaction-based ptimer API.  This just requires adding
+the list of features we implement.
 begin/commit calls around the various places that modify the ptimer
 state, and using the new ptimer_init() function to create the timer.
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Message-id: 20230912140434.1333369-13-peter.maydell@linaro.org
 Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
 Message-id: 20191017132122.4402-4-peter.maydell@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- hw/dma/xilinx_axidma.c | 9 +++++----
+ docs/system/arm/emulation.rst | 1 +
-file changed, 5 insertions(+), 4 deletions(-)
+ linux-user/elfload.c          | 1 +
  target/arm/tcg/cpu64.c        | 1 +
 files changed, 3 insertions(+)
-diff --git a/hw/dma/xilinx_axidma.c b/hw/dma/xilinx_axidma.c
+diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
 index XXXXXXX..XXXXXXX 100644
---- a/hw/dma/xilinx_axidma.c
+--- a/docs/system/arm/emulation.rst
-+++ b/hw/dma/xilinx_axidma.c
++++ b/docs/system/arm/emulation.rst
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
- #include "hw/ptimer.h"
+ - FEAT_LSE (Large System Extensions)
- #include "hw/qdev-properties.h"
+ - FEAT_LSE2 (Large System Extensions v2)
- #include "qemu/log.h"
+ - FEAT_LVA (Large Virtual Address space)
--#include "qemu/main-loop.h"
++- FEAT_MOPS (Standardization of memory operations)
- #include "qemu/module.h"
+ - FEAT_MTE (Memory Tagging Extension)
+ - FEAT_MTE2 (Memory Tagging Extension)
- #include "hw/stream.h"
+ - FEAT_MTE3 (MTE Asymmetric Fault Handling)
-@@ -XXX,XX +XXX,XX @@ enum {
+diff --git a/linux-user/elfload.c b/linux-user/elfload.c
- };
+index XXXXXXX..XXXXXXX 100644
+--- a/linux-user/elfload.c
- struct Stream {
++++ b/linux-user/elfload.c
--    QEMUBH *bh;
+@@ -XXX,XX +XXX,XX @@ uint32_t get_elf_hwcap2(void)
-     ptimer_state *ptimer;
+     GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
-     qemu_irq irq;
+     GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
+     GET_FEATURE_ID(aa64_hbc, ARM_HWCAP2_A64_HBC);
-@@ -XXX,XX +XXX,XX @@ static void stream_complete(struct Stream *s)
++    GET_FEATURE_ID(aa64_mops, ARM_HWCAP2_A64_MOPS);
-     unsigned int comp_delay;
+     return hwcaps;
      /* Start the delayed timer.  */
 +    ptimer_transaction_begin(s->ptimer);
      comp_delay = s->regs[R_DMACR] >> 24;
      if (comp_delay) {
          ptimer_stop(s->ptimer);
@@ -XXX,XX +XXX,XX @@ static void stream_complete(struct Stream *s)
          s->regs[R_DMASR] |= DMASR_IOC_IRQ;
          stream_reload_complete_cnt(s);
      }
 +    ptimer_transaction_commit(s->ptimer);
  }
+diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
- static void stream_process_mem2s(struct Stream *s, StreamSlave *tx_data_dev,
+index XXXXXXX..XXXXXXX 100644
-@@ -XXX,XX +XXX,XX @@ static void xilinx_axidma_realize(DeviceState *dev, Error **errp)
+--- a/target/arm/tcg/cpu64.c
-         struct Stream *st = &s->streams[i];
++++ b/target/arm/tcg/cpu64.c
+@@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj)
-         st->nr = i;
+     cpu->isar.id_aa64isar1 = t;
--        st->bh = qemu_bh_new(timer_hit, st);
--        st->ptimer = ptimer_init_with_bh(st->bh, PTIMER_POLICY_DEFAULT);
+     t = cpu->isar.id_aa64isar2;
-+        st->ptimer = ptimer_init(timer_hit, st, PTIMER_POLICY_DEFAULT);
++    t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1);     /* FEAT_MOPS */
-+        ptimer_transaction_begin(st->ptimer);
+     t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1);      /* FEAT_HBC */
-         ptimer_set_freq(st->ptimer, s->freqhz);
+     cpu->isar.id_aa64isar2 = t;
 +        ptimer_transaction_commit(st->ptimer);
      }
      return;
 --
-.20.1
+.34.1

-[PULL 02/51] aspeed: Add an AST2600 eval board
+[PULL 23/30] audio/jackaudio: Avoid dynamic stack allocation in qjack_client_init
-From: Cédric Le Goater <clg@kaod.org>
+Avoid a dynamic stack allocation in qjack_client_init(), by using
 a g_autofree heap allocation instead.
-Signed-off-by: Cédric Le Goater <clg@kaod.org>
+(We stick with allocate + snprintf() because the JACK API requires
-Reviewed-by: Joel Stanley <joel@jms.id.au>
+the name to be no more than its maximum size, so g_strdup_printf()
-Message-id: 20191023130455.1347-3-clg@kaod.org
+would require an extra truncation step.)
 The codebase has very few VLAs, and if we can get rid of them all we
 can make the compiler error on new additions.  This is a defensive
 measure against security bugs where an on-stack dynamic allocation
 isn't correctly size-checked (e.g.  CVE-2021-3527).
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
+Reviewed-by: Francisco Iglesias <frasse.iglesias@gmail.com>
+Reviewed-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
+Message-id: 20230818155846.1651287-2-peter.maydell@linaro.org
 ---
- include/hw/arm/aspeed.h |  1 +
+ audio/jackaudio.c | 5 +++--
- hw/arm/aspeed.c         | 23 +++++++++++++++++++++++
+file changed, 3 insertions(+), 2 deletions(-)
 files changed, 24 insertions(+)
-diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h
+diff --git a/audio/jackaudio.c b/audio/jackaudio.c
 index XXXXXXX..XXXXXXX 100644
---- a/include/hw/arm/aspeed.h
+--- a/audio/jackaudio.c
-+++ b/include/hw/arm/aspeed.h
++++ b/audio/jackaudio.c
-@@ -XXX,XX +XXX,XX @@ typedef struct AspeedBoardConfig {
+@@ -XXX,XX +XXX,XX @@ static void qjack_client_connect_ports(QJackClient *c)
-     const char *desc;
+ static int qjack_client_init(QJackClient *c)
      const char *soc_name;
      uint32_t hw_strap1;
 +    uint32_t hw_strap2;
      const char *fmc_model;
      const char *spi_model;
      uint32_t num_cs;
 diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
 index XXXXXXX..XXXXXXX 100644
 --- a/hw/arm/aspeed.c
 +++ b/hw/arm/aspeed.c
@@ -XXX,XX +XXX,XX @@ struct AspeedBoardState {
  /* Witherspoon hardware value: 0xF10AD216 (but use romulus definition) */
  #define WITHERSPOON_BMC_HW_STRAP1 ROMULUS_BMC_HW_STRAP1
 +/* AST2600 evb hardware value */
 +#define AST2600_EVB_HW_STRAP1 0x000000C0
 +#define AST2600_EVB_HW_STRAP2 0x00000003
 +
  /*
   * The max ram region is for firmwares that scan the address space
   * with load/store to guess how much RAM the SoC has.
@@ -XXX,XX +XXX,XX @@ static void aspeed_board_init(MachineState *machine,
                               &error_abort);
      object_property_set_int(OBJECT(&bmc->soc), cfg->hw_strap1, "hw-strap1",
                              &error_abort);
 +    object_property_set_int(OBJECT(&bmc->soc), cfg->hw_strap2, "hw-strap2",
 +                            &error_abort);
      object_property_set_int(OBJECT(&bmc->soc), cfg->num_cs, "num-cs",
                              &error_abort);
      object_property_set_int(OBJECT(&bmc->soc), machine->smp.cpus, "num-cpus",
@@ -XXX,XX +XXX,XX @@ static void ast2500_evb_i2c_init(AspeedBoardState *bmc)
      i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 11), "ds1338", 0x32);
  }
 +static void ast2600_evb_i2c_init(AspeedBoardState *bmc)
 +{
 +    /* Start with some devices on our I2C busses */
 +    ast2500_evb_i2c_init(bmc);
 +}
 +
  static void romulus_bmc_i2c_init(AspeedBoardState *bmc)
  {
-     AspeedSoCState *soc = &bmc->soc;
+     jack_status_t status;
-@@ -XXX,XX +XXX,XX @@ static const AspeedBoardConfig aspeed_boards[] = {
+-    char client_name[jack_client_name_size()];
-         .num_cs    = 2,
++    int client_name_len = jack_client_name_size(); /* includes NUL */
-         .i2c_init  = witherspoon_bmc_i2c_init,
++    g_autofree char *client_name = g_new(char, client_name_len);
-         .ram       = 512 * MiB,
+     jack_options_t options = JackNullOption;
-+    }, {
-+        .name      = MACHINE_TYPE_NAME("ast2600-evb"),
+     if (c->state == QJACK_STATE_RUNNING) {
-+        .desc      = "Aspeed AST2600 EVB (Cortex A7)",
+@@ -XXX,XX +XXX,XX @@ static int qjack_client_init(QJackClient *c)
-+        .soc_name  = "ast2600-a0",
-+        .hw_strap1 = AST2600_EVB_HW_STRAP1,
+     c->connect_ports = true;
-+        .hw_strap2 = AST2600_EVB_HW_STRAP2,
-+        .fmc_model = "w25q512jv",
+-    snprintf(client_name, sizeof(client_name), "%s-%s",
-+        .spi_model = "mx66u51235f",
++    snprintf(client_name, client_name_len, "%s-%s",
-+        .num_cs    = 1,
+         c->out ? "out" : "in",
-+        .i2c_init  = ast2600_evb_i2c_init,
+         c->opt->client_name ? c->opt->client_name : audio_application_name());
 +        .ram       = 1 * GiB,
      },
  };
 --
-.20.1
+.34.1

-[PULL 04/51] target/arm: Split out rebuild_hflags_a64
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-Create a function to compute the values of the TBFLAG_A64 bits
-that will be cached.  For now, the env->hflags variable is not
-used, and the results are fed back to cpu_get_tb_cpu_state.
-Note that not all BTI related flags are cached, so we have to
-test the BTI feature twice -- once for those bits moved out to
-rebuild_hflags_a64 and once for those bits that remain in
-cpu_get_tb_cpu_state.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-3-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/helper.c | 131 +++++++++++++++++++++++---------------------
-file changed, 69 insertions(+), 62 deletions(-)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
-+++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
-     return flags;
- }
-+static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
-+                                   ARMMMUIdx mmu_idx)
-+{
-+    ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
-+    ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
-+    uint32_t flags = 0;
-+    uint64_t sctlr;
-+    int tbii, tbid;
-+
-+    flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
-+
-+    /* FIXME: ARMv8.1-VHE S2 translation regime.  */
-+    if (regime_el(env, stage1) < 2) {
-+        ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
-+        tbid = (p1.tbi << 1) | p0.tbi;
-+        tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
-+    } else {
-+        tbid = p0.tbi;
-+        tbii = tbid & !p0.tbid;
-+    }
-+
-+    flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
-+    flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);
-+
-+    if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
-+        int sve_el = sve_exception_el(env, el);
-+        uint32_t zcr_len;
-+
-+        /*
-+         * If SVE is disabled, but FP is enabled,
-+         * then the effective len is 0.
-+         */
-+        if (sve_el != 0 && fp_el == 0) {
-+            zcr_len = 0;
-+        } else {
-+            zcr_len = sve_zcr_len_for_el(env, el);
-+        }
-+        flags = FIELD_DP32(flags, TBFLAG_A64, SVEEXC_EL, sve_el);
-+        flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len);
-+    }
-+
-+    sctlr = arm_sctlr(env, el);
-+
-+    if (cpu_isar_feature(aa64_pauth, env_archcpu(env))) {
-+        /*
-+         * In order to save space in flags, we record only whether
-+         * pauth is "inactive", meaning all insns are implemented as
-+         * a nop, or "active" when some action must be performed.
-+         * The decision of which action to take is left to a helper.
-+         */
-+        if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) {
-+            flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1);
-+        }
-+    }
-+
-+    if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
-+        /* Note that SCTLR_EL[23].BT == SCTLR_BT1.  */
-+        if (sctlr & (el == 0 ? SCTLR_BT0 : SCTLR_BT1)) {
-+            flags = FIELD_DP32(flags, TBFLAG_A64, BT, 1);
-+        }
-+    }
-+
-+    return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
-+}
-+
- void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-                           target_ulong *cs_base, uint32_t *pflags)
- {
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-     uint32_t flags = 0;
-     if (is_a64(env)) {
--        ARMCPU *cpu = env_archcpu(env);
--        uint64_t sctlr;
--
-         *pc = env->pc;
--        flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
--
--        /* Get control bits for tagged addresses.  */
--        {
--            ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
--            ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
--            int tbii, tbid;
--
--            /* FIXME: ARMv8.1-VHE S2 translation regime.  */
--            if (regime_el(env, stage1) < 2) {
--                ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
--                tbid = (p1.tbi << 1) | p0.tbi;
--                tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
--            } else {
--                tbid = p0.tbi;
--                tbii = tbid & !p0.tbid;
--            }
--
--            flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
--            flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);
--        }
--
--        if (cpu_isar_feature(aa64_sve, cpu)) {
--            int sve_el = sve_exception_el(env, current_el);
--            uint32_t zcr_len;
--
--            /* If SVE is disabled, but FP is enabled,
--             * then the effective len is 0.
--             */
--            if (sve_el != 0 && fp_el == 0) {
--                zcr_len = 0;
--            } else {
--                zcr_len = sve_zcr_len_for_el(env, current_el);
--            }
--            flags = FIELD_DP32(flags, TBFLAG_A64, SVEEXC_EL, sve_el);
--            flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len);
--        }
--
--        sctlr = arm_sctlr(env, current_el);
--
--        if (cpu_isar_feature(aa64_pauth, cpu)) {
--            /*
--             * In order to save space in flags, we record only whether
--             * pauth is "inactive", meaning all insns are implemented as
--             * a nop, or "active" when some action must be performed.
--             * The decision of which action to take is left to a helper.
--             */
--            if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) {
--                flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1);
--            }
--        }
--
--        if (cpu_isar_feature(aa64_bti, cpu)) {
--            /* Note that SCTLR_EL[23].BT == SCTLR_BT1.  */
--            if (sctlr & (current_el == 0 ? SCTLR_BT0 : SCTLR_BT1)) {
--                flags = FIELD_DP32(flags, TBFLAG_A64, BT, 1);
--            }
-+        flags = rebuild_hflags_a64(env, current_el, fp_el, mmu_idx);
-+        if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
-             flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
-         }
-     } else {
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-             flags = FIELD_DP32(flags, TBFLAG_A32,
-                                XSCALE_CPAR, env->cp15.c15_cpar);
-         }
--    }
--    flags = rebuild_hflags_common(env, fp_el, mmu_idx, flags);
-+        flags = rebuild_hflags_common(env, fp_el, mmu_idx, flags);
-+    }
-     /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
-      * states defined in the ARM ARM for software singlestep:
---
-.20.1

-[PULL 05/51] target/arm: Split out rebuild_hflags_common_32
+[PULL 24/30] audio/jackaudio: Avoid dynamic stack allocation in qjack_process()
-From: Richard Henderson <richard.henderson@linaro.org>
+Avoid a dynamic stack allocation in qjack_process().  Since this
 function is a JACK process callback, we are not permitted to malloc()
 here, so we allocate a working buffer in qjack_client_init() instead.
-Create a function to compute the values of the TBFLAG_A32 bits
+The codebase has very few VLAs, and if we can get rid of them all we
-that will be cached, and are used by all profiles.
+can make the compiler error on new additions.  This is a defensive
 measure against security bugs where an on-stack dynamic allocation
 isn't correctly size-checked (e.g.  CVE-2021-3527).
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-4-richard.henderson@linaro.org
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
+Reviewed-by: Francisco Iglesias <frasse.iglesias@gmail.com>
+Reviewed-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
+Message-id: 20230818155846.1651287-3-peter.maydell@linaro.org
 ---
- target/arm/helper.c | 16 +++++++++++-----
+ audio/jackaudio.c | 16 +++++++++++-----
 file changed, 11 insertions(+), 5 deletions(-)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
+diff --git a/audio/jackaudio.c b/audio/jackaudio.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
+--- a/audio/jackaudio.c
-+++ b/target/arm/helper.c
++++ b/audio/jackaudio.c
-@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
+@@ -XXX,XX +XXX,XX @@ typedef struct QJackClient {
-     return flags;
+     int             buffersize;
      jack_port_t   **port;
      QJackBuffer     fifo;
 +
 +    /* Used as workspace by qjack_process() */
 +    float **process_buffers;
  }
+ QJackClient;
-+static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el,
-+                                         ARMMMUIdx mmu_idx, uint32_t flags)
+@@ -XXX,XX +XXX,XX @@ static int qjack_process(jack_nframes_t nframes, void *arg)
-+{
+     }
-+    flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, arm_sctlr_b(env));
-+    flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env));
+     /* get the buffers for the ports */
-+
+-    float *buffers[c->nchannels];
-+    return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
+     for (int i = 0; i < c->nchannels; ++i) {
-+}
+-        buffers[i] = jack_port_get_buffer(c->port[i], nframes);
-+
++        c->process_buffers[i] = jack_port_get_buffer(c->port[i], nframes);
- static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
+     }
-                                    ARMMMUIdx mmu_idx)
- {
+     if (c->out) {
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
+         if (likely(c->enabled)) {
-     ARMMMUIdx mmu_idx = arm_mmu_idx(env);
+-            qjack_buffer_read_l(&c->fifo, buffers, nframes);
-     int current_el = arm_current_el(env);
++            qjack_buffer_read_l(&c->fifo, c->process_buffers, nframes);
-     int fp_el = fp_exception_el(env, current_el);
+         } else {
--    uint32_t flags = 0;
+             for (int i = 0; i < c->nchannels; ++i) {
-+    uint32_t flags;
+-                memset(buffers[i], 0, nframes * sizeof(float));
++                memset(c->process_buffers[i], 0, nframes * sizeof(float));
-     if (is_a64(env)) {
+             }
          *pc = env->pc;
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
          }
      } else {
-         *pc = env->regs[15];
+         if (likely(c->enabled)) {
-+        flags = rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
+-            qjack_buffer_write_l(&c->fifo, buffers, nframes);
-         flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
++            qjack_buffer_write_l(&c->fifo, c->process_buffers, nframes);
          flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, env->vfp.vec_len);
          flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE, env->vfp.vec_stride);
          flags = FIELD_DP32(flags, TBFLAG_A32, CONDEXEC, env->condexec_bits);
 -        flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, arm_sctlr_b(env));
 -        flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env));
          if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
              || arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
              flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
              flags = FIELD_DP32(flags, TBFLAG_A32,
                                 XSCALE_CPAR, env->cp15.c15_cpar);
          }
--
--        flags = rebuild_hflags_common(env, fp_el, mmu_idx, flags);
      }
-     /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
+@@ -XXX,XX +XXX,XX @@ static int qjack_client_init(QJackClient *c)
            jack_get_client_name(c->client));
      }
 +    /* Allocate working buffer for process callback */
 +    c->process_buffers = g_new(float *, c->nchannels);
 +
      jack_set_process_callback(c->client, qjack_process , c);
      jack_set_port_registration_callback(c->client, qjack_port_registration, c);
      jack_set_xrun_callback(c->client, qjack_xrun, c);
@@ -XXX,XX +XXX,XX @@ static void qjack_client_fini_locked(QJackClient *c)
          qjack_buffer_free(&c->fifo);
          g_free(c->port);
 +        g_free(c->process_buffers);
          c->state = QJACK_STATE_DISCONNECTED;
          /* fallthrough */
 --
-.20.1
+.34.1

-[PULL 24/51] linux-user/aarch64: Rebuild hflags for TARGET_WORDS_BIGENDIAN
+[PULL 25/30] sbsa-ref: add non-secure EL2 virtual timer
-From: Richard Henderson <richard.henderson@linaro.org>
+From: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org>
-Continue setting, but not relying upon, env->hflags.
+Armv8.1+ cpus have Virtual Host Extension (VHE) which added non-secure
 EL2 virtual timer.
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+This change adds it to fullfil Arm BSA (Base System Architecture)
-Message-id: 20191023150057.25731-23-richard.henderson@linaro.org
+requirements.
 Signed-off-by: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org>
 Message-id: 20230913140610.214893-2-marcin.juszkiewicz@linaro.org
 Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- linux-user/aarch64/cpu_loop.c | 1 +
+ hw/arm/sbsa-ref.c | 2 ++
-file changed, 1 insertion(+)
+file changed, 2 insertions(+)
-diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
+diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
 index XXXXXXX..XXXXXXX 100644
---- a/linux-user/aarch64/cpu_loop.c
+--- a/hw/arm/sbsa-ref.c
-+++ b/linux-user/aarch64/cpu_loop.c
++++ b/hw/arm/sbsa-ref.c
-@@ -XXX,XX +XXX,XX @@ void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
+@@ -XXX,XX +XXX,XX @@
-     for (i = 1; i < 4; ++i) {
+ #define ARCH_TIMER_S_EL1_IRQ   13
-         env->cp15.sctlr_el[i] |= SCTLR_EE;
+ #define ARCH_TIMER_NS_EL1_IRQ  14
-     }
+ #define ARCH_TIMER_NS_EL2_IRQ  10
-+    arm_rebuild_hflags(env);
++#define ARCH_TIMER_NS_EL2_VIRT_IRQ  12
- #endif
+ enum {
-     if (cpu_isar_feature(aa64_pauth, cpu)) {
+     SBSA_FLASH,
@@ -XXX,XX +XXX,XX @@ static void create_gic(SBSAMachineState *sms, MemoryRegion *mem)
              [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ,
              [GTIMER_HYP]  = ARCH_TIMER_NS_EL2_IRQ,
              [GTIMER_SEC]  = ARCH_TIMER_S_EL1_IRQ,
 +            [GTIMER_HYPVIRT] = ARCH_TIMER_NS_EL2_VIRT_IRQ,
          };
          for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
 --
-.20.1
+.34.1

-[PULL 49/51] hw/arm/bcm2836: Rename cpus[] as cpu[].core
+[PULL 26/30] elf2dmp: replace PE export name check with PDB name check
-From: Philippe Mathieu-Daudé <f4bug@amsat.org>
+From: Viktor Prutyanov <viktor@daynix.com>
-As we are going to add more core-specific fields, add a 'cpu'
+PE export name check introduced in d399d6b179 isn't reliable enough,
-structure and move the ARMCPU field there as 'core'.
+because a page with the export directory may be not present for some
 reason. On the other hand, elf2dmp retrieves the PDB name in any case.
 It can be also used to check that a PE image is the kernel image. So,
 check PDB name when searching for Windows kernel image.
-Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2165917
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Message-id: 20191019234715.25750-7-f4bug@amsat.org
+Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
 Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
 Message-id: 20230915170153.10959-2-viktor@daynix.com
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- include/hw/arm/bcm2836.h |  4 +++-
+ contrib/elf2dmp/main.c | 93 +++++++++++++++---------------------------
- hw/arm/bcm2836.c         | 26 ++++++++++++++------------
+file changed, 33 insertions(+), 60 deletions(-)
 files changed, 17 insertions(+), 13 deletions(-)
-diff --git a/include/hw/arm/bcm2836.h b/include/hw/arm/bcm2836.h
+diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
 index XXXXXXX..XXXXXXX 100644
---- a/include/hw/arm/bcm2836.h
+--- a/contrib/elf2dmp/main.c
-+++ b/include/hw/arm/bcm2836.h
++++ b/contrib/elf2dmp/main.c
-@@ -XXX,XX +XXX,XX @@ typedef struct BCM283XState {
+@@ -XXX,XX +XXX,XX @@ static int write_dump(struct pa_space *ps,
-     char *cpu_type;
+     return fclose(dmp_file);
-     uint32_t enabled_cpus;
+ }
--    ARMCPU cpus[BCM283X_NCPUS];
+-static bool pe_check_export_name(uint64_t base, void *start_addr,
-+    struct {
+-        struct va_space *vs)
-+        ARMCPU core;
+-{
-+    } cpu[BCM283X_NCPUS];
+-    IMAGE_EXPORT_DIRECTORY export_dir;
-     BCM2836ControlState control;
+-    const char *pe_name;
-     BCM2835PeripheralState peripherals;
+-
- } BCM283XState;
+-    if (pe_get_data_dir_entry(base, start_addr, IMAGE_FILE_EXPORT_DIRECTORY,
-diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
+-                &export_dir, sizeof(export_dir), vs)) {
-index XXXXXXX..XXXXXXX 100644
+-        return false;
---- a/hw/arm/bcm2836.c
+-    }
-+++ b/hw/arm/bcm2836.c
+-
-@@ -XXX,XX +XXX,XX @@ static void bcm2836_init(Object *obj)
+-    pe_name = va_space_resolve(vs, base + export_dir.Name);
-     int n;
+-    if (!pe_name) {
+-        return false;
-     for (n = 0; n < BCM283X_NCPUS; n++) {
+-    }
--        object_initialize_child(obj, "cpu[*]", &s->cpus[n], sizeof(s->cpus[n]),
+-
--                                info->cpu_type, &error_abort, NULL);
+-    return !strcmp(pe_name, PE_NAME);
-+        object_initialize_child(obj, "cpu[*]", &s->cpu[n].core,
+-}
-+                                sizeof(s->cpu[n].core), info->cpu_type,
+-
-+                                &error_abort, NULL);
+-static int pe_get_pdb_symstore_hash(uint64_t base, void *start_addr,
 -        char *hash, struct va_space *vs)
 +static bool pe_check_pdb_name(uint64_t base, void *start_addr,
 +        struct va_space *vs, OMFSignatureRSDS *rsds)
  {
      const char sign_rsds[4] = "RSDS";
      IMAGE_DEBUG_DIRECTORY debug_dir;
 -    OMFSignatureRSDS rsds;
 -    char *pdb_name;
 -    size_t pdb_name_sz;
 -    size_t i;
 +    char pdb_name[sizeof(PDB_NAME)];
      if (pe_get_data_dir_entry(base, start_addr, IMAGE_FILE_DEBUG_DIRECTORY,
                  &debug_dir, sizeof(debug_dir), vs)) {
          eprintf("Failed to get Debug Directory\n");
 -        return 1;
 +        return false;
      }
-     sysbus_init_child_obj(obj, "control", &s->control, sizeof(s->control),
+     if (debug_dir.Type != IMAGE_DEBUG_TYPE_CODEVIEW) {
-@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
+-        return 1;
++        eprintf("Debug Directory type is not CodeView\n");
-     for (n = 0; n < BCM283X_NCPUS; n++) {
++        return false;
-         /* TODO: this should be converted to a property of ARM_CPU */
+     }
--        s->cpus[n].mp_affinity = (info->clusterid << 8) | n;
-+        s->cpu[n].core.mp_affinity = (info->clusterid << 8) | n;
+     if (va_space_rw(vs,
+                 base + debug_dir.AddressOfRawData,
-         /* set periphbase/CBAR value for CPU-local registers */
+-                &rsds, sizeof(rsds), 0)) {
--        object_property_set_int(OBJECT(&s->cpus[n]),
+-        return 1;
-+        object_property_set_int(OBJECT(&s->cpu[n].core),
++                rsds, sizeof(*rsds), 0)) {
-                                 info->peri_base,
++        eprintf("Failed to resolve OMFSignatureRSDS\n");
-                                 "reset-cbar", &err);
++        return false;
-         if (err) {
+     }
-@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
 -    printf("CodeView signature is \'%.4s\'\n", rsds.Signature);
 -
 -    if (memcmp(&rsds.Signature, sign_rsds, sizeof(sign_rsds))) {
 -        return 1;
 +    if (memcmp(&rsds->Signature, sign_rsds, sizeof(sign_rsds))) {
 +        eprintf("CodeView signature is \'%.4s\', \'%s\' expected\n",
 +                rsds->Signature, sign_rsds);
 +        return false;
      }
 -    pdb_name_sz = debug_dir.SizeOfData - sizeof(rsds);
 -    pdb_name = malloc(pdb_name_sz);
 -    if (!pdb_name) {
 -        return 1;
 +    if (debug_dir.SizeOfData - sizeof(*rsds) != sizeof(PDB_NAME)) {
 +        eprintf("PDB name size doesn't match\n");
 +        return false;
      }
      if (va_space_rw(vs, base + debug_dir.AddressOfRawData +
 -                offsetof(OMFSignatureRSDS, name), pdb_name, pdb_name_sz, 0)) {
 -        free(pdb_name);
 -        return 1;
 +                offsetof(OMFSignatureRSDS, name), pdb_name, sizeof(PDB_NAME),
 +                0)) {
 +        eprintf("Failed to resolve PDB name\n");
 +        return false;
      }
      printf("PDB name is \'%s\', \'%s\' expected\n", pdb_name, PDB_NAME);
 -    if (strcmp(pdb_name, PDB_NAME)) {
 -        eprintf("Unexpected PDB name, it seems the kernel isn't found\n");
 -        free(pdb_name);
 -        return 1;
 -    }
 +    return !strcmp(pdb_name, PDB_NAME);
 +}
 -    free(pdb_name);
 -
 -    sprintf(hash, "%.08x%.04x%.04x%.02x%.02x", rsds.guid.a, rsds.guid.b,
 -            rsds.guid.c, rsds.guid.d[0], rsds.guid.d[1]);
 +static void pe_get_pdb_symstore_hash(OMFSignatureRSDS *rsds, char *hash)
 +{
 +    sprintf(hash, "%.08x%.04x%.04x%.02x%.02x", rsds->guid.a, rsds->guid.b,
 +            rsds->guid.c, rsds->guid.d[0], rsds->guid.d[1]);
      hash += 20;
 -    for (i = 0; i < 6; i++, hash += 2) {
 -        sprintf(hash, "%.02x", rsds.guid.e[i]);
 +    for (unsigned int i = 0; i < 6; i++, hash += 2) {
 +        sprintf(hash, "%.02x", rsds->guid.e[i]);
      }
 -    sprintf(hash, "%.01x", rsds.age);
 -
 -    return 0;
 +    sprintf(hash, "%.01x", rsds->age);
  }
  int main(int argc, char *argv[])
@@ -XXX,XX +XXX,XX @@ int main(int argc, char *argv[])
      KDDEBUGGER_DATA64 *kdbg;
      uint64_t KdVersionBlock;
      bool kernel_found = false;
 +    OMFSignatureRSDS rsds;
      if (argc != 3) {
          eprintf("usage:\n\t%s elf_file dmp_file\n", argv[0]);
@@ -XXX,XX +XXX,XX @@ int main(int argc, char *argv[])
          }
-         /* start powered off if not enabled */
+         if (*(uint16_t *)nt_start_addr == 0x5a4d) { /* MZ */
--        object_property_set_bool(OBJECT(&s->cpus[n]), n >= s->enabled_cpus,
+-            if (pe_check_export_name(KernBase, nt_start_addr, &vs)) {
-+        object_property_set_bool(OBJECT(&s->cpu[n].core), n >= s->enabled_cpus,
++            printf("Checking candidate KernBase = 0x%016"PRIx64"\n", KernBase);
-                                  "start-powered-off", &err);
++            if (pe_check_pdb_name(KernBase, nt_start_addr, &vs, &rsds)) {
-         if (err) {
+                 kernel_found = true;
-             error_propagate(errp, err);
+                 break;
-             return;
+             }
-         }
+@@ -XXX,XX +XXX,XX @@ int main(int argc, char *argv[])
+     printf("KernBase = 0x%016"PRIx64", signature is \'%.2s\'\n", KernBase,
--        object_property_set_bool(OBJECT(&s->cpus[n]), true, "realized", &err);
+             (char *)nt_start_addr);
-+        object_property_set_bool(OBJECT(&s->cpu[n].core), true,
-+                                 "realized", &err);
+-    if (pe_get_pdb_symstore_hash(KernBase, nt_start_addr, pdb_hash, &vs)) {
-         if (err) {
+-        eprintf("Failed to get PDB symbol store hash\n");
-             error_propagate(errp, err);
+-        err = 1;
-             return;
+-        goto out_ps;
-@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
+-    }
++    pe_get_pdb_symstore_hash(&rsds, pdb_hash);
-         /* Connect irq/fiq outputs from the interrupt controller. */
-         qdev_connect_gpio_out_named(DEVICE(&s->control), "irq", n,
+     sprintf(pdb_url, "%s%s/%s/%s", SYM_URL_BASE, PDB_NAME, pdb_hash, PDB_NAME);
--                qdev_get_gpio_in(DEVICE(&s->cpus[n]), ARM_CPU_IRQ));
+     printf("PDB URL is %s\n", pdb_url);
 +                qdev_get_gpio_in(DEVICE(&s->cpu[n].core), ARM_CPU_IRQ));
          qdev_connect_gpio_out_named(DEVICE(&s->control), "fiq", n,
 -                qdev_get_gpio_in(DEVICE(&s->cpus[n]), ARM_CPU_FIQ));
 +                qdev_get_gpio_in(DEVICE(&s->cpu[n].core), ARM_CPU_FIQ));
          /* Connect timers from the CPU to the interrupt controller */
 -        qdev_connect_gpio_out(DEVICE(&s->cpus[n]), GTIMER_PHYS,
 +        qdev_connect_gpio_out(DEVICE(&s->cpu[n].core), GTIMER_PHYS,
                  qdev_get_gpio_in_named(DEVICE(&s->control), "cntpnsirq", n));
 -        qdev_connect_gpio_out(DEVICE(&s->cpus[n]), GTIMER_VIRT,
 +        qdev_connect_gpio_out(DEVICE(&s->cpu[n].core), GTIMER_VIRT,
                  qdev_get_gpio_in_named(DEVICE(&s->control), "cntvirq", n));
 -        qdev_connect_gpio_out(DEVICE(&s->cpus[n]), GTIMER_HYP,
 +        qdev_connect_gpio_out(DEVICE(&s->cpu[n].core), GTIMER_HYP,
                  qdev_get_gpio_in_named(DEVICE(&s->control), "cnthpirq", n));
 -        qdev_connect_gpio_out(DEVICE(&s->cpus[n]), GTIMER_SEC,
 +        qdev_connect_gpio_out(DEVICE(&s->cpu[n].core), GTIMER_SEC,
                  qdev_get_gpio_in_named(DEVICE(&s->control), "cntpsirq", n));
      }
  }
 --
-.20.1
+.34.1

-[PULL 07/51] target/arm: Split out rebuild_hflags_m32
+[PULL 27/30] elf2dmp: introduce physical block alignment
-From: Richard Henderson <richard.henderson@linaro.org>
+From: Viktor Prutyanov <viktor@daynix.com>
-Create a function to compute the values of the TBFLAG_A32 bits
+Physical memory ranges may not be aligned to page size in QEMU ELF, but
-that will be cached, and are used by M-profile.
+DMP can only contain page-aligned runs. So, align them.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
-Message-id: 20191023150057.25731-6-richard.henderson@linaro.org
+Message-id: 20230915170153.10959-3-viktor@daynix.com
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- target/arm/helper.c | 45 ++++++++++++++++++++++++++++++---------------
+ contrib/elf2dmp/addrspace.h |  1 +
-file changed, 30 insertions(+), 15 deletions(-)
+ contrib/elf2dmp/addrspace.c | 31 +++++++++++++++++++++++++++++--
  contrib/elf2dmp/main.c      |  5 +++--
 files changed, 33 insertions(+), 4 deletions(-)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
+diff --git a/contrib/elf2dmp/addrspace.h b/contrib/elf2dmp/addrspace.h
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
+--- a/contrib/elf2dmp/addrspace.h
-+++ b/target/arm/helper.c
++++ b/contrib/elf2dmp/addrspace.h
-@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el,
+@@ -XXX,XX +XXX,XX @@
-     return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
  #define ELF2DMP_PAGE_BITS 12
  #define ELF2DMP_PAGE_SIZE (1ULL << ELF2DMP_PAGE_BITS)
 +#define ELF2DMP_PAGE_MASK (ELF2DMP_PAGE_SIZE - 1)
  #define ELF2DMP_PFN_MASK (~(ELF2DMP_PAGE_SIZE - 1))
  #define INVALID_PA  UINT64_MAX
 diff --git a/contrib/elf2dmp/addrspace.c b/contrib/elf2dmp/addrspace.c
 index XXXXXXX..XXXXXXX 100644
 --- a/contrib/elf2dmp/addrspace.c
 +++ b/contrib/elf2dmp/addrspace.c
@@ -XXX,XX +XXX,XX @@ static struct pa_block *pa_space_find_block(struct pa_space *ps, uint64_t pa)
      for (i = 0; i < ps->block_nr; i++) {
          if (ps->block[i].paddr <= pa &&
 -                pa <= ps->block[i].paddr + ps->block[i].size) {
 +                pa < ps->block[i].paddr + ps->block[i].size) {
              return ps->block + i;
          }
      }
@@ -XXX,XX +XXX,XX @@ static uint8_t *pa_space_resolve(struct pa_space *ps, uint64_t pa)
      return block->addr + (pa - block->paddr);
  }
-+static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el,
++static void pa_block_align(struct pa_block *b)
 +                                   ARMMMUIdx mmu_idx)
 +{
-+    uint32_t flags = 0;
++    uint64_t low_align = ((b->paddr - 1) | ELF2DMP_PAGE_MASK) + 1 - b->paddr;
 +    uint64_t high_align = (b->paddr + b->size) & ELF2DMP_PAGE_MASK;
 +
-+    if (arm_v7m_is_handler_mode(env)) {
++    if (low_align == 0 && high_align == 0) {
-+        flags = FIELD_DP32(flags, TBFLAG_A32, HANDLER, 1);
++        return;
 +    }
 +
-+    /*
++    if (low_align + high_align < b->size) {
-+     * v8M always applies stack limit checks unless CCR.STKOFHFNMIGN
++        printf("Block 0x%"PRIx64"+:0x%"PRIx64" will be aligned to "
-+     * is suppressing them because the requested execution priority
++                "0x%"PRIx64"+:0x%"PRIx64"\n", b->paddr, b->size,
-+     * is less than 0.
++                b->paddr + low_align, b->size - low_align - high_align);
-+     */
++        b->size -= low_align + high_align;
-+    if (arm_feature(env, ARM_FEATURE_V8) &&
++    } else {
-+        !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) &&
++        printf("Block 0x%"PRIx64"+:0x%"PRIx64" is too small to align\n",
-+          (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
++                b->paddr, b->size);
-+        flags = FIELD_DP32(flags, TBFLAG_A32, STACKCHECK, 1);
++        b->size = 0;
 +    }
 +
-+    return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
++    b->addr += low_align;
 +    b->paddr += low_align;
 +}
 +
- static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
+ int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf)
                                     ARMMMUIdx mmu_idx)
  {
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
+     Elf64_Half phdr_nr = elf_getphdrnum(qemu_elf->map);
-         }
+@@ -XXX,XX +XXX,XX @@ int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf)
-     } else {
+                 .paddr = phdr[i].p_paddr,
-         *pc = env->regs[15];
+                 .size = phdr[i].p_filesz,
--        flags = rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
+             };
-+
+-            block_i++;
-+        if (arm_feature(env, ARM_FEATURE_M)) {
++            pa_block_align(&ps->block[block_i]);
-+            flags = rebuild_hflags_m32(env, fp_el, mmu_idx);
++            block_i = ps->block[block_i].size ? (block_i + 1) : block_i;
 +        } else {
 +            flags = rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
 +        }
 +
          flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
          flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, env->vfp.vec_len);
          flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE, env->vfp.vec_stride);
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
          }
      }
--    if (arm_v7m_is_handler_mode(env)) {
++    ps->block_nr = block_i;
--        flags = FIELD_DP32(flags, TBFLAG_A32, HANDLER, 1);
++
--    }
+     return 0;
--
+ }
--    /* v8M always applies stack limit checks unless CCR.STKOFHFNMIGN is
--     * suppressing them because the requested execution priority is less than 0.
+diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
--     */
+index XXXXXXX..XXXXXXX 100644
--    if (arm_feature(env, ARM_FEATURE_V8) &&
+--- a/contrib/elf2dmp/main.c
--        arm_feature(env, ARM_FEATURE_M) &&
++++ b/contrib/elf2dmp/main.c
--        !((mmu_idx  & ARM_MMU_IDX_M_NEGPRI) &&
+@@ -XXX,XX +XXX,XX @@ static int write_dump(struct pa_space *ps,
--          (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
+     for (i = 0; i < ps->block_nr; i++) {
--        flags = FIELD_DP32(flags, TBFLAG_A32, STACKCHECK, 1);
+         struct pa_block *b = &ps->block[i];
--    }
--
+-        printf("Writing block #%zu/%zu to file...\n", i, ps->block_nr);
-     if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
++        printf("Writing block #%zu/%zu of %"PRIu64" bytes to file...\n", i,
-         FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) != env->v7m.secure) {
++                ps->block_nr, b->size);
-         flags = FIELD_DP32(flags, TBFLAG_A32, FPCCR_S_WRONG, 1);
+         if (fwrite(b->addr, b->size, 1, dmp_file) != 1) {
 -            eprintf("Failed to write dump header\n");
 +            eprintf("Failed to write block\n");
              fclose(dmp_file);
              return 1;
          }
 --
-.20.1
+.34.1

-[PULL 43/51] target/arm/kvm: host cpu: Add support for sve<N> properties
+[PULL 28/30] elf2dmp: introduce merging of physical memory runs
-From: Andrew Jones <drjones@redhat.com>
+From: Viktor Prutyanov <viktor@daynix.com>
-Allow cpu 'host' to enable SVE when it's available, unless the
+DMP supports 42 physical memory runs at most. So, merge adjacent
-user chooses to disable it with the added 'sve=off' cpu property.
+physical memory ranges from QEMU ELF when possible to minimize total
-Also give the user the ability to select vector lengths with the
+number of runs.
 sve<N> properties. We don't adopt 'max' cpu's other sve property,
 sve-max-vq, because that property is difficult to use with KVM.
 That property assumes all vector lengths in the range from 1 up
 to and including the specified maximum length are supported, but
 there may be optional lengths not supported by the host in that
 range. With KVM one must be more specific when enabling vector
 lengths.
-Signed-off-by: Andrew Jones <drjones@redhat.com>
+Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
-Reviewed-by: Eric Auger <eric.auger@redhat.com>
+Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
-Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Message-id: 20230915170153.10959-4-viktor@daynix.com
-Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
+[PMM: fixed format string for printing size_t values]
 Message-id: 20191024121808.9612-10-drjones@redhat.com
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- target/arm/cpu.h          |  2 ++
+ contrib/elf2dmp/main.c | 56 ++++++++++++++++++++++++++++++++++++------
- target/arm/cpu.c          |  3 +++
+file changed, 48 insertions(+), 8 deletions(-)
  target/arm/cpu64.c        | 33 +++++++++++++++++----------------
  target/arm/kvm64.c        | 14 +++++++++++++-
  tests/arm-cpu-features.c  | 23 +++++++++++------------
  docs/arm-cpu-features.rst | 19 ++++++++++++-------
 files changed, 58 insertions(+), 36 deletions(-)
-diff --git a/target/arm/cpu.h b/target/arm/cpu.h
+diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/cpu.h
+--- a/contrib/elf2dmp/main.c
-+++ b/target/arm/cpu.h
++++ b/contrib/elf2dmp/main.c
-@@ -XXX,XX +XXX,XX @@ int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+@@ -XXX,XX +XXX,XX @@
- void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
+ #define PE_NAME     "ntoskrnl.exe"
- void aarch64_sve_change_el(CPUARMState *env, int old_el,
-                            int new_el, bool el0_a64);
+ #define INITIAL_MXCSR   0x1f80
-+void aarch64_add_sve_properties(Object *obj);
++#define MAX_NUMBER_OF_RUNS  42
- #else
- static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
+ typedef struct idt_desc {
- static inline void aarch64_sve_change_el(CPUARMState *env, int o,
+     uint16_t offset1;   /* offset bits 0..15 */
-                                          int n, bool a)
+@@ -XXX,XX +XXX,XX @@ static int fix_dtb(struct va_space *vs, QEMU_Elf *qe)
- { }
+     return 1;
 +static inline void aarch64_add_sve_properties(Object *obj) { }
  #endif
  #if !defined(CONFIG_TCG)
 diff --git a/target/arm/cpu.c b/target/arm/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/cpu.c
 +++ b/target/arm/cpu.c
@@ -XXX,XX +XXX,XX @@ static void arm_host_initfn(Object *obj)
      ARMCPU *cpu = ARM_CPU(obj);
      kvm_arm_set_cpu_features_from_host(cpu);
 +    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
 +        aarch64_add_sve_properties(obj);
 +    }
      arm_cpu_post_init(obj);
  }
-diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
++static void try_merge_runs(struct pa_space *ps,
-index XXXXXXX..XXXXXXX 100644
++        WinDumpPhyMemDesc64 *PhysicalMemoryBlock)
 --- a/target/arm/cpu64.c
 +++ b/target/arm/cpu64.c
@@ -XXX,XX +XXX,XX @@ static void cpu_arm_set_sve(Object *obj, Visitor *v, const char *name,
      cpu->isar.id_aa64pfr0 = t;
  }
 +void aarch64_add_sve_properties(Object *obj)
 +{
-+    uint32_t vq;
++    unsigned int merge_cnt = 0, run_idx = 0;
 +
-+    object_property_add(obj, "sve", "bool", cpu_arm_get_sve,
++    PhysicalMemoryBlock->NumberOfRuns = 0;
 +                        cpu_arm_set_sve, NULL, NULL, &error_fatal);
 +
-+    for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
++    for (size_t idx = 0; idx < ps->block_nr; idx++) {
-+        char name[8];
++        struct pa_block *blk = ps->block + idx;
-+        sprintf(name, "sve%d", vq * 128);
++        struct pa_block *next = blk + 1;
-+        object_property_add(obj, name, "bool", cpu_arm_get_sve_vq,
++
-+                            cpu_arm_set_sve_vq, NULL, NULL, &error_fatal);
++        PhysicalMemoryBlock->NumberOfPages += blk->size / ELF2DMP_PAGE_SIZE;
 +
 +        if (idx + 1 != ps->block_nr && blk->paddr + blk->size == next->paddr) {
 +            printf("Block #%zu 0x%"PRIx64"+:0x%"PRIx64" and %u previous will be"
 +                    " merged\n", idx, blk->paddr, blk->size, merge_cnt);
 +            merge_cnt++;
 +        } else {
 +            struct pa_block *first_merged = blk - merge_cnt;
 +
 +            printf("Block #%zu 0x%"PRIx64"+:0x%"PRIx64" and %u previous will be"
 +                    " merged to 0x%"PRIx64"+:0x%"PRIx64" (run #%u)\n",
 +                    idx, blk->paddr, blk->size, merge_cnt, first_merged->paddr,
 +                    blk->paddr + blk->size - first_merged->paddr, run_idx);
 +            PhysicalMemoryBlock->Run[run_idx] = (WinDumpPhyMemRun64) {
 +                .BasePage = first_merged->paddr / ELF2DMP_PAGE_SIZE,
 +                .PageCount = (blk->paddr + blk->size - first_merged->paddr) /
 +                        ELF2DMP_PAGE_SIZE,
 +            };
 +            PhysicalMemoryBlock->NumberOfRuns++;
 +            run_idx++;
 +            merge_cnt = 0;
 +        }
 +    }
 +}
 +
- /* -cpu max: if KVM is enabled, like -cpu host (best possible with this host);
+ static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps,
-  * otherwise, a CPU with as many features enabled as our emulation supports.
+         struct va_space *vs, uint64_t KdDebuggerDataBlock,
-  * The version of '-cpu max' for qemu-system-arm is defined in cpu.c;
+         KDDEBUGGER_DATA64 *kdbg, uint64_t KdVersionBlock, int nr_cpus)
-@@ -XXX,XX +XXX,XX @@ static void cpu_arm_set_sve(Object *obj, Visitor *v, const char *name,
+@@ -XXX,XX +XXX,XX @@ static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps,
- static void aarch64_max_initfn(Object *obj)
+             KUSD_OFFSET_PRODUCT_TYPE);
- {
+     DBGKD_GET_VERSION64 kvb;
-     ARMCPU *cpu = ARM_CPU(obj);
+     WinDumpHeader64 h;
--    uint32_t vq;
+-    size_t i;
--    uint64_t t;
+     QEMU_BUILD_BUG_ON(KUSD_OFFSET_SUITE_MASK >= ELF2DMP_PAGE_SIZE);
-     if (kvm_enabled()) {
+     QEMU_BUILD_BUG_ON(KUSD_OFFSET_PRODUCT_TYPE >= ELF2DMP_PAGE_SIZE);
-         kvm_arm_set_cpu_features_from_host(cpu);
+@@ -XXX,XX +XXX,XX @@ static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps,
--        if (kvm_arm_sve_supported(CPU(cpu))) {
+         .RequiredDumpSpace = sizeof(h),
--            t = cpu->isar.id_aa64pfr0;
+     };
--            t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1);
--            cpu->isar.id_aa64pfr0 = t;
+-    for (i = 0; i < ps->block_nr; i++) {
--        }
+-        h.PhysicalMemoryBlock.NumberOfPages +=
-     } else {
+-                ps->block[i].size / ELF2DMP_PAGE_SIZE;
-+        uint64_t t;
+-        h.PhysicalMemoryBlock.Run[i] = (WinDumpPhyMemRun64) {
-         uint32_t u;
+-            .BasePage = ps->block[i].paddr / ELF2DMP_PAGE_SIZE,
-         aarch64_a57_initfn(obj);
+-            .PageCount = ps->block[i].size / ELF2DMP_PAGE_SIZE,
+-        };
-@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
++    if (h.PhysicalMemoryBlock.NumberOfRuns <= MAX_NUMBER_OF_RUNS) {
- #endif
++        for (size_t idx = 0; idx < ps->block_nr; idx++) {
 +            h.PhysicalMemoryBlock.NumberOfPages +=
 +                    ps->block[idx].size / ELF2DMP_PAGE_SIZE;
 +            h.PhysicalMemoryBlock.Run[idx] = (WinDumpPhyMemRun64) {
 +                .BasePage = ps->block[idx].paddr / ELF2DMP_PAGE_SIZE,
 +                .PageCount = ps->block[idx].size / ELF2DMP_PAGE_SIZE,
 +            };
 +        }
 +    } else {
 +        try_merge_runs(ps, &h.PhysicalMemoryBlock);
      }
--    object_property_add(obj, "sve", "bool", cpu_arm_get_sve,
+     h.RequiredDumpSpace +=
 -                        cpu_arm_set_sve, NULL, NULL, &error_fatal);
 +    aarch64_add_sve_properties(obj);
      object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq,
                          cpu_max_set_sve_max_vq, NULL, NULL, &error_fatal);
 -
 -    for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
 -        char name[8];
 -        sprintf(name, "sve%d", vq * 128);
 -        object_property_add(obj, name, "bool", cpu_arm_get_sve_vq,
 -                            cpu_arm_set_sve_vq, NULL, NULL, &error_fatal);
 -    }
  }
  struct ARMCPUInfo {
 diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/kvm64.c
 +++ b/target/arm/kvm64.c
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
       * and then query that CPU for the relevant ID registers.
       */
      int fdarray[3];
 +    bool sve_supported;
      uint64_t features = 0;
 +    uint64_t t;
      int err;
      /* Old kernels may not know about the PREFERRED_TARGET ioctl: however
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
                                ARM64_SYS_REG(3, 0, 0, 3, 2));
      }
 +    sve_supported = ioctl(fdarray[0], KVM_CHECK_EXTENSION, KVM_CAP_ARM_SVE) > 0;
 +
      kvm_arm_destroy_scratch_host_vcpu(fdarray);
      if (err < 0) {
          return false;
      }
 -   /* We can assume any KVM supporting CPU is at least a v8
 +    /* Add feature bits that can't appear until after VCPU init. */
 +    if (sve_supported) {
 +        t = ahcf->isar.id_aa64pfr0;
 +        t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1);
 +        ahcf->isar.id_aa64pfr0 = t;
 +    }
 +
 +    /*
 +     * We can assume any KVM supporting CPU is at least a v8
       * with VFPv4+Neon; this in turn implies most of the other
       * feature bits.
       */
 diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tests/arm-cpu-features.c
 +++ b/tests/arm-cpu-features.c
@@ -XXX,XX +XXX,XX @@ static void sve_tests_sve_off_kvm(const void *data)
  {
      QTestState *qts;
 -    qts = qtest_init(MACHINE "-accel kvm -cpu max,sve=off");
 +    qts = qtest_init(MACHINE "-accel kvm -cpu host,sve=off");
      /*
       * We don't know if this host supports SVE so we don't
@@ -XXX,XX +XXX,XX @@ static void sve_tests_sve_off_kvm(const void *data)
       * and that using sve<N>=off to explicitly disable vector
       * lengths is OK too.
       */
 -    assert_sve_vls(qts, "max", 0, NULL);
 -    assert_sve_vls(qts, "max", 0, "{ 'sve128': false }");
 +    assert_sve_vls(qts, "host", 0, NULL);
 +    assert_sve_vls(qts, "host", 0, "{ 'sve128': false }");
      qtest_quit(qts);
  }
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
              "We cannot guarantee the CPU type 'cortex-a15' works "
              "with KVM on this host", NULL);
 -        assert_has_feature(qts, "max", "sve");
 -        resp = do_query_no_props(qts, "max");
 +        assert_has_feature(qts, "host", "sve");
 +        resp = do_query_no_props(qts, "host");
          kvm_supports_sve = resp_get_feature(resp, "sve");
          vls = resp_get_sve_vls(resp);
          qobject_unref(resp);
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
              sprintf(max_name, "sve%d", max_vq * 128);
              /* Enabling a supported length is of course fine. */
 -            assert_sve_vls(qts, "max", vls, "{ %s: true }", max_name);
 +            assert_sve_vls(qts, "host", vls, "{ %s: true }", max_name);
              /* Get the next supported length smaller than max-vq. */
              vq = 64 - __builtin_clzll(vls & ~BIT_ULL(max_vq - 1));
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
                   * We have at least one length smaller than max-vq,
                   * so we can disable max-vq.
                   */
 -                assert_sve_vls(qts, "max", (vls & ~BIT_ULL(max_vq - 1)),
 +                assert_sve_vls(qts, "host", (vls & ~BIT_ULL(max_vq - 1)),
                                 "{ %s: false }", max_name);
                  /*
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
                   */
                  sprintf(name, "sve%d", vq * 128);
                  error = g_strdup_printf("cannot disable %s", name);
 -                assert_error(qts, "max", error,
 +                assert_error(qts, "host", error,
                               "{ %s: true, %s: false }",
                               max_name, name);
                  g_free(error);
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
              vq = __builtin_ffsll(vls);
              sprintf(name, "sve%d", vq * 128);
              error = g_strdup_printf("cannot disable %s", name);
 -            assert_error(qts, "max", error, "{ %s: false }", name);
 +            assert_error(qts, "host", error, "{ %s: false }", name);
              g_free(error);
              /* Get an unsupported length. */
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
              if (vq <= SVE_MAX_VQ) {
                  sprintf(name, "sve%d", vq * 128);
                  error = g_strdup_printf("cannot enable %s", name);
 -                assert_error(qts, "max", error, "{ %s: true }", name);
 +                assert_error(qts, "host", error, "{ %s: true }", name);
                  g_free(error);
              }
          } else {
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
      } else {
          assert_has_not_feature(qts, "host", "aarch64");
          assert_has_not_feature(qts, "host", "pmu");
 -
 -        assert_has_not_feature(qts, "max", "sve");
 +        assert_has_not_feature(qts, "host", "sve");
      }
      qtest_quit(qts);
 diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst
 index XXXXXXX..XXXXXXX 100644
 --- a/docs/arm-cpu-features.rst
 +++ b/docs/arm-cpu-features.rst
@@ -XXX,XX +XXX,XX @@ SVE CPU Property Examples
       $ qemu-system-aarch64 -M virt -cpu max
 -  3) Only enable the 128-bit vector length::
 +  3) When KVM is enabled, implicitly enable all host CPU supported vector
 +     lengths with the `host` CPU type::
 +
 +     $ qemu-system-aarch64 -M virt,accel=kvm -cpu host
 +
 +  4) Only enable the 128-bit vector length::
       $ qemu-system-aarch64 -M virt -cpu max,sve128=on
 -  4) Disable the 512-bit vector length and all larger vector lengths,
 +  5) Disable the 512-bit vector length and all larger vector lengths,
       since 512 is a power-of-two.  This results in all the smaller,
       uninitialized lengths (128, 256, and 384) defaulting to enabled::
       $ qemu-system-aarch64 -M virt -cpu max,sve512=off
 -  5) Enable the 128-bit, 256-bit, and 512-bit vector lengths::
 +  6) Enable the 128-bit, 256-bit, and 512-bit vector lengths::
       $ qemu-system-aarch64 -M virt -cpu max,sve128=on,sve256=on,sve512=on
 -  6) The same as (5), but since the 128-bit and 256-bit vector
 +  7) The same as (6), but since the 128-bit and 256-bit vector
       lengths are required for the 512-bit vector length to be enabled,
       then allow them to be auto-enabled::
       $ qemu-system-aarch64 -M virt -cpu max,sve512=on
 -  7) Do the same as (6), but by first disabling SVE and then re-enabling it::
 +  8) Do the same as (7), but by first disabling SVE and then re-enabling it::
       $ qemu-system-aarch64 -M virt -cpu max,sve=off,sve512=on,sve=on
 -  8) Force errors regarding the last vector length::
 +  9) Force errors regarding the last vector length::
       $ qemu-system-aarch64 -M virt -cpu max,sve128=off
       $ qemu-system-aarch64 -M virt -cpu max,sve=off,sve128=off,sve=on
@@ -XXX,XX +XXX,XX @@ The examples in "SVE CPU Property Examples" exhibit many ways to select
  vector lengths which developers may find useful in order to avoid overly
  verbose command lines.  However, the recommended way to select vector
  lengths is to explicitly enable each desired length.  Therefore only
 -example's (1), (3), and (5) exhibit recommended uses of the properties.
 +example's (1), (4), and (6) exhibit recommended uses of the properties.
 --
-.20.1
+.34.1

-[PULL 42/51] target/arm/cpu64: max cpu: Support sve properties with KVM
+[PULL 29/30] elf2dmp: use Linux mmap with MAP_NORESERVE when possible
-From: Andrew Jones <drjones@redhat.com>
+From: Viktor Prutyanov <viktor@daynix.com>
-Extend the SVE vq map initialization and validation with KVM's
+Glib's g_mapped_file_new maps file with PROT_READ|PROT_WRITE and
-supported vector lengths when KVM is enabled. In order to determine
+MAP_PRIVATE. This leads to premature physical memory allocation of dump
-and select supported lengths we add two new KVM functions for getting
+file size on Linux hosts and may fail. On Linux, mapping the file with
-and setting the KVM_REG_ARM64_SVE_VLS pseudo-register.
+MAP_NORESERVE limits the allocation by available memory.
-This patch has been co-authored with Richard Henderson, who reworked
+Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
-the target/arm/cpu64.c changes in order to push all the validation and
+Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
-auto-enabling/disabling steps into the finalizer, resulting in a nice
+Message-id: 20230915170153.10959-5-viktor@daynix.com
 LOC reduction.
 Signed-off-by: Andrew Jones <drjones@redhat.com>
 Reviewed-by: Eric Auger <eric.auger@redhat.com>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
 Message-id: 20191024121808.9612-9-drjones@redhat.com
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- target/arm/kvm_arm.h      |  12 +++
+ contrib/elf2dmp/qemu_elf.h |  2 ++
- target/arm/cpu64.c        | 176 ++++++++++++++++++++++++++++----------
+ contrib/elf2dmp/qemu_elf.c | 68 +++++++++++++++++++++++++++++++-------
- target/arm/kvm64.c        | 100 +++++++++++++++++++++-
+files changed, 58 insertions(+), 12 deletions(-)
  tests/arm-cpu-features.c  | 106 ++++++++++++++++++++++-
  docs/arm-cpu-features.rst |  45 +++++++---
 files changed, 381 insertions(+), 58 deletions(-)
-diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
+diff --git a/contrib/elf2dmp/qemu_elf.h b/contrib/elf2dmp/qemu_elf.h
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/kvm_arm.h
+--- a/contrib/elf2dmp/qemu_elf.h
-+++ b/target/arm/kvm_arm.h
++++ b/contrib/elf2dmp/qemu_elf.h
-@@ -XXX,XX +XXX,XX @@ typedef struct ARMHostCPUFeatures {
+@@ -XXX,XX +XXX,XX @@ typedef struct QEMUCPUState {
-  */
+ int is_system(QEMUCPUState *s);
- bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf);
+ typedef struct QEMU_Elf {
-+/**
++#ifndef CONFIG_LINUX
-+ * kvm_arm_sve_get_vls:
+     GMappedFile *gmf;
-+ * @cs: CPUState
++#endif
-+ * @map: bitmap to fill in
+     size_t size;
-+ *
+     void *map;
-+ * Get all the SVE vector lengths supported by the KVM host, setting
+     QEMUCPUState **state;
-+ * the bits corresponding to their length in quadwords minus one
+diff --git a/contrib/elf2dmp/qemu_elf.c b/contrib/elf2dmp/qemu_elf.c
-+ * (vq - 1) in @map up to ARM_MAX_VQ.
+index XXXXXXX..XXXXXXX 100644
-+ */
+--- a/contrib/elf2dmp/qemu_elf.c
-+void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map);
++++ b/contrib/elf2dmp/qemu_elf.c
@@ -XXX,XX +XXX,XX @@ static bool check_ehdr(QEMU_Elf *qe)
      return true;
  }
 -int QEMU_Elf_init(QEMU_Elf *qe, const char *filename)
 +static int QEMU_Elf_map(QEMU_Elf *qe, const char *filename)
  {
 +#ifdef CONFIG_LINUX
 +    struct stat st;
 +    int fd;
 +
- /**
++    printf("Using Linux mmap\n");
-  * kvm_arm_set_cpu_features_from_host:
++
-  * @cpu: ARMCPU to set the features for
++    fd = open(filename, O_RDONLY, 0);
-@@ -XXX,XX +XXX,XX @@ static inline int kvm_arm_vgic_probe(void)
++    if (fd == -1) {
- static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) {}
++        eprintf("Failed to open ELF dump file \'%s\'\n", filename);
- static inline void kvm_arm_pmu_init(CPUState *cs) {}
++        return 1;
 +static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) {}
  #endif
  static inline const char *gic_class_name(void)
 diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/cpu64.c
 +++ b/target/arm/cpu64.c
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
       * any of the above.  Finally, if SVE is not disabled, then at least one
       * vector length must be enabled.
       */
 +    DECLARE_BITMAP(kvm_supported, ARM_MAX_VQ);
      DECLARE_BITMAP(tmp, ARM_MAX_VQ);
      uint32_t vq, max_vq = 0;
 +    /* Collect the set of vector lengths supported by KVM. */
 +    bitmap_zero(kvm_supported, ARM_MAX_VQ);
 +    if (kvm_enabled() && kvm_arm_sve_supported(CPU(cpu))) {
 +        kvm_arm_sve_get_vls(CPU(cpu), kvm_supported);
 +    } else if (kvm_enabled()) {
 +        assert(!cpu_isar_feature(aa64_sve, cpu));
 +    }
 +
-     /*
++    if (fstat(fd, &st)) {
-      * Process explicit sve<N> properties.
++        eprintf("Failed to get size of ELF dump file\n");
-      * From the properties, sve_vq_map<N> implies sve_vq_init<N>.
++        close(fd);
-@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
++        return 1;
-             return;
++    }
-         }
++    qe->size = st.st_size;
 -        /* Propagate enabled bits down through required powers-of-two. */
 -        for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
 -            if (!test_bit(vq - 1, cpu->sve_vq_init)) {
 -                set_bit(vq - 1, cpu->sve_vq_map);
 +        if (kvm_enabled()) {
 +            /*
 +             * For KVM we have to automatically enable all supported unitialized
 +             * lengths, even when the smaller lengths are not all powers-of-two.
 +             */
 +            bitmap_andnot(tmp, kvm_supported, cpu->sve_vq_init, max_vq);
 +            bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq);
 +        } else {
 +            /* Propagate enabled bits down through required powers-of-two. */
 +            for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
 +                if (!test_bit(vq - 1, cpu->sve_vq_init)) {
 +                    set_bit(vq - 1, cpu->sve_vq_map);
 +                }
              }
          }
      } else if (cpu->sve_max_vq == 0) {
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
              return;
          }
 -        /* Disabling a power-of-two disables all larger lengths. */
 -        if (test_bit(0, cpu->sve_vq_init)) {
 -            error_setg(errp, "cannot disable sve128");
 -            error_append_hint(errp, "Disabling sve128 results in all vector "
 -                              "lengths being disabled.\n");
 -            error_append_hint(errp, "With SVE enabled, at least one vector "
 -                              "length must be enabled.\n");
 -            return;
 -        }
 -        for (vq = 2; vq <= ARM_MAX_VQ; vq <<= 1) {
 -            if (test_bit(vq - 1, cpu->sve_vq_init)) {
 -                break;
 +        if (kvm_enabled()) {
 +            /* Disabling a supported length disables all larger lengths. */
 +            for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
 +                if (test_bit(vq - 1, cpu->sve_vq_init) &&
 +                    test_bit(vq - 1, kvm_supported)) {
 +                    break;
 +                }
              }
 +            max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
 +            bitmap_andnot(cpu->sve_vq_map, kvm_supported,
 +                          cpu->sve_vq_init, max_vq);
 +            if (max_vq == 0 || bitmap_empty(cpu->sve_vq_map, max_vq)) {
 +                error_setg(errp, "cannot disable sve%d", vq * 128);
 +                error_append_hint(errp, "Disabling sve%d results in all "
 +                                  "vector lengths being disabled.\n",
 +                                  vq * 128);
 +                error_append_hint(errp, "With SVE enabled, at least one "
 +                                  "vector length must be enabled.\n");
 +                return;
 +            }
 +        } else {
 +            /* Disabling a power-of-two disables all larger lengths. */
 +            if (test_bit(0, cpu->sve_vq_init)) {
 +                error_setg(errp, "cannot disable sve128");
 +                error_append_hint(errp, "Disabling sve128 results in all "
 +                                  "vector lengths being disabled.\n");
 +                error_append_hint(errp, "With SVE enabled, at least one "
 +                                  "vector length must be enabled.\n");
 +                return;
 +            }
 +            for (vq = 2; vq <= ARM_MAX_VQ; vq <<= 1) {
 +                if (test_bit(vq - 1, cpu->sve_vq_init)) {
 +                    break;
 +                }
 +            }
 +            max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
 +            bitmap_complement(cpu->sve_vq_map, cpu->sve_vq_init, max_vq);
          }
 -        max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
 -        bitmap_complement(cpu->sve_vq_map, cpu->sve_vq_init, max_vq);
          max_vq = find_last_bit(cpu->sve_vq_map, max_vq) + 1;
      }
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
      assert(max_vq != 0);
      bitmap_clear(cpu->sve_vq_map, max_vq, ARM_MAX_VQ - max_vq);
 -    /* Ensure all required powers-of-two are enabled. */
 -    for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
 -        if (!test_bit(vq - 1, cpu->sve_vq_map)) {
 -            error_setg(errp, "cannot disable sve%d", vq * 128);
 -            error_append_hint(errp, "sve%d is required as it "
 -                              "is a power-of-two length smaller than "
 -                              "the maximum, sve%d\n",
 -                              vq * 128, max_vq * 128);
 +    if (kvm_enabled()) {
 +        /* Ensure the set of lengths matches what KVM supports. */
 +        bitmap_xor(tmp, cpu->sve_vq_map, kvm_supported, max_vq);
 +        if (!bitmap_empty(tmp, max_vq)) {
 +            vq = find_last_bit(tmp, max_vq) + 1;
 +            if (test_bit(vq - 1, cpu->sve_vq_map)) {
 +                if (cpu->sve_max_vq) {
 +                    error_setg(errp, "cannot set sve-max-vq=%d",
 +                               cpu->sve_max_vq);
 +                    error_append_hint(errp, "This KVM host does not support "
 +                                      "the vector length %d-bits.\n",
 +                                      vq * 128);
 +                    error_append_hint(errp, "It may not be possible to use "
 +                                      "sve-max-vq with this KVM host. Try "
 +                                      "using only sve<N> properties.\n");
 +                } else {
 +                    error_setg(errp, "cannot enable sve%d", vq * 128);
 +                    error_append_hint(errp, "This KVM host does not support "
 +                                      "the vector length %d-bits.\n",
 +                                      vq * 128);
 +                }
 +            } else {
 +                error_setg(errp, "cannot disable sve%d", vq * 128);
 +                error_append_hint(errp, "The KVM host requires all "
 +                                  "supported vector lengths smaller "
 +                                  "than %d bits to also be enabled.\n",
 +                                  max_vq * 128);
 +            }
              return;
          }
 +    } else {
 +        /* Ensure all required powers-of-two are enabled. */
 +        for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
 +            if (!test_bit(vq - 1, cpu->sve_vq_map)) {
 +                error_setg(errp, "cannot disable sve%d", vq * 128);
 +                error_append_hint(errp, "sve%d is required as it "
 +                                  "is a power-of-two length smaller than "
 +                                  "the maximum, sve%d\n",
 +                                  vq * 128, max_vq * 128);
 +                return;
 +            }
 +        }
      }
      /*
@@ -XXX,XX +XXX,XX @@ static void cpu_max_set_sve_max_vq(Object *obj, Visitor *v, const char *name,
  {
      ARMCPU *cpu = ARM_CPU(obj);
      Error *err = NULL;
 +    uint32_t max_vq;
 -    visit_type_uint32(v, name, &cpu->sve_max_vq, &err);
 -
 -    if (!err && (cpu->sve_max_vq == 0 || cpu->sve_max_vq > ARM_MAX_VQ)) {
 -        error_setg(&err, "unsupported SVE vector length");
 -        error_append_hint(&err, "Valid sve-max-vq in range [1-%d]\n",
 -                          ARM_MAX_VQ);
 +    visit_type_uint32(v, name, &max_vq, &err);
 +    if (err) {
 +        error_propagate(errp, err);
 +        return;
      }
 -    error_propagate(errp, err);
 +
-+    if (kvm_enabled() && !kvm_arm_sve_supported(CPU(cpu))) {
++    qe->map = mmap(NULL, qe->size, PROT_READ | PROT_WRITE,
-+        error_setg(errp, "cannot set sve-max-vq");
++            MAP_PRIVATE | MAP_NORESERVE, fd, 0);
-+        error_append_hint(errp, "SVE not supported by KVM on this host\n");
++    if (qe->map == MAP_FAILED) {
-+        return;
++        eprintf("Failed to map ELF file\n");
 +        close(fd);
 +        return 1;
 +    }
 +
-+    if (max_vq == 0 || max_vq > ARM_MAX_VQ) {
++    close(fd);
-+        error_setg(errp, "unsupported SVE vector length");
++#else
-+        error_append_hint(errp, "Valid sve-max-vq in range [1-%d]\n",
+     GError *gerr = NULL;
-+                          ARM_MAX_VQ);
+-    int err = 0;
 +        return;
 +    }
 +
-+    cpu->sve_max_vq = max_vq;
++    printf("Using GLib mmap\n");
- }
+     qe->gmf = g_mapped_file_new(filename, TRUE, &gerr);
- static void cpu_arm_get_sve_vq(Object *obj, Visitor *v, const char *name,
+     if (gerr) {
-@@ -XXX,XX +XXX,XX @@ static void cpu_arm_set_sve_vq(Object *obj, Visitor *v, const char *name,
+@@ -XXX,XX +XXX,XX @@ int QEMU_Elf_init(QEMU_Elf *qe, const char *filename)
-         return;
-     }
+     qe->map = g_mapped_file_get_contents(qe->gmf);
+     qe->size = g_mapped_file_get_length(qe->gmf);
-+    if (value && kvm_enabled() && !kvm_arm_sve_supported(CPU(cpu))) {
++#endif
 +        error_setg(errp, "cannot enable %s", name);
 +        error_append_hint(errp, "SVE not supported by KVM on this host\n");
 +        return;
 +    }
 +
-     if (value) {
++    return 0;
          set_bit(vq - 1, cpu->sve_vq_map);
      } else {
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
          cpu->ctr = 0x80038003; /* 32 byte I and D cacheline size, VIPT icache */
          cpu->dcz_blocksize = 7; /*  512 bytes */
  #endif
 -
 -        object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq,
 -                            cpu_max_set_sve_max_vq, NULL, NULL, &error_fatal);
 -
 -        for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
 -            char name[8];
 -            sprintf(name, "sve%d", vq * 128);
 -            object_property_add(obj, name, "bool", cpu_arm_get_sve_vq,
 -                                cpu_arm_set_sve_vq, NULL, NULL, &error_fatal);
 -        }
      }
      object_property_add(obj, "sve", "bool", cpu_arm_get_sve,
                          cpu_arm_set_sve, NULL, NULL, &error_fatal);
 +    object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq,
 +                        cpu_max_set_sve_max_vq, NULL, NULL, &error_fatal);
 +
 +    for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
 +        char name[8];
 +        sprintf(name, "sve%d", vq * 128);
 +        object_property_add(obj, name, "bool", cpu_arm_get_sve_vq,
 +                            cpu_arm_set_sve_vq, NULL, NULL, &error_fatal);
 +    }
  }
  struct ARMCPUInfo {
 diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/kvm64.c
 +++ b/target/arm/kvm64.c
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_sve_supported(CPUState *cpu)
      return kvm_check_extension(s, KVM_CAP_ARM_SVE);
  }
 +QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
 +
 +void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
 +{
 +    /* Only call this function if kvm_arm_sve_supported() returns true. */
 +    static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS];
 +    static bool probed;
 +    uint32_t vq = 0;
 +    int i, j;
 +
 +    bitmap_clear(map, 0, ARM_MAX_VQ);
 +
 +    /*
 +     * KVM ensures all host CPUs support the same set of vector lengths.
 +     * So we only need to create the scratch VCPUs once and then cache
 +     * the results.
 +     */
 +    if (!probed) {
 +        struct kvm_vcpu_init init = {
 +            .target = -1,
 +            .features[0] = (1 << KVM_ARM_VCPU_SVE),
 +        };
 +        struct kvm_one_reg reg = {
 +            .id = KVM_REG_ARM64_SVE_VLS,
 +            .addr = (uint64_t)&vls[0],
 +        };
 +        int fdarray[3], ret;
 +
 +        probed = true;
 +
 +        if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) {
 +            error_report("failed to create scratch VCPU with SVE enabled");
 +            abort();
 +        }
 +        ret = ioctl(fdarray[2], KVM_GET_ONE_REG, &reg);
 +        kvm_arm_destroy_scratch_host_vcpu(fdarray);
 +        if (ret) {
 +            error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s",
 +                         strerror(errno));
 +            abort();
 +        }
 +
 +        for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) {
 +            if (vls[i]) {
 +                vq = 64 - clz64(vls[i]) + i * 64;
 +                break;
 +            }
 +        }
 +        if (vq > ARM_MAX_VQ) {
 +            warn_report("KVM supports vector lengths larger than "
 +                        "QEMU can enable");
 +        }
 +    }
 +
 +    for (i = 0; i < KVM_ARM64_SVE_VLS_WORDS; ++i) {
 +        if (!vls[i]) {
 +            continue;
 +        }
 +        for (j = 1; j <= 64; ++j) {
 +            vq = j + i * 64;
 +            if (vq > ARM_MAX_VQ) {
 +                return;
 +            }
 +            if (vls[i] & (1UL << (j - 1))) {
 +                set_bit(vq - 1, map);
 +            }
 +        }
 +    }
 +}
 +
-+static int kvm_arm_sve_set_vls(CPUState *cs)
++static void QEMU_Elf_unmap(QEMU_Elf *qe)
 +{
-+    uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = {0};
++#ifdef CONFIG_LINUX
-+    struct kvm_one_reg reg = {
++    munmap(qe->map, qe->size);
-+        .id = KVM_REG_ARM64_SVE_VLS,
++#else
-+        .addr = (uint64_t)&vls[0],
++    g_mapped_file_unref(qe->gmf);
-+    };
++#endif
 +    ARMCPU *cpu = ARM_CPU(cs);
 +    uint32_t vq;
 +    int i, j;
 +
 +    assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX);
 +
 +    for (vq = 1; vq <= cpu->sve_max_vq; ++vq) {
 +        if (test_bit(vq - 1, cpu->sve_vq_map)) {
 +            i = (vq - 1) / 64;
 +            j = (vq - 1) % 64;
 +            vls[i] |= 1UL << j;
 +        }
 +    }
 +
 +    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 +}
 +
- #define ARM_CPU_ID_MPIDR       3, 0, 0, 0, 5
++int QEMU_Elf_init(QEMU_Elf *qe, const char *filename)
++{
- int kvm_arch_init_vcpu(CPUState *cs)
++    if (QEMU_Elf_map(qe, filename)) {
-@@ -XXX,XX +XXX,XX @@ int kvm_arch_init_vcpu(CPUState *cs)
++        return 1;
++    }
-     if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE ||
-         !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) {
+     if (!check_ehdr(qe)) {
--        fprintf(stderr, "KVM is not supported for this guest CPU type\n");
+         eprintf("Input file has the wrong format\n");
-+        error_report("KVM is not supported for this guest CPU type");
+-        err = 1;
-         return -EINVAL;
+-        goto out_unmap;
 +        QEMU_Elf_unmap(qe);
 +        return 1;
      }
-@@ -XXX,XX +XXX,XX @@ int kvm_arch_init_vcpu(CPUState *cs)
+     if (init_states(qe)) {
          eprintf("Failed to extract QEMU CPU states\n");
 -        err = 1;
 -        goto out_unmap;
 +        QEMU_Elf_unmap(qe);
 +        return 1;
      }
-     if (cpu_isar_feature(aa64_sve, cpu)) {
+     return 0;
-+        ret = kvm_arm_sve_set_vls(cs);
+-
-+        if (ret) {
+-out_unmap:
-+            return ret;
+-    g_mapped_file_unref(qe->gmf);
-+        }
+-
-         ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE);
+-    return err;
          if (ret) {
              return ret;
 diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tests/arm-cpu-features.c
 +++ b/tests/arm-cpu-features.c
@@ -XXX,XX +XXX,XX @@ static QDict *resp_get_props(QDict *resp)
      return qdict;
  }
-+static bool resp_get_feature(QDict *resp, const char *feature)
+ void QEMU_Elf_exit(QEMU_Elf *qe)
-+{
+ {
-+    QDict *props;
+     exit_states(qe);
-+
+-    g_mapped_file_unref(qe->gmf);
-+    g_assert(resp);
++    QEMU_Elf_unmap(qe);
 +    g_assert(resp_has_props(resp));
 +    props = resp_get_props(resp);
 +    g_assert(qdict_get(props, feature));
 +    return qdict_get_bool(props, feature);
 +}
 +
  #define assert_has_feature(qts, cpu_type, feature)                     \
  ({                                                                     \
      QDict *_resp = do_query_no_props(qts, cpu_type);                   \
@@ -XXX,XX +XXX,XX @@ static void sve_tests_sve_off(const void *data)
      qtest_quit(qts);
  }
-+static void sve_tests_sve_off_kvm(const void *data)
-+{
-+    QTestState *qts;
-+
-+    qts = qtest_init(MACHINE "-accel kvm -cpu max,sve=off");
-+
-+    /*
-+     * We don't know if this host supports SVE so we don't
-+     * attempt to test enabling anything. We only test that
-+     * everything is disabled (as it should be with sve=off)
-+     * and that using sve<N>=off to explicitly disable vector
-+     * lengths is OK too.
-+     */
-+    assert_sve_vls(qts, "max", 0, NULL);
-+    assert_sve_vls(qts, "max", 0, "{ 'sve128': false }");
-+
-+    qtest_quit(qts);
-+}
-+
- static void test_query_cpu_model_expansion(const void *data)
- {
-     QTestState *qts;
-@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
-     qts = qtest_init(MACHINE "-accel kvm -cpu host");
-     if (g_str_equal(qtest_get_arch(), "aarch64")) {
-+        bool kvm_supports_sve;
-+        char max_name[8], name[8];
-+        uint32_t max_vq, vq;
-+        uint64_t vls;
-+        QDict *resp;
-+        char *error;
-+
-         assert_has_feature(qts, "host", "aarch64");
-         assert_has_feature(qts, "host", "pmu");
--        assert_has_feature(qts, "max", "sve");
--
-         assert_error(qts, "cortex-a15",
-             "We cannot guarantee the CPU type 'cortex-a15' works "
-             "with KVM on this host", NULL);
-+
-+        assert_has_feature(qts, "max", "sve");
-+        resp = do_query_no_props(qts, "max");
-+        kvm_supports_sve = resp_get_feature(resp, "sve");
-+        vls = resp_get_sve_vls(resp);
-+        qobject_unref(resp);
-+
-+        if (kvm_supports_sve) {
-+            g_assert(vls != 0);
-+            max_vq = 64 - __builtin_clzll(vls);
-+            sprintf(max_name, "sve%d", max_vq * 128);
-+
-+            /* Enabling a supported length is of course fine. */
-+            assert_sve_vls(qts, "max", vls, "{ %s: true }", max_name);
-+
-+            /* Get the next supported length smaller than max-vq. */
-+            vq = 64 - __builtin_clzll(vls & ~BIT_ULL(max_vq - 1));
-+            if (vq) {
-+                /*
-+                 * We have at least one length smaller than max-vq,
-+                 * so we can disable max-vq.
-+                 */
-+                assert_sve_vls(qts, "max", (vls & ~BIT_ULL(max_vq - 1)),
-+                               "{ %s: false }", max_name);
-+
-+                /*
-+                 * Smaller, supported vector lengths cannot be disabled
-+                 * unless all larger, supported vector lengths are also
-+                 * disabled.
-+                 */
-+                sprintf(name, "sve%d", vq * 128);
-+                error = g_strdup_printf("cannot disable %s", name);
-+                assert_error(qts, "max", error,
-+                             "{ %s: true, %s: false }",
-+                             max_name, name);
-+                g_free(error);
-+            }
-+
-+            /*
-+             * The smallest, supported vector length is required, because
-+             * we need at least one vector length enabled.
-+             */
-+            vq = __builtin_ffsll(vls);
-+            sprintf(name, "sve%d", vq * 128);
-+            error = g_strdup_printf("cannot disable %s", name);
-+            assert_error(qts, "max", error, "{ %s: false }", name);
-+            g_free(error);
-+
-+            /* Get an unsupported length. */
-+            for (vq = 1; vq <= max_vq; ++vq) {
-+                if (!(vls & BIT_ULL(vq - 1))) {
-+                    break;
-+                }
-+            }
-+            if (vq <= SVE_MAX_VQ) {
-+                sprintf(name, "sve%d", vq * 128);
-+                error = g_strdup_printf("cannot enable %s", name);
-+                assert_error(qts, "max", error, "{ %s: true }", name);
-+                g_free(error);
-+            }
-+        } else {
-+            g_assert(vls == 0);
-+        }
-     } else {
-         assert_has_not_feature(qts, "host", "aarch64");
-         assert_has_not_feature(qts, "host", "pmu");
-@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
-     if (kvm_available) {
-         qtest_add_data_func("/arm/kvm/query-cpu-model-expansion",
-                             NULL, test_query_cpu_model_expansion_kvm);
-+        if (g_str_equal(qtest_get_arch(), "aarch64")) {
-+            qtest_add_data_func("/arm/kvm/query-cpu-model-expansion/sve-off",
-+                                NULL, sve_tests_sve_off_kvm);
-+        }
-     }
-     return g_test_run();
-diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst
-index XXXXXXX..XXXXXXX 100644
---- a/docs/arm-cpu-features.rst
-+++ b/docs/arm-cpu-features.rst
-@@ -XXX,XX +XXX,XX @@ SVE CPU Property Dependencies and Constraints
-) At least one vector length must be enabled when `sve` is enabled.
--  2) If a vector length `N` is enabled, then all power-of-two vector
--     lengths smaller than `N` must also be enabled.  E.g. if `sve512`
--     is enabled, then the 128-bit and 256-bit vector lengths must also
--     be enabled.
-+  2) If a vector length `N` is enabled, then, when KVM is enabled, all
-+     smaller, host supported vector lengths must also be enabled.  If
-+     KVM is not enabled, then only all the smaller, power-of-two vector
-+     lengths must be enabled.  E.g. with KVM if the host supports all
-+     vector lengths up to 512-bits (128, 256, 384, 512), then if `sve512`
-+     is enabled, the 128-bit vector length, 256-bit vector length, and
-+     384-bit vector length must also be enabled. Without KVM, the 384-bit
-+     vector length would not be required.
-+
-+  3) If KVM is enabled then only vector lengths that the host CPU type
-+     support may be enabled.  If SVE is not supported by the host, then
-+     no `sve*` properties may be enabled.
- SVE CPU Property Parsing Semantics
- ----------------------------------
-@@ -XXX,XX +XXX,XX @@ SVE CPU Property Parsing Semantics
-      an error is generated.
-) If SVE is enabled (`sve=on`), but no `sve<N>` CPU properties are
--     provided, then all supported vector lengths are enabled, including
--     the non-power-of-two lengths.
-+     provided, then all supported vector lengths are enabled, which when
-+     KVM is not in use means including the non-power-of-two lengths, and,
-+     when KVM is in use, it means all vector lengths supported by the host
-+     processor.
-) If SVE is enabled, then an error is generated when attempting to
-      disable the last enabled vector length (see constraint (1) of "SVE
-@@ -XXX,XX +XXX,XX @@ SVE CPU Property Parsing Semantics
-      has been explicitly disabled, then an error is generated (see
-      constraint (2) of "SVE CPU Property Dependencies and Constraints").
--  5) If one or more `sve<N>` CPU properties are set `off`, but no `sve<N>`,
-+  5) When KVM is enabled, if the host does not support SVE, then an error
-+     is generated when attempting to enable any `sve*` properties (see
-+     constraint (3) of "SVE CPU Property Dependencies and Constraints").
-+
-+  6) When KVM is enabled, if the host does support SVE, then an error is
-+     generated when attempting to enable any vector lengths not supported
-+     by the host (see constraint (3) of "SVE CPU Property Dependencies and
-+     Constraints").
-+
-+  7) If one or more `sve<N>` CPU properties are set `off`, but no `sve<N>`,
-      CPU properties are set `on`, then the specified vector lengths are
-      disabled but the default for any unspecified lengths remains enabled.
--     Disabling a power-of-two vector length also disables all vector
--     lengths larger than the power-of-two length (see constraint (2) of
--     "SVE CPU Property Dependencies and Constraints").
-+     When KVM is not enabled, disabling a power-of-two vector length also
-+     disables all vector lengths larger than the power-of-two length.
-+     When KVM is enabled, then disabling any supported vector length also
-+     disables all larger vector lengths (see constraint (2) of "SVE CPU
-+     Property Dependencies and Constraints").
--  6) If one or more `sve<N>` CPU properties are set to `on`, then they
-+  8) If one or more `sve<N>` CPU properties are set to `on`, then they
-      are enabled and all unspecified lengths default to disabled, except
-      for the required lengths per constraint (2) of "SVE CPU Property
-      Dependencies and Constraints", which will even be auto-enabled if
-      they were not explicitly enabled.
--  7) If SVE was disabled (`sve=off`), allowing all vector lengths to be
-+  9) If SVE was disabled (`sve=off`), allowing all vector lengths to be
-      explicitly disabled (i.e. avoiding the error specified in (3) of
-      "SVE CPU Property Parsing Semantics"), then if later an `sve=on` is
-      provided an error will be generated.  To avoid this error, one must
 --
-.20.1
+.34.1

-[PULL 08/51] target/arm: Reduce tests vs M-profile in cpu_get_tb_cpu_state
+[PULL 30/30] elf2dmp: rework PDB_STREAM_INDEXES::segments obtaining
-From: Richard Henderson <richard.henderson@linaro.org>
+From: Viktor Prutyanov <viktor@daynix.com>
-Hoist the computation of some TBFLAG_A32 bits that only apply to
+PDB for Windows 11 kernel has slightly different structure compared to
-M-profile under a single test for ARM_FEATURE_M.
+previous versions. Since elf2dmp don't use the other fields, copy only
 'segments' field from PDB_STREAM_INDEXES.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
-Message-id: 20191023150057.25731-7-richard.henderson@linaro.org
+Message-id: 20230915170153.10959-6-viktor@daynix.com
 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
 ---
- target/arm/helper.c | 49 +++++++++++++++++++++------------------------
+ contrib/elf2dmp/pdb.h |  2 +-
-file changed, 23 insertions(+), 26 deletions(-)
+ contrib/elf2dmp/pdb.c | 15 ++++-----------
 files changed, 5 insertions(+), 12 deletions(-)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
+diff --git a/contrib/elf2dmp/pdb.h b/contrib/elf2dmp/pdb.h
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
+--- a/contrib/elf2dmp/pdb.h
-+++ b/target/arm/helper.c
++++ b/contrib/elf2dmp/pdb.h
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
+@@ -XXX,XX +XXX,XX @@ struct pdb_reader {
+     } ds;
-         if (arm_feature(env, ARM_FEATURE_M)) {
+     uint32_t file_used[1024];
-             flags = rebuild_hflags_m32(env, fp_el, mmu_idx);
+     PDB_SYMBOLS *symbols;
-+
+-    PDB_STREAM_INDEXES sidx;
-+            if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
++    uint16_t segments;
-+                FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S)
+     uint8_t *modimage;
-+                != env->v7m.secure) {
+     char *segs;
-+                flags = FIELD_DP32(flags, TBFLAG_A32, FPCCR_S_WRONG, 1);
+     size_t segs_size;
-+            }
+diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c
-+
+index XXXXXXX..XXXXXXX 100644
-+            if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
+--- a/contrib/elf2dmp/pdb.c
-+                (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) ||
++++ b/contrib/elf2dmp/pdb.c
-+                 (env->v7m.secure &&
+@@ -XXX,XX +XXX,XX @@ static void *pdb_ds_read_file(struct pdb_reader* r, uint32_t file_number)
-+                  !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) {
+ static int pdb_init_segments(struct pdb_reader *r)
-+                /*
+ {
-+                 * ASPEN is set, but FPCA/SFPA indicate that there is no
+     char *segs;
-+                 * active FP context; we must create a new FP context before
+-    unsigned stream_idx = r->sidx.segments;
-+                 * executing any FP insn.
++    unsigned stream_idx = r->segments;
-+                 */
-+                flags = FIELD_DP32(flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED, 1);
+     segs = pdb_ds_read_file(r, stream_idx);
-+            }
+     if (!segs) {
-+
+@@ -XXX,XX +XXX,XX @@ static int pdb_init_symbols(struct pdb_reader *r)
-+            bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+ {
-+            if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
+     int err = 0;
-+                flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
+     PDB_SYMBOLS *symbols;
-+            }
+-    PDB_STREAM_INDEXES *sidx = &r->sidx;
-         } else {
+-
-             flags = rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
+-    memset(sidx, -1, sizeof(*sidx));
-         }
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
+     symbols = pdb_ds_read_file(r, 3);
-         }
+     if (!symbols) {
-     }
+@@ -XXX,XX +XXX,XX @@ static int pdb_init_symbols(struct pdb_reader *r)
--    if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
+     r->symbols = symbols;
--        FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) != env->v7m.secure) {
--        flags = FIELD_DP32(flags, TBFLAG_A32, FPCCR_S_WRONG, 1);
+-    if (symbols->stream_index_size != sizeof(PDB_STREAM_INDEXES)) {
 -        err = 1;
 -        goto out_symbols;
 -    }
 -
--    if (arm_feature(env, ARM_FEATURE_M) &&
+-    memcpy(sidx, (const char *)symbols + sizeof(PDB_SYMBOLS) +
--        (env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
++    r->segments = *(uint16_t *)((const char *)symbols + sizeof(PDB_SYMBOLS) +
--        (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) ||
+             symbols->module_size + symbols->offset_size +
--         (env->v7m.secure &&
+             symbols->hash_size + symbols->srcmodule_size +
--          !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) {
+-            symbols->pdbimport_size + symbols->unknown2_size, sizeof(*sidx));
--        /*
++            symbols->pdbimport_size + symbols->unknown2_size +
--         * ASPEN is set, but FPCA/SFPA indicate that there is no active
++            offsetof(PDB_STREAM_INDEXES, segments));
--         * FP context; we must create a new FP context before executing
--         * any FP insn.
+     /* Read global symbol table */
--         */
+     r->modimage = pdb_ds_read_file(r, symbols->gsym_file);
 -        flags = FIELD_DP32(flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED, 1);
 -    }
 -
 -    if (arm_feature(env, ARM_FEATURE_M)) {
 -        bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
 -
 -        if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
 -            flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
 -        }
 -    }
 -
      if (!arm_feature(env, ARM_FEATURE_M)) {
          int target_el = arm_debug_target_el(env);
 --
-.20.1
+.34.1

-[PULL 09/51] target/arm: Split out rebuild_hflags_a32
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-Currently a trivial wrapper for rebuild_hflags_common_32.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-8-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/helper.c | 8 +++++++-
-file changed, 7 insertions(+), 1 deletion(-)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
-+++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el,
-     return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
- }
-+static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el,
-+                                   ARMMMUIdx mmu_idx)
-+{
-+    return rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
-+}
-+
- static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
-                                    ARMMMUIdx mmu_idx)
- {
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-                 flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
-             }
-         } else {
--            flags = rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
-+            flags = rebuild_hflags_a32(env, fp_el, mmu_idx);
-         }
-         flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
---
-.20.1

-[PULL 10/51] target/arm: Split out rebuild_hflags_aprofile
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-Create a function to compute the values of the TBFLAG_ANY bits
-that will be cached, and are used by A-profile.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-9-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/helper.c | 20 ++++++++++++--------
-file changed, 12 insertions(+), 8 deletions(-)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
-+++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el,
-     return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
- }
-+static uint32_t rebuild_hflags_aprofile(CPUARMState *env)
-+{
-+    int flags = 0;
-+
-+    flags = FIELD_DP32(flags, TBFLAG_ANY, DEBUG_TARGET_EL,
-+                       arm_debug_target_el(env));
-+    return flags;
-+}
-+
- static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el,
-                                    ARMMMUIdx mmu_idx)
- {
--    return rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
-+    uint32_t flags = rebuild_hflags_aprofile(env);
-+    return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
- }
- static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
-                                    ARMMMUIdx mmu_idx)
- {
-+    uint32_t flags = rebuild_hflags_aprofile(env);
-     ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
-     ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
--    uint32_t flags = 0;
-     uint64_t sctlr;
-     int tbii, tbid;
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-         }
-     }
--    if (!arm_feature(env, ARM_FEATURE_M)) {
--        int target_el = arm_debug_target_el(env);
--
--        flags = FIELD_DP32(flags, TBFLAG_ANY, DEBUG_TARGET_EL, target_el);
--    }
--
-     *pflags = flags;
-     *cs_base = 0;
- }
---
-.20.1

-[PULL 11/51] target/arm: Hoist XSCALE_CPAR, VECLEN, VECSTRIDE in cpu_get_tb_cpu_state
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-We do not need to compute any of these values for M-profile.
-Further, XSCALE_CPAR overlaps VECSTRIDE so obviously the two
-sets must be mutually exclusive.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-10-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/helper.c | 21 ++++++++++++++-------
-file changed, 14 insertions(+), 7 deletions(-)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
-+++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-             }
-         } else {
-             flags = rebuild_hflags_a32(env, fp_el, mmu_idx);
-+
-+            /*
-+             * Note that XSCALE_CPAR shares bits with VECSTRIDE.
-+             * Note that VECLEN+VECSTRIDE are RES0 for M-profile.
-+             */
-+            if (arm_feature(env, ARM_FEATURE_XSCALE)) {
-+                flags = FIELD_DP32(flags, TBFLAG_A32,
-+                                   XSCALE_CPAR, env->cp15.c15_cpar);
-+            } else {
-+                flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN,
-+                                   env->vfp.vec_len);
-+                flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE,
-+                                   env->vfp.vec_stride);
-+            }
-         }
-         flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
--        flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, env->vfp.vec_len);
--        flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE, env->vfp.vec_stride);
-         flags = FIELD_DP32(flags, TBFLAG_A32, CONDEXEC, env->condexec_bits);
-         if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
-             || arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
-             flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
-         }
--        /* Note that XSCALE_CPAR shares bits with VECSTRIDE */
--        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
--            flags = FIELD_DP32(flags, TBFLAG_A32,
--                               XSCALE_CPAR, env->cp15.c15_cpar);
--        }
-     }
-     /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
---
-.20.1

-[PULL 12/51] target/arm: Simplify set of PSTATE_SS in cpu_get_tb_cpu_state
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-Hoist the variable load for PSTATE into the existing test vs is_a64.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-11-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/helper.c | 20 ++++++++------------
-file changed, 8 insertions(+), 12 deletions(-)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
-+++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-     ARMMMUIdx mmu_idx = arm_mmu_idx(env);
-     int current_el = arm_current_el(env);
-     int fp_el = fp_exception_el(env, current_el);
--    uint32_t flags;
-+    uint32_t flags, pstate_for_ss;
-     if (is_a64(env)) {
-         *pc = env->pc;
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-         if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
-             flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
-         }
-+        pstate_for_ss = env->pstate;
-     } else {
-         *pc = env->regs[15];
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-             || arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
-             flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
-         }
-+        pstate_for_ss = env->uncached_cpsr;
-     }
--    /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
-+    /*
-+     * The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
-      * states defined in the ARM ARM for software singlestep:
-      *  SS_ACTIVE   PSTATE.SS   State
-      *     0            x       Inactive (the TB flag for SS is always 0)
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-      *     1            1       Active-not-pending
-      * SS_ACTIVE is set in hflags; PSTATE_SS is computed every TB.
-      */
--    if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE)) {
--        if (is_a64(env)) {
--            if (env->pstate & PSTATE_SS) {
--                flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
--            }
--        } else {
--            if (env->uncached_cpsr & PSTATE_SS) {
--                flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
--            }
--        }
-+    if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE) &&
-+        (pstate_for_ss & PSTATE_SS)) {
-+        flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
-     }
-     *pflags = flags;
---
-.20.1

-[PULL 16/51] target/arm: Hoist store to cs_base in cpu_get_tb_cpu_state
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-By performing this store early, we avoid having to save and restore
-the register holding the address around any function calls.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-15-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/helper.c | 2 +-
-file changed, 1 insertion(+), 1 deletion(-)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
-+++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
- {
-     uint32_t flags, pstate_for_ss;
-+    *cs_base = 0;
-     flags = rebuild_hflags_internal(env);
-     if (is_a64(env)) {
-@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-     }
-     *pflags = flags;
--    *cs_base = 0;
- }
- #ifdef TARGET_AARCH64
---
-.20.1

-[PULL 17/51] target/arm: Add HELPER(rebuild_hflags_{a32, a64, m32})
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-This functions are given the mode and el state of the cpu
-and writes the computed value to env->hflags.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-16-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/helper.h |  4 ++++
- target/arm/helper.c | 24 ++++++++++++++++++++++++
-files changed, 28 insertions(+)
-diff --git a/target/arm/helper.h b/target/arm/helper.h
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.h
-+++ b/target/arm/helper.h
-@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(msr_banked, void, env, i32, i32, i32)
- DEF_HELPER_2(get_user_reg, i32, env, i32)
- DEF_HELPER_3(set_user_reg, void, env, i32, i32)
-+DEF_HELPER_FLAGS_2(rebuild_hflags_m32, TCG_CALL_NO_RWG, void, env, int)
-+DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int)
-+DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int)
-+
- DEF_HELPER_1(vfp_get_fpscr, i32, env)
- DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
-+++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ void arm_rebuild_hflags(CPUARMState *env)
-     env->hflags = rebuild_hflags_internal(env);
- }
-+void HELPER(rebuild_hflags_m32)(CPUARMState *env, int el)
-+{
-+    int fp_el = fp_exception_el(env, el);
-+    ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
-+
-+    env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx);
-+}
-+
-+void HELPER(rebuild_hflags_a32)(CPUARMState *env, int el)
-+{
-+    int fp_el = fp_exception_el(env, el);
-+    ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
-+
-+    env->hflags = rebuild_hflags_a32(env, fp_el, mmu_idx);
-+}
-+
-+void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el)
-+{
-+    int fp_el = fp_exception_el(env, el);
-+    ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
-+
-+    env->hflags = rebuild_hflags_a64(env, el, fp_el, mmu_idx);
-+}
-+
- void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-                           target_ulong *cs_base, uint32_t *pflags)
- {
---
-.20.1

-[PULL 18/51] target/arm: Rebuild hflags at EL changes
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-Begin setting, but not relying upon, env->hflags.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-17-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- linux-user/syscall.c    | 1 +
- target/arm/cpu.c        | 1 +
- target/arm/helper-a64.c | 3 +++
- target/arm/helper.c     | 2 ++
- target/arm/machine.c    | 1 +
- target/arm/op_helper.c  | 1 +
-files changed, 9 insertions(+)
-diff --git a/linux-user/syscall.c b/linux-user/syscall.c
-index XXXXXXX..XXXXXXX 100644
---- a/linux-user/syscall.c
-+++ b/linux-user/syscall.c
-@@ -XXX,XX +XXX,XX @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
-                     aarch64_sve_narrow_vq(env, vq);
-                 }
-                 env->vfp.zcr_el[1] = vq - 1;
-+                arm_rebuild_hflags(env);
-                 ret = vq * 16;
-             }
-             return ret;
-diff --git a/target/arm/cpu.c b/target/arm/cpu.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/cpu.c
-+++ b/target/arm/cpu.c
-@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(CPUState *s)
-     hw_breakpoint_update_all(cpu);
-     hw_watchpoint_update_all(cpu);
-+    arm_rebuild_hflags(env);
- }
- bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
-diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper-a64.c
-+++ b/target/arm/helper-a64.c
-@@ -XXX,XX +XXX,XX @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
-         } else {
-             env->regs[15] = new_pc & ~0x3;
-         }
-+        helper_rebuild_hflags_a32(env, new_el);
-         qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
-                       "AArch32 EL%d PC 0x%" PRIx32 "\n",
-                       cur_el, new_el, env->regs[15]);
-@@ -XXX,XX +XXX,XX @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
-         }
-         aarch64_restore_sp(env, new_el);
-         env->pc = new_pc;
-+        helper_rebuild_hflags_a64(env, new_el);
-         qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
-                       "AArch64 EL%d PC 0x%" PRIx64 "\n",
-                       cur_el, new_el, env->pc);
-     }
-+
-     /*
-      * Note that cur_el can never be 0.  If new_el is 0, then
-      * el0_a64 is return_to_aa64, else el0_a64 is ignored.
-diff --git a/target/arm/helper.c b/target/arm/helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
-+++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ static void take_aarch32_exception(CPUARMState *env, int new_mode,
-         env->regs[14] = env->regs[15] + offset;
-     }
-     env->regs[15] = newpc;
-+    arm_rebuild_hflags(env);
- }
- static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs)
-@@ -XXX,XX +XXX,XX @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
-     pstate_write(env, PSTATE_DAIF | new_mode);
-     env->aarch64 = 1;
-     aarch64_restore_sp(env, new_el);
-+    helper_rebuild_hflags_a64(env, new_el);
-     env->pc = addr;
-diff --git a/target/arm/machine.c b/target/arm/machine.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/machine.c
-+++ b/target/arm/machine.c
-@@ -XXX,XX +XXX,XX @@ static int cpu_post_load(void *opaque, int version_id)
-     if (!kvm_enabled()) {
-         pmu_op_finish(&cpu->env);
-     }
-+    arm_rebuild_hflags(&cpu->env);
-     return 0;
- }
-diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/op_helper.c
-+++ b/target/arm/op_helper.c
-@@ -XXX,XX +XXX,XX @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val)
-      * state. Do the masking now.
-      */
-     env->regs[15] &= (env->thumb ? ~1 : ~3);
-+    arm_rebuild_hflags(env);
-     qemu_mutex_lock_iothread();
-     arm_call_el_change_hook(env_archcpu(env));
---
-.20.1

-[PULL 19/51] target/arm: Rebuild hflags at MSR writes
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-Continue setting, but not relying upon, env->hflags.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-18-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/translate-a64.c | 13 +++++++++++--
- target/arm/translate.c     | 28 +++++++++++++++++++++++-----
-files changed, 34 insertions(+), 7 deletions(-)
-diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/translate-a64.c
-+++ b/target/arm/translate-a64.c
-@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
-     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
-         /* I/O operations must end the TB here (whether read or write) */
-         s->base.is_jmp = DISAS_UPDATE;
--    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
--        /* We default to ending the TB on a coprocessor register write,
-+    }
-+    if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
-+        /*
-+         * A write to any coprocessor regiser that ends a TB
-+         * must rebuild the hflags for the next TB.
-+         */
-+        TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
-+        gen_helper_rebuild_hflags_a64(cpu_env, tcg_el);
-+        tcg_temp_free_i32(tcg_el);
-+        /*
-+         * We default to ending the TB on a coprocessor register write,
-          * but allow this to be suppressed by the register definition
-          * (usually only necessary to work around guest bugs).
-          */
-diff --git a/target/arm/translate.c b/target/arm/translate.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/translate.c
-+++ b/target/arm/translate.c
-@@ -XXX,XX +XXX,XX @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn)
-     ri = get_arm_cp_reginfo(s->cp_regs,
-             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
-     if (ri) {
-+        bool need_exit_tb;
-+
-         /* Check access permissions */
-         if (!cp_access_ok(s->current_el, ri, isread)) {
-             return 1;
-@@ -XXX,XX +XXX,XX @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn)
-             }
-         }
--        if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
--            /* I/O operations must end the TB here (whether read or write) */
--            gen_lookup_tb(s);
--        } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
--            /* We default to ending the TB on a coprocessor register write,
-+        /* I/O operations must end the TB here (whether read or write) */
-+        need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
-+                        (ri->type & ARM_CP_IO));
-+
-+        if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
-+            /*
-+             * A write to any coprocessor regiser that ends a TB
-+             * must rebuild the hflags for the next TB.
-+             */
-+            TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
-+            if (arm_dc_feature(s, ARM_FEATURE_M)) {
-+                gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
-+            } else {
-+                gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
-+            }
-+            tcg_temp_free_i32(tcg_el);
-+            /*
-+             * We default to ending the TB on a coprocessor register write,
-              * but allow this to be suppressed by the register definition
-              * (usually only necessary to work around guest bugs).
-              */
-+            need_exit_tb = true;
-+        }
-+        if (need_exit_tb) {
-             gen_lookup_tb(s);
-         }
---
-.20.1

-[PULL 20/51] target/arm: Rebuild hflags at CPSR writes
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-Continue setting, but not relying upon, env->hflags.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-19-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/op_helper.c | 3 +++
-file changed, 3 insertions(+)
-diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/op_helper.c
-+++ b/target/arm/op_helper.c
-@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(usat16)(CPUARMState *env, uint32_t x, uint32_t shift)
- void HELPER(setend)(CPUARMState *env)
- {
-     env->uncached_cpsr ^= CPSR_E;
-+    arm_rebuild_hflags(env);
- }
- /* Function checks whether WFx (WFI/WFE) instructions are set up to be trapped.
-@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(cpsr_read)(CPUARMState *env)
- void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask)
- {
-     cpsr_write(env, val, mask, CPSRWriteByInstr);
-+    /* TODO: Not all cpsr bits are relevant to hflags.  */
-+    arm_rebuild_hflags(env);
- }
- /* Write the CPSR for a 32-bit exception return */
---
-.20.1

-[PULL 21/51] target/arm: Rebuild hflags at Xscale SCTLR writes
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-Continue setting, but not relying upon, env->hflags.
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-20-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/helper.c | 10 ++++++++++
-file changed, 10 insertions(+)
-diff --git a/target/arm/helper.c b/target/arm/helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper.c
-+++ b/target/arm/helper.c
-@@ -XXX,XX +XXX,XX @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-     /* ??? Lots of these bits are not implemented.  */
-     /* This may enable/disable the MMU, so do a TLB flush.  */
-     tlb_flush(CPU(cpu));
-+
-+    if (ri->type & ARM_CP_SUPPRESS_TB_END) {
-+        /*
-+         * Normally we would always end the TB on an SCTLR write; see the
-+         * comment in ARMCPRegInfo sctlr initialization below for why Xscale
-+         * is special.  Setting ARM_CP_SUPPRESS_TB_END also stops the rebuild
-+         * of hflags from the translator, so do it here.
-+         */
-+        arm_rebuild_hflags(env);
-+    }
- }
- static CPAccessResult fpexc32_access(CPUARMState *env, const ARMCPRegInfo *ri,
---
-.20.1

-[PULL 22/51] target/arm: Rebuild hflags for M-profile
+Deleted patch
-From: Richard Henderson <richard.henderson@linaro.org>
-Continue setting, but not relying upon, env->hflags.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-id: 20191023150057.25731-21-richard.henderson@linaro.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/m_helper.c  | 6 ++++++
- target/arm/translate.c | 5 ++++-
-files changed, 10 insertions(+), 1 deletion(-)
-diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/m_helper.c
-+++ b/target/arm/m_helper.c
-@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_bxns)(CPUARMState *env, uint32_t dest)
-     switch_v7m_security_state(env, dest & 1);
-     env->thumb = 1;
-     env->regs[15] = dest & ~1;
-+    arm_rebuild_hflags(env);
- }
- void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
-@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
-     switch_v7m_security_state(env, 0);
-     env->thumb = 1;
-     env->regs[15] = dest;
-+    arm_rebuild_hflags(env);
- }
- static uint32_t *get_v7m_sp_ptr(CPUARMState *env, bool secure, bool threadmode,
-@@ -XXX,XX +XXX,XX @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
-     env->regs[14] = lr;
-     env->regs[15] = addr & 0xfffffffe;
-     env->thumb = addr & 1;
-+    arm_rebuild_hflags(env);
- }
- static void v7m_update_fpccr(CPUARMState *env, uint32_t frameptr,
-@@ -XXX,XX +XXX,XX @@ static void do_v7m_exception_exit(ARMCPU *cpu)
-     /* Otherwise, we have a successful exception exit. */
-     arm_clear_exclusive(env);
-+    arm_rebuild_hflags(env);
-     qemu_log_mask(CPU_LOG_INT, "...successful exception return\n");
- }
-@@ -XXX,XX +XXX,XX @@ static bool do_v7m_function_return(ARMCPU *cpu)
-     xpsr_write(env, 0, XPSR_IT);
-     env->thumb = newpc & 1;
-     env->regs[15] = newpc & ~1;
-+    arm_rebuild_hflags(env);
-     qemu_log_mask(CPU_LOG_INT, "...function return successful\n");
-     return true;
-@@ -XXX,XX +XXX,XX @@ static bool v7m_handle_execute_nsc(ARMCPU *cpu)
-     switch_v7m_security_state(env, true);
-     xpsr_write(env, 0, XPSR_IT);
-     env->regs[15] += 4;
-+    arm_rebuild_hflags(env);
-     return true;
- gen_invep:
-diff --git a/target/arm/translate.c b/target/arm/translate.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/translate.c
-+++ b/target/arm/translate.c
-@@ -XXX,XX +XXX,XX @@ static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
- static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
- {
--    TCGv_i32 addr, reg;
-+    TCGv_i32 addr, reg, el;
-     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
-         return false;
-@@ -XXX,XX +XXX,XX @@ static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
-     gen_helper_v7m_msr(cpu_env, addr, reg);
-     tcg_temp_free_i32(addr);
-     tcg_temp_free_i32(reg);
-+    el = tcg_const_i32(s->current_el);
-+    gen_helper_rebuild_hflags_m32(cpu_env, el);
-+    tcg_temp_free_i32(el);
-     gen_lookup_tb(s);
-     return true;
- }
---
-.20.1

-[PULL 36/51] tests: arm: Introduce cpu feature tests
+Deleted patch
-From: Andrew Jones <drjones@redhat.com>
-Now that Arm CPUs have advertised features lets add tests to ensure
-we maintain their expected availability with and without KVM.
-Signed-off-by: Andrew Jones <drjones@redhat.com>
-Reviewed-by: Eric Auger <eric.auger@redhat.com>
-Message-id: 20191024121808.9612-3-drjones@redhat.com
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- tests/Makefile.include   |   5 +-
- tests/arm-cpu-features.c | 240 +++++++++++++++++++++++++++++++++++++++
-files changed, 244 insertions(+), 1 deletion(-)
- create mode 100644 tests/arm-cpu-features.c
-diff --git a/tests/Makefile.include b/tests/Makefile.include
-index XXXXXXX..XXXXXXX 100644
---- a/tests/Makefile.include
-+++ b/tests/Makefile.include
-@@ -XXX,XX +XXX,XX @@ check-qtest-sparc64-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF)
- check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
- check-qtest-sparc64-y += tests/boot-serial-test$(EXESUF)
-+check-qtest-arm-y += tests/arm-cpu-features$(EXESUF)
- check-qtest-arm-y += tests/microbit-test$(EXESUF)
- check-qtest-arm-y += tests/m25p80-test$(EXESUF)
- check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF)
-@@ -XXX,XX +XXX,XX @@ check-qtest-arm-y += tests/boot-serial-test$(EXESUF)
- check-qtest-arm-y += tests/hexloader-test$(EXESUF)
- check-qtest-arm-$(CONFIG_PFLASH_CFI02) += tests/pflash-cfi02-test$(EXESUF)
--check-qtest-aarch64-y = tests/numa-test$(EXESUF)
-+check-qtest-aarch64-y += tests/arm-cpu-features$(EXESUF)
-+check-qtest-aarch64-y += tests/numa-test$(EXESUF)
- check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF)
- check-qtest-aarch64-y += tests/migration-test$(EXESUF)
- # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make test unconditional
-@@ -XXX,XX +XXX,XX @@ tests/test-qapi-util$(EXESUF): tests/test-qapi-util.o $(test-util-obj-y)
- tests/numa-test$(EXESUF): tests/numa-test.o
- tests/vmgenid-test$(EXESUF): tests/vmgenid-test.o tests/boot-sector.o tests/acpi-utils.o
- tests/cdrom-test$(EXESUF): tests/cdrom-test.o tests/boot-sector.o $(libqos-obj-y)
-+tests/arm-cpu-features$(EXESUF): tests/arm-cpu-features.o
- tests/migration/stress$(EXESUF): tests/migration/stress.o
-     $(call quiet-command, $(LINKPROG) -static -O3 $(PTHREAD_LIB) -o $@ $< ,"LINK","$(TARGET_DIR)$@")
-diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c
-new file mode 100644
-index XXXXXXX..XXXXXXX
---- /dev/null
-+++ b/tests/arm-cpu-features.c
-@@ -XXX,XX +XXX,XX @@
-+/*
-+ * Arm CPU feature test cases
-+ *
-+ * Copyright (c) 2019 Red Hat Inc.
-+ * Authors:
-+ *  Andrew Jones <drjones@redhat.com>
-+ *
-+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
-+ * See the COPYING file in the top-level directory.
-+ */
-+#include "qemu/osdep.h"
-+#include "libqtest.h"
-+#include "qapi/qmp/qdict.h"
-+#include "qapi/qmp/qjson.h"
-+
-+#define MACHINE    "-machine virt,gic-version=max "
-+#define QUERY_HEAD "{ 'execute': 'query-cpu-model-expansion', " \
-+                     "'arguments': { 'type': 'full', "
-+#define QUERY_TAIL "}}"
-+
-+static QDict *do_query_no_props(QTestState *qts, const char *cpu_type)
-+{
-+    return qtest_qmp(qts, QUERY_HEAD "'model': { 'name': %s }"
-+                          QUERY_TAIL, cpu_type);
-+}
-+
-+static QDict *do_query(QTestState *qts, const char *cpu_type,
-+                       const char *fmt, ...)
-+{
-+    QDict *resp;
-+
-+    if (fmt) {
-+        QDict *args;
-+        va_list ap;
-+
-+        va_start(ap, fmt);
-+        args = qdict_from_vjsonf_nofail(fmt, ap);
-+        va_end(ap);
-+
-+        resp = qtest_qmp(qts, QUERY_HEAD "'model': { 'name': %s, "
-+                                                    "'props': %p }"
-+                              QUERY_TAIL, cpu_type, args);
-+    } else {
-+        resp = do_query_no_props(qts, cpu_type);
-+    }
-+
-+    return resp;
-+}
-+
-+static const char *resp_get_error(QDict *resp)
-+{
-+    QDict *qdict;
-+
-+    g_assert(resp);
-+
-+    qdict = qdict_get_qdict(resp, "error");
-+    if (qdict) {
-+        return qdict_get_str(qdict, "desc");
-+    }
-+    return NULL;
-+}
-+
-+#define assert_error(qts, cpu_type, expected_error, fmt, ...)          \
-+({                                                                     \
-+    QDict *_resp;                                                      \
-+    const char *_error;                                                \
-+                                                                       \
-+    _resp = do_query(qts, cpu_type, fmt, ##__VA_ARGS__);               \
-+    g_assert(_resp);                                                   \
-+    _error = resp_get_error(_resp);                                    \
-+    g_assert(_error);                                                  \
-+    g_assert(g_str_equal(_error, expected_error));                     \
-+    qobject_unref(_resp);                                              \
-+})
-+
-+static bool resp_has_props(QDict *resp)
-+{
-+    QDict *qdict;
-+
-+    g_assert(resp);
-+
-+    if (!qdict_haskey(resp, "return")) {
-+        return false;
-+    }
-+    qdict = qdict_get_qdict(resp, "return");
-+
-+    if (!qdict_haskey(qdict, "model")) {
-+        return false;
-+    }
-+    qdict = qdict_get_qdict(qdict, "model");
-+
-+    return qdict_haskey(qdict, "props");
-+}
-+
-+static QDict *resp_get_props(QDict *resp)
-+{
-+    QDict *qdict;
-+
-+    g_assert(resp);
-+    g_assert(resp_has_props(resp));
-+
-+    qdict = qdict_get_qdict(resp, "return");
-+    qdict = qdict_get_qdict(qdict, "model");
-+    qdict = qdict_get_qdict(qdict, "props");
-+    return qdict;
-+}
-+
-+#define assert_has_feature(qts, cpu_type, feature)                     \
-+({                                                                     \
-+    QDict *_resp = do_query_no_props(qts, cpu_type);                   \
-+    g_assert(_resp);                                                   \
-+    g_assert(resp_has_props(_resp));                                   \
-+    g_assert(qdict_get(resp_get_props(_resp), feature));               \
-+    qobject_unref(_resp);                                              \
-+})
-+
-+#define assert_has_not_feature(qts, cpu_type, feature)                 \
-+({                                                                     \
-+    QDict *_resp = do_query_no_props(qts, cpu_type);                   \
-+    g_assert(_resp);                                                   \
-+    g_assert(!resp_has_props(_resp) ||                                 \
-+             !qdict_get(resp_get_props(_resp), feature));              \
-+    qobject_unref(_resp);                                              \
-+})
-+
-+static void assert_type_full(QTestState *qts)
-+{
-+    const char *error;
-+    QDict *resp;
-+
-+    resp = qtest_qmp(qts, "{ 'execute': 'query-cpu-model-expansion', "
-+                            "'arguments': { 'type': 'static', "
-+                                           "'model': { 'name': 'foo' }}}");
-+    g_assert(resp);
-+    error = resp_get_error(resp);
-+    g_assert(error);
-+    g_assert(g_str_equal(error,
-+                         "The requested expansion type is not supported"));
-+    qobject_unref(resp);
-+}
-+
-+static void assert_bad_props(QTestState *qts, const char *cpu_type)
-+{
-+    const char *error;
-+    QDict *resp;
-+
-+    resp = qtest_qmp(qts, "{ 'execute': 'query-cpu-model-expansion', "
-+                            "'arguments': { 'type': 'full', "
-+                                           "'model': { 'name': %s, "
-+                                                      "'props': false }}}",
-+                     cpu_type);
-+    g_assert(resp);
-+    error = resp_get_error(resp);
-+    g_assert(error);
-+    g_assert(g_str_equal(error,
-+                         "Invalid parameter type for 'props', expected: dict"));
-+    qobject_unref(resp);
-+}
-+
-+static void test_query_cpu_model_expansion(const void *data)
-+{
-+    QTestState *qts;
-+
-+    qts = qtest_init(MACHINE "-cpu max");
-+
-+    /* Test common query-cpu-model-expansion input validation */
-+    assert_type_full(qts);
-+    assert_bad_props(qts, "max");
-+    assert_error(qts, "foo", "The CPU type 'foo' is not a recognized "
-+                 "ARM CPU type", NULL);
-+    assert_error(qts, "max", "Parameter 'not-a-prop' is unexpected",
-+                 "{ 'not-a-prop': false }");
-+    assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL);
-+
-+    /* Test expected feature presence/absence for some cpu types */
-+    assert_has_feature(qts, "max", "pmu");
-+    assert_has_feature(qts, "cortex-a15", "pmu");
-+    assert_has_not_feature(qts, "cortex-a15", "aarch64");
-+
-+    if (g_str_equal(qtest_get_arch(), "aarch64")) {
-+        assert_has_feature(qts, "max", "aarch64");
-+        assert_has_feature(qts, "cortex-a57", "pmu");
-+        assert_has_feature(qts, "cortex-a57", "aarch64");
-+
-+        /* Test that features that depend on KVM generate errors without. */
-+        assert_error(qts, "max",
-+                     "'aarch64' feature cannot be disabled "
-+                     "unless KVM is enabled and 32-bit EL1 "
-+                     "is supported",
-+                     "{ 'aarch64': false }");
-+    }
-+
-+    qtest_quit(qts);
-+}
-+
-+static void test_query_cpu_model_expansion_kvm(const void *data)
-+{
-+    QTestState *qts;
-+
-+    qts = qtest_init(MACHINE "-accel kvm -cpu host");
-+
-+    if (g_str_equal(qtest_get_arch(), "aarch64")) {
-+        assert_has_feature(qts, "host", "aarch64");
-+        assert_has_feature(qts, "host", "pmu");
-+
-+        assert_error(qts, "cortex-a15",
-+            "We cannot guarantee the CPU type 'cortex-a15' works "
-+            "with KVM on this host", NULL);
-+    } else {
-+        assert_has_not_feature(qts, "host", "aarch64");
-+        assert_has_not_feature(qts, "host", "pmu");
-+    }
-+
-+    qtest_quit(qts);
-+}
-+
-+int main(int argc, char **argv)
-+{
-+    bool kvm_available = false;
-+
-+    if (!access("/dev/kvm",  R_OK | W_OK)) {
-+#if defined(HOST_AARCH64)
-+        kvm_available = g_str_equal(qtest_get_arch(), "aarch64");
-+#elif defined(HOST_ARM)
-+        kvm_available = g_str_equal(qtest_get_arch(), "arm");
-+#endif
-+    }
-+
-+    g_test_init(&argc, &argv, NULL);
-+
-+    qtest_add_data_func("/arm/query-cpu-model-expansion",
-+                        NULL, test_query_cpu_model_expansion);
-+
-+    if (kvm_available) {
-+        qtest_add_data_func("/arm/kvm/query-cpu-model-expansion",
-+                            NULL, test_query_cpu_model_expansion_kvm);
-+    }
-+
-+    return g_test_run();
-+}
---
-.20.1

-[PULL 41/51] target/arm/kvm: scratch vcpu: Preserve input kvm_vcpu_init features
+Deleted patch
-From: Andrew Jones <drjones@redhat.com>
-kvm_arm_create_scratch_host_vcpu() takes a struct kvm_vcpu_init
-parameter. Rather than just using it as an output parameter to
-pass back the preferred target, use it also as an input parameter,
-allowing a caller to pass a selected target if they wish and to
-also pass cpu features. If the caller doesn't want to select a
-target they can pass -1 for the target which indicates they want
-to use the preferred target and have it passed back like before.
-Signed-off-by: Andrew Jones <drjones@redhat.com>
-Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-Reviewed-by: Eric Auger <eric.auger@redhat.com>
-Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
-Reviewed-by: Beata Michalska <beata.michalska@linaro.org>
-Message-id: 20191024121808.9612-8-drjones@redhat.com
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- target/arm/kvm.c   | 20 +++++++++++++++-----
- target/arm/kvm32.c |  6 +++++-
- target/arm/kvm64.c |  6 +++++-
-files changed, 25 insertions(+), 7 deletions(-)
-diff --git a/target/arm/kvm.c b/target/arm/kvm.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/kvm.c
-+++ b/target/arm/kvm.c
-@@ -XXX,XX +XXX,XX @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
-                                       int *fdarray,
-                                       struct kvm_vcpu_init *init)
- {
--    int ret, kvmfd = -1, vmfd = -1, cpufd = -1;
-+    int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1;
-     kvmfd = qemu_open("/dev/kvm", O_RDWR);
-     if (kvmfd < 0) {
-@@ -XXX,XX +XXX,XX @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
-         goto finish;
-     }
--    ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init);
-+    if (init->target == -1) {
-+        struct kvm_vcpu_init preferred;
-+
-+        ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred);
-+        if (!ret) {
-+            init->target = preferred.target;
-+        }
-+    }
-     if (ret >= 0) {
-         ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
-         if (ret < 0) {
-@@ -XXX,XX +XXX,XX @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
-          * creating one kind of guest CPU which is its preferred
-          * CPU type.
-          */
-+        struct kvm_vcpu_init try;
-+
-         while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
--            init->target = *cpus_to_try++;
--            memset(init->features, 0, sizeof(init->features));
--            ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
-+            try.target = *cpus_to_try++;
-+            memcpy(try.features, init->features, sizeof(init->features));
-+            ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try);
-             if (ret >= 0) {
-                 break;
-             }
-@@ -XXX,XX +XXX,XX @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
-         if (ret < 0) {
-             goto err;
-         }
-+        init->target = try.target;
-     } else {
-         /* Treat a NULL cpus_to_try argument the same as an empty
-          * list, which means we will fail the call since this must
-diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/kvm32.c
-+++ b/target/arm/kvm32.c
-@@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
-         QEMU_KVM_ARM_TARGET_CORTEX_A15,
-         QEMU_KVM_ARM_TARGET_NONE
-     };
--    struct kvm_vcpu_init init;
-+    /*
-+     * target = -1 informs kvm_arm_create_scratch_host_vcpu()
-+     * to use the preferred target
-+     */
-+    struct kvm_vcpu_init init = { .target = -1, };
-     if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
-         return false;
-diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/arm/kvm64.c
-+++ b/target/arm/kvm64.c
-@@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
-         KVM_ARM_TARGET_CORTEX_A57,
-         QEMU_KVM_ARM_TARGET_NONE
-     };
--    struct kvm_vcpu_init init;
-+    /*
-+     * target = -1 informs kvm_arm_create_scratch_host_vcpu()
-+     * to use the preferred target
-+     */
-+    struct kvm_vcpu_init init = { .target = -1, };
-     if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
-         return false;
---
-.20.1

-[PULL 44/51] hw/misc/bcm2835_thermal: Add a dummy BCM2835 thermal sensor
+Deleted patch
-From: Philippe Mathieu-Daudé <f4bug@amsat.org>
-We will soon implement the SYS_timer. This timer is used by Linux
-in the thermal subsystem, so once available, the subsystem will be
-enabled and poll the temperature sensors. We need to provide the
-minimum required to keep Linux booting.
-Add a dummy thermal sensor returning ~25°C based on:
-https://github.com/raspberrypi/linux/blob/rpi-5.3.y/drivers/thermal/broadcom/bcm2835_thermal.c
-Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Message-id: 20191019234715.25750-2-f4bug@amsat.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- hw/misc/Makefile.objs             |   1 +
- include/hw/misc/bcm2835_thermal.h |  27 ++++++
- hw/misc/bcm2835_thermal.c         | 135 ++++++++++++++++++++++++++++++
-files changed, 163 insertions(+)
- create mode 100644 include/hw/misc/bcm2835_thermal.h
- create mode 100644 hw/misc/bcm2835_thermal.c
-diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
-index XXXXXXX..XXXXXXX 100644
---- a/hw/misc/Makefile.objs
-+++ b/hw/misc/Makefile.objs
-@@ -XXX,XX +XXX,XX @@ common-obj-$(CONFIG_OMAP) += omap_tap.o
- common-obj-$(CONFIG_RASPI) += bcm2835_mbox.o
- common-obj-$(CONFIG_RASPI) += bcm2835_property.o
- common-obj-$(CONFIG_RASPI) += bcm2835_rng.o
-+common-obj-$(CONFIG_RASPI) += bcm2835_thermal.o
- common-obj-$(CONFIG_SLAVIO) += slavio_misc.o
- common-obj-$(CONFIG_ZYNQ) += zynq_slcr.o
- common-obj-$(CONFIG_ZYNQ) += zynq-xadc.o
-diff --git a/include/hw/misc/bcm2835_thermal.h b/include/hw/misc/bcm2835_thermal.h
-new file mode 100644
-index XXXXXXX..XXXXXXX
---- /dev/null
-+++ b/include/hw/misc/bcm2835_thermal.h
-@@ -XXX,XX +XXX,XX @@
-+/*
-+ * BCM2835 dummy thermal sensor
-+ *
-+ * Copyright (C) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
-+ *
-+ * SPDX-License-Identifier: GPL-2.0-or-later
-+ */
-+
-+#ifndef HW_MISC_BCM2835_THERMAL_H
-+#define HW_MISC_BCM2835_THERMAL_H
-+
-+#include "hw/sysbus.h"
-+
-+#define TYPE_BCM2835_THERMAL "bcm2835-thermal"
-+
-+#define BCM2835_THERMAL(obj) \
-+    OBJECT_CHECK(Bcm2835ThermalState, (obj), TYPE_BCM2835_THERMAL)
-+
-+typedef struct {
-+    /*< private >*/
-+    SysBusDevice parent_obj;
-+    /*< public >*/
-+    MemoryRegion iomem;
-+    uint32_t ctl;
-+} Bcm2835ThermalState;
-+
-+#endif
-diff --git a/hw/misc/bcm2835_thermal.c b/hw/misc/bcm2835_thermal.c
-new file mode 100644
-index XXXXXXX..XXXXXXX
---- /dev/null
-+++ b/hw/misc/bcm2835_thermal.c
-@@ -XXX,XX +XXX,XX @@
-+/*
-+ * BCM2835 dummy thermal sensor
-+ *
-+ * Copyright (C) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
-+ *
-+ * SPDX-License-Identifier: GPL-2.0-or-later
-+ */
-+
-+#include "qemu/osdep.h"
-+#include "qemu/log.h"
-+#include "qapi/error.h"
-+#include "hw/misc/bcm2835_thermal.h"
-+#include "hw/registerfields.h"
-+#include "migration/vmstate.h"
-+
-+REG32(CTL, 0)
-+FIELD(CTL, POWER_DOWN, 0, 1)
-+FIELD(CTL, RESET, 1, 1)
-+FIELD(CTL, BANDGAP_CTRL, 2, 3)
-+FIELD(CTL, INTERRUPT_ENABLE, 5, 1)
-+FIELD(CTL, DIRECT, 6, 1)
-+FIELD(CTL, INTERRUPT_CLEAR, 7, 1)
-+FIELD(CTL, HOLD, 8, 10)
-+FIELD(CTL, RESET_DELAY, 18, 8)
-+FIELD(CTL, REGULATOR_ENABLE, 26, 1)
-+
-+REG32(STAT, 4)
-+FIELD(STAT, DATA, 0, 10)
-+FIELD(STAT, VALID, 10, 1)
-+FIELD(STAT, INTERRUPT, 11, 1)
-+
-+#define THERMAL_OFFSET_C 412
-+#define THERMAL_COEFF  (-0.538f)
-+
-+static uint16_t bcm2835_thermal_temp2adc(int temp_C)
-+{
-+    return (temp_C - THERMAL_OFFSET_C) / THERMAL_COEFF;
-+}
-+
-+static uint64_t bcm2835_thermal_read(void *opaque, hwaddr addr, unsigned size)
-+{
-+    Bcm2835ThermalState *s = BCM2835_THERMAL(opaque);
-+    uint32_t val = 0;
-+
-+    switch (addr) {
-+    case A_CTL:
-+        val = s->ctl;
-+        break;
-+    case A_STAT:
-+        /* Temperature is constantly 25°C. */
-+        val = FIELD_DP32(bcm2835_thermal_temp2adc(25), STAT, VALID, true);
-+        break;
-+    default:
-+        /* MemoryRegionOps are aligned, so this can not happen. */
-+        g_assert_not_reached();
-+    }
-+    return val;
-+}
-+
-+static void bcm2835_thermal_write(void *opaque, hwaddr addr,
-+                                  uint64_t value, unsigned size)
-+{
-+    Bcm2835ThermalState *s = BCM2835_THERMAL(opaque);
-+
-+    switch (addr) {
-+    case A_CTL:
-+        s->ctl = value;
-+        break;
-+    case A_STAT:
-+        qemu_log_mask(LOG_GUEST_ERROR, "%s: write 0x%" PRIx64
-+                                       " to 0x%" HWADDR_PRIx "\n",
-+                       __func__, value, addr);
-+        break;
-+    default:
-+        /* MemoryRegionOps are aligned, so this can not happen. */
-+        g_assert_not_reached();
-+    }
-+}
-+
-+static const MemoryRegionOps bcm2835_thermal_ops = {
-+    .read = bcm2835_thermal_read,
-+    .write = bcm2835_thermal_write,
-+    .impl.max_access_size = 4,
-+    .valid.min_access_size = 4,
-+    .endianness = DEVICE_NATIVE_ENDIAN,
-+};
-+
-+static void bcm2835_thermal_reset(DeviceState *dev)
-+{
-+    Bcm2835ThermalState *s = BCM2835_THERMAL(dev);
-+
-+    s->ctl = 0;
-+}
-+
-+static void bcm2835_thermal_realize(DeviceState *dev, Error **errp)
-+{
-+    Bcm2835ThermalState *s = BCM2835_THERMAL(dev);
-+
-+    memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_thermal_ops,
-+                          s, TYPE_BCM2835_THERMAL, 8);
-+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
-+}
-+
-+static const VMStateDescription bcm2835_thermal_vmstate = {
-+    .name = "bcm2835_thermal",
-+    .version_id = 1,
-+    .minimum_version_id = 1,
-+    .fields = (VMStateField[]) {
-+        VMSTATE_UINT32(ctl, Bcm2835ThermalState),
-+        VMSTATE_END_OF_LIST()
-+    }
-+};
-+
-+static void bcm2835_thermal_class_init(ObjectClass *klass, void *data)
-+{
-+    DeviceClass *dc = DEVICE_CLASS(klass);
-+
-+    dc->realize = bcm2835_thermal_realize;
-+    dc->reset = bcm2835_thermal_reset;
-+    dc->vmsd = &bcm2835_thermal_vmstate;
-+}
-+
-+static const TypeInfo bcm2835_thermal_info = {
-+    .name = TYPE_BCM2835_THERMAL,
-+    .parent = TYPE_SYS_BUS_DEVICE,
-+    .instance_size = sizeof(Bcm2835ThermalState),
-+    .class_init = bcm2835_thermal_class_init,
-+};
-+
-+static void bcm2835_thermal_register_types(void)
-+{
-+    type_register_static(&bcm2835_thermal_info);
-+}
-+
-+type_init(bcm2835_thermal_register_types)
---
-.20.1

-[PULL 45/51] hw/arm/bcm2835_peripherals: Use the thermal sensor block
+Deleted patch
-From: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Map the thermal sensor in the BCM2835 block.
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Message-id: 20191019234715.25750-3-f4bug@amsat.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- include/hw/arm/bcm2835_peripherals.h |  2 ++
- include/hw/arm/raspi_platform.h      |  1 +
- hw/arm/bcm2835_peripherals.c         | 13 +++++++++++++
-files changed, 16 insertions(+)
-diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/hw/arm/bcm2835_peripherals.h
-+++ b/include/hw/arm/bcm2835_peripherals.h
-@@ -XXX,XX +XXX,XX @@
- #include "hw/misc/bcm2835_property.h"
- #include "hw/misc/bcm2835_rng.h"
- #include "hw/misc/bcm2835_mbox.h"
-+#include "hw/misc/bcm2835_thermal.h"
- #include "hw/sd/sdhci.h"
- #include "hw/sd/bcm2835_sdhost.h"
- #include "hw/gpio/bcm2835_gpio.h"
-@@ -XXX,XX +XXX,XX @@ typedef struct BCM2835PeripheralState {
-     SDHCIState sdhci;
-     BCM2835SDHostState sdhost;
-     BCM2835GpioState gpio;
-+    Bcm2835ThermalState thermal;
-     UnimplementedDeviceState i2s;
-     UnimplementedDeviceState spi[1];
-     UnimplementedDeviceState i2c[3];
-diff --git a/include/hw/arm/raspi_platform.h b/include/hw/arm/raspi_platform.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/hw/arm/raspi_platform.h
-+++ b/include/hw/arm/raspi_platform.h
-@@ -XXX,XX +XXX,XX @@
- #define SPI0_OFFSET             0x204000
- #define BSC0_OFFSET             0x205000 /* BSC0 I2C/TWI */
- #define OTP_OFFSET              0x20f000
-+#define THERMAL_OFFSET          0x212000
- #define BSC_SL_OFFSET           0x214000 /* SPI slave */
- #define AUX_OFFSET              0x215000 /* AUX: UART1/SPI1/SPI2 */
- #define EMMC1_OFFSET            0x300000
-diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
-index XXXXXXX..XXXXXXX 100644
---- a/hw/arm/bcm2835_peripherals.c
-+++ b/hw/arm/bcm2835_peripherals.c
-@@ -XXX,XX +XXX,XX @@ static void bcm2835_peripherals_init(Object *obj)
-     object_property_add_const_link(OBJECT(&s->dma), "dma-mr",
-                                    OBJECT(&s->gpu_bus_mr), &error_abort);
-+    /* Thermal */
-+    sysbus_init_child_obj(obj, "thermal", &s->thermal, sizeof(s->thermal),
-+                          TYPE_BCM2835_THERMAL);
-+
-     /* GPIO */
-     sysbus_init_child_obj(obj, "gpio", &s->gpio, sizeof(s->gpio),
-                           TYPE_BCM2835_GPIO);
-@@ -XXX,XX +XXX,XX @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
-                                                   INTERRUPT_DMA0 + n));
-     }
-+    /* THERMAL */
-+    object_property_set_bool(OBJECT(&s->thermal), true, "realized", &err);
-+    if (err) {
-+        error_propagate(errp, err);
-+        return;
-+    }
-+    memory_region_add_subregion(&s->peri_mr, THERMAL_OFFSET,
-+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->thermal), 0));
-+
-     /* GPIO */
-     object_property_set_bool(OBJECT(&s->gpio), true, "realized", &err);
-     if (err) {
---
-.20.1

-[PULL 46/51] hw/timer/bcm2835: Add the BCM2835 SYS_timer
+Deleted patch
-From: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Add the 64-bit free running timer. Do not model the COMPARE register
-(no IRQ generated).
-This timer is used by Linux kernel and recently U-Boot:
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/clocksource/bcm2835_timer.c?h=v3.7
-https://github.com/u-boot/u-boot/blob/v2019.07/include/configs/rpi.h#L19
-Datasheet used:
-https://www.raspberrypi.org/app/uploads/2012/02/BCM2835-ARM-Peripherals.pdf
-Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Message-id: 20191019234715.25750-4-f4bug@amsat.org
-[PMM: squashed in switch to using memset in reset]
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- hw/timer/Makefile.objs            |   1 +
- include/hw/timer/bcm2835_systmr.h |  33 ++++++
- hw/timer/bcm2835_systmr.c         | 163 ++++++++++++++++++++++++++++++
- hw/timer/trace-events             |   5 +
-files changed, 202 insertions(+)
- create mode 100644 include/hw/timer/bcm2835_systmr.h
- create mode 100644 hw/timer/bcm2835_systmr.c
-diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs
-index XXXXXXX..XXXXXXX 100644
---- a/hw/timer/Makefile.objs
-+++ b/hw/timer/Makefile.objs
-@@ -XXX,XX +XXX,XX @@ common-obj-$(CONFIG_SUN4V_RTC) += sun4v-rtc.o
- common-obj-$(CONFIG_CMSDK_APB_TIMER) += cmsdk-apb-timer.o
- common-obj-$(CONFIG_CMSDK_APB_DUALTIMER) += cmsdk-apb-dualtimer.o
- common-obj-$(CONFIG_MSF2) += mss-timer.o
-+common-obj-$(CONFIG_RASPI) += bcm2835_systmr.o
-diff --git a/include/hw/timer/bcm2835_systmr.h b/include/hw/timer/bcm2835_systmr.h
-new file mode 100644
-index XXXXXXX..XXXXXXX
---- /dev/null
-+++ b/include/hw/timer/bcm2835_systmr.h
-@@ -XXX,XX +XXX,XX @@
-+/*
-+ * BCM2835 SYS timer emulation
-+ *
-+ * Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
-+ *
-+ * SPDX-License-Identifier: GPL-2.0-or-later
-+ */
-+
-+#ifndef BCM2835_SYSTIMER_H
-+#define BCM2835_SYSTIMER_H
-+
-+#include "hw/sysbus.h"
-+#include "hw/irq.h"
-+
-+#define TYPE_BCM2835_SYSTIMER "bcm2835-sys-timer"
-+#define BCM2835_SYSTIMER(obj) \
-+    OBJECT_CHECK(BCM2835SystemTimerState, (obj), TYPE_BCM2835_SYSTIMER)
-+
-+typedef struct {
-+    /*< private >*/
-+    SysBusDevice parent_obj;
-+
-+    /*< public >*/
-+    MemoryRegion iomem;
-+    qemu_irq irq;
-+
-+    struct {
-+        uint32_t status;
-+        uint32_t compare[4];
-+    } reg;
-+} BCM2835SystemTimerState;
-+
-+#endif
-diff --git a/hw/timer/bcm2835_systmr.c b/hw/timer/bcm2835_systmr.c
-new file mode 100644
-index XXXXXXX..XXXXXXX
---- /dev/null
-+++ b/hw/timer/bcm2835_systmr.c
-@@ -XXX,XX +XXX,XX @@
-+/*
-+ * BCM2835 SYS timer emulation
-+ *
-+ * Copyright (C) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
-+ *
-+ * SPDX-License-Identifier: GPL-2.0-or-later
-+ *
-+ * Datasheet: BCM2835 ARM Peripherals (C6357-M-1398)
-+ * https://www.raspberrypi.org/app/uploads/2012/02/BCM2835-ARM-Peripherals.pdf
-+ *
-+ * Only the free running 64-bit counter is implemented.
-+ * The 4 COMPARE registers and the interruption are not implemented.
-+ */
-+
-+#include "qemu/osdep.h"
-+#include "qemu/log.h"
-+#include "qemu/timer.h"
-+#include "hw/timer/bcm2835_systmr.h"
-+#include "hw/registerfields.h"
-+#include "migration/vmstate.h"
-+#include "trace.h"
-+
-+REG32(CTRL_STATUS,  0x00)
-+REG32(COUNTER_LOW,  0x04)
-+REG32(COUNTER_HIGH, 0x08)
-+REG32(COMPARE0,     0x0c)
-+REG32(COMPARE1,     0x10)
-+REG32(COMPARE2,     0x14)
-+REG32(COMPARE3,     0x18)
-+
-+static void bcm2835_systmr_update_irq(BCM2835SystemTimerState *s)
-+{
-+    bool enable = !!s->reg.status;
-+
-+    trace_bcm2835_systmr_irq(enable);
-+    qemu_set_irq(s->irq, enable);
-+}
-+
-+static void bcm2835_systmr_update_compare(BCM2835SystemTimerState *s,
-+                                          unsigned timer_index)
-+{
-+    /* TODO fow now, since neither Linux nor U-boot use these timers. */
-+    qemu_log_mask(LOG_UNIMP, "COMPARE register %u not implemented\n",
-+                  timer_index);
-+}
-+
-+static uint64_t bcm2835_systmr_read(void *opaque, hwaddr offset,
-+                                    unsigned size)
-+{
-+    BCM2835SystemTimerState *s = BCM2835_SYSTIMER(opaque);
-+    uint64_t r = 0;
-+
-+    switch (offset) {
-+    case A_CTRL_STATUS:
-+        r = s->reg.status;
-+        break;
-+    case A_COMPARE0 ... A_COMPARE3:
-+        r = s->reg.compare[(offset - A_COMPARE0) >> 2];
-+        break;
-+    case A_COUNTER_LOW:
-+    case A_COUNTER_HIGH:
-+        /* Free running counter at 1MHz */
-+        r = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL);
-+        r >>= 8 * (offset - A_COUNTER_LOW);
-+        r &= UINT32_MAX;
-+        break;
-+    default:
-+        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad offset 0x%" HWADDR_PRIx "\n",
-+                      __func__, offset);
-+        break;
-+    }
-+    trace_bcm2835_systmr_read(offset, r);
-+
-+    return r;
-+}
-+
-+static void bcm2835_systmr_write(void *opaque, hwaddr offset,
-+                                 uint64_t value, unsigned size)
-+{
-+    BCM2835SystemTimerState *s = BCM2835_SYSTIMER(opaque);
-+
-+    trace_bcm2835_systmr_write(offset, value);
-+    switch (offset) {
-+    case A_CTRL_STATUS:
-+        s->reg.status &= ~value; /* Ack */
-+        bcm2835_systmr_update_irq(s);
-+        break;
-+    case A_COMPARE0 ... A_COMPARE3:
-+        s->reg.compare[(offset - A_COMPARE0) >> 2] = value;
-+        bcm2835_systmr_update_compare(s, (offset - A_COMPARE0) >> 2);
-+        break;
-+    case A_COUNTER_LOW:
-+    case A_COUNTER_HIGH:
-+        qemu_log_mask(LOG_GUEST_ERROR, "%s: read-only ofs 0x%" HWADDR_PRIx "\n",
-+                      __func__, offset);
-+        break;
-+    default:
-+        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad offset 0x%" HWADDR_PRIx "\n",
-+                      __func__, offset);
-+        break;
-+    }
-+}
-+
-+static const MemoryRegionOps bcm2835_systmr_ops = {
-+    .read = bcm2835_systmr_read,
-+    .write = bcm2835_systmr_write,
-+    .endianness = DEVICE_LITTLE_ENDIAN,
-+    .impl = {
-+        .min_access_size = 4,
-+        .max_access_size = 4,
-+    },
-+};
-+
-+static void bcm2835_systmr_reset(DeviceState *dev)
-+{
-+    BCM2835SystemTimerState *s = BCM2835_SYSTIMER(dev);
-+
-+    memset(&s->reg, 0, sizeof(s->reg));
-+}
-+
-+static void bcm2835_systmr_realize(DeviceState *dev, Error **errp)
-+{
-+    BCM2835SystemTimerState *s = BCM2835_SYSTIMER(dev);
-+
-+    memory_region_init_io(&s->iomem, OBJECT(dev), &bcm2835_systmr_ops,
-+                          s, "bcm2835-sys-timer", 0x20);
-+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem);
-+    sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq);
-+}
-+
-+static const VMStateDescription bcm2835_systmr_vmstate = {
-+    .name = "bcm2835_sys_timer",
-+    .version_id = 1,
-+    .minimum_version_id = 1,
-+    .fields = (VMStateField[]) {
-+        VMSTATE_UINT32(reg.status, BCM2835SystemTimerState),
-+        VMSTATE_UINT32_ARRAY(reg.compare, BCM2835SystemTimerState, 4),
-+        VMSTATE_END_OF_LIST()
-+    }
-+};
-+
-+static void bcm2835_systmr_class_init(ObjectClass *klass, void *data)
-+{
-+    DeviceClass *dc = DEVICE_CLASS(klass);
-+
-+    dc->realize = bcm2835_systmr_realize;
-+    dc->reset = bcm2835_systmr_reset;
-+    dc->vmsd = &bcm2835_systmr_vmstate;
-+}
-+
-+static const TypeInfo bcm2835_systmr_info = {
-+    .name = TYPE_BCM2835_SYSTIMER,
-+    .parent = TYPE_SYS_BUS_DEVICE,
-+    .instance_size = sizeof(BCM2835SystemTimerState),
-+    .class_init = bcm2835_systmr_class_init,
-+};
-+
-+static void bcm2835_systmr_register_types(void)
-+{
-+    type_register_static(&bcm2835_systmr_info);
-+}
-+
-+type_init(bcm2835_systmr_register_types);
-diff --git a/hw/timer/trace-events b/hw/timer/trace-events
-index XXXXXXX..XXXXXXX 100644
---- a/hw/timer/trace-events
-+++ b/hw/timer/trace-events
-@@ -XXX,XX +XXX,XX @@ pl031_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
- pl031_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
- pl031_alarm_raised(void) "alarm raised"
- pl031_set_alarm(uint32_t ticks) "alarm set for %u ticks"
-+
-+# bcm2835_systmr.c
-+bcm2835_systmr_irq(bool enable) "timer irq state %u"
-+bcm2835_systmr_read(uint64_t offset, uint64_t data) "timer read: offset 0x%" PRIx64 " data 0x%" PRIx64
-+bcm2835_systmr_write(uint64_t offset, uint64_t data) "timer write: offset 0x%" PRIx64 " data 0x%" PRIx64
---
-.20.1

-[PULL 47/51] hw/arm/bcm2835_peripherals: Use the SYS_timer
+Deleted patch
-From: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Connect the recently added SYS_timer.
-Now U-Boot does not hang anymore polling a free running counter
-stuck at 0.
-This timer is also used by the Linux kernel thermal subsystem.
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Message-id: 20191019234715.25750-5-f4bug@amsat.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- include/hw/arm/bcm2835_peripherals.h |  3 ++-
- hw/arm/bcm2835_peripherals.c         | 17 ++++++++++++++++-
-files changed, 18 insertions(+), 2 deletions(-)
-diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/hw/arm/bcm2835_peripherals.h
-+++ b/include/hw/arm/bcm2835_peripherals.h
-@@ -XXX,XX +XXX,XX @@
- #include "hw/sd/sdhci.h"
- #include "hw/sd/bcm2835_sdhost.h"
- #include "hw/gpio/bcm2835_gpio.h"
-+#include "hw/timer/bcm2835_systmr.h"
- #include "hw/misc/unimp.h"
- #define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals"
-@@ -XXX,XX +XXX,XX @@ typedef struct BCM2835PeripheralState {
-     MemoryRegion ram_alias[4];
-     qemu_irq irq, fiq;
--    UnimplementedDeviceState systmr;
-+    BCM2835SystemTimerState systmr;
-     UnimplementedDeviceState armtmr;
-     UnimplementedDeviceState cprman;
-     UnimplementedDeviceState a2w;
-diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
-index XXXXXXX..XXXXXXX 100644
---- a/hw/arm/bcm2835_peripherals.c
-+++ b/hw/arm/bcm2835_peripherals.c
-@@ -XXX,XX +XXX,XX @@ static void bcm2835_peripherals_init(Object *obj)
-     /* Interrupt Controller */
-     sysbus_init_child_obj(obj, "ic", &s->ic, sizeof(s->ic), TYPE_BCM2835_IC);
-+    /* SYS Timer */
-+    sysbus_init_child_obj(obj, "systimer", &s->systmr, sizeof(s->systmr),
-+                          TYPE_BCM2835_SYSTIMER);
-+
-     /* UART0 */
-     sysbus_init_child_obj(obj, "uart0", &s->uart0, sizeof(s->uart0),
-                           TYPE_PL011);
-@@ -XXX,XX +XXX,XX @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
-                 sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->ic), 0));
-     sysbus_pass_irq(SYS_BUS_DEVICE(s), SYS_BUS_DEVICE(&s->ic));
-+    /* Sys Timer */
-+    object_property_set_bool(OBJECT(&s->systmr), true, "realized", &err);
-+    if (err) {
-+        error_propagate(errp, err);
-+        return;
-+    }
-+    memory_region_add_subregion(&s->peri_mr, ST_OFFSET,
-+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systmr), 0));
-+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->systmr), 0,
-+        qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_ARM_IRQ,
-+                               INTERRUPT_ARM_TIMER));
-+
-     /* UART0 */
-     qdev_prop_set_chr(DEVICE(&s->uart0), "chardev", serial_hd(0));
-     object_property_set_bool(OBJECT(&s->uart0), true, "realized", &err);
-@@ -XXX,XX +XXX,XX @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
-     }
-     create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 0x40);
--    create_unimp(s, &s->systmr, "bcm2835-systimer", ST_OFFSET, 0x20);
-     create_unimp(s, &s->cprman, "bcm2835-cprman", CPRMAN_OFFSET, 0x1000);
-     create_unimp(s, &s->a2w, "bcm2835-a2w", A2W_OFFSET, 0x1000);
-     create_unimp(s, &s->i2s, "bcm2835-i2s", I2S_OFFSET, 0x100);
---
-.20.1

-[PULL 48/51] hw/arm/bcm2836: Make the SoC code modular
+Deleted patch
-From: Philippe Mathieu-Daudé <f4bug@amsat.org>
-This file creates the BCM2836/BCM2837 blocks.
-The biggest differences with the BCM2838 we are going to add, are
-the base addresses of the interrupt controller and the peripherals.
-Add these addresses in the BCM283XInfo structure to make this
-block more modular. Remove the MCORE_OFFSET offset as it is
-not useful and rather confusing.
-Reviewed-by: Esteban Bosse <estebanbosse@gmail.com>
-Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Message-id: 20191019234715.25750-6-f4bug@amsat.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- hw/arm/bcm2836.c | 18 +++++++++---------
-file changed, 9 insertions(+), 9 deletions(-)
-diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
-index XXXXXXX..XXXXXXX 100644
---- a/hw/arm/bcm2836.c
-+++ b/hw/arm/bcm2836.c
-@@ -XXX,XX +XXX,XX @@
- #include "hw/arm/raspi_platform.h"
- #include "hw/sysbus.h"
--/* Peripheral base address seen by the CPU */
--#define BCM2836_PERI_BASE       0x3F000000
--
--/* "QA7" (Pi2) interrupt controller and mailboxes etc. */
--#define BCM2836_CONTROL_BASE    0x40000000
--
- struct BCM283XInfo {
-     const char *name;
-     const char *cpu_type;
-+    hwaddr peri_base; /* Peripheral base address seen by the CPU */
-+    hwaddr ctrl_base; /* Interrupt controller and mailboxes etc. */
-     int clusterid;
- };
-@@ -XXX,XX +XXX,XX @@ static const BCM283XInfo bcm283x_socs[] = {
-     {
-         .name = TYPE_BCM2836,
-         .cpu_type = ARM_CPU_TYPE_NAME("cortex-a7"),
-+        .peri_base = 0x3f000000,
-+        .ctrl_base = 0x40000000,
-         .clusterid = 0xf,
-     },
- #ifdef TARGET_AARCH64
-     {
-         .name = TYPE_BCM2837,
-         .cpu_type = ARM_CPU_TYPE_NAME("cortex-a53"),
-+        .peri_base = 0x3f000000,
-+        .ctrl_base = 0x40000000,
-         .clusterid = 0x0,
-     },
- #endif
-@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
-     }
-     sysbus_mmio_map_overlap(SYS_BUS_DEVICE(&s->peripherals), 0,
--                            BCM2836_PERI_BASE, 1);
-+                            info->peri_base, 1);
-     /* bcm2836 interrupt controller (and mailboxes, etc.) */
-     object_property_set_bool(OBJECT(&s->control), true, "realized", &err);
-@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
-         return;
-     }
--    sysbus_mmio_map(SYS_BUS_DEVICE(&s->control), 0, BCM2836_CONTROL_BASE);
-+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->control), 0, info->ctrl_base);
-     sysbus_connect_irq(SYS_BUS_DEVICE(&s->peripherals), 0,
-         qdev_get_gpio_in_named(DEVICE(&s->control), "gpu-irq", 0));
-@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
-         /* set periphbase/CBAR value for CPU-local registers */
-         object_property_set_int(OBJECT(&s->cpus[n]),
--                                BCM2836_PERI_BASE + MSYNC_OFFSET,
-+                                info->peri_base,
-                                 "reset-cbar", &err);
-         if (err) {
-             error_propagate(errp, err);
---
-.20.1

-[PULL 51/51] hw/arm/highbank: Use AddressSpace when using write_secondary_boot()
+Deleted patch
-From: Philippe Mathieu-Daudé <f4bug@amsat.org>
-write_secondary_boot() is used in SMP configurations where the
-CPU address space might not be the main System Bus.
-The rom_add_blob_fixed_as() function allow us to specify an
-address space. Use it to write each boot blob in the corresponding
-CPU address space.
-Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Message-id: 20191019234715.25750-15-f4bug@amsat.org
-Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
----
- hw/arm/highbank.c | 3 ++-
-file changed, 2 insertions(+), 1 deletion(-)
-diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
-index XXXXXXX..XXXXXXX 100644
---- a/hw/arm/highbank.c
-+++ b/hw/arm/highbank.c
-@@ -XXX,XX +XXX,XX @@ static void hb_write_secondary(ARMCPU *cpu, const struct arm_boot_info *info)
-     for (n = 0; n < ARRAY_SIZE(smpboot); n++) {
-         smpboot[n] = tswap32(smpboot[n]);
-     }
--    rom_add_blob_fixed("smpboot", smpboot, sizeof(smpboot), SMP_BOOT_ADDR);
-+    rom_add_blob_fixed_as("smpboot", smpboot, sizeof(smpboot), SMP_BOOT_ADDR,
-+                          arm_boot_address_space(cpu, info));
- }
- static void hb_reset_secondary(ARMCPU *cpu, const struct arm_boot_info *info)
---
-.20.1

Probably the last arm pullreq before softfreeze...

The following changes since commit 58560ad254fbda71d4daa6622d71683190070ee2:

Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-4.2-20191024' into staging (2019-10-24 16:22:58 +0100)

are available in the Git repository at:

https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20191024

for you to fetch changes up to a01a4a3e85ae8f6fe21adbedc80f7013faabdcf4:

hw/arm/highbank: Use AddressSpace when using write_secondary_boot() (2019-10-24 17:16:30 +0100)

----------------------------------------------------------------
target-arm queue:
 * raspi boards: some cleanup
 * raspi: implement the bcm2835 system timer device
 * raspi: implement a dummy thermal sensor
 * KVM: support providing SVE to the guest
 * misc devices: switch to ptimer transaction API
 * cache TB flag state to improve performance of cpu_get_tb_cpu_state
 * aspeed: Add an AST2600 eval board

----------------------------------------------------------------
Andrew Jones (9):
      target/arm/monitor: Introduce qmp_query_cpu_model_expansion
      tests: arm: Introduce cpu feature tests
      target/arm: Allow SVE to be disabled via a CPU property
      target/arm/cpu64: max cpu: Introduce sve<N> properties
      target/arm/kvm64: Add kvm_arch_get/put_sve
      target/arm/kvm64: max cpu: Enable SVE when available
      target/arm/kvm: scratch vcpu: Preserve input kvm_vcpu_init features
      target/arm/cpu64: max cpu: Support sve properties with KVM
      target/arm/kvm: host cpu: Add support for sve<N> properties

Cédric Le Goater (2):
      hw/gpio: Fix property accessors of the AST2600 GPIO 1.8V model
      aspeed: Add an AST2600 eval board

Peter Maydell (8):
      hw/net/fsl_etsec/etsec.c: Switch to transaction-based ptimer API
      hw/timer/xilinx_timer.c: Switch to transaction-based ptimer API
      hw/dma/xilinx_axidma.c: Switch to transaction-based ptimer API
      hw/timer/slavio_timer: Remove useless check for NULL t->timer
      hw/timer/slavio_timer.c: Switch to transaction-based ptimer API
      hw/timer/grlib_gptimer.c: Switch to transaction-based ptimer API
      hw/m68k/mcf5206.c: Switch to transaction-based ptimer API
      hw/watchdog/milkymist-sysctl.c: Switch to transaction-based ptimer API

Philippe Mathieu-Daudé (8):
      hw/misc/bcm2835_thermal: Add a dummy BCM2835 thermal sensor
      hw/arm/bcm2835_peripherals: Use the thermal sensor block
      hw/timer/bcm2835: Add the BCM2835 SYS_timer
      hw/arm/bcm2835_peripherals: Use the SYS_timer
      hw/arm/bcm2836: Make the SoC code modular
      hw/arm/bcm2836: Rename cpus[] as cpu[].core
      hw/arm/raspi: Use AddressSpace when using arm_boot::write_secondary_boot
      hw/arm/highbank: Use AddressSpace when using write_secondary_boot()

Richard Henderson (24):
      target/arm: Split out rebuild_hflags_common
      target/arm: Split out rebuild_hflags_a64
      target/arm: Split out rebuild_hflags_common_32
      target/arm: Split arm_cpu_data_is_big_endian
      target/arm: Split out rebuild_hflags_m32
      target/arm: Reduce tests vs M-profile in cpu_get_tb_cpu_state
      target/arm: Split out rebuild_hflags_a32
      target/arm: Split out rebuild_hflags_aprofile
      target/arm: Hoist XSCALE_CPAR, VECLEN, VECSTRIDE in cpu_get_tb_cpu_state
      target/arm: Simplify set of PSTATE_SS in cpu_get_tb_cpu_state
      target/arm: Hoist computation of TBFLAG_A32.VFPEN
      target/arm: Add arm_rebuild_hflags
      target/arm: Split out arm_mmu_idx_el
      target/arm: Hoist store to cs_base in cpu_get_tb_cpu_state
      target/arm: Add HELPER(rebuild_hflags_{a32, a64, m32})
      target/arm: Rebuild hflags at EL changes
      target/arm: Rebuild hflags at MSR writes
      target/arm: Rebuild hflags at CPSR writes
      target/arm: Rebuild hflags at Xscale SCTLR writes
      target/arm: Rebuild hflags for M-profile
      target/arm: Rebuild hflags for M-profile NVIC
      linux-user/aarch64: Rebuild hflags for TARGET_WORDS_BIGENDIAN
      linux-user/arm: Rebuild hflags for TARGET_WORDS_BIGENDIAN
      target/arm: Rely on hflags correct in cpu_get_tb_cpu_state

From: Cédric Le Goater <clg@kaod.org>

The property names of AST2600 GPIO 1.8V model are one character bigger
than the names of the other ASPEED GPIO model. Increase the string
buffer size by one and be more strict on the expected pattern of the
property name.

This fixes the QOM test of the ast2600-evb machine under :

Apple LLVM version 10.0.0 (clang-1000.10.44.4)
  Target: x86_64-apple-darwin17.7.0
  Thread model: posix
  InstalledDir: /Library/Developer/CommandLineTools/usr/bin

Cc: Rashmica Gupta <rashmica.g@gmail.com>
Fixes: 36d737ee82b2 ("hw/gpio: Add in AST2600 specific implementation")
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 20191023130455.1347-2-clg@kaod.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/gpio/aspeed_gpio.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/gpio/aspeed_gpio.c
+++ b/hw/gpio/aspeed_gpio.c
@@ -XXX,XX +XXX,XX @@ static void aspeed_gpio_get_pin(Object *obj, Visitor *v, const char *name,
 {
     int pin = 0xfff;
     bool level = true;
-    char group[3];
+    char group[4];
     AspeedGPIOState *s = ASPEED_GPIO(obj);
     int set_idx, group_idx = 0;
 
     if (sscanf(name, "gpio%2[A-Z]%1d", group, &pin) != 2) {
         /* 1.8V gpio */
-        if (sscanf(name, "gpio%3s%1d", group, &pin) != 2) {
+        if (sscanf(name, "gpio%3[18A-E]%1d", group, &pin) != 2) {
             error_setg(errp, "%s: error reading %s", __func__, name);
             return;
         }
@@ -XXX,XX +XXX,XX @@ static void aspeed_gpio_set_pin(Object *obj, Visitor *v, const char *name,
     Error *local_err = NULL;
     bool level;
     int pin = 0xfff;
-    char group[3];
+    char group[4];
     AspeedGPIOState *s = ASPEED_GPIO(obj);
     int set_idx, group_idx = 0;
 
@@ -XXX,XX +XXX,XX @@ static void aspeed_gpio_set_pin(Object *obj, Visitor *v, const char *name,
     }
     if (sscanf(name, "gpio%2[A-Z]%1d", group, &pin) != 2) {
         /* 1.8V gpio */
-        if (sscanf(name, "gpio%3s%1d", group, &pin) != 2) {
+        if (sscanf(name, "gpio%3[18A-E]%1d", group, &pin) != 2) {
             error_setg(errp, "%s: error reading %s", __func__, name);
             return;
         }
-- 
2.20.1

From: Cédric Le Goater <clg@kaod.org>

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Message-id: 20191023130455.1347-3-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 include/hw/arm/aspeed.h |  1 +
 hw/arm/aspeed.c         | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h
index XXXXXXX..XXXXXXX 100644
--- a/include/hw/arm/aspeed.h
+++ b/include/hw/arm/aspeed.h
@@ -XXX,XX +XXX,XX @@ typedef struct AspeedBoardConfig {
     const char *desc;
     const char *soc_name;
     uint32_t hw_strap1;
+    uint32_t hw_strap2;
     const char *fmc_model;
     const char *spi_model;
     uint32_t num_cs;
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -XXX,XX +XXX,XX @@ struct AspeedBoardState {
 /* Witherspoon hardware value: 0xF10AD216 (but use romulus definition) */
 #define WITHERSPOON_BMC_HW_STRAP1 ROMULUS_BMC_HW_STRAP1
 
+/* AST2600 evb hardware value */
+#define AST2600_EVB_HW_STRAP1 0x000000C0
+#define AST2600_EVB_HW_STRAP2 0x00000003
+
 /*
  * The max ram region is for firmwares that scan the address space
  * with load/store to guess how much RAM the SoC has.
@@ -XXX,XX +XXX,XX @@ static void aspeed_board_init(MachineState *machine,
                              &error_abort);
     object_property_set_int(OBJECT(&bmc->soc), cfg->hw_strap1, "hw-strap1",
                             &error_abort);
+    object_property_set_int(OBJECT(&bmc->soc), cfg->hw_strap2, "hw-strap2",
+                            &error_abort);
     object_property_set_int(OBJECT(&bmc->soc), cfg->num_cs, "num-cs",
                             &error_abort);
     object_property_set_int(OBJECT(&bmc->soc), machine->smp.cpus, "num-cpus",
@@ -XXX,XX +XXX,XX @@ static void ast2500_evb_i2c_init(AspeedBoardState *bmc)
     i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 11), "ds1338", 0x32);
 }
 
+static void ast2600_evb_i2c_init(AspeedBoardState *bmc)
+{
+    /* Start with some devices on our I2C busses */
+    ast2500_evb_i2c_init(bmc);
+}
+
 static void romulus_bmc_i2c_init(AspeedBoardState *bmc)
 {
     AspeedSoCState *soc = &bmc->soc;
@@ -XXX,XX +XXX,XX @@ static const AspeedBoardConfig aspeed_boards[] = {
         .num_cs    = 2,
         .i2c_init  = witherspoon_bmc_i2c_init,
         .ram       = 512 * MiB,
+    }, {
+        .name      = MACHINE_TYPE_NAME("ast2600-evb"),
+        .desc      = "Aspeed AST2600 EVB (Cortex A7)",
+        .soc_name  = "ast2600-a0",
+        .hw_strap1 = AST2600_EVB_HW_STRAP1,
+        .hw_strap2 = AST2600_EVB_HW_STRAP2,
+        .fmc_model = "w25q512jv",
+        .spi_model = "mx66u51235f",
+        .num_cs    = 1,
+        .i2c_init  = ast2600_evb_i2c_init,
+        .ram       = 1 * GiB,
     },
 };
 
-- 
2.20.1