[PATCH 7/8] tests/tcg/aarch64: fixes for WFX instructions system test

Alex Bennée posted 8 patches 3 weeks, 3 days ago
Maintainers: Peter Maydell <peter.maydell@linaro.org>
[PATCH 7/8] tests/tcg/aarch64: fixes for WFX instructions system test
Posted by Alex Bennée 3 weeks, 3 days ago
This required some iteration with the model to add bits it missed in
the initial implementation. I ended up debugging and then prompting
for the missing bits:

  - a basic gicv3 implementation
  - a couple of rounds of tweaking the config
  - mapping the GIC via page tables
  - fixing the compilation of the support library
  - extending the elapsed time check for WFE
  - light re-ordering of Makefile, setting correct machine opts

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 tests/tcg/aarch64/system/lib/gicv3.h      | 56 +++++++++++++++++
 tests/tcg/aarch64/system/lib/gicv3.c      | 77 +++++++++++++++++++++++
 tests/tcg/aarch64/system/wfx.c            | 17 ++++-
 tests/tcg/aarch64/Makefile.softmmu-target | 13 +++-
 tests/tcg/aarch64/system/boot.S           | 55 ++++++++++------
 5 files changed, 196 insertions(+), 22 deletions(-)
 create mode 100644 tests/tcg/aarch64/system/lib/gicv3.h
 create mode 100644 tests/tcg/aarch64/system/lib/gicv3.c

diff --git a/tests/tcg/aarch64/system/lib/gicv3.h b/tests/tcg/aarch64/system/lib/gicv3.h
new file mode 100644
index 00000000000..9a1268937c6
--- /dev/null
+++ b/tests/tcg/aarch64/system/lib/gicv3.h
@@ -0,0 +1,56 @@
+/*
+ * GICv3 Helper Library
+ *
+ * Copyright (c) 2024 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef GICV3_H
+#define GICV3_H
+
+#include <stdint.h>
+
+/* Virt machine GICv3 base addresses */
+#define GICD_BASE       0x08000000  /* c.f. VIRT_GIC_DIST */
+#define GICR_BASE       0x080a0000  /* c.f. VIRT_GIC_REDIST */
+
+/* Distributor registers */
+#define GICD_CTLR       (GICD_BASE + 0x0000)
+#define GICD_TYPER      (GICD_BASE + 0x0004)
+#define GICD_IIDR       (GICD_BASE + 0x0008)
+
+/* Redistributor registers (per-CPU) */
+#define GICR_SGI_OFFSET 0x00010000
+
+#define GICR_CTLR       0x0000
+#define GICR_WAKER      0x0014
+#define GICR_IGROUPR0   (GICR_SGI_OFFSET + 0x0080)
+#define GICR_ISENABLER0 (GICR_SGI_OFFSET + 0x0100)
+#define GICR_IPRIORITYR0 (GICR_SGI_OFFSET + 0x0400)
+
+/* GICD_CTLR bits */
+#define GICD_CTLR_ARE_NS (1U << 4)
+#define GICD_CTLR_ENA_G1NS (1U << 1)
+#define GICD_CTLR_ENA_G0 (1U << 0)
+
+/* GICR_WAKER bits */
+#define GICR_WAKER_ChildrenAsleep (1U << 2)
+#define GICR_WAKER_ProcessorSleep (1U << 1)
+
+/**
+ * gicv3_init:
+ *
+ * Initialize GICv3 distributor and the redistributor for the current CPU.
+ */
+void gicv3_init(void);
+
+/**
+ * gicv3_enable_irq:
+ * @irq: The IRQ number to enable
+ *
+ * Enable the specified IRQ (SPI or PPI).
+ */
+void gicv3_enable_irq(unsigned int irq);
+
+#endif /* GICV3_H */
diff --git a/tests/tcg/aarch64/system/lib/gicv3.c b/tests/tcg/aarch64/system/lib/gicv3.c
new file mode 100644
index 00000000000..a09a0e430e6
--- /dev/null
+++ b/tests/tcg/aarch64/system/lib/gicv3.c
@@ -0,0 +1,77 @@
+/*
+ * GICv3 Helper Library Implementation
+ *
+ * Copyright (c) 2024 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "gicv3.h"
+
+#define write_sysreg(r, v) do {                     \
+        uint64_t __val = (uint64_t)(v);             \
+        asm volatile("msr " #r ", %x0"              \
+                 : : "rZ" (__val));                 \
+} while (0)
+
+#define isb() asm volatile("isb" : : : "memory")
+
+static inline void write_reg(uintptr_t addr, uint32_t val)
+{
+    *(volatile uint32_t *)addr = val;
+}
+
+static inline uint32_t read_reg(uintptr_t addr)
+{
+    return *(volatile uint32_t *)addr;
+}
+
+void gicv3_init(void)
+{
+    uint32_t val;
+
+    /* 1. Enable Distributor ARE and Group 1 NS */
+    val = read_reg(GICD_CTLR);
+    val |= GICD_CTLR_ARE_NS | GICD_CTLR_ENA_G1NS;
+    write_reg(GICD_CTLR, val);
+
+    /* 2. Wake up Redistributor 0 */
+    /* Clear ProcessorSleep */
+    val = read_reg(GICR_BASE + GICR_WAKER);
+    val &= ~GICR_WAKER_ProcessorSleep;
+    write_reg(GICR_BASE + GICR_WAKER, val);
+
+    /* Wait for ChildrenAsleep to be cleared */
+    while (read_reg(GICR_BASE + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+        /* spin */
+    }
+
+    /* 3. Enable CPU interface */
+    /* Set Priority Mask to allow all interrupts */
+    write_sysreg(ICC_PMR_EL1, 0xff);
+    /* Enable Group 1 Non-Secure interrupts */
+    write_sysreg(ICC_IGRPEN1_EL1, 1);
+    isb();
+}
+
+void gicv3_enable_irq(unsigned int irq)
+{
+    if (irq < 32) {
+        /* PPI: use GICR_ISENABLER0 */
+        uintptr_t addr;
+
+        /* Set Group 1 */
+        addr = GICR_BASE + GICR_IGROUPR0;
+        write_reg(addr, read_reg(addr) | (1U << irq));
+
+        /* Set priority (0xa0) */
+        addr = GICR_BASE + GICR_IPRIORITYR0 + irq;
+        *(volatile uint8_t *)addr = 0xa0;
+
+        /* Enable it */
+        addr = GICR_BASE + GICR_ISENABLER0;
+        write_reg(addr, 1U << irq);
+    } else {
+        /* SPI: not implemented yet */
+    }
+}
diff --git a/tests/tcg/aarch64/system/wfx.c b/tests/tcg/aarch64/system/wfx.c
index 59436c381fd..567d9e59c70 100644
--- a/tests/tcg/aarch64/system/wfx.c
+++ b/tests/tcg/aarch64/system/wfx.c
@@ -8,6 +8,7 @@
 
 #include <stdint.h>
 #include <minilib.h>
+#include "gicv3.h"
 
 #define __stringify_1(x...) #x
 #define __stringify(x...)   __stringify_1(x)
@@ -31,6 +32,9 @@
 #define wfit(reg) asm volatile("wfit %0" : : "r" (reg) : "memory")
 #define wfet(reg) asm volatile("wfet %0" : : "r" (reg) : "memory")
 
+#define enable_irq()  asm volatile("msr daifclr, #2" : : : "memory")
+#define disable_irq() asm volatile("msr daifset, #2" : : : "memory")
+
 static void wait_ticks(uint64_t ticks)
 {
     uint64_t start = read_sysreg(cntvct_el0);
@@ -44,6 +48,9 @@ int main(void)
     uint64_t start, end, elapsed;
     uint64_t timeout;
 
+    gicv3_init();
+    gicv3_enable_irq(27); /* Virtual Timer PPI */
+
     ml_printf("WFX Test\n");
 
     /* 1. Test WFI with timer interrupt */
@@ -58,8 +65,13 @@ int main(void)
      * We don't have a full interrupt handler, but WFI should wake up
      * when the interrupt is pending even if we have it masked at the CPU.
      * PSTATE.I is set by boot code.
+     *
+     * We unmask interrupts here to ensure the CPU can take the minimal
+     * exception handler defined in boot.S.
      */
+    enable_irq();
     wfi();
+    disable_irq();
     end = read_sysreg(cntvct_el0);
     elapsed = end - start;
     if (elapsed < 100000) {
@@ -76,11 +88,12 @@ int main(void)
     wfe(); /* Should return immediately */
     end = read_sysreg(cntvct_el0);
     elapsed = end - start;
-    if (elapsed > 1000) { /* Should be very fast */
+    /* while this should be fast there is some overhead from TCG */
+    if (elapsed > 20000) {
         ml_printf("FAILED: WFE slept despite SEV (%ld ticks)\n", elapsed);
         return 1;
     }
-    ml_printf("PASSED\n");
+    ml_printf("PASSED (%ld ticks)\n", elapsed);
 
     /* 3. Test WFIT */
     ml_printf("Testing WFIT...");
diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target
index 84342c52cd7..9a5b95de621 100644
--- a/tests/tcg/aarch64/Makefile.softmmu-target
+++ b/tests/tcg/aarch64/Makefile.softmmu-target
@@ -4,8 +4,9 @@
 
 AARCH64_SRC=$(SRC_PATH)/tests/tcg/aarch64
 AARCH64_SYSTEM_SRC=$(AARCH64_SRC)/system
+AARCH64_SYSTEM_LIB_SRC=$(AARCH64_SYSTEM_SRC)/lib
 
-VPATH+=$(AARCH64_SYSTEM_SRC)
+VPATH+=$(AARCH64_SYSTEM_SRC) $(AARCH64_SYSTEM_LIB_SRC)
 
 # These objects provide the basic boot code and helper functions for all tests
 CRT_OBJS=boot.o
@@ -24,7 +25,7 @@ LINK_SCRIPT=$(AARCH64_SYSTEM_SRC)/kernel.ld
 LDFLAGS=-Wl,-T$(LINK_SCRIPT)
 TESTS+=$(AARCH64_TESTS) $(MULTIARCH_TESTS)
 EXTRA_RUNS+=$(MULTIARCH_RUNS)
-CFLAGS+=-nostdlib -ggdb -O0 $(MINILIB_INC)
+CFLAGS+=-nostdlib -ggdb -O0 $(MINILIB_INC) -I$(AARCH64_SYSTEM_LIB_SRC)
 LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc
 
 config-cc.mak: Makefile
@@ -102,7 +103,15 @@ run-pauth-3:
 	$(call skip-test, "RUN of pauth-3", "not built")
 endif
 
+gicv3.o: gicv3.c gicv3.h
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -c $< -o $@
+
 wfx: CFLAGS += -march=armv8.7-a
+wfx: LDFLAGS += gicv3.o
+wfx: gicv3.o
+
+QEMU_GICV3_MACHINE=-M virt,gic-version=3 -cpu max -display none
+run-wfx: QEMU_OPTS=$(QEMU_GICV3_MACHINE) $(QEMU_BASE_ARGS) -kernel
 
 ifneq ($(CROSS_CC_HAS_ARMV8_MTE),)
 QEMU_MTE_ENABLED_MACHINE=-M virt,mte=on -cpu max -display none
diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S
index 8bfa4e4efc7..6a71fc0da5a 100644
--- a/tests/tcg/aarch64/system/boot.S
+++ b/tests/tcg/aarch64/system/boot.S
@@ -60,7 +60,6 @@ curr_sp0_irq:
 curr_sp0_fiq:
 curr_sp0_serror:
 curr_spx_sync:
-curr_spx_irq:
 curr_spx_fiq:
 curr_spx_serror:
 lower_a64_sync:
@@ -248,29 +247,34 @@ at_testel:
 	msr	ttbr0_el1, x0
 
 	/*
-	 * Setup a flat address mapping page-tables. Stage one simply
-	 * maps RAM to the first Gb. The stage2 tables have two 2mb
-	 * translation block entries covering a series of adjacent
-	 * 4k pages.
+	 * Setup a flat address mapping page-tables.
+	 *
+	 * ttb (Level 1):
+	 *   - Entry 0 [0 - 1GB]: 1GB Device block (for GIC and other H/W)
+	 *   - Entry 1 [1GB - 2GB]: Table entry pointing to ttb_stage2 (for RAM)
 	 */
 
-	/* Stage 1 entry: indexed by IA[38:30] */
-	adr	x1, .				/* phys address */
-	bic	x1, x1, #(1 << 30) - 1		/* 1GB alignment*/
-	add	x2, x0, x1, lsr #(30 - 3)	/* offset in l1 page table */
+	/* Entry 0: 1GB Device block mapping at 0x0 */
+	ldr	x1, =0x401 | (1 << 2)		/* AF=1, block, AttrIndx=Attr1 (Device) */
+	str	x1, [x0]
 
-	/* point to stage 2 table [47:12] */
-	adrp	x0, ttb_stage2
-	orr 	x1, x0, #3 			/* ptr to stage 2 */
-	str	x1, [x2]
+	/* Entry 1: Table entry pointing to ttb_stage2 */
+	adrp	x1, ttb_stage2
+	orr 	x1, x1, #3 			/* ptr to table (type=3) */
+	str	x1, [x0, #8]
 
-	/* Stage 2 entries: indexed by IA[29:21] */
+	/* Stage 2 entries: indexed by IA[29:21] (within 1GB-2GB range) */
+	adrp	x0, ttb_stage2
+	add	x0, x0, :lo12:ttb_stage2
 	ldr	x5, =(((1 << 9) - 1) << 21)
 
 	/* First block: .text/RO/execute enabled */
 	adr	x1, .				/* phys address */
 	bic	x1, x1, #(1 << 21) - 1		/* 2mb block alignment	*/
-	and	x4, x1, x5			/* IA[29:21] */
+	/* Note: we assume RAM is in the 1GB-2GB range, so IA[30] is 1 */
+	mov	x4, x1
+	bic	x4, x4, #(1 << 30)		/* remove 1GB offset for L2 index */
+	and	x4, x4, x5			/* IA[29:21] */
 	add	x2, x0, x4, lsr #(21 - 3)	/* offset in l2 page table */
 	ldr	x3, =0x401			/* attr(AF, block) */
 	orr	x1, x1, x3
@@ -280,7 +284,9 @@ at_testel:
 	adrp	x1, .data
 	add	x1, x1, :lo12:.data
 	bic	x1, x1, #(1 << 21) - 1		/* 2mb block alignment */
-	and	x4, x1, x5			/* IA[29:21] */
+	mov	x4, x1
+	bic	x4, x4, #(1 << 30)		/* remove 1GB offset for L2 index */
+	and	x4, x4, x5			/* IA[29:21] */
 	add	x2, x0, x4, lsr #(21 - 3)	/* offset in l2 page table */
 	ldr	x3, =(3 << 53) | 0x401		/* attr(AF, NX, block) */
 	orr	x1, x1, x3
@@ -290,7 +296,9 @@ at_testel:
 	adrp	x1, mte_page
 	add	x1, x1, :lo12:mte_page
 	bic	x1, x1, #(1 << 21) - 1
-	and 	x4, x1, x5
+	mov	x4, x1
+	bic	x4, x4, #(1 << 30)		/* remove 1GB offset for L2 index */
+	and 	x4, x4, x5
 	add	x2, x0, x4, lsr #(21 - 3)
 	/* attr(AF, NX, block, AttrIndx=Attr1) */
 	ldr	x3, =(3 << 53) | 0x401 | (1 << 2)
@@ -317,7 +325,7 @@ at_testel:
 	ldr	x0, = (2 << 32) | 25 | (3 << 10) | (3 << 8)
 	msr	tcr_el1, x0
 
-	mov	x0, #0xee			/* Inner/outer cacheable WB */
+	ldr	x0, =0x04ee			/* Attr1: Device-nGnRE, Attr0: Normal WB */
 	msr	mair_el1, x0
 	isb
 
@@ -370,6 +378,17 @@ _exit:
 	semihosting_call
 	/* never returns */
 
+	/*
+	 * IRQ handler
+	 */
+	.global curr_spx_irq
+curr_spx_irq:
+	/* Minimal IRQ handler: just mask the timer and return */
+	mrs	x0, cntv_ctl_el0
+	orr	x0, x0, #2		/* IMASK=1 */
+	msr	cntv_ctl_el0, x0
+	eret
+
 	/*
 	 * Helper Functions
 	*/
-- 
2.47.3