[v2] target/arm/tcg: refine cache descriptions with a wrapper

[PATCH v2] target/arm/tcg: refine cache descriptions with a wrapper

Posted by Alireza Sanaee via 1 year, 5 months ago

This patch allows for easier manipulation of the cache description
register, CCSIDR. Which is helpful for testing as well. Currently
numbers get hard-coded and might be prone to errors.

Therefore, this patch adds wrappers for different types of CPUs
available in tcg to decribe caches. Two functions `make_ccsidr32` and
`make_ccsidr64` describing descriptions. The 32 bit version receives
extra parameters that became unknown later in 64 bit.

For CCSIDR register, 32 bit version follows specification [1].
Conversely, 64 bit version follows specification [2].

[1] B4.1.19, ARM Architecture Reference Manual ARMv7-A and ARMv7-R
edition, https://developer.arm.com/documentation/ddi0406
[2] D23.2.29, ARM Architecture Reference Manual for A-profile Architecture,
https://developer.arm.com/documentation/ddi0487/latest/

Signed-off-by: Alireza Sanaee <alireza.sanaee@huawei.com>
---
 target/arm/cpu-features.h | 53 ++++++++++++++++++++++++
 target/arm/cpu64.c        | 19 ++++++---
 target/arm/tcg/cpu64.c    | 86 ++++++++++++++++++---------------------
 3 files changed, 105 insertions(+), 53 deletions(-)

diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index c59ca104fe..00a0f0d963 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -1022,6 +1022,59 @@ static inline bool isar_feature_any_evt(const ARMISARegisters *id)
     return isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id);
 }
 
+static inline uint64_t make_ccsidr32(unsigned assoc, unsigned linesize,
+                                     unsigned cachesize, uint8_t flags)
+{
+    unsigned lg_linesize = ctz32(linesize);
+    unsigned sets;
+
+    /*
+     * The 32-bit CCSIDR_EL1 format is:
+     *   [27:13] number of sets - 1
+     *   [12:3]  associativity - 1
+     *   [2:0]   log2(linesize) - 4
+     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
+     */
+    assert(assoc != 0);
+    assert(is_power_of_2(linesize));
+    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
+
+    /* sets * associativity * linesize == cachesize. */
+    sets = cachesize / (assoc * linesize);
+    assert(cachesize % (assoc * linesize) == 0);
+
+    return ((uint64_t)(flags) << 28)
+        | ((sets - 1) << 13)
+        | ((assoc - 1) << 3)
+        | (lg_linesize - 4);
+}
+
+static inline uint64_t make_ccsidr64(unsigned assoc, unsigned linesize,
+                              unsigned cachesize)
+{
+    unsigned lg_linesize = ctz32(linesize);
+    unsigned sets;
+
+    /*
+     * The 64-bit CCSIDR_EL1 format is:
+     *   [55:32] number of sets - 1
+     *   [23:3]  associativity - 1
+     *   [2:0]   log2(linesize) - 4
+     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
+     */
+    assert(assoc != 0);
+    assert(is_power_of_2(linesize));
+    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
+
+    /* sets * associativity * linesize == cachesize. */
+    sets = cachesize / (assoc * linesize);
+    assert(cachesize % (assoc * linesize) == 0);
+
+    return ((uint64_t)(sets - 1) << 32)
+         | ((assoc - 1) << 3)
+         | (lg_linesize - 4);
+}
+
 /*
  * Forward to the above feature tests given an ARMCPU pointer.
  */
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 262a1d6c0b..57ebc1b979 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -23,6 +23,7 @@
 #include "cpu.h"
 #include "cpregs.h"
 #include "qemu/module.h"
+#include "qemu/units.h"
 #include "sysemu/kvm.h"
 #include "sysemu/hvf.h"
 #include "sysemu/qtest.h"
@@ -642,9 +643,12 @@ static void aarch64_a57_initfn(Object *obj)
     cpu->isar.dbgdevid1 = 0x2;
     cpu->isar.reset_pmcr_el0 = 0x41013000;
     cpu->clidr = 0x0a200023;
-    cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
-    cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
-    cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
+    /* 32KB L1 dcache */
+    cpu->ccsidr[0] = make_ccsidr32(4, 64, 32 * KiB, 7);
+    /* 48KB L1 icache */
+    cpu->ccsidr[1] = make_ccsidr32(3, 64, 48 * KiB, 2);
+    /* 2048KB L2 cache */
+    cpu->ccsidr[2] = make_ccsidr32(16, 64, 2 * MiB, 7);
     cpu->dcz_blocksize = 4; /* 64 bytes */
     cpu->gic_num_lrs = 4;
     cpu->gic_vpribits = 5;
@@ -700,9 +704,12 @@ static void aarch64_a53_initfn(Object *obj)
     cpu->isar.dbgdevid1 = 0x1;
     cpu->isar.reset_pmcr_el0 = 0x41033000;
     cpu->clidr = 0x0a200023;
-    cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
-    cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */
-    cpu->ccsidr[2] = 0x707fe07a; /* 1024KB L2 cache */
+    /* 32KB L1 dcache */
+    cpu->ccsidr[0] = make_ccsidr32(4, 64, 32 * KiB, 7);
+    /* 32KB L1 icache */
+    cpu->ccsidr[1] = make_ccsidr32(1, 64, 32 * KiB, 2);
+    /* 1024KB L2 cache */
+    cpu->ccsidr[2] = make_ccsidr32(16, 64, 1 * MiB, 7);
     cpu->dcz_blocksize = 4; /* 64 bytes */
     cpu->gic_num_lrs = 4;
     cpu->gic_vpribits = 5;
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index fe232eb306..85d8b1a5b4 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -29,32 +29,6 @@
 #include "cpu-features.h"
 #include "cpregs.h"
 
-static uint64_t make_ccsidr64(unsigned assoc, unsigned linesize,
-                              unsigned cachesize)
-{
-    unsigned lg_linesize = ctz32(linesize);
-    unsigned sets;
-
-    /*
-     * The 64-bit CCSIDR_EL1 format is:
-     *   [55:32] number of sets - 1
-     *   [23:3]  associativity - 1
-     *   [2:0]   log2(linesize) - 4
-     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
-     */
-    assert(assoc != 0);
-    assert(is_power_of_2(linesize));
-    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
-
-    /* sets * associativity * linesize == cachesize. */
-    sets = cachesize / (assoc * linesize);
-    assert(cachesize % (assoc * linesize) == 0);
-
-    return ((uint64_t)(sets - 1) << 32)
-         | ((assoc - 1) << 3)
-         | (lg_linesize - 4);
-}
-
 static void aarch64_a35_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
@@ -106,9 +80,12 @@ static void aarch64_a35_initfn(Object *obj)
     cpu->isar.reset_pmcr_el0 = 0x410a3000;
 
     /* From B2.29 Cache ID registers */
-    cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
-    cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */
-    cpu->ccsidr[2] = 0x703fe03a; /* 512KB L2 cache */
+    /* 32KB L1 dcache */
+    cpu->ccsidr[0] = make_ccsidr32(4, 64, 32 * KiB, 7);
+    /* 32KB L1 icache */
+    cpu->ccsidr[1] = make_ccsidr32(4, 64, 32 * KiB, 2);
+    /* 512KB L2 cache */
+    cpu->ccsidr[2] = make_ccsidr32(16, 64, 512 * KiB, 7);
 
     /* From B3.5 VGIC Type register */
     cpu->gic_num_lrs = 4;
@@ -272,9 +249,12 @@ static void aarch64_a55_initfn(Object *obj)
     cpu->revidr = 0;
 
     /* From B2.23 CCSIDR_EL1 */
-    cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
-    cpu->ccsidr[1] = 0x200fe01a; /* 32KB L1 icache */
-    cpu->ccsidr[2] = 0x703fe07a; /* 512KB L2 cache */
+    /* 32KB L1 dcache */
+    cpu->ccsidr[0] = make_ccsidr32(4, 64, 32 * KiB, 7);
+    /* 32KB L1 icache */
+    cpu->ccsidr[1] = make_ccsidr32(4, 64, 32 * KiB, 2);
+    /* 512KB L2 cache */
+    cpu->ccsidr[2] = make_ccsidr32(16, 64, 512 * KiB, 7);
 
     /* From B2.96 SCTLR_EL3 */
     cpu->reset_sctlr = 0x30c50838;
@@ -338,9 +318,12 @@ static void aarch64_a72_initfn(Object *obj)
     cpu->isar.dbgdevid1 = 0x2;
     cpu->isar.reset_pmcr_el0 = 0x41023000;
     cpu->clidr = 0x0a200023;
-    cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
-    cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
-    cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */
+    /* 32KB L1 dcache */
+    cpu->ccsidr[0] = make_ccsidr32(4, 64, 32 * KiB, 7);
+    /* 48KB L1 dcache */
+    cpu->ccsidr[1] = make_ccsidr32(3, 64, 48 * KiB, 2);
+    /* 1MB L2 cache */
+    cpu->ccsidr[2] = make_ccsidr32(16, 64, 1 * MiB, 7);
     cpu->dcz_blocksize = 4; /* 64 bytes */
     cpu->gic_num_lrs = 4;
     cpu->gic_vpribits = 5;
@@ -397,9 +380,12 @@ static void aarch64_a76_initfn(Object *obj)
     cpu->revidr = 0;
 
     /* From B2.18 CCSIDR_EL1 */
-    cpu->ccsidr[0] = 0x701fe01a; /* 64KB L1 dcache */
-    cpu->ccsidr[1] = 0x201fe01a; /* 64KB L1 icache */
-    cpu->ccsidr[2] = 0x707fe03a; /* 512KB L2 cache */
+    /* 64KB L1 dcache */
+    cpu->ccsidr[0] = make_ccsidr32(4, 64, 64 * KiB, 7);
+    /* 64KB L1 icache */
+    cpu->ccsidr[1] = make_ccsidr32(4, 64, 64 * KiB, 2);
+    /* 512KB L2 cache */
+    cpu->ccsidr[2] = make_ccsidr32(8, 64, 512 * KiB, 7);
 
     /* From B2.93 SCTLR_EL3 */
     cpu->reset_sctlr = 0x30c50838;
@@ -449,9 +435,12 @@ static void aarch64_a64fx_initfn(Object *obj)
     cpu->isar.id_aa64isar1 = 0x0000000000010001;
     cpu->isar.id_aa64zfr0 = 0x0000000000000000;
     cpu->clidr = 0x0000000080000023;
-    cpu->ccsidr[0] = 0x7007e01c; /* 64KB L1 dcache */
-    cpu->ccsidr[1] = 0x2007e01c; /* 64KB L1 icache */
-    cpu->ccsidr[2] = 0x70ffe07c; /* 8MB L2 cache */
+    /* 64KB L1 dcache */
+    cpu->ccsidr[0] = make_ccsidr32(4, 256, 64 * KiB, 7);
+    /* 64KB L1 icache */
+    cpu->ccsidr[1] = make_ccsidr32(4, 256, 64 * KiB, 2);
+    /* 8MB L2 cache */
+    cpu->ccsidr[2] = make_ccsidr32(16, 256, 8 * MiB, 7);
     cpu->dcz_blocksize = 6; /* 256 bytes */
     cpu->gic_num_lrs = 4;
     cpu->gic_vpribits = 5;
@@ -637,9 +626,12 @@ static void aarch64_neoverse_n1_initfn(Object *obj)
     cpu->revidr = 0;
 
     /* From B2.23 CCSIDR_EL1 */
-    cpu->ccsidr[0] = 0x701fe01a; /* 64KB L1 dcache */
-    cpu->ccsidr[1] = 0x201fe01a; /* 64KB L1 icache */
-    cpu->ccsidr[2] = 0x70ffe03a; /* 1MB L2 cache */
+    /* 64KB L1 dcache */
+    cpu->ccsidr[0] = make_ccsidr32(4, 64, 64 * KiB, 7);
+    /* 64KB L1 icache */
+    cpu->ccsidr[1] = make_ccsidr32(4, 64, 64 * KiB, 2);
+    /* 1MB L2 dcache */
+    cpu->ccsidr[2] = make_ccsidr32(8, 64, 1 * MiB, 7);
 
     /* From B2.98 SCTLR_EL3 */
     cpu->reset_sctlr = 0x30c50838;
@@ -721,9 +713,9 @@ static void aarch64_neoverse_v1_initfn(Object *obj)
      * L2: 8-way set associative, 64 byte line size, either 512K or 1MB.
      * L3: No L3 (this matches the CLIDR_EL1 value).
      */
-    cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */
-    cpu->ccsidr[1] = cpu->ccsidr[0];                 /* L1 icache */
-    cpu->ccsidr[2] = make_ccsidr64(8, 64, 1 * MiB);  /* L2 cache */
+    cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* 64KB L1 dcache */
+    cpu->ccsidr[1] = cpu->ccsidr[0];                 /* 64KB L1 icache */
+    cpu->ccsidr[2] = make_ccsidr64(8, 64, 1 * MiB);  /* 1MB L2 cache */
 
     /* From 3.2.115 SCTLR_EL3 */
     cpu->reset_sctlr = 0x30c50838;
-- 
2.34.1

Re: [PATCH v2] target/arm/tcg: refine cache descriptions with a wrapper

Posted by Alex Bennée 1 year, 5 months ago

Alireza Sanaee via <qemu-arm@nongnu.org> writes:

> This patch allows for easier manipulation of the cache description
> register, CCSIDR. Which is helpful for testing as well. Currently
> numbers get hard-coded and might be prone to errors.
>
> Therefore, this patch adds wrappers for different types of CPUs
> available in tcg to decribe caches. Two functions `make_ccsidr32` and
> `make_ccsidr64` describing descriptions. The 32 bit version receives
> extra parameters that became unknown later in 64 bit.
>
> For CCSIDR register, 32 bit version follows specification [1].
> Conversely, 64 bit version follows specification [2].
>
> [1] B4.1.19, ARM Architecture Reference Manual ARMv7-A and ARMv7-R
> edition, https://developer.arm.com/documentation/ddi0406
> [2] D23.2.29, ARM Architecture Reference Manual for A-profile Architecture,
> https://developer.arm.com/documentation/ddi0487/latest/
>
> Signed-off-by: Alireza Sanaee <alireza.sanaee@huawei.com>
> ---
>  target/arm/cpu-features.h | 53 ++++++++++++++++++++++++
>  target/arm/cpu64.c        | 19 ++++++---
>  target/arm/tcg/cpu64.c    | 86 ++++++++++++++++++---------------------
>  3 files changed, 105 insertions(+), 53 deletions(-)
>
> diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
> index c59ca104fe..00a0f0d963 100644
> --- a/target/arm/cpu-features.h
> +++ b/target/arm/cpu-features.h
> @@ -1022,6 +1022,59 @@ static inline bool isar_feature_any_evt(const ARMISARegisters *id)
>      return isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id);
>  }
>  
> +static inline uint64_t make_ccsidr32(unsigned assoc, unsigned linesize,
> +                                     unsigned cachesize, uint8_t flags)
> +{

isn't this returning a 32 bit value?

> +    unsigned lg_linesize = ctz32(linesize);
> +    unsigned sets;
> +
> +    /*
> +     * The 32-bit CCSIDR_EL1 format is:
> +     *   [27:13] number of sets - 1
> +     *   [12:3]  associativity - 1
> +     *   [2:0]   log2(linesize) - 4
> +     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
> +     */
> +    assert(assoc != 0);
> +    assert(is_power_of_2(linesize));
> +    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
> +
> +    /* sets * associativity * linesize == cachesize. */
> +    sets = cachesize / (assoc * linesize);
> +    assert(cachesize % (assoc * linesize) == 0);
> +
> +    return ((uint64_t)(flags) << 28)
> +        | ((sets - 1) << 13)
> +        | ((assoc - 1) << 3)
> +        | (lg_linesize - 4);

This is a nice improvement but using deposit() will ensure you don't
accidentally overflow fields with the shift/or combos. So something
like:

  uint32_t ccsidr32 = 0;
  ..
  ccsidr32 = deposit32(ccsidr32, 28,  4, flags);
  ccsidr32 = deposit32(ccsidr32, 13, 14, sets - 1);
  ccsidr32 = deposit32(ccsidr32,  3, 10, assoc - 1);
  ccsidr32 = deposit32(ccsidr32,  0,  3, lg_linesize - 1);

And leave the compiler to simplify everything (not that it matters that
much for an init function).

Actually I note CCSIDR already has some field definitions so it would
be:

  ccsidr32 = FIELD_DP32(ccsidr32, CCSIDR_EL1, LINESIZE, lg_linesize -1);

etc. Although I notice it two sets of defines to account for FEAT_CCIDX

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH v2] target/arm/tcg: refine cache descriptions with a wrapper

Posted by Peter Maydell 1 year, 5 months ago

On Mon, 2 Sept 2024 at 11:28, Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Alireza Sanaee via <qemu-arm@nongnu.org> writes:
>
> > This patch allows for easier manipulation of the cache description
> > register, CCSIDR. Which is helpful for testing as well. Currently
> > numbers get hard-coded and might be prone to errors.
> >
> > Therefore, this patch adds wrappers for different types of CPUs
> > available in tcg to decribe caches. Two functions `make_ccsidr32` and
> > `make_ccsidr64` describing descriptions. The 32 bit version receives
> > extra parameters that became unknown later in 64 bit.
> >
> > For CCSIDR register, 32 bit version follows specification [1].
> > Conversely, 64 bit version follows specification [2].
> >
> > [1] B4.1.19, ARM Architecture Reference Manual ARMv7-A and ARMv7-R
> > edition, https://developer.arm.com/documentation/ddi0406
> > [2] D23.2.29, ARM Architecture Reference Manual for A-profile Architecture,
> > https://developer.arm.com/documentation/ddi0487/latest/
> >
> > Signed-off-by: Alireza Sanaee <alireza.sanaee@huawei.com>
> > ---
> >  target/arm/cpu-features.h | 53 ++++++++++++++++++++++++
> >  target/arm/cpu64.c        | 19 ++++++---
> >  target/arm/tcg/cpu64.c    | 86 ++++++++++++++++++---------------------
> >  3 files changed, 105 insertions(+), 53 deletions(-)
> >
> > diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
> > index c59ca104fe..00a0f0d963 100644
> > --- a/target/arm/cpu-features.h
> > +++ b/target/arm/cpu-features.h
> > @@ -1022,6 +1022,59 @@ static inline bool isar_feature_any_evt(const ARMISARegisters *id)
> >      return isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id);
> >  }
> >
> > +static inline uint64_t make_ccsidr32(unsigned assoc, unsigned linesize,
> > +                                     unsigned cachesize, uint8_t flags)
> > +{
>
> isn't this returning a 32 bit value?
>
> > +    unsigned lg_linesize = ctz32(linesize);
> > +    unsigned sets;
> > +
> > +    /*
> > +     * The 32-bit CCSIDR_EL1 format is:
> > +     *   [27:13] number of sets - 1
> > +     *   [12:3]  associativity - 1
> > +     *   [2:0]   log2(linesize) - 4
> > +     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
> > +     */
> > +    assert(assoc != 0);
> > +    assert(is_power_of_2(linesize));
> > +    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
> > +
> > +    /* sets * associativity * linesize == cachesize. */
> > +    sets = cachesize / (assoc * linesize);
> > +    assert(cachesize % (assoc * linesize) == 0);
> > +
> > +    return ((uint64_t)(flags) << 28)
> > +        | ((sets - 1) << 13)
> > +        | ((assoc - 1) << 3)
> > +        | (lg_linesize - 4);
>
> This is a nice improvement but using deposit() will ensure you don't
> accidentally overflow fields with the shift/or combos. So something
> like:
>
>   uint32_t ccsidr32 = 0;
>   ..
>   ccsidr32 = deposit32(ccsidr32, 28,  4, flags);
>   ccsidr32 = deposit32(ccsidr32, 13, 14, sets - 1);
>   ccsidr32 = deposit32(ccsidr32,  3, 10, assoc - 1);
>   ccsidr32 = deposit32(ccsidr32,  0,  3, lg_linesize - 1);
>
> And leave the compiler to simplify everything (not that it matters that
> much for an init function).
>
> Actually I note CCSIDR already has some field definitions so it would
> be:
>
>   ccsidr32 = FIELD_DP32(ccsidr32, CCSIDR_EL1, LINESIZE, lg_linesize -1);
>
> etc. Although I notice it two sets of defines to account for FEAT_CCIDX

Mmm. Though I feel like we (me absolutely included) are rather
getting into bikeshedding a fairly simple refactoring patch...

thanks
-- PMM

Re: [PATCH v2] target/arm/tcg: refine cache descriptions with a wrapper

Posted by Philippe Mathieu-Daudé 1 year, 5 months ago

Hi Alireza,

On 30/8/24 20:47, Alireza Sanaee via wrote:
> This patch allows for easier manipulation of the cache description
> register, CCSIDR. Which is helpful for testing as well. Currently
> numbers get hard-coded and might be prone to errors.
> 
> Therefore, this patch adds wrappers for different types of CPUs
> available in tcg to decribe caches. Two functions `make_ccsidr32` and
> `make_ccsidr64` describing descriptions. The 32 bit version receives
> extra parameters that became unknown later in 64 bit.
> 
> For CCSIDR register, 32 bit version follows specification [1].
> Conversely, 64 bit version follows specification [2].
> 
> [1] B4.1.19, ARM Architecture Reference Manual ARMv7-A and ARMv7-R
> edition, https://developer.arm.com/documentation/ddi0406
> [2] D23.2.29, ARM Architecture Reference Manual for A-profile Architecture,
> https://developer.arm.com/documentation/ddi0487/latest/
> 
> Signed-off-by: Alireza Sanaee <alireza.sanaee@huawei.com>
> ---
>   target/arm/cpu-features.h | 53 ++++++++++++++++++++++++
>   target/arm/cpu64.c        | 19 ++++++---
>   target/arm/tcg/cpu64.c    | 86 ++++++++++++++++++---------------------
>   3 files changed, 105 insertions(+), 53 deletions(-)
> 
> diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
> index c59ca104fe..00a0f0d963 100644
> --- a/target/arm/cpu-features.h
> +++ b/target/arm/cpu-features.h
> @@ -1022,6 +1022,59 @@ static inline bool isar_feature_any_evt(const ARMISARegisters *id)
>       return isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id);
>   }
>   
> +static inline uint64_t make_ccsidr32(unsigned assoc, unsigned linesize,
> +                                     unsigned cachesize, uint8_t flags)
> +{
> +    unsigned lg_linesize = ctz32(linesize);
> +    unsigned sets;
> +
> +    /*
> +     * The 32-bit CCSIDR_EL1 format is:
> +     *   [27:13] number of sets - 1
> +     *   [12:3]  associativity - 1
> +     *   [2:0]   log2(linesize) - 4
> +     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
> +     */
> +    assert(assoc != 0);
> +    assert(is_power_of_2(linesize));
> +    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
> +
> +    /* sets * associativity * linesize == cachesize. */
> +    sets = cachesize / (assoc * linesize);
> +    assert(cachesize % (assoc * linesize) == 0);
> +
> +    return ((uint64_t)(flags) << 28)
> +        | ((sets - 1) << 13)
> +        | ((assoc - 1) << 3)
> +        | (lg_linesize - 4);
> +}
> +
> +static inline uint64_t make_ccsidr64(unsigned assoc, unsigned linesize,
> +                              unsigned cachesize)
> +{
> +    unsigned lg_linesize = ctz32(linesize);
> +    unsigned sets;
> +
> +    /*
> +     * The 64-bit CCSIDR_EL1 format is:
> +     *   [55:32] number of sets - 1
> +     *   [23:3]  associativity - 1
> +     *   [2:0]   log2(linesize) - 4
> +     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
> +     */
> +    assert(assoc != 0);
> +    assert(is_power_of_2(linesize));
> +    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
> +
> +    /* sets * associativity * linesize == cachesize. */
> +    sets = cachesize / (assoc * linesize);
> +    assert(cachesize % (assoc * linesize) == 0);
> +
> +    return ((uint64_t)(sets - 1) << 32)
> +         | ((assoc - 1) << 3)
> +         | (lg_linesize - 4);
> +}
> +
>   /*
>    * Forward to the above feature tests given an ARMCPU pointer.
>    */
> diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> index 262a1d6c0b..57ebc1b979 100644
> --- a/target/arm/cpu64.c
> +++ b/target/arm/cpu64.c
> @@ -23,6 +23,7 @@
>   #include "cpu.h"
>   #include "cpregs.h"
>   #include "qemu/module.h"
> +#include "qemu/units.h"
>   #include "sysemu/kvm.h"
>   #include "sysemu/hvf.h"
>   #include "sysemu/qtest.h"
> @@ -642,9 +643,12 @@ static void aarch64_a57_initfn(Object *obj)
>       cpu->isar.dbgdevid1 = 0x2;
>       cpu->isar.reset_pmcr_el0 = 0x41013000;
>       cpu->clidr = 0x0a200023;
> -    cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
> -    cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
> -    cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
> +    /* 32KB L1 dcache */
> +    cpu->ccsidr[0] = make_ccsidr32(4, 64, 32 * KiB, 7);
> +    /* 48KB L1 icache */
> +    cpu->ccsidr[1] = make_ccsidr32(3, 64, 48 * KiB, 2);
> +    /* 2048KB L2 cache */
> +    cpu->ccsidr[2] = make_ccsidr32(16, 64, 2 * MiB, 7);

I like the uses of make_ccsidrXX() instead of the magic values.

I don't like much the code duplication between make_ccsidrXX()
definitions, I'd prefer both call a common (static?) one.

I'm not keen on adding inlined functions in target/arm/cpu-features.h.
Just expose the prototype declarations, and keep the definition in .c
files. That way we don't need to include "qemu/host-utils.h" to get
is_power_of_2() declaration.

Regards,

Phil.

Re: [PATCH v2] target/arm/tcg: refine cache descriptions with a wrapper

Posted by Peter Maydell 1 year, 5 months ago

On Mon, 2 Sept 2024 at 11:07, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>
> Hi Alireza,
>
> On 30/8/24 20:47, Alireza Sanaee via wrote:
> > This patch allows for easier manipulation of the cache description
> > register, CCSIDR. Which is helpful for testing as well. Currently
> > numbers get hard-coded and might be prone to errors.
> >
> > Therefore, this patch adds wrappers for different types of CPUs
> > available in tcg to decribe caches. Two functions `make_ccsidr32` and
> > `make_ccsidr64` describing descriptions. The 32 bit version receives
> > extra parameters that became unknown later in 64 bit.
> >
> > For CCSIDR register, 32 bit version follows specification [1].
> > Conversely, 64 bit version follows specification [2].
> >
> > [1] B4.1.19, ARM Architecture Reference Manual ARMv7-A and ARMv7-R
> > edition, https://developer.arm.com/documentation/ddi0406
> > [2] D23.2.29, ARM Architecture Reference Manual for A-profile Architecture,
> > https://developer.arm.com/documentation/ddi0487/latest/
> >
> > Signed-off-by: Alireza Sanaee <alireza.sanaee@huawei.com>
> > ---
> >   target/arm/cpu-features.h | 53 ++++++++++++++++++++++++
> >   target/arm/cpu64.c        | 19 ++++++---
> >   target/arm/tcg/cpu64.c    | 86 ++++++++++++++++++---------------------
> >   3 files changed, 105 insertions(+), 53 deletions(-)
> >
> > diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
> > index c59ca104fe..00a0f0d963 100644
> > --- a/target/arm/cpu-features.h
> > +++ b/target/arm/cpu-features.h
> > @@ -1022,6 +1022,59 @@ static inline bool isar_feature_any_evt(const ARMISARegisters *id)
> >       return isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id);
> >   }
> >
> > +static inline uint64_t make_ccsidr32(unsigned assoc, unsigned linesize,
> > +                                     unsigned cachesize, uint8_t flags)
> > +{
> > +    unsigned lg_linesize = ctz32(linesize);
> > +    unsigned sets;
> > +
> > +    /*
> > +     * The 32-bit CCSIDR_EL1 format is:
> > +     *   [27:13] number of sets - 1
> > +     *   [12:3]  associativity - 1
> > +     *   [2:0]   log2(linesize) - 4
> > +     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
> > +     */
> > +    assert(assoc != 0);
> > +    assert(is_power_of_2(linesize));
> > +    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
> > +
> > +    /* sets * associativity * linesize == cachesize. */
> > +    sets = cachesize / (assoc * linesize);
> > +    assert(cachesize % (assoc * linesize) == 0);
> > +
> > +    return ((uint64_t)(flags) << 28)
> > +        | ((sets - 1) << 13)
> > +        | ((assoc - 1) << 3)
> > +        | (lg_linesize - 4);
> > +}
> > +
> > +static inline uint64_t make_ccsidr64(unsigned assoc, unsigned linesize,
> > +                              unsigned cachesize)
> > +{
> > +    unsigned lg_linesize = ctz32(linesize);
> > +    unsigned sets;
> > +
> > +    /*
> > +     * The 64-bit CCSIDR_EL1 format is:
> > +     *   [55:32] number of sets - 1
> > +     *   [23:3]  associativity - 1
> > +     *   [2:0]   log2(linesize) - 4
> > +     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
> > +     */
> > +    assert(assoc != 0);
> > +    assert(is_power_of_2(linesize));
> > +    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
> > +
> > +    /* sets * associativity * linesize == cachesize. */
> > +    sets = cachesize / (assoc * linesize);
> > +    assert(cachesize % (assoc * linesize) == 0);
> > +
> > +    return ((uint64_t)(sets - 1) << 32)
> > +         | ((assoc - 1) << 3)
> > +         | (lg_linesize - 4);
> > +}
> > +
> >   /*
> >    * Forward to the above feature tests given an ARMCPU pointer.
> >    */
> > diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> > index 262a1d6c0b..57ebc1b979 100644
> > --- a/target/arm/cpu64.c
> > +++ b/target/arm/cpu64.c
> > @@ -23,6 +23,7 @@
> >   #include "cpu.h"
> >   #include "cpregs.h"
> >   #include "qemu/module.h"
> > +#include "qemu/units.h"
> >   #include "sysemu/kvm.h"
> >   #include "sysemu/hvf.h"
> >   #include "sysemu/qtest.h"
> > @@ -642,9 +643,12 @@ static void aarch64_a57_initfn(Object *obj)
> >       cpu->isar.dbgdevid1 = 0x2;
> >       cpu->isar.reset_pmcr_el0 = 0x41013000;
> >       cpu->clidr = 0x0a200023;
> > -    cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
> > -    cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
> > -    cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
> > +    /* 32KB L1 dcache */
> > +    cpu->ccsidr[0] = make_ccsidr32(4, 64, 32 * KiB, 7);
> > +    /* 48KB L1 icache */
> > +    cpu->ccsidr[1] = make_ccsidr32(3, 64, 48 * KiB, 2);
> > +    /* 2048KB L2 cache */
> > +    cpu->ccsidr[2] = make_ccsidr32(16, 64, 2 * MiB, 7);
>
> I like the uses of make_ccsidrXX() instead of the magic values.
>
> I don't like much the code duplication between make_ccsidrXX()
> definitions, I'd prefer both call a common (static?) one.

How about we have
typedef enum {
    CCSIDR_FORMAT_LEGACY,
    CCSIDR_FORMAT_CCIDX,
} CCSIDRFormat;

and a single
uint64_t make_ccsidr(CCSIDRFormat format, unsigned assoc, unsigned
                     linesize, unsigned cachesize, unsigned flags);

? Since the only difference between the two functions is the final
line that assembles the return value, that seems like maybe
a better way to avoid the code duplication than a common
sub-function.

-- PMM

Re: [PATCH v2] target/arm/tcg: refine cache descriptions with a wrapper

Posted by Alireza Sanaee via 1 year, 5 months ago

On Mon, 2 Sep 2024 11:25:36 +0100
Peter Maydell <peter.maydell@linaro.org> wrote:

> On Mon, 2 Sept 2024 at 11:07, Philippe Mathieu-Daudé
> <philmd@linaro.org> wrote:
> >
> > Hi Alireza,
> >
> > On 30/8/24 20:47, Alireza Sanaee via wrote:  
> > > This patch allows for easier manipulation of the cache description
> > > register, CCSIDR. Which is helpful for testing as well. Currently
> > > numbers get hard-coded and might be prone to errors.
> > >
> > > Therefore, this patch adds wrappers for different types of CPUs
> > > available in tcg to decribe caches. Two functions `make_ccsidr32`
> > > and `make_ccsidr64` describing descriptions. The 32 bit version
> > > receives extra parameters that became unknown later in 64 bit.
> > >
> > > For CCSIDR register, 32 bit version follows specification [1].
> > > Conversely, 64 bit version follows specification [2].
> > >
> > > [1] B4.1.19, ARM Architecture Reference Manual ARMv7-A and ARMv7-R
> > > edition, https://developer.arm.com/documentation/ddi0406
> > > [2] D23.2.29, ARM Architecture Reference Manual for A-profile
> > > Architecture, https://developer.arm.com/documentation/ddi0487/latest/
> > >
> > > Signed-off-by: Alireza Sanaee <alireza.sanaee@huawei.com>
> > > ---
> > >   target/arm/cpu-features.h | 53 ++++++++++++++++++++++++
> > >   target/arm/cpu64.c        | 19 ++++++---
> > >   target/arm/tcg/cpu64.c    | 86
> > > ++++++++++++++++++--------------------- 3 files changed, 105
> > > insertions(+), 53 deletions(-)
> > >
> > > diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
> > > index c59ca104fe..00a0f0d963 100644
> > > --- a/target/arm/cpu-features.h
> > > +++ b/target/arm/cpu-features.h
> > > @@ -1022,6 +1022,59 @@ static inline bool
> > > isar_feature_any_evt(const ARMISARegisters *id) return
> > > isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id); }
> > >
> > > +static inline uint64_t make_ccsidr32(unsigned assoc, unsigned
> > > linesize,
> > > +                                     unsigned cachesize, uint8_t
> > > flags) +{
> > > +    unsigned lg_linesize = ctz32(linesize);
> > > +    unsigned sets;
> > > +
> > > +    /*
> > > +     * The 32-bit CCSIDR_EL1 format is:
> > > +     *   [27:13] number of sets - 1
> > > +     *   [12:3]  associativity - 1
> > > +     *   [2:0]   log2(linesize) - 4
> > > +     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes,
> > > etc
> > > +     */
> > > +    assert(assoc != 0);
> > > +    assert(is_power_of_2(linesize));
> > > +    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
> > > +
> > > +    /* sets * associativity * linesize == cachesize. */
> > > +    sets = cachesize / (assoc * linesize);
> > > +    assert(cachesize % (assoc * linesize) == 0);
> > > +
> > > +    return ((uint64_t)(flags) << 28)
> > > +        | ((sets - 1) << 13)
> > > +        | ((assoc - 1) << 3)
> > > +        | (lg_linesize - 4);
> > > +}
> > > +
> > > +static inline uint64_t make_ccsidr64(unsigned assoc, unsigned
> > > linesize,
> > > +                              unsigned cachesize)
> > > +{
> > > +    unsigned lg_linesize = ctz32(linesize);
> > > +    unsigned sets;
> > > +
> > > +    /*
> > > +     * The 64-bit CCSIDR_EL1 format is:
> > > +     *   [55:32] number of sets - 1
> > > +     *   [23:3]  associativity - 1
> > > +     *   [2:0]   log2(linesize) - 4
> > > +     *           so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes,
> > > etc
> > > +     */
> > > +    assert(assoc != 0);
> > > +    assert(is_power_of_2(linesize));
> > > +    assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
> > > +
> > > +    /* sets * associativity * linesize == cachesize. */
> > > +    sets = cachesize / (assoc * linesize);
> > > +    assert(cachesize % (assoc * linesize) == 0);
> > > +
> > > +    return ((uint64_t)(sets - 1) << 32)
> > > +         | ((assoc - 1) << 3)
> > > +         | (lg_linesize - 4);
> > > +}
> > > +
> > >   /*
> > >    * Forward to the above feature tests given an ARMCPU pointer.
> > >    */
> > > diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> > > index 262a1d6c0b..57ebc1b979 100644
> > > --- a/target/arm/cpu64.c
> > > +++ b/target/arm/cpu64.c
> > > @@ -23,6 +23,7 @@
> > >   #include "cpu.h"
> > >   #include "cpregs.h"
> > >   #include "qemu/module.h"
> > > +#include "qemu/units.h"
> > >   #include "sysemu/kvm.h"
> > >   #include "sysemu/hvf.h"
> > >   #include "sysemu/qtest.h"
> > > @@ -642,9 +643,12 @@ static void aarch64_a57_initfn(Object *obj)
> > >       cpu->isar.dbgdevid1 = 0x2;
> > >       cpu->isar.reset_pmcr_el0 = 0x41013000;
> > >       cpu->clidr = 0x0a200023;
> > > -    cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
> > > -    cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
> > > -    cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
> > > +    /* 32KB L1 dcache */
> > > +    cpu->ccsidr[0] = make_ccsidr32(4, 64, 32 * KiB, 7);
> > > +    /* 48KB L1 icache */
> > > +    cpu->ccsidr[1] = make_ccsidr32(3, 64, 48 * KiB, 2);
> > > +    /* 2048KB L2 cache */
> > > +    cpu->ccsidr[2] = make_ccsidr32(16, 64, 2 * MiB, 7);  
> >
> > I like the uses of make_ccsidrXX() instead of the magic values.
> >
> > I don't like much the code duplication between make_ccsidrXX()
> > definitions, I'd prefer both call a common (static?) one.  
> 
> How about we have
> typedef enum {
>     CCSIDR_FORMAT_LEGACY,
>     CCSIDR_FORMAT_CCIDX,
> } CCSIDRFormat;
> 
> and a single
> uint64_t make_ccsidr(CCSIDRFormat format, unsigned assoc, unsigned
>                      linesize, unsigned cachesize, unsigned flags);
> 
> ? Since the only difference between the two functions is the final
> line that assembles the return value, that seems like maybe
> a better way to avoid the code duplication than a common
> sub-function.
> 
> -- PMM

I like this suggestion. I can address Philippe's concern too if I move
functions around. I thought a bit how to avoid duplication then I ended
up saying let's see what others might say.

Alireza