[PATCH 5/5] target/loongarch/tcg: Add hardware page table walker support

Song Gao posted 5 patches 3 months, 4 weeks ago
There is a newer version of this series
[PATCH 5/5] target/loongarch/tcg: Add hardware page table walker support
Posted by Song Gao 3 months, 4 weeks ago
Add hardware page table walker (HPTW) feature for la664.
Set CPUCFG2.HPTW = 1 to indicate that HPTW is implemented on this CPU.
Set PWCH.HPTW_EN = 1 to enable HPTW.

Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 target/loongarch/cpu-csr.h        |   3 +
 target/loongarch/cpu.c            |   1 +
 target/loongarch/cpu.h            |   1 +
 target/loongarch/cpu_helper.c     |  20 ++++-
 target/loongarch/internals.h      |   2 +
 target/loongarch/tcg/tlb_helper.c | 122 ++++++++++++++++++++++++++++++
 6 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/target/loongarch/cpu-csr.h b/target/loongarch/cpu-csr.h
index 0834e91f30..1aa015dc44 100644
--- a/target/loongarch/cpu-csr.h
+++ b/target/loongarch/cpu-csr.h
@@ -68,6 +68,8 @@ FIELD(TLBENTRY, PLV, 2, 2)
 FIELD(TLBENTRY, MAT, 4, 2)
 FIELD(TLBENTRY, G, 6, 1)
 FIELD(TLBENTRY, HUGE, 6, 1)
+FIELD(TLBENTRY, PRESENT, 7, 1)
+FIELD(TLBENTRY, WRITE, 8, 1)
 FIELD(TLBENTRY, HGLOBAL, 12, 1)
 FIELD(TLBENTRY, LEVEL, 13, 2)
 FIELD(TLBENTRY_32, PPN, 8, 24)
@@ -103,6 +105,7 @@ FIELD(CSR_PWCH, DIR3_BASE, 0, 6)
 FIELD(CSR_PWCH, DIR3_WIDTH, 6, 6)
 FIELD(CSR_PWCH, DIR4_BASE, 12, 6)
 FIELD(CSR_PWCH, DIR4_WIDTH, 18, 6)
+FIELD(CSR_PWCH, HPTW_EN, 24, 1)
 
 #define LOONGARCH_CSR_STLBPS         0x1e /* Stlb page size */
 FIELD(CSR_STLBPS, PS, 0, 5)
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 1b975f1de8..df355eee79 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -477,6 +477,7 @@ static void loongarch_la664_initfn(Object *obj)
     env->cpucfg[0] = 0x14d000; /* PRID */
 
     loongarch_common_initfn(env, obj);
+    env->cpucfg[2] = FIELD_DP32(env->cpucfg[2], CPUCFG2, HPTW, 1);
 }
 
 static void loongarch_la464_initfn(Object *obj)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 6c41fafb70..84f92507d6 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -155,6 +155,7 @@ FIELD(CPUCFG2, LBT_ARM, 19, 1)
 FIELD(CPUCFG2, LBT_MIPS, 20, 1)
 FIELD(CPUCFG2, LSPW, 21, 1)
 FIELD(CPUCFG2, LAM, 22, 1)
+FIELD(CPUCFG2, HPTW, 24, 1)
 
 /* cpucfg[3] bits */
 FIELD(CPUCFG3, CCDMA, 0, 1)
diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c
index 580362ac3e..fed0fd8788 100644
--- a/target/loongarch/cpu_helper.c
+++ b/target/loongarch/cpu_helper.c
@@ -182,6 +182,7 @@ int get_physical_address(CPULoongArchState *env, hwaddr *physical,
 {
     int user_mode = mmu_idx == MMU_USER_IDX;
     int kernel_mode = mmu_idx == MMU_KERNEL_IDX;
+    int ret;
     uint32_t plv, base_c, base_v;
     int64_t addr_high;
     uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA);
@@ -221,8 +222,23 @@ int get_physical_address(CPULoongArchState *env, hwaddr *physical,
     }
 
     /* Mapped address */
-    return loongarch_map_address(env, physical, prot, address,
-                                 access_type, mmu_idx);
+    ret = loongarch_map_address(env, physical, prot, address,
+                                access_type, mmu_idx);
+#ifdef CONFIG_TCG
+    if (!FIELD_EX32(env->cpucfg[2], CPUCFG2, HPTW)) {
+        return ret;
+    }
+
+    if (!FIELD_EX32(env->CSR_PWCH, CSR_PWCH, HPTW_EN)) {
+        return ret;
+    }
+
+    if (do_page_walk(env, address, access_type, ret)) {
+        ret = loongarch_map_address(env, physical, prot, address,
+                                    access_type, mmu_idx);
+    }
+#endif
+    return ret;
 }
 
 hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index 944153b180..6aa15fa36d 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -63,6 +63,8 @@ hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                             MMUAccessType access_type, int mmu_idx,
                             bool probe, uintptr_t retaddr);
+bool do_page_walk(CPULoongArchState *env, vaddr address,
+                  MMUAccessType, int tlb_error);
 #endif
 #endif /* !CONFIG_USER_ONLY */
 
diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c
index 463e9be7f2..a4f9f996fd 100644
--- a/target/loongarch/tcg/tlb_helper.c
+++ b/target/loongarch/tcg/tlb_helper.c
@@ -651,3 +651,125 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd,
     }
     env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI, PS, ps);
 }
+
+static target_ulong get_pte_base(CPULoongArchState *env, vaddr address)
+{
+    uint64_t dir_base, dir_width;
+    target_ulong base;
+    int i;
+
+    /* Get PGD */
+    base = ((address >> 63) & 0x1) ? env->CSR_PGDH : env->CSR_PGDL;
+
+    for (i = 4; i > 0; i--) {
+        get_dir_base_width(env, &dir_base, &dir_width, i);
+        /*
+         * LDDIR: level = 2 corresponds to Dir1 in PWCL.
+         * PWCL/PWCH: dir >= 1 && dir_width == 0 means no such level.
+         */
+        if (i >= 2 && dir_width == 0) {
+            continue;
+        }
+        base = do_lddir(env, base, address, i);
+    }
+
+    return base;
+}
+
+bool do_page_walk(CPULoongArchState *env, vaddr address,
+                  MMUAccessType access_type, int tlb_error)
+{
+    CPUState *cs = env_cpu(env);
+    target_ulong base, ps, tmp0, tmp1, ptindex, ptoffset, entry;
+    uint64_t entrylo0, entrylo1, tlbehi, vppn;
+    uint64_t ptbase = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE);
+    uint64_t ptwidth = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH);
+    int index, shift;
+    bool ret = false;
+
+    /*
+     * tlb error map :
+     * TLBRET_NOMATCH : tlb fill
+     * TLBRET_INVALID : access_type = 0/2  tlb_load
+     *                : access_type = 1    tlb_store
+     * TLBRET_DIRTY   : tlb_modify
+     */
+    switch (tlb_error) {
+    case TLBRET_NOMATCH:
+        base = get_pte_base(env, address);
+        if (base == 0) {
+            return ret;
+        }
+        do_ldpte(env, base, address, &tmp0, &tmp1, &ps);
+        entrylo0 = tmp0;
+        entrylo1 = tmp1;
+        tlbehi = address & (TARGET_PAGE_MASK << 1);
+        vppn = FIELD_EX64(tlbehi, CSR_TLBEHI_64, VPPN);
+        index = get_random_tlb_index(env, tlbehi, ps);
+        invalidate_tlb(env, index);
+        do_fill_tlb_entry(env, vppn, entrylo0, entrylo1, index, ps);
+        ret = true;
+        break;
+    case TLBRET_DIRTY:
+    case TLBRET_INVALID:
+        base = get_pte_base(env, address);
+
+        /* 0:64bit, 1:128bit, 2:192bit, 3:256bit */
+        shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH);
+        shift = (shift + 1) * 3;
+        ptindex = (address >> ptbase) & ((1 << ptwidth) -1);
+        ptoffset = ptindex << shift;
+        tmp0 = base | ptoffset;
+        entry = ldq_phys(cs->as, tmp0) & TARGET_PHYS_MASK;
+
+        if (entry == 0) {
+            return ret;
+        }
+
+        /* Check entry, and do tlb modify. */
+        if ((tlb_error == TLBRET_INVALID) &&
+            (access_type == MMU_DATA_LOAD ||
+             access_type == MMU_INST_FETCH )) {
+            if (!(FIELD_EX64(entry, TLBENTRY, PRESENT))) {
+                break;
+            }
+            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
+        } else if ((tlb_error == TLBRET_INVALID) &&
+                   access_type == MMU_DATA_STORE) {
+            if (!((FIELD_EX64(entry, TLBENTRY, PRESENT) &&
+                  (FIELD_EX64(entry, TLBENTRY, WRITE))))){
+                break;
+            }
+            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
+            entry = FIELD_DP64(entry, TLBENTRY, D, 1);
+        } else if (tlb_error ==  TLBRET_DIRTY) {
+            if (!(FIELD_EX64(entry, TLBENTRY, WRITE))) {
+                break;
+            }
+            entry = FIELD_DP64(entry, TLBENTRY, D, 1);
+            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
+        }
+        stq_phys(cs->as, tmp0, entry);
+        tmp0 = tmp0 & (~0x8);
+        entrylo0 = ldq_phys(cs->as, tmp0) & TARGET_PHYS_MASK;
+        entrylo1 = ldq_phys(cs->as, tmp0 | 0x8) & TARGET_PHYS_MASK;
+        tlbehi = address & (TARGET_PAGE_MASK << 1);
+        ps = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE);
+        vppn = FIELD_EX64(tlbehi, CSR_TLBEHI_64, VPPN);
+
+        /*
+         * srch tlb index with tlb entryhi
+         * if no match, we use get_random_tlb_index() to get random index.
+         */
+        if (!loongarch_tlb_search(env, tlbehi, &index)) {
+            index = get_random_tlb_index(env, tlbehi, ps);
+        }
+        invalidate_tlb(env, index);
+        do_fill_tlb_entry(env, vppn, entrylo0, entrylo1, index, ps);
+        ret = true;
+        break;
+    default:
+        ;
+    }
+    return ret;
+}
-- 
2.33.0
Re: [PATCH 5/5] target/loongarch/tcg: Add hardware page table walker support
Posted by Richard Henderson 3 months, 4 weeks ago
On 7/29/24 11:39, Song Gao wrote:
>       /* Mapped address */
> -    return loongarch_map_address(env, physical, prot, address,
> -                                 access_type, mmu_idx);
> +    ret = loongarch_map_address(env, physical, prot, address,
> +                                access_type, mmu_idx);
> +#ifdef CONFIG_TCG
> +    if (!FIELD_EX32(env->cpucfg[2], CPUCFG2, HPTW)) {
> +        return ret;
> +    }
> +
> +    if (!FIELD_EX32(env->CSR_PWCH, CSR_PWCH, HPTW_EN)) {
> +        return ret;
> +    }
> +
> +    if (do_page_walk(env, address, access_type, ret)) {

When called from loongarch_cpu_get_phys_page_debug, you do not want ...

> +        index = get_random_tlb_index(env, tlbehi, ps);
> +        invalidate_tlb(env, index);
> +        do_fill_tlb_entry(env, vppn, entrylo0, entrylo1, index, ps);

... to modify the TLB.  This will cause gdbstub to modify the behaviour of the guest, 
which you do not want.

> +        entry = ldq_phys(cs->as, tmp0) & TARGET_PHYS_MASK;
> +
> +        if (entry == 0) {
> +            return ret;
> +        }
> +
> +        /* Check entry, and do tlb modify. */
> +        if ((tlb_error == TLBRET_INVALID) &&
> +            (access_type == MMU_DATA_LOAD ||
> +             access_type == MMU_INST_FETCH )) {
> +            if (!(FIELD_EX64(entry, TLBENTRY, PRESENT))) {
> +                break;
> +            }
> +            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
> +        } else if ((tlb_error == TLBRET_INVALID) &&
> +                   access_type == MMU_DATA_STORE) {
> +            if (!((FIELD_EX64(entry, TLBENTRY, PRESENT) &&
> +                  (FIELD_EX64(entry, TLBENTRY, WRITE))))){
> +                break;
> +            }
> +            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
> +            entry = FIELD_DP64(entry, TLBENTRY, D, 1);
> +        } else if (tlb_error ==  TLBRET_DIRTY) {
> +            if (!(FIELD_EX64(entry, TLBENTRY, WRITE))) {
> +                break;
> +            }
> +            entry = FIELD_DP64(entry, TLBENTRY, D, 1);
> +            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
> +        }
> +        stq_phys(cs->as, tmp0, entry);

You certainly want to use a compare and swap here, restarting if the compare fails.


r~
Re: [PATCH 5/5] target/loongarch/tcg: Add hardware page table walker support
Posted by gaosong 2 months, 2 weeks ago
在 2024/7/29 上午11:57, Richard Henderson 写道:
> On 7/29/24 11:39, Song Gao wrote:
>>       /* Mapped address */
>> -    return loongarch_map_address(env, physical, prot, address,
>> -                                 access_type, mmu_idx);
>> +    ret = loongarch_map_address(env, physical, prot, address,
>> +                                access_type, mmu_idx);
>> +#ifdef CONFIG_TCG
>> +    if (!FIELD_EX32(env->cpucfg[2], CPUCFG2, HPTW)) {
>> +        return ret;
>> +    }
>> +
>> +    if (!FIELD_EX32(env->CSR_PWCH, CSR_PWCH, HPTW_EN)) {
>> +        return ret;
>> +    }
>> +
>> +    if (do_page_walk(env, address, access_type, ret)) {
>
> When called from loongarch_cpu_get_phys_page_debug, you do not want ...
>
>> +        index = get_random_tlb_index(env, tlbehi, ps);
>> +        invalidate_tlb(env, index);
>> +        do_fill_tlb_entry(env, vppn, entrylo0, entrylo1, index, ps);
>
> ... to modify the TLB.  This will cause gdbstub to modify the 
> behaviour of the guest, which you do not want.
>
Hi,  sorry for the late reply.  I'm very busy recently.

How about adding a variable to determine if tlb needs to be modified?
like this:

@@ -248,7 +250,7 @@ hwaddr loongarch_cpu_get_phys_page_debug(CPUState 
*cs, vaddr addr)
      int prot;

      if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD,
-                             cpu_mmu_index(cs, false)) != 0) {
+                             cpu_mmu_index(cs, false) != 0, false)) {
          return -1;
      }

[..]

@@ -233,9 +233,11 @@ int get_physical_address(CPULoongArchState *env, 
hwaddr *physical,
          return ret;
      }

-    if (do_page_walk(env, address, access_type, ret)) {
-        ret = loongarch_map_address(env, physical, prot, address,
-                                    access_type, mmu_idx);
+    if (do_page_walk(env, address, access_type, ret, physical, 
is_modify)) {
+       if (is_modify) {
+            ret = loongarch_map_address(env, physical, prot, address,
+                                        access_type, mmu_idx);
+        }
      }

  bool do_page_walk(CPULoongArchState *env, vaddr address,
-                  MMUAccessType access_type, int tlb_error)
+                  MMUAccessType access_type, int tlb_error,
+                  hwaddr *physical, bool is_modify)
  {
      CPUState *cs = env_cpu(env);
      target_ulong base, ps, tmp0, tmp1, ptindex, ptoffset, entry;
@@ -705,9 +706,21 @@ bool do_page_walk(CPULoongArchState *env, vaddr 
address,
          entrylo1 = tmp1;
          tlbehi = address & (TARGET_PAGE_MASK << 1);
          vppn = FIELD_EX64(tlbehi, CSR_TLBEHI_64, VPPN);
-        index = get_random_tlb_index(env, tlbehi, ps);
-        invalidate_tlb(env, index);
-        do_fill_tlb_entry(env, vppn, entrylo0, entrylo1, index, ps);
+
+        if (is_modify) {
+            index = get_random_tlb_index(env, tlbehi, ps);
+            invalidate_tlb(env, index);
+            do_fill_tlb_entry(env, vppn, entrylo0, entrylo1, index, ps);
+        } else {
+            uint64_t tlb_entry, tlb_ppn;
+            uint8_t n;
+            n = (address >> ps) & 0x1;
+
+            tlb_entry = n ? entrylo1 : entrylo0;
+            tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN);
+            tlb_ppn = tlb_ppn & ~(((0x1UL << (ps - 12)) -1));
+            *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | (address 
& MAKE_64BIT_MASK(0, ps));
+        }
          ret = true;
          break;

>> +        entry = ldq_phys(cs->as, tmp0) & TARGET_PHYS_MASK;
>> +
>> +        if (entry == 0) {
>> +            return ret;
>> +        }
>> +
>> +        /* Check entry, and do tlb modify. */
>> +        if ((tlb_error == TLBRET_INVALID) &&
>> +            (access_type == MMU_DATA_LOAD ||
>> +             access_type == MMU_INST_FETCH )) {
>> +            if (!(FIELD_EX64(entry, TLBENTRY, PRESENT))) {
>> +                break;
>> +            }
>> +            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
>> +        } else if ((tlb_error == TLBRET_INVALID) &&
>> +                   access_type == MMU_DATA_STORE) {
>> +            if (!((FIELD_EX64(entry, TLBENTRY, PRESENT) &&
>> +                  (FIELD_EX64(entry, TLBENTRY, WRITE))))){
>> +                break;
>> +            }
>> +            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
>> +            entry = FIELD_DP64(entry, TLBENTRY, D, 1);
>> +        } else if (tlb_error ==  TLBRET_DIRTY) {
>> +            if (!(FIELD_EX64(entry, TLBENTRY, WRITE))) {
>> +                break;
>> +            }
>> +            entry = FIELD_DP64(entry, TLBENTRY, D, 1);
>> +            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
>> +        }
>> +        stq_phys(cs->as, tmp0, entry);
>
> You certainly want to use a compare and swap here, restarting if the 
> compare fails.
>
Sorry ,  I don't understand here, could you explain it in detail?

Thanks.
Song Gao
>
> r~


Re: [PATCH 5/5] target/loongarch/tcg: Add hardware page table walker support
Posted by Richard Henderson 2 months, 2 weeks ago
On 9/5/24 01:27, gaosong wrote:
> How about adding a variable to determine if tlb needs to be modified?
> like this:
> 
> @@ -248,7 +250,7 @@ hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
>       int prot;
> 
>       if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD,
> -                             cpu_mmu_index(cs, false)) != 0) {
> +                             cpu_mmu_index(cs, false) != 0, false)) {
>           return -1;
>       }

Yes, that sort of thing.  In other targets the flags is called 'debug'.

>>> +        entry = ldq_phys(cs->as, tmp0) & TARGET_PHYS_MASK;
>>> +
>>> +        if (entry == 0) {
>>> +            return ret;
>>> +        }
>>> +
>>> +        /* Check entry, and do tlb modify. */
>>> +        if ((tlb_error == TLBRET_INVALID) &&
>>> +            (access_type == MMU_DATA_LOAD ||
>>> +             access_type == MMU_INST_FETCH )) {
>>> +            if (!(FIELD_EX64(entry, TLBENTRY, PRESENT))) {
>>> +                break;
>>> +            }
>>> +            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
>>> +        } else if ((tlb_error == TLBRET_INVALID) &&
>>> +                   access_type == MMU_DATA_STORE) {
>>> +            if (!((FIELD_EX64(entry, TLBENTRY, PRESENT) &&
>>> +                  (FIELD_EX64(entry, TLBENTRY, WRITE))))){
>>> +                break;
>>> +            }
>>> +            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
>>> +            entry = FIELD_DP64(entry, TLBENTRY, D, 1);
>>> +        } else if (tlb_error ==  TLBRET_DIRTY) {
>>> +            if (!(FIELD_EX64(entry, TLBENTRY, WRITE))) {
>>> +                break;
>>> +            }
>>> +            entry = FIELD_DP64(entry, TLBENTRY, D, 1);
>>> +            entry = FIELD_DP64(entry, TLBENTRY, V, 1);
>>> +        }
>>> +        stq_phys(cs->as, tmp0, entry);
>>
>> You certainly want to use a compare and swap here, restarting if the compare fails.
>>
> Sorry ,  I don't understand here, could you explain it in detail?

A plain store will have an smp race condition with the guest kernel.
The update needs to be atomic.

Compare:

   target/arm/ptw.c, arm_casq_ptw()
   target/riscv/cpu_helper.c, get_physical_address(), s/cmpxchg/
   target/i386/tcg/sysemu/excp_helper.c, ptw_setl()


r~