[PATCH v2 18/54] accel/tcg: Remove the victim tlb

Richard Henderson posted 54 patches 1 week, 2 days ago
[PATCH v2 18/54] accel/tcg: Remove the victim tlb
Posted by Richard Henderson 1 week, 2 days ago
This has been functionally replaced by the IntervalTree.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/hw/core/cpu.h |  8 -----
 accel/tcg/cputlb.c    | 74 -------------------------------------------
 2 files changed, 82 deletions(-)

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 1ebc999a73..8eda0574b2 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -201,9 +201,6 @@ struct CPUClass {
  */
 #define NB_MMU_MODES 16
 
-/* Use a fully associative victim tlb of 8 entries. */
-#define CPU_VTLB_SIZE 8
-
 /*
  * The full TLB entry, which is not accessed by generated TCG code,
  * so the layout is not as critical as that of CPUTLBEntry. This is
@@ -285,11 +282,6 @@ typedef struct CPUTLBDesc {
     /* maximum number of entries observed in the window */
     size_t window_max_entries;
     size_t n_used_entries;
-    /* The next index to use in the tlb victim table.  */
-    size_t vindex;
-    /* The tlb victim table, in two parts.  */
-    CPUTLBEntry vtable[CPU_VTLB_SIZE];
-    CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE];
     CPUTLBEntryFull *fulltlb;
     /* All active tlb entries for this address space. */
     IntervalTreeRoot iroot;
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index ea4b78866b..8caa8c0f1d 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -328,8 +328,6 @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
     tlbfast_flush_locked(desc, fast);
     desc->large_page_addr = -1;
     desc->large_page_mask = -1;
-    desc->vindex = 0;
-    memset(desc->vtable, -1, sizeof(desc->vtable));
     interval_tree_free_nodes(&desc->iroot, offsetof(CPUTLBEntryTree, itree));
 }
 
@@ -361,11 +359,6 @@ static inline void tlb_n_used_entries_inc(CPUState *cpu, uintptr_t mmu_idx)
     cpu->neg.tlb.d[mmu_idx].n_used_entries++;
 }
 
-static inline void tlb_n_used_entries_dec(CPUState *cpu, uintptr_t mmu_idx)
-{
-    cpu->neg.tlb.d[mmu_idx].n_used_entries--;
-}
-
 void tlb_init(CPUState *cpu)
 {
     int64_t now = get_clock_realtime();
@@ -496,20 +489,6 @@ static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
             page == (tlb_entry->addr_code & mask));
 }
 
-static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, vaddr page)
-{
-    return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
-}
-
-/**
- * tlb_entry_is_empty - return true if the entry is not in use
- * @te: pointer to CPUTLBEntry
- */
-static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
-{
-    return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
-}
-
 /* Called with tlb_c.lock held */
 static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
                                         vaddr page,
@@ -522,28 +501,6 @@ static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
     return false;
 }
 
-/* Called with tlb_c.lock held */
-static void tlb_flush_vtlb_page_mask_locked(CPUState *cpu, int mmu_idx,
-                                            vaddr page,
-                                            vaddr mask)
-{
-    CPUTLBDesc *d = &cpu->neg.tlb.d[mmu_idx];
-    int k;
-
-    assert_cpu_is_self(cpu);
-    for (k = 0; k < CPU_VTLB_SIZE; k++) {
-        if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) {
-            tlb_n_used_entries_dec(cpu, mmu_idx);
-        }
-    }
-}
-
-static inline void tlb_flush_vtlb_page_locked(CPUState *cpu, int mmu_idx,
-                                              vaddr page)
-{
-    tlb_flush_vtlb_page_mask_locked(cpu, mmu_idx, page, -1);
-}
-
 static void tlbfast_flush_range_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
                                        vaddr addr, vaddr len, vaddr mask)
 {
@@ -588,7 +545,6 @@ static void tlb_flush_page_locked(CPUState *cpu, int midx, vaddr page)
 
     tlbfast_flush_range_locked(desc, &cpu->neg.tlb.f[midx],
                                page, TARGET_PAGE_SIZE, -1);
-    tlb_flush_vtlb_page_locked(cpu, midx, page);
 
     node = tlbtree_lookup_addr(desc, page);
     if (node) {
@@ -764,11 +720,6 @@ static void tlb_flush_range_locked(CPUState *cpu, int midx,
 
     tlbfast_flush_range_locked(d, f, addr, len, mask);
 
-    for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
-        vaddr page = addr + i;
-        tlb_flush_vtlb_page_mask_locked(cpu, midx, page, mask);
-    }
-
     addr_mask = addr & mask;
     last_mask = addr_mask + len - 1;
     last_imask = last_mask | ~mask;
@@ -1017,10 +968,6 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
             tlb_reset_dirty_range_locked(&fast->table[i], start1, length);
         }
 
-        for (size_t i = 0; i < CPU_VTLB_SIZE; i++) {
-            tlb_reset_dirty_range_locked(&desc->vtable[i], start1, length);
-        }
-
         for (CPUTLBEntryTree *t = tlbtree_lookup_range(desc, 0, -1); t;
              t = tlbtree_lookup_range_next(t, 0, -1)) {
             tlb_reset_dirty_range_locked(&t->copy, start1, length);
@@ -1054,10 +1001,6 @@ static void tlb_set_dirty(CPUState *cpu, vaddr addr)
 
         tlb_set_dirty1_locked(tlb_entry(cpu, mmu_idx, addr), addr);
 
-        for (int k = 0; k < CPU_VTLB_SIZE; k++) {
-            tlb_set_dirty1_locked(&desc->vtable[k], addr);
-        }
-
         node = tlbtree_lookup_addr(desc, addr);
         if (node) {
             tlb_set_dirty1_locked(&node->copy, addr);
@@ -1216,23 +1159,6 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
     /* Note that the tlb is no longer clean.  */
     tlb->c.dirty |= 1 << mmu_idx;
 
-    /* Make sure there's no cached translation for the new page.  */
-    tlb_flush_vtlb_page_locked(cpu, mmu_idx, addr_page);
-
-    /*
-     * Only evict the old entry to the victim tlb if it's for a
-     * different page; otherwise just overwrite the stale data.
-     */
-    if (!tlb_hit_page_anyprot(te, addr_page) && !tlb_entry_is_empty(te)) {
-        unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
-        CPUTLBEntry *tv = &desc->vtable[vidx];
-
-        /* Evict the old entry into the victim tlb.  */
-        copy_tlb_helper_locked(tv, te);
-        desc->vfulltlb[vidx] = desc->fulltlb[index];
-        tlb_n_used_entries_dec(cpu, mmu_idx);
-    }
-
     /* Replace an old IntervalTree entry, or create a new one. */
     node = tlbtree_lookup_addr(desc, addr_page);
     if (!node) {
-- 
2.43.0
Re: [PATCH v2 18/54] accel/tcg: Remove the victim tlb
Posted by Pierrick Bouvier 1 week, 1 day ago

On 11/14/24 08:00, Richard Henderson wrote:
> This has been functionally replaced by the IntervalTree.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   include/hw/core/cpu.h |  8 -----
>   accel/tcg/cputlb.c    | 74 -------------------------------------------
>   2 files changed, 82 deletions(-)
> 
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index 1ebc999a73..8eda0574b2 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -201,9 +201,6 @@ struct CPUClass {
>    */
>   #define NB_MMU_MODES 16
>   
> -/* Use a fully associative victim tlb of 8 entries. */
> -#define CPU_VTLB_SIZE 8
> -
>   /*
>    * The full TLB entry, which is not accessed by generated TCG code,
>    * so the layout is not as critical as that of CPUTLBEntry. This is
> @@ -285,11 +282,6 @@ typedef struct CPUTLBDesc {
>       /* maximum number of entries observed in the window */
>       size_t window_max_entries;
>       size_t n_used_entries;
> -    /* The next index to use in the tlb victim table.  */
> -    size_t vindex;
> -    /* The tlb victim table, in two parts.  */
> -    CPUTLBEntry vtable[CPU_VTLB_SIZE];
> -    CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE];
>       CPUTLBEntryFull *fulltlb;
>       /* All active tlb entries for this address space. */
>       IntervalTreeRoot iroot;
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index ea4b78866b..8caa8c0f1d 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -328,8 +328,6 @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
>       tlbfast_flush_locked(desc, fast);
>       desc->large_page_addr = -1;
>       desc->large_page_mask = -1;
> -    desc->vindex = 0;
> -    memset(desc->vtable, -1, sizeof(desc->vtable));
>       interval_tree_free_nodes(&desc->iroot, offsetof(CPUTLBEntryTree, itree));
>   }
>   
> @@ -361,11 +359,6 @@ static inline void tlb_n_used_entries_inc(CPUState *cpu, uintptr_t mmu_idx)
>       cpu->neg.tlb.d[mmu_idx].n_used_entries++;
>   }
>   
> -static inline void tlb_n_used_entries_dec(CPUState *cpu, uintptr_t mmu_idx)
> -{
> -    cpu->neg.tlb.d[mmu_idx].n_used_entries--;
> -}
> -
>   void tlb_init(CPUState *cpu)
>   {
>       int64_t now = get_clock_realtime();
> @@ -496,20 +489,6 @@ static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
>               page == (tlb_entry->addr_code & mask));
>   }
>   
> -static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, vaddr page)
> -{
> -    return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
> -}
> -
> -/**
> - * tlb_entry_is_empty - return true if the entry is not in use
> - * @te: pointer to CPUTLBEntry
> - */
> -static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
> -{
> -    return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
> -}
> -
>   /* Called with tlb_c.lock held */
>   static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
>                                           vaddr page,
> @@ -522,28 +501,6 @@ static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
>       return false;
>   }
>   
> -/* Called with tlb_c.lock held */
> -static void tlb_flush_vtlb_page_mask_locked(CPUState *cpu, int mmu_idx,
> -                                            vaddr page,
> -                                            vaddr mask)
> -{
> -    CPUTLBDesc *d = &cpu->neg.tlb.d[mmu_idx];
> -    int k;
> -
> -    assert_cpu_is_self(cpu);
> -    for (k = 0; k < CPU_VTLB_SIZE; k++) {
> -        if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) {
> -            tlb_n_used_entries_dec(cpu, mmu_idx);
> -        }
> -    }
> -}
> -
> -static inline void tlb_flush_vtlb_page_locked(CPUState *cpu, int mmu_idx,
> -                                              vaddr page)
> -{
> -    tlb_flush_vtlb_page_mask_locked(cpu, mmu_idx, page, -1);
> -}
> -
>   static void tlbfast_flush_range_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
>                                          vaddr addr, vaddr len, vaddr mask)
>   {
> @@ -588,7 +545,6 @@ static void tlb_flush_page_locked(CPUState *cpu, int midx, vaddr page)
>   
>       tlbfast_flush_range_locked(desc, &cpu->neg.tlb.f[midx],
>                                  page, TARGET_PAGE_SIZE, -1);
> -    tlb_flush_vtlb_page_locked(cpu, midx, page);
>   
>       node = tlbtree_lookup_addr(desc, page);
>       if (node) {
> @@ -764,11 +720,6 @@ static void tlb_flush_range_locked(CPUState *cpu, int midx,
>   
>       tlbfast_flush_range_locked(d, f, addr, len, mask);
>   
> -    for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
> -        vaddr page = addr + i;
> -        tlb_flush_vtlb_page_mask_locked(cpu, midx, page, mask);
> -    }
> -
>       addr_mask = addr & mask;
>       last_mask = addr_mask + len - 1;
>       last_imask = last_mask | ~mask;
> @@ -1017,10 +968,6 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
>               tlb_reset_dirty_range_locked(&fast->table[i], start1, length);
>           }
>   
> -        for (size_t i = 0; i < CPU_VTLB_SIZE; i++) {
> -            tlb_reset_dirty_range_locked(&desc->vtable[i], start1, length);
> -        }
> -
>           for (CPUTLBEntryTree *t = tlbtree_lookup_range(desc, 0, -1); t;
>                t = tlbtree_lookup_range_next(t, 0, -1)) {
>               tlb_reset_dirty_range_locked(&t->copy, start1, length);
> @@ -1054,10 +1001,6 @@ static void tlb_set_dirty(CPUState *cpu, vaddr addr)
>   
>           tlb_set_dirty1_locked(tlb_entry(cpu, mmu_idx, addr), addr);
>   
> -        for (int k = 0; k < CPU_VTLB_SIZE; k++) {
> -            tlb_set_dirty1_locked(&desc->vtable[k], addr);
> -        }
> -
>           node = tlbtree_lookup_addr(desc, addr);
>           if (node) {
>               tlb_set_dirty1_locked(&node->copy, addr);
> @@ -1216,23 +1159,6 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
>       /* Note that the tlb is no longer clean.  */
>       tlb->c.dirty |= 1 << mmu_idx;
>   
> -    /* Make sure there's no cached translation for the new page.  */
> -    tlb_flush_vtlb_page_locked(cpu, mmu_idx, addr_page);
> -
> -    /*
> -     * Only evict the old entry to the victim tlb if it's for a
> -     * different page; otherwise just overwrite the stale data.
> -     */
> -    if (!tlb_hit_page_anyprot(te, addr_page) && !tlb_entry_is_empty(te)) {
> -        unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
> -        CPUTLBEntry *tv = &desc->vtable[vidx];
> -
> -        /* Evict the old entry into the victim tlb.  */
> -        copy_tlb_helper_locked(tv, te);
> -        desc->vfulltlb[vidx] = desc->fulltlb[index];
> -        tlb_n_used_entries_dec(cpu, mmu_idx);
> -    }
> -
>       /* Replace an old IntervalTree entry, or create a new one. */
>       node = tlbtree_lookup_addr(desc, addr_page);
>       if (!node) {

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>