From nobody Fri Nov 7 12:34:53 2025 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 153910813556096.22380977043508; Tue, 9 Oct 2018 11:02:15 -0700 (PDT) Received: from localhost ([::1]:53223 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1g9wKk-0006dh-GC for importer@patchew.org; Tue, 09 Oct 2018 14:02:14 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:54272) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1g9wAU-0006lO-Sx for qemu-devel@nongnu.org; Tue, 09 Oct 2018 13:51:40 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1g9wAR-0002F5-NA for qemu-devel@nongnu.org; Tue, 09 Oct 2018 13:51:38 -0400 Received: from wout1-smtp.messagingengine.com ([64.147.123.24]:35603) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1g9wAR-0002DW-96 for qemu-devel@nongnu.org; Tue, 09 Oct 2018 13:51:35 -0400 Received: from compute4.internal (compute4.nyi.internal [10.202.2.44]) by mailout.west.internal (Postfix) with ESMTP id E2ADC8DC; Tue, 9 Oct 2018 13:51:33 -0400 (EDT) Received: from mailfrontend1 ([10.202.2.162]) by compute4.internal (MEProxy); Tue, 09 Oct 2018 13:51:34 -0400 Received: from localhost (flamenco.cs.columbia.edu [128.59.20.216]) by mail.messagingengine.com (Postfix) with ESMTPA id 392B2E461F; Tue, 9 Oct 2018 13:51:33 -0400 (EDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=braap.org; h= from:to:cc:subject:date:message-id:in-reply-to:references; s= mesmtp; bh=lRVR2jlWZ4ZEB6lHBjOQ7Vz/GqSwBz43R7s/6BEmuqI=; b=ro0z/ xU3l8+/CD4WSmbUGDi7yGX3pzOJCE2Sa7nnuNwoYAkN7QsgIv7J2IM8eElOL0anh bV1h930PhkKr9F+PKpXNEPywJ6cEt6Z3JYKqlmjSeEtUdKMty8Pqng6hXOZmv6wq ErvIuxl9lPxDcI7SBSB1AN9K32GB04IBqAuisQ= DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d= messagingengine.com; h=cc:date:from:in-reply-to:message-id :references:subject:to:x-me-proxy:x-me-proxy:x-me-sender :x-me-sender:x-sasl-enc; s=fm1; bh=lRVR2jlWZ4ZEB6lHBjOQ7Vz/GqSwB z43R7s/6BEmuqI=; b=D6eJrZEeVQRUm6M9iFek+k+v7lk/sjS3QL6MvpmgeOoGR EakC+HQJfh+5U1e4SJI4hWRhRlkeIhEfXqHMQd/Z8sLdAMlPJJfYGhbCvrogWkNu ds0ZnpeOk+1S9lsHGM2IHSf5ziGYjyBr6XgNgT76rWX2fSqjZdLIYH01MQ5i04iO J4zlStdggblKKGoVPTvLt6Ug1Vpl6vLq+fjIbMwNWyIQhDqeaZuLDwiF+0vMOWAT 45b+AveTwtRIhzEG1fgVyS5oTpdp8R+k2Qp5q+ZI6voA0AMDS07ob9JdSYcDABPW hvstTP9cMfrzXkZyCRBvVoWkdlBnmqqy4ebsmvrPA== X-ME-Sender: X-ME-Proxy: From: "Emilio G. Cota" To: qemu-devel@nongnu.org Date: Tue, 9 Oct 2018 13:51:26 -0400 Message-Id: <20181009175129.17888-3-cota@braap.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20181009175129.17888-1-cota@braap.org> References: <20181009175129.17888-1-cota@braap.org> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 64.147.123.24 Subject: [Qemu-devel] [RFC v3 2/5] (XXX) cputlb: introduce indirection for TLB size X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: =?UTF-8?q?Alex=20Benn=C3=A9e?= , Richard Henderson Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" This paves the way for implementing dynamic TLB resizing. XXX: convert other TCG backends Signed-off-by: Emilio G. Cota --- include/exec/cpu-defs.h | 10 ++++++---- include/exec/cpu_ldst.h | 14 +++++++++++++- accel/tcg/cputlb.c | 18 +++++++++++++++--- tcg/i386/tcg-target.inc.c | 26 +++++++++++++------------- 4 files changed, 47 insertions(+), 21 deletions(-) diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index 4ff62f32bf..87cd015f60 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -141,13 +141,15 @@ typedef struct CPUIOTLBEntry { MemTxAttrs attrs; } CPUIOTLBEntry; =20 -#define CPU_COMMON_TLB \ +#define CPU_COMMON_TLB \ /* The meaning of the MMU modes is defined in the target code. */ \ - /* tlb_lock serializes updates to tlb_table and tlb_v_table */ \ + /* tlb_lock serializes updates to tlb_mask, tlb_table and tlb_v_table = */ \ QemuSpin tlb_lock; \ - CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE]; \ + /* tlb_mask[i] contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */ \ + uintptr_t tlb_mask[NB_MMU_MODES]; \ + CPUTLBEntry *tlb_table[NB_MMU_MODES]; \ CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \ - CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \ + CPUIOTLBEntry *iotlb[NB_MMU_MODES]; \ CPUIOTLBEntry iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE]; \ size_t tlb_flush_count; \ target_ulong tlb_flush_addr; \ diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index e3d8d738aa..3ded1df9b7 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -130,7 +130,9 @@ extern __thread uintptr_t helper_retaddr; static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, target_ulong addr) { - return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + uintptr_t size_mask =3D env->tlb_mask[mmu_idx] >> CPU_TLB_ENTRY_BITS; + + return (addr >> TARGET_PAGE_BITS) & size_mask; } =20 /* Find the TLB entry corresponding to the mmu_idx + address pair. */ @@ -140,6 +142,16 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env= , uintptr_t mmu_idx, return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)]; } =20 +static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) +{ + return env->tlb_mask[mmu_idx] + (1 << CPU_TLB_ENTRY_BITS); +} + +static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx) +{ + return (env->tlb_mask[mmu_idx] >> CPU_TLB_ENTRY_BITS) + 1; +} + #ifdef MMU_MODE0_SUFFIX #define CPU_MMU_INDEX 0 #define MEMSUFFIX MMU_MODE0_SUFFIX diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index a5972773de..80406f1033 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -76,8 +76,16 @@ QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); void tlb_init(CPUState *cpu) { CPUArchState *env =3D cpu->env_ptr; + int i; =20 qemu_spin_init(&env->tlb_lock); + for (i =3D 0; i < NB_MMU_MODES; i++) { + size_t n_entries =3D CPU_TLB_SIZE; + + env->tlb_mask[i] =3D (n_entries - 1) << CPU_TLB_ENTRY_BITS; + env->tlb_table[i] =3D g_new(CPUTLBEntry, n_entries); + env->iotlb[i] =3D g_new0(CPUIOTLBEntry, n_entries); + } } =20 /* flush_all_helper: run fn across all cpus @@ -120,6 +128,7 @@ size_t tlb_flush_count(void) static void tlb_flush_nocheck(CPUState *cpu) { CPUArchState *env =3D cpu->env_ptr; + int i; =20 /* The QOM tests will trigger tlb_flushes without setting up TCG * so we bug out here in that case. @@ -139,7 +148,9 @@ static void tlb_flush_nocheck(CPUState *cpu) * that do not hold the lock are performed by the same owner thread. */ qemu_spin_lock(&env->tlb_lock); - memset(env->tlb_table, -1, sizeof(env->tlb_table)); + for (i =3D 0; i < NB_MMU_MODES; i++) { + memset(env->tlb_table[i], -1, sizeof_tlb(env, i)); + } memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); qemu_spin_unlock(&env->tlb_lock); =20 @@ -200,7 +211,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cp= u, run_on_cpu_data data) if (test_bit(mmu_idx, &mmu_idx_bitmask)) { tlb_debug("%d\n", mmu_idx); =20 - memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0])); + memset(env->tlb_table[mmu_idx], -1, sizeof_tlb(env, mmu_idx)); memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[= 0])); } } @@ -523,8 +534,9 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, = ram_addr_t length) qemu_spin_lock(&env->tlb_lock); for (mmu_idx =3D 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { unsigned int i; + unsigned int n =3D tlb_n_entries(env, mmu_idx); =20 - for (i =3D 0; i < CPU_TLB_SIZE; i++) { + for (i =3D 0; i < n; i++) { tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], star= t1, length); } diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 436195894b..91bbabd6e8 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -330,6 +330,7 @@ static inline int tcg_target_const_match(tcg_target_lon= g val, TCGType type, #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ #define OPC_ANDN (0xf2 | P_EXT38) #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) +#define OPC_AND_GvEv (OPC_ARITH_GvEv | (ARITH_AND << 3)) #define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16) #define OPC_BSF (0xbc | P_EXT) #define OPC_BSR (0xbd | P_EXT) @@ -1633,6 +1634,15 @@ static inline void tcg_out_tlb_load(TCGContext *s, T= CGReg addrlo, TCGReg addrhi, } =20 tcg_out_mov(s, tlbtype, r0, addrlo); + tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, r0, TCG_AREG0, + offsetof(CPUArchState, tlb_mask[mem_index])); + + tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r0, TCG_AREG0, + offsetof(CPUArchState, tlb_table[mem_index])); + /* If the required alignment is at least as large as the access, simply copy the address and mask. For lesser alignments, check that we do= n't cross pages for the complete access. */ @@ -1642,20 +1652,10 @@ static inline void tcg_out_tlb_load(TCGContext *s, = TCGReg addrlo, TCGReg addrhi, tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_ma= sk); } tlb_mask =3D (target_ulong)TARGET_PAGE_MASK | a_mask; - - tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0); - tgen_arithi(s, ARITH_AND + tlbrexw, r0, - (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); - - tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0, - offsetof(CPUArchState, tlb_table[mem_index][0= ]) - + which); =20 /* cmp 0(r0), r1 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, which); =20 /* Prepare for both the fast path add of the tlb addend, and the slow path function argument setup. There are two cases worth note: @@ -1672,7 +1672,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TC= GReg addrlo, TCGReg addrhi, =20 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { /* cmp 4(r0), addrhi */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4); + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, which + 4); =20 /* jne slow_path */ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); @@ -1684,7 +1684,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TC= GReg addrlo, TCGReg addrhi, =20 /* add addend(r0), r1 */ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0, - offsetof(CPUTLBEntry, addend) - which); + offsetof(CPUTLBEntry, addend)); } =20 /* --=20 2.17.1