From nobody Wed Sep 17 08:13:29 2025
Return-Path: <linux-kernel-owner@vger.kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id B757BC4332F
	for <linux-kernel@archiver.kernel.org>; Thu, 22 Dec 2022 02:36:02 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S235034AbiLVCgB (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
        Wed, 21 Dec 2022 21:36:01 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46676 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S234980AbiLVCfh (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Wed, 21 Dec 2022 21:35:37 -0500
Received: from mail-pj1-x104a.google.com (mail-pj1-x104a.google.com
 [IPv6:2607:f8b0:4864:20::104a])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7A1EE264A0
        for <linux-kernel@vger.kernel.org>;
 Wed, 21 Dec 2022 18:35:14 -0800 (PST)
Received: by mail-pj1-x104a.google.com with SMTP id
 pi14-20020a17090b1e4e00b0021d20da7a51so2300625pjb.2
        for <linux-kernel@vger.kernel.org>;
 Wed, 21 Dec 2022 18:35:14 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20210112;
        h=cc:to:from:subject:message-id:references:mime-version:in-reply-to
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=OqpNn9k+U+hG3YogdgDR/fOEr/aI6IdVDrzcvJRoBmQ=;
        b=LZCil3LEBuiqPglCaABHVnyyRktcG83zDWgYL8mZt4giWucmb8FzbuHkl/qRDPEfTL
         sho0PmZ83Kz1PVrv7LxQITDU73TzGBV57cqQUyLCGPrx9sfwBeRiioD2DlM4/tip3fqi
         o4IJFivw5AtKaKsdUfv+TfoKhPRPMShpd4EbkgwFft29q/MNo2D6I6u6PD7ToZ52tz4W
         VKhny6D60ScNULutdaRX1NMuignZ1Y+Q42SR/GGwjeDOYE8dXqUceH2wQdl+BQh9qtF5
         92z74m16CNnw3BzXc5KaYuQ6cZkc4YU7xlvloYt7f+L5x8vmyokePQB0TBCZVG3vF0Yg
         INTQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20210112;
        h=cc:to:from:subject:message-id:references:mime-version:in-reply-to
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=OqpNn9k+U+hG3YogdgDR/fOEr/aI6IdVDrzcvJRoBmQ=;
        b=b8XX7b2uZvcozgBj6OGMmzGTiexqi6XAxEjJK32lZe8NnldgDXdjl4XH5ZOZawHkGd
         F0J/gTgeTyI1tXKEjf9rHcmwSz64smtDZub1cn7hqo9dO2vk5e1lBeaa7NcFtB5qGDPw
         6h/0TTlbC8Ijl9Mfz9XT6zlxTWrO1IPWhqzhvbwD9RY7M8W8aqi22eAYEQ29DzYW0QeN
         UrMbWwTfbS6zrDHnoKSQQnxtU8UyAv3RuwbTUuudxLU8mKDELLj6VhNii5Mr+98BY2hT
         aSKgxM3XH+dOB8D37oy9vH2lhofPRra+8/dRqvPcWP2ON8q3Y0pvTW1aWLu44qhBGJt3
         fVgg==
X-Gm-Message-State: AFqh2krDNTACpeR2KkYpbSGtJ926b9tzIXp89y9NoAO33SMje5ruzHKP
        0cveuLDYPQsiR4LWM6jjJWCcRTIH+epO
X-Google-Smtp-Source: 
 AMrXdXsdS1uAeaLst9k9vclVCCOLWIbxK9Lx9IWWeNQ8BLMOiOd/S8VYUSYeV2+muAopSD1+l7BaJNjs32ck
X-Received: from vipin.c.googlers.com ([fda3:e722:ac3:cc00:7f:e700:c0a8:479f])
 (user=vipinsh job=sendgmr) by 2002:a17:902:7d92:b0:18e:bd50:f19a with SMTP id
 a18-20020a1709027d9200b0018ebd50f19amr219700plm.81.1671676513459; Wed, 21 Dec
 2022 18:35:13 -0800 (PST)
Date: Wed, 21 Dec 2022 18:34:56 -0800
In-Reply-To: <20221222023457.1764-1-vipinsh@google.com>
Mime-Version: 1.0
References: <20221222023457.1764-1-vipinsh@google.com>
X-Mailer: git-send-email 2.39.0.314.g84b9a713c41-goog
Message-ID: <20221222023457.1764-9-vipinsh@google.com>
Subject: [Patch v3 8/9] KVM: x86/mmu: Make split_shadow_page_cache NUMA aware
From: Vipin Sharma <vipinsh@google.com>
To: seanjc@google.com, pbonzini@redhat.com, bgardon@google.com,
        dmatlack@google.com
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
        Vipin Sharma <vipinsh@google.com>
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Make split_shadow_page_cache NUMA aware and allocate page table's pages
during the split based on the underlying physical page's NUMA node.

Signed-off-by: Vipin Sharma <vipinsh@google.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/mmu/mmu.c          | 50 ++++++++++++++++++---------------
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_hos=
t.h
index b1f319ad6f89..7b3f36ae37a4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1410,7 +1410,7 @@ struct kvm_arch {
 	 *
 	 * Protected by kvm->slots_lock.
 	 */
-	struct kvm_mmu_memory_cache split_shadow_page_cache;
+	struct kvm_mmu_memory_cache split_shadow_page_cache[MAX_NUMNODES];
 	struct kvm_mmu_memory_cache split_page_header_cache;
=20
 	/*
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 511c6ef265ee..7454bfc49a51 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6126,7 +6126,7 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(s=
truct kvm *kvm,
 int kvm_mmu_init_vm(struct kvm *kvm)
 {
 	struct kvm_page_track_notifier_node *node =3D &kvm->arch.mmu_sp_tracker;
-	int r;
+	int r, nid;
=20
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
@@ -6145,8 +6145,9 @@ int kvm_mmu_init_vm(struct kvm *kvm)
 	INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_page_header_cache,
 				  mmu_page_header_cache, NUMA_NO_NODE);
=20
-	INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache,
-				  NULL, NUMA_NO_NODE);
+	for_each_node(nid)
+		INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache[nid],
+					  NULL, NUMA_NO_NODE);
 	spin_lock_init(&kvm->arch.split_shadow_page_cache_lock);
=20
 	INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_desc_cache,
@@ -6157,10 +6158,13 @@ int kvm_mmu_init_vm(struct kvm *kvm)
=20
 static void mmu_free_vm_memory_caches(struct kvm *kvm)
 {
+	int nid;
+
 	kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache);
 	kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache);
-	mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache,
-				 &kvm->arch.split_shadow_page_cache_lock);
+	for_each_node(nid)
+		mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid],
+					 &kvm->arch.split_shadow_page_cache_lock);
 }
=20
 void kvm_mmu_uninit_vm(struct kvm *kvm)
@@ -6269,7 +6273,7 @@ static inline bool need_topup(struct kvm_mmu_memory_c=
ache *cache, int min)
 	return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
 }
=20
-static bool need_topup_split_caches_or_resched(struct kvm *kvm)
+static bool need_topup_split_caches_or_resched(struct kvm *kvm, int nid)
 {
 	if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
 		return true;
@@ -6281,10 +6285,10 @@ static bool need_topup_split_caches_or_resched(stru=
ct kvm *kvm)
 	 */
 	return need_topup(&kvm->arch.split_desc_cache, SPLIT_DESC_CACHE_MIN_NR_OB=
JECTS) ||
 	       need_topup(&kvm->arch.split_page_header_cache, 1) ||
-	       need_topup(&kvm->arch.split_shadow_page_cache, 1);
+	       need_topup(&kvm->arch.split_shadow_page_cache[nid], 1);
 }
=20
-static int topup_split_caches(struct kvm *kvm)
+static int topup_split_caches(struct kvm *kvm, int nid)
 {
 	/*
 	 * Allocating rmap list entries when splitting huge pages for nested
@@ -6314,18 +6318,21 @@ static int topup_split_caches(struct kvm *kvm)
 	if (r)
 		return r;
=20
-	return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache,
+	return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid],
 					 &kvm->arch.split_shadow_page_cache_lock,
 					 1);
 }
=20
-static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u=
64 *huge_sptep)
+static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm,
+							u64 *huge_sptep,
+							u64 huge_spte)
 {
 	struct kvm_mmu_page *huge_sp =3D sptep_to_sp(huge_sptep);
 	struct shadow_page_caches caches =3D {};
 	union kvm_mmu_page_role role;
 	unsigned int access;
 	gfn_t gfn;
+	int nid;
=20
 	gfn =3D kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep));
 	access =3D kvm_mmu_page_get_access(huge_sp, spte_index(huge_sptep));
@@ -6338,9 +6345,11 @@ static struct kvm_mmu_page *shadow_mmu_get_sp_for_sp=
lit(struct kvm *kvm, u64 *hu
 	 */
 	role =3D kvm_mmu_child_role(huge_sptep, /*direct=3D*/true, access);
=20
+	nid =3D kvm_pfn_to_page_table_nid(spte_to_pfn(huge_spte));
+
 	/* Direct SPs do not require a shadowed_info_cache. */
 	caches.page_header_cache =3D &kvm->arch.split_page_header_cache;
-	caches.shadow_page_cache =3D &kvm->arch.split_shadow_page_cache;
+	caches.shadow_page_cache =3D &kvm->arch.split_shadow_page_cache[nid];
 	caches.shadow_page_cache_lock =3D &kvm->arch.split_shadow_page_cache_lock;
=20
 	/* Safe to pass NULL for vCPU since requesting a direct SP. */
@@ -6360,7 +6369,7 @@ static void shadow_mmu_split_huge_page(struct kvm *kv=
m,
 	gfn_t gfn;
 	int index;
=20
-	sp =3D shadow_mmu_get_sp_for_split(kvm, huge_sptep);
+	sp =3D shadow_mmu_get_sp_for_split(kvm, huge_sptep, huge_spte);
=20
 	for (index =3D 0; index < SPTE_ENT_PER_PAGE; index++) {
 		sptep =3D &sp->spt[index];
@@ -6398,7 +6407,7 @@ static int shadow_mmu_try_split_huge_page(struct kvm =
*kvm,
 					  u64 *huge_sptep)
 {
 	struct kvm_mmu_page *huge_sp =3D sptep_to_sp(huge_sptep);
-	int level, r =3D 0;
+	int level, r =3D 0, nid;
 	gfn_t gfn;
 	u64 spte;
=20
@@ -6406,13 +6415,14 @@ static int shadow_mmu_try_split_huge_page(struct kv=
m *kvm,
 	gfn =3D kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep));
 	level =3D huge_sp->role.level;
 	spte =3D *huge_sptep;
+	nid =3D kvm_pfn_to_page_table_nid(spte_to_pfn(spte));
=20
 	if (kvm_mmu_available_pages(kvm) <=3D KVM_MIN_FREE_MMU_PAGES) {
 		r =3D -ENOSPC;
 		goto out;
 	}
=20
-	if (need_topup_split_caches_or_resched(kvm)) {
+	if (need_topup_split_caches_or_resched(kvm, nid)) {
 		write_unlock(&kvm->mmu_lock);
 		cond_resched();
 		/*
@@ -6420,7 +6430,7 @@ static int shadow_mmu_try_split_huge_page(struct kvm =
*kvm,
 		 * rmap iterator should be restarted because the MMU lock was
 		 * dropped.
 		 */
-		r =3D topup_split_caches(kvm) ?: -EAGAIN;
+		r =3D topup_split_caches(kvm, nid) ?: -EAGAIN;
 		write_lock(&kvm->mmu_lock);
 		goto out;
 	}
@@ -6709,17 +6719,15 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm,=
 u64 gen)
 }
=20
 static unsigned long mmu_shrink_cache(struct kvm_mmu_memory_cache *cache,
-				      int cache_count,
 				      spinlock_t *cache_lock)
 {
 	unsigned long freed =3D 0;
 	int nid;
=20
 	spin_lock(cache_lock);
-	for (nid =3D 0; nid < cache_count; nid++) {
-		if (node_online(nid) && cache[nid].nobjs)
+	for_each_online_node(nid)
+		if (cache[nid].nobjs)
 			freed +=3D kvm_mmu_empty_memory_cache(&cache[nid]);
-	}
 	spin_unlock(cache_lock);
 	return freed;
 }
@@ -6741,8 +6749,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrin=
k_control *sc)
 			first_kvm =3D kvm;
 		list_move_tail(&kvm->vm_list, &vm_list);
=20
-		freed +=3D mmu_shrink_cache(&kvm->arch.split_shadow_page_cache,
-					  1,
+		freed +=3D mmu_shrink_cache(kvm->arch.split_shadow_page_cache,
 					  &kvm->arch.split_shadow_page_cache_lock);
=20
 		if (freed >=3D sc->nr_to_scan)
@@ -6750,7 +6757,6 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrin=
k_control *sc)
=20
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			freed +=3D mmu_shrink_cache(vcpu->arch.mmu_shadow_page_cache,
-						  MAX_NUMNODES,
 						  &vcpu->arch.mmu_shadow_page_cache_lock);
 		}
=20
--=20
2.39.0.314.g84b9a713c41-goog