From nobody Sun Sep 14 03:51:59 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6292FC05027 for ; Thu, 26 Jan 2023 18:42:35 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231520AbjAZSmd (ORCPT ); Thu, 26 Jan 2023 13:42:33 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:60850 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229536AbjAZSmb (ORCPT ); Thu, 26 Jan 2023 13:42:31 -0500 Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 732343EFC6; Thu, 26 Jan 2023 10:42:30 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1674758550; x=1706294550; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=Pd74plTjDM934Mu3gAMHaaPSse3mzDXyW5UsS0F/p6Y=; b=M4+DrZXgFsISNEoVz+4DKQxvK+yFOj3zhIvbQlnh9XFRglKmQXe6dDJN M6Bdz0Yibu4rwzoSkclp8zi/9W0zEU/WRi783thglTZQ3UF1VjwojgqgO 1uNm9vDA7IVPy3EpVfKrQ+miFU/6OeOpUdH1zNMbsMlIxLzYzE1pB+Co6 Ha8+OC/OqlsevFMwHscJKkZOrauGidQA2yD19Le9nggr8/5aBsZMi506c EuNzHDWnmmLJh3qAPqvrWR7hyfm94SOHE9tZy0COvwsE0M/TUeFxs7U/e Sc5iOKKq6YS9noaDIHFoElJzW3tzH606RmjmbEm2GVFHjh/uZ2vxIZ3kw Q==; X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="354203321" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="354203321" Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:05 -0800 X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="991745439" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="991745439" Received: from agluck-desk3.sc.intel.com ([172.25.222.78]) by fmsmga005-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:05 -0800 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH 1/7] x86/resctrl: Refactor in preparation for node-scoped resources Date: Thu, 26 Jan 2023 10:41:51 -0800 Message-Id: <20230126184157.27626-2-tony.luck@intel.com> X-Mailer: git-send-email 2.39.1 In-Reply-To: <20230126184157.27626-1-tony.luck@intel.com> References: <20230126184157.27626-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Sub-NUMA cluster systems provide monitoring resources at the NUMA node scope instead of the L3 cache scope. Rename the cache_level field in struct rdt_resource to the more generic "scope" and add symbolic names and a helper function. No functional change. Signed-off-by: Tony Luck --- include/linux/resctrl.h | 4 ++-- arch/x86/kernel/cpu/resctrl/internal.h | 5 +++++ arch/x86/kernel/cpu/resctrl/core.c | 15 ++++++++++----- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 0cee154abc9f..64ecfcafa0a2 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -150,7 +150,7 @@ struct resctrl_schema; * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine * @num_rmid: Number of RMIDs available - * @cache_level: Which cache level defines scope of this resource + * @scope: Scope of this resource (cache level or NUMA node) * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. * @domains: All domains for this resource @@ -168,7 +168,7 @@ struct rdt_resource { bool alloc_capable; bool mon_capable; int num_rmid; - int cache_level; + int scope; struct resctrl_cache cache; struct resctrl_membw membw; struct list_head domains; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/r= esctrl/internal.h index 5ebd28e6aa0c..15cea517efaa 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -414,6 +414,11 @@ enum resctrl_res_level { RDT_NUM_RESOURCES, }; =20 +enum resctrl_scope { + SCOPE_L2_CACHE =3D 2, + SCOPE_L3_CACHE =3D 3 +}; + static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) { struct rdt_hw_resource *hw_res =3D resctrl_to_arch_res(res); diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resct= rl/core.c index c98e52ff5f20..6914232acf84 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -65,7 +65,7 @@ struct rdt_hw_resource rdt_resources_all[] =3D { .r_resctrl =3D { .rid =3D RDT_RESOURCE_L3, .name =3D "L3", - .cache_level =3D 3, + .scope =3D SCOPE_L3_CACHE, .domains =3D domain_init(RDT_RESOURCE_L3), .parse_ctrlval =3D parse_cbm, .format_str =3D "%d=3D%0*x", @@ -79,7 +79,7 @@ struct rdt_hw_resource rdt_resources_all[] =3D { .r_resctrl =3D { .rid =3D RDT_RESOURCE_L2, .name =3D "L2", - .cache_level =3D 2, + .scope =3D SCOPE_L2_CACHE, .domains =3D domain_init(RDT_RESOURCE_L2), .parse_ctrlval =3D parse_cbm, .format_str =3D "%d=3D%0*x", @@ -93,7 +93,7 @@ struct rdt_hw_resource rdt_resources_all[] =3D { .r_resctrl =3D { .rid =3D RDT_RESOURCE_MBA, .name =3D "MB", - .cache_level =3D 3, + .scope =3D SCOPE_L3_CACHE, .domains =3D domain_init(RDT_RESOURCE_MBA), .parse_ctrlval =3D parse_bw, .format_str =3D "%d=3D%*u", @@ -462,6 +462,11 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct = rdt_hw_domain *hw_dom) return 0; } =20 +static int get_domain_id(int cpu, enum resctrl_scope scope) +{ + return get_cpu_cacheinfo_id(cpu, scope); +} + /* * domain_add_cpu - Add a cpu to a resource's domain list. * @@ -477,7 +482,7 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct r= dt_hw_domain *hw_dom) */ static void domain_add_cpu(int cpu, struct rdt_resource *r) { - int id =3D get_cpu_cacheinfo_id(cpu, r->cache_level); + int id =3D get_domain_id(cpu, r->scope); struct list_head *add_pos =3D NULL; struct rdt_hw_domain *hw_dom; struct rdt_domain *d; @@ -527,7 +532,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource= *r) =20 static void domain_remove_cpu(int cpu, struct rdt_resource *r) { - int id =3D get_cpu_cacheinfo_id(cpu, r->cache_level); + int id =3D get_domain_id(cpu, r->scope); struct rdt_hw_domain *hw_dom; struct rdt_domain *d; =20 diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cp= u/resctrl/pseudo_lock.c index 524f8ff3e69c..d2ba4f7f6a79 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -297,7 +297,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_r= egion *plr) plr->size =3D rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); =20 for (i =3D 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level =3D=3D plr->s->res->cache_level) { + if (ci->info_list[i].level =3D=3D plr->s->res->scope) { plr->line_size =3D ci->info_list[i].coherency_line_size; return 0; } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/r= esctrl/rdtgroup.c index 5993da21d822..a6ba3080e5db 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1333,7 +1333,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource= *r, num_b =3D bitmap_weight(&cbm, r->cache.cbm_len); ci =3D get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); for (i =3D 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level =3D=3D r->cache_level) { + if (ci->info_list[i].level =3D=3D r->scope) { size =3D ci->info_list[i].size / r->cache.cbm_len * num_b; break; } --=20 2.39.1 From nobody Sun Sep 14 03:51:59 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 74AAFC61DA7 for ; Thu, 26 Jan 2023 18:42:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230423AbjAZSmh (ORCPT ); Thu, 26 Jan 2023 13:42:37 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:60860 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229674AbjAZSmc (ORCPT ); Thu, 26 Jan 2023 13:42:32 -0500 Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8B077366A7; Thu, 26 Jan 2023 10:42:31 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1674758551; x=1706294551; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=bsHRrFSHNHwYzutnGoCpp7L6CsbI7cB3D9T+kZSrwWU=; b=mfHqfGaDmMC6NCYo4PSX4ZkLr/x0RR7UEKkKm100M2cPF3Ntw28aYE9Y JBBpEAK9eVXQ25qjcYZnXhtv6jxytQNn7zUxaQKAuanYulHk8hwaqNfb4 Brrbn+gt7uYQLXe/zzxDcnF77ovxsCi7vstuv9VMQknMr5k3wr09Wc/m4 iD10aQ95XrgcANut7vD1Q+RY3MPxFi+i3lgcm6GMzQxXuR+ChS/1I/py8 EnhLx6LTSa1ifWhKdsjN/5Ylmq0fsiUYBQkWUAkXw5phHLTYyenhRdOsd zf0Y2sZLmPLXnV6xB9KCtI/XVFVJbcyszsgdBvaO1qV3+B20jqKRqLVFP w==; X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="354203330" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="354203330" Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:06 -0800 X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="991745442" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="991745442" Received: from agluck-desk3.sc.intel.com ([172.25.222.78]) by fmsmga005-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:05 -0800 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH 2/7] x86/resctrl: Remove hard code of RDT_RESOURCE_L3 in monitor.c Date: Thu, 26 Jan 2023 10:41:52 -0800 Message-Id: <20230126184157.27626-3-tony.luck@intel.com> X-Mailer: git-send-email 2.39.1 In-Reply-To: <20230126184157.27626-1-tony.luck@intel.com> References: <20230126184157.27626-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Scope of monitoring may be scoped at L3 cache granularity (legacy) or at the node level (systems with Sub NUMA Cluster enabled). Save the struct rdt_resource pointer that was used to initialize the monitor sections of code and use that value instead of the hard-coded RDT_RESOURCE_L3. No functional change. Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/resctrl/monitor.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/re= sctrl/monitor.c index 77538abeb72a..d05bbd4f6b2d 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -30,6 +30,8 @@ struct rmid_entry { struct list_head list; }; =20 +static struct rdt_resource *mon_resource; + /** * @rmid_free_lru A least recently used list of free RMIDs * These RMIDs are guaranteed to have an occupancy less than the @@ -251,7 +253,7 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, stru= ct rdt_domain *d, */ void __check_limbo(struct rdt_domain *d, bool force_free) { - struct rdt_resource *r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r =3D mon_resource; struct rmid_entry *entry; u32 crmid =3D 1, nrmid; bool rmid_dirty; @@ -316,7 +318,7 @@ int alloc_rmid(void) =20 static void add_rmid_to_limbo(struct rmid_entry *entry) { - struct rdt_resource *r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r =3D mon_resource; struct rdt_domain *d; int cpu, err; u64 val =3D 0; @@ -633,7 +635,7 @@ void cqm_handle_limbo(struct work_struct *work) =20 mutex_lock(&rdtgroup_mutex); =20 - r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + r =3D mon_resource; d =3D container_of(work, struct rdt_domain, cqm_limbo.work); =20 __check_limbo(d, false); @@ -669,7 +671,7 @@ void mbm_handle_overflow(struct work_struct *work) if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; =20 - r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + r =3D mon_resource; d =3D container_of(work, struct rdt_domain, mbm_over.work); =20 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { @@ -747,9 +749,11 @@ static struct mon_evt mbm_local_event =3D { /* * Initialize the event list for the resource. * - * Note that MBM events are also part of RDT_RESOURCE_L3 resource + * Note that MBM events can either be part of RDT_RESOURCE_L3 resource * because as per the SDM the total and local memory bandwidth - * are enumerated as part of L3 monitoring. + * are enumerated as part of L3 monitoring, or they may be per NUMA + * node on systems with sub-NUMA cluster enabled and are then in the + * RDT_RESOURCE_NODE resource. */ static void l3_mon_evt_init(struct rdt_resource *r) { @@ -761,6 +765,8 @@ static void l3_mon_evt_init(struct rdt_resource *r) list_add_tail(&mbm_total_event.list, &r->evt_list); if (is_mbm_local_enabled()) list_add_tail(&mbm_local_event.list, &r->evt_list); + + mon_resource =3D r; } =20 int rdt_get_mon_l3_config(struct rdt_resource *r) --=20 2.39.1 From nobody Sun Sep 14 03:51:59 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 93A7DC54EAA for ; Thu, 26 Jan 2023 18:42:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231925AbjAZSmp (ORCPT ); Thu, 26 Jan 2023 13:42:45 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:60874 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230019AbjAZSmd (ORCPT ); Thu, 26 Jan 2023 13:42:33 -0500 Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 68E4E402D8; Thu, 26 Jan 2023 10:42:32 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1674758552; x=1706294552; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=sfw55e9pOISjtn87W0zcDQDfivg/FZE8hQOsm+yl2hY=; b=TDv94L+z2Z7d5rlAohU44bWNGIfKDhy0ZHcIxF2eo35NDF1AButwlqSb rxOACzKAQbcVJuxNrnGZopohLB2SGb9l8vaKy6O4Je5FhBWeC893Q+KwG 9SNKbDKSvGWOL/hJTR6efFf//Dgdr2NjPzN3lbXyS2/25MDQB+M8QiGiV R1rlS7s/8OaNQlbX4Kj2zi+BTl1Tl/25WPwAmEAVaQQPC3BDBaj9JExed U4HDVEIPmLdN21L76PvS9xAk6MkwLJA79YS1fXGuOqtTtGvymP9w16PD4 uKlQ44we40M4ptYKseRgXQ0I7iHVP2/P8xBf8htkFhmw/ROdJhc505rd3 g==; X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="354203341" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="354203341" Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:06 -0800 X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="991745445" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="991745445" Received: from agluck-desk3.sc.intel.com ([172.25.222.78]) by fmsmga005-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:05 -0800 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH 3/7] x86/resctrl: Add a new node-scoped resource to rdt_resources_all[] Date: Thu, 26 Jan 2023 10:41:53 -0800 Message-Id: <20230126184157.27626-4-tony.luck@intel.com> X-Mailer: git-send-email 2.39.1 In-Reply-To: <20230126184157.27626-1-tony.luck@intel.com> References: <20230126184157.27626-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Add a placeholder in the array of struct rdt_hw_resource to be used for event monitoring of systems with Sub-NUMA Cluster enabled. Update get_domain_id() to handle SCOPE_NODE. Signed-off-by: Tony Luck Reviewed-By: Peter Newman Tested-By: Peter Newman --- arch/x86/kernel/cpu/resctrl/internal.h | 2 ++ arch/x86/kernel/cpu/resctrl/core.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/r= esctrl/internal.h index 15cea517efaa..39a62babd60b 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -409,12 +409,14 @@ enum resctrl_res_level { RDT_RESOURCE_L3, RDT_RESOURCE_L2, RDT_RESOURCE_MBA, + RDT_RESOURCE_NODE, =20 /* Must be the last */ RDT_NUM_RESOURCES, }; =20 enum resctrl_scope { + SCOPE_NODE, SCOPE_L2_CACHE =3D 2, SCOPE_L3_CACHE =3D 3 }; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resct= rl/core.c index 6914232acf84..19be6fe42ef3 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -100,6 +100,16 @@ struct rdt_hw_resource rdt_resources_all[] =3D { .fflags =3D RFTYPE_RES_MB, }, }, + [RDT_RESOURCE_NODE] =3D + { + .r_resctrl =3D { + .rid =3D RDT_RESOURCE_NODE, + .name =3D "L3", + .scope =3D SCOPE_NODE, + .domains =3D domain_init(RDT_RESOURCE_NODE), + .fflags =3D RFTYPE_RES_MB, + }, + }, }; =20 /* @@ -464,6 +474,8 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct r= dt_hw_domain *hw_dom) =20 static int get_domain_id(int cpu, enum resctrl_scope scope) { + if (scope =3D=3D SCOPE_NODE) + return cpu_to_node(cpu); return get_cpu_cacheinfo_id(cpu, scope); } =20 --=20 2.39.1 From nobody Sun Sep 14 03:51:59 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id ABFEBC05027 for ; Thu, 26 Jan 2023 18:42:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232228AbjAZSms (ORCPT ); Thu, 26 Jan 2023 13:42:48 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:60882 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230482AbjAZSmd (ORCPT ); Thu, 26 Jan 2023 13:42:33 -0500 Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7CA7845F74; Thu, 26 Jan 2023 10:42:32 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1674758552; x=1706294552; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=b/uxKduTxOxhRTdknRQRA39FqoLPxucgbMsljFn2K+0=; b=DdliaEVWjUrRs1Wc5IuIrZ5iHW/zadQS2UJ9SWe7e+NMUtdMIh5okzhV pgM70QlRwCTGbf39lsXn29r+gEK/kcpLUM1K/xa/5S9L6MTjdo208J2F4 5y0etbDfK2gZK1E0N2dyFEaOuGX6atJJD3vy+YmqwY7o1bi/mvj8KJ5Eo UjQNrgLukkPEL0VkDZZlcxS4NK4XRs2TI82O7drt63P40UCwJ4CS4nJT3 zf3fk9Xz9vZVWpIl8hl3Ywo/iBbvZPiw2OxwrI5K2Fq3Zr6CMmz02gFG8 kJcfwGWwiNBdWqIfnxP8Pg0phrZMaeTJVBf4CmqtWmWk0BN0K1I0cXeHO A==; X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="354203351" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="354203351" Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:06 -0800 X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="991745449" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="991745449" Received: from agluck-desk3.sc.intel.com ([172.25.222.78]) by fmsmga005-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:06 -0800 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH 4/7] x86/resctrl: Add code to setup monitoring at L3 or NODE scope. Date: Thu, 26 Jan 2023 10:41:54 -0800 Message-Id: <20230126184157.27626-5-tony.luck@intel.com> X-Mailer: git-send-email 2.39.1 In-Reply-To: <20230126184157.27626-1-tony.luck@intel.com> References: <20230126184157.27626-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" When Sub-NUMA cluster is enabled (snc_ways > 1) use the RDT_RESOURCE_NODE instead of RDT_RESOURCE_L3 for all monitoring operations. The mon_scale and num_rmid values from CPUID(0xf,0x1),(EBX,ECX) must be scaled down by the number of Sub-NUMA Clusters. A subsequent change will detect sub-NUMA cluster mode and set "snc_ways". For now set to one (meaning each L3 cache spans one node). Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/resctrl/internal.h | 2 ++ arch/x86/kernel/cpu/resctrl/core.c | 13 ++++++++++++- arch/x86/kernel/cpu/resctrl/monitor.c | 4 ++-- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 5 ++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/r= esctrl/internal.h index 39a62babd60b..ad26d008dafa 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -405,6 +405,8 @@ DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); =20 extern struct dentry *debugfs_resctrl; =20 +extern int snc_ways; + enum resctrl_res_level { RDT_RESOURCE_L3, RDT_RESOURCE_L2, diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resct= rl/core.c index 19be6fe42ef3..53b2ab37af2f 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -48,6 +48,11 @@ int max_name_width, max_data_width; */ bool rdt_alloc_capable; =20 +/* + * How many Sub-Numa Cluster nodes share a single L3 cache + */ +int snc_ways =3D 1; + static void mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); @@ -786,7 +791,13 @@ static __init bool get_rdt_alloc_resources(void) =20 static __init bool get_rdt_mon_resources(void) { - struct rdt_resource *r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r; + + /* When SNC enabled, monitor functions at node instead of L3 cache scope = */ + if (snc_ways > 1) + r =3D &rdt_resources_all[RDT_RESOURCE_NODE].r_resctrl; + else + r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; =20 if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) rdt_mon_features |=3D (1 << QOS_L3_OCCUP_EVENT_ID); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/re= sctrl/monitor.c index d05bbd4f6b2d..3fc63aa68130 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -777,8 +777,8 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) int ret; =20 resctrl_rmid_realloc_limit =3D boot_cpu_data.x86_cache_size * 1024; - hw_res->mon_scale =3D boot_cpu_data.x86_cache_occ_scale; - r->num_rmid =3D boot_cpu_data.x86_cache_max_rmid + 1; + hw_res->mon_scale =3D boot_cpu_data.x86_cache_occ_scale / snc_ways; + r->num_rmid =3D (boot_cpu_data.x86_cache_max_rmid + 1) / snc_ways; hw_res->mbm_width =3D MBM_CNTR_WIDTH_BASE; =20 if (mbm_offset > 0 && mbm_offset <=3D MBM_CNTR_WIDTH_OFFSET_MAX) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/r= esctrl/rdtgroup.c index a6ba3080e5db..a0dc64a70d01 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2238,7 +2238,10 @@ static int rdt_get_tree(struct fs_context *fc) static_branch_enable_cpuslocked(&rdt_enable_key); =20 if (is_mbm_enabled()) { - r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + if (snc_ways > 1) + r =3D &rdt_resources_all[RDT_RESOURCE_NODE].r_resctrl; + else + r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; list_for_each_entry(dom, &r->domains, list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL); } --=20 2.39.1 From nobody Sun Sep 14 03:51:59 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 064E6C61DA3 for ; Thu, 26 Jan 2023 18:42:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232243AbjAZSmu (ORCPT ); Thu, 26 Jan 2023 13:42:50 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:60912 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231513AbjAZSmd (ORCPT ); Thu, 26 Jan 2023 13:42:33 -0500 Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0F314474C7; Thu, 26 Jan 2023 10:42:33 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1674758553; x=1706294553; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=lazqj33+wcKOHpjgmXNXHNJZww2MNyt2EHBaLJXRYCw=; b=YZr0bbBfahBUfu/3SAMOQ6hI6BwMxQ8CQFasPDZ5HwLV8kesrv8s24Hl 0WtUe9oo8GJn8+c+IALbhD/LlfL/SU3gSzPTB5qofKctBLWJTrqFrUo5O ShanDrlviRiTEOeOGS5wHs2QY8SGYhwixZvyGox93/YAIQOSZJCJHH3wM 6od47+f5NrPNwX9WRpbqQrmPETE+U01LzrqGmGjNSnxIknaDJ4HNj07kX Majv+Ly4+zHupenxDshgdJYVAUU1db7mBqhns5iPiMUy8Hn4ii9X7/4H9 kygaLFTiKrRg32zbGrA3E4+K1P7LDjFoY0bwfxQ9t/LqnVDxSkRWnGg3k g==; X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="354203361" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="354203361" Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:06 -0800 X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="991745452" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="991745452" Received: from agluck-desk3.sc.intel.com ([172.25.222.78]) by fmsmga005-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:06 -0800 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH 5/7] x86/resctrl: Add a new "snc_ways" file to the monitoring info directory. Date: Thu, 26 Jan 2023 10:41:55 -0800 Message-Id: <20230126184157.27626-6-tony.luck@intel.com> X-Mailer: git-send-email 2.39.1 In-Reply-To: <20230126184157.27626-1-tony.luck@intel.com> References: <20230126184157.27626-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Make it easy for the user to tell if Sub-NUMA Cluster is enabled by providing an info/ file. Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/r= esctrl/rdtgroup.c index a0dc64a70d01..392e7a08d083 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -997,6 +997,14 @@ static int rdt_num_rmids_show(struct kernfs_open_file = *of, return 0; } =20 +static int rdt_snc_ways_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + seq_printf(seq, "%d\n", snc_ways); + + return 0; +} + static int rdt_mon_features_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { @@ -1451,6 +1459,13 @@ static struct rftype res_common_files[] =3D { .seq_show =3D rdt_num_rmids_show, .fflags =3D RF_MON_INFO, }, + { + .name =3D "snc_ways", + .mode =3D 0444, + .kf_ops =3D &rdtgroup_kf_single_ops, + .seq_show =3D rdt_snc_ways_show, + .fflags =3D RF_MON_INFO, + }, { .name =3D "cbm_mask", .mode =3D 0444, --=20 2.39.1 From nobody Sun Sep 14 03:51:59 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 59C43C54EAA for ; Thu, 26 Jan 2023 18:42:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232253AbjAZSmx (ORCPT ); Thu, 26 Jan 2023 13:42:53 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:60926 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229530AbjAZSme (ORCPT ); Thu, 26 Jan 2023 13:42:34 -0500 Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2E65C49022; Thu, 26 Jan 2023 10:42:33 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1674758553; x=1706294553; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=ivyLAdBKqYQLl3lR+fj5FZ2eFkx8GG4TgMFpd25I12Y=; b=FizYTQwtsAhHSYsGHMFYSBag+jzIr/fw0g/NX7I/LJAgmrAmf2O4MfsJ ksreZLGCDaz9ekhG1ZfcvwbyKfHYhrhhi3le55HoB4xZGBIQvlbaQYwGH r5zAHfUbR8x5EbHCczY4ThYoRa7XxJJXSTlOQiroYGqAQeI0Z9qC7b9tv jZH/vUB9Z+Im4MPMKVR2KHkzdiHgO3Owgss40Ix/ovH0+IFIG/t+Yw+Pl KFTbfz33PGNYXTEgu4sjQgcTsWbQqEUaxLA49Y478CUytOR/FnqopjjDi 8nRmdSrnp7QTPsTA3HOM95VGd4j27ZVyYI+tDzz5tq1rIG4iz5r4Wdn9t w==; X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="354203371" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="354203371" Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:06 -0800 X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="991745455" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="991745455" Received: from agluck-desk3.sc.intel.com ([172.25.222.78]) by fmsmga005-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:06 -0800 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH 6/7] x86/resctrl: Update documentation with Sub-NUMA cluster changes Date: Thu, 26 Jan 2023 10:41:56 -0800 Message-Id: <20230126184157.27626-7-tony.luck@intel.com> X-Mailer: git-send-email 2.39.1 In-Reply-To: <20230126184157.27626-1-tony.luck@intel.com> References: <20230126184157.27626-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" With Sub-NUMA Cluster mode enabled the scope of monitoring resources is per-NODE instead of per-L3 cache. Suffices of directories with "L3" in their name refer to Sun-NUMA nodes instead of L3 cache ids. Signed-off-by: Tony Luck --- Documentation/x86/resctrl.rst | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/x86/resctrl.rst b/Documentation/x86/resctrl.rst index 71a531061e4e..9043a2d2f2d3 100644 --- a/Documentation/x86/resctrl.rst +++ b/Documentation/x86/resctrl.rst @@ -167,6 +167,11 @@ with the following files: bytes) at which a previously used LLC_occupancy counter can be considered for re-use. =20 +"snc_ways": + A value of "1" marks that SNC mode is disabled. + Values of "2" or "4" indicate how many NUMA + nodes share an L3 cache. + Finally, in the top level of the "info" directory there is a file named "last_cmd_status". This is reset with every "command" issued via the file system (making new directories or writing to any of the @@ -254,9 +259,13 @@ When control is enabled all CTRL_MON groups will also = contain: When monitoring is enabled all MON groups will also contain: =20 "mon_data": - This contains a set of files organized by L3 domain and by - RDT event. E.g. on a system with two L3 domains there will - be subdirectories "mon_L3_00" and "mon_L3_01". Each of these + This contains a set of files organized by L3 domain or by NUMA + node (depending on whether SNC mode is disabled or enabled + respectively) and by RDT event. E.g. on a system with SNC + mode disabled with two L3 domains there will be subdirectories + "mon_L3_00" and "mon_L3_01" the numerical suffix refers to the + L3 cache id. With SNC enabled the directory names are the same, + but the numerical suffix refers to the node id. Each of these directories have one file per event (e.g. "llc_occupancy", "mbm_total_bytes", and "mbm_local_bytes"). In a MON group these files provide a read out of the current value of the event for --=20 2.39.1 From nobody Sun Sep 14 03:51:59 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 43A02C54EAA for ; Thu, 26 Jan 2023 18:42:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232261AbjAZSm5 (ORCPT ); Thu, 26 Jan 2023 13:42:57 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:60930 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231607AbjAZSme (ORCPT ); Thu, 26 Jan 2023 13:42:34 -0500 Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9146D66FA7; Thu, 26 Jan 2023 10:42:33 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1674758553; x=1706294553; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=OeIpfTz2aSbcARPCLzsdspXDjxwH3ccCC0KuAuykCuc=; b=eCCLJiCgqm786FogTNHEUkFFYkB++e2SDf3305B2ADLJ9sjVQ3LP4KOT WZRDPcIsSTfHo56h5CDghJMsgNrlSHzBlT0xNwaECeXpDdlD9lUNygmQn /G/zBpP+xHmhGVBGGXTtlf0hfVdFt9MAAInJmwETmGQMcCHRMgduF9tFr XefsX+QEhrCtqYXK6IJtYM9UpbWRid3ZBBPEPBEV8lpLQv6h3t9dVmsdZ 7ia/0r9MvuxZDeVBCpRtipZvcGzPsx9p7vLLeoU8Ap+MBwV9bk1CDgasr 3RitSoVFXPbzdVlv91uSzTYAosly/8TrCsvvPAiYicUxVIgEaWwHi1c9D g==; X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="354203381" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="354203381" Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:06 -0800 X-IronPort-AV: E=McAfee;i="6500,9779,10602"; a="991745458" X-IronPort-AV: E=Sophos;i="5.97,249,1669104000"; d="scan'208";a="991745458" Received: from agluck-desk3.sc.intel.com ([172.25.222.78]) by fmsmga005-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jan 2023 10:42:06 -0800 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH 7/7] x86/resctrl: Determine if Sub-NUMA Cluster is enabled and initialize. Date: Thu, 26 Jan 2023 10:41:57 -0800 Message-Id: <20230126184157.27626-8-tony.luck@intel.com> X-Mailer: git-send-email 2.39.1 In-Reply-To: <20230126184157.27626-1-tony.luck@intel.com> References: <20230126184157.27626-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" There isn't a simple hardware enumeration to indicate to software that a system is running with Sub-NUMA Cluster enabled. Compare the number of NUMA nodes with the number of L3 caches to calculate the number of Sub-NUMA nodes per L3 cache. When Sub-NUMA cluster mode is enabled in BIOS setup the RMID counters are distributed equally between the SNC nodes within each socket. E.g. if there are 400 RMID counters, and the system is configured with two SNC nodes per socket, then RMID counter 0..199 are used on SNC node 0 on the socket, and RMID counter 200..399 on SNC node 1. Handle this by initializing a per-cpu RMID offset value. Use this to calculate the value to write to the RMID field of the IA32_PQR_ASSOC MSR during context switch, and also to the IA32_QM_EVTSEL MSR when reading RMID event values. N.B. this works well for well-behaved NUMA applications that access memory predominantly from the local memory node. For applications that access memory across multiple nodes it may be necessary for the user to read counters for all SNC nodes on a socket and add the values to get the actual LLC occupancy or memory bandwidth. Perhaps this isn't all that different from applications that span across multiple sockets in a legacy system. Signed-off-by: Tony Luck --- arch/x86/include/asm/resctrl.h | 4 ++- arch/x86/kernel/cpu/resctrl/core.c | 43 +++++++++++++++++++++++++-- arch/x86/kernel/cpu/resctrl/monitor.c | 2 +- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 52788f79786f..59b8afd8c53c 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -35,6 +35,8 @@ DECLARE_STATIC_KEY_FALSE(rdt_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); =20 +DECLARE_PER_CPU(int, rmid_offset); + /* * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR * @@ -69,7 +71,7 @@ static void __resctrl_sched_in(void) if (static_branch_likely(&rdt_mon_enable_key)) { tmp =3D READ_ONCE(current->rmid); if (tmp) - rmid =3D tmp; + rmid =3D tmp + this_cpu_read(rmid_offset); } =20 if (closid !=3D state->cur_closid || rmid !=3D state->cur_rmid) { diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resct= rl/core.c index 53b2ab37af2f..0ff739375e3b 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -16,6 +16,7 @@ =20 #define pr_fmt(fmt) "resctrl: " fmt =20 +#include #include #include #include @@ -484,6 +485,13 @@ static int get_domain_id(int cpu, enum resctrl_scope s= cope) return get_cpu_cacheinfo_id(cpu, scope); } =20 +DEFINE_PER_CPU(int, rmid_offset); + +static void set_per_cpu_rmid_offset(int cpu, struct rdt_resource *r) +{ + this_cpu_write(rmid_offset, (cpu_to_node(cpu) % snc_ways) * r->num_rmid); +} + /* * domain_add_cpu - Add a cpu to a resource's domain list. * @@ -515,6 +523,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource= *r) cpumask_set_cpu(cpu, &d->cpu_mask); if (r->cache.arch_has_per_cpu_cfg) rdt_domain_reconfigure_cdp(r); + if (r->mon_capable) + set_per_cpu_rmid_offset(cpu, r); return; } =20 @@ -533,9 +543,12 @@ static void domain_add_cpu(int cpu, struct rdt_resourc= e *r) return; } =20 - if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { - domain_free(hw_dom); - return; + if (r->mon_capable) { + if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { + domain_free(hw_dom); + return; + } + set_per_cpu_rmid_offset(cpu, r); } =20 list_add_tail(&d->list, add_pos); @@ -845,11 +858,35 @@ static __init bool get_rdt_resources(void) return (rdt_mon_capable || rdt_alloc_capable); } =20 +static __init int find_snc_ways(void) +{ + unsigned long *node_caches; + int cpu, node, ret; + + node_caches =3D kcalloc(BITS_TO_LONGS(nr_node_ids), sizeof(*node_caches),= GFP_KERNEL); + if (!node_caches) + return 1; + + cpus_read_lock(); + for_each_node(node) { + cpu =3D cpumask_first(cpumask_of_node(node)); + set_bit(get_cpu_cacheinfo_id(cpu, 3), node_caches); + } + cpus_read_unlock(); + + ret =3D nr_node_ids / bitmap_weight(node_caches, nr_node_ids); + kfree(node_caches); + + return ret; +} + static __init void rdt_init_res_defs_intel(void) { struct rdt_hw_resource *hw_res; struct rdt_resource *r; =20 + snc_ways =3D find_snc_ways(); + for_each_rdt_resource(r) { hw_res =3D resctrl_to_arch_res(r); =20 diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/re= sctrl/monitor.c index 3fc63aa68130..bd5ec348d925 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -160,7 +160,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id = eventid, u64 *val) * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) * are error bits. */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid + this_cpu_read(rmid_offset)); rdmsrl(MSR_IA32_QM_CTR, msr_val); =20 if (msr_val & RMID_VAL_ERROR) --=20 2.39.1