From nobody Sat Feb 7 17:20:21 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 92836EB64DC for ; Wed, 21 Jun 2023 17:40:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231138AbjFURkp (ORCPT ); Wed, 21 Jun 2023 13:40:45 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43676 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229653AbjFURkj (ORCPT ); Wed, 21 Jun 2023 13:40:39 -0400 Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C8673F1; Wed, 21 Jun 2023 10:40:38 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1687369238; x=1718905238; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=7ao2OThthz21P6Yl50SPghVSsGnyGwhkun9NS+QMgvU=; b=ANkiNRMzouKpLxbSJrf3w+Hf1y2uTlxYAYLTRvsPEpBqXM6onco9brRD ityR/ZNjirh0xetRRrJPfpUjaM3TlRWoBdTV+8/iYOhga87REeojDYN9m YGyg51+JGRWbD2oyRM7hfZ/CO8Dalxny6Wbvv9JeIWBgbZHs3dIh/1xIg QnauBqAtpbpJabBKnWR7k3LtYYD1KBMMelbSMVZ5LmrIq5r6VzcVNLSlx y70YzCRdMx5Ys4XOJQFINMSvVsAuD8qy++7hQDQbTkVBKTiOJKN9n5A7v BfcIC8/Z5wHYKOQBYLiIiUmvajdKv3OA5Ls1Z1Vexsv9jUc4rf612okSD w==; X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="359120503" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="359120503" Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:16 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="779921558" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="779921558" Received: from agluck-desk3.sc.intel.com ([172.25.222.74]) by fmsmga008-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:15 -0700 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH v2 1/7] x86/resctrl: Refactor in preparation for node-scoped resources Date: Wed, 21 Jun 2023 10:40:00 -0700 Message-Id: <20230621174006.42533-2-tony.luck@intel.com> X-Mailer: git-send-email 2.40.1 In-Reply-To: <20230621174006.42533-1-tony.luck@intel.com> References: <20230621174006.42533-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Sub-NUMA cluster systems provide monitoring resources at the NUMA node scope instead of the L3 cache scope. Rename the cache_level field in struct rdt_resource to the more generic "scope" and add symbolic names and a helper function. No functional change. Signed-off-by: Tony Luck --- include/linux/resctrl.h | 4 ++-- arch/x86/kernel/cpu/resctrl/internal.h | 5 +++++ arch/x86/kernel/cpu/resctrl/core.c | 17 +++++++++++------ arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 8334eeacfec5..25051daa6655 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -150,7 +150,7 @@ struct resctrl_schema; * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine * @num_rmid: Number of RMIDs available - * @cache_level: Which cache level defines scope of this resource + * @scope: Scope of this resource (cache level or NUMA node) * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. * @domains: All domains for this resource @@ -168,7 +168,7 @@ struct rdt_resource { bool alloc_capable; bool mon_capable; int num_rmid; - int cache_level; + int scope; struct resctrl_cache cache; struct resctrl_membw membw; struct list_head domains; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/r= esctrl/internal.h index 85ceaf9a31ac..8275b8a74f7e 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -440,6 +440,11 @@ enum resctrl_res_level { RDT_NUM_RESOURCES, }; =20 +enum resctrl_scope { + SCOPE_L2_CACHE =3D 2, + SCOPE_L3_CACHE =3D 3 +}; + static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) { struct rdt_hw_resource *hw_res =3D resctrl_to_arch_res(res); diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resct= rl/core.c index 030d3b409768..6571514752f3 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -65,7 +65,7 @@ struct rdt_hw_resource rdt_resources_all[] =3D { .r_resctrl =3D { .rid =3D RDT_RESOURCE_L3, .name =3D "L3", - .cache_level =3D 3, + .scope =3D SCOPE_L3_CACHE, .domains =3D domain_init(RDT_RESOURCE_L3), .parse_ctrlval =3D parse_cbm, .format_str =3D "%d=3D%0*x", @@ -79,7 +79,7 @@ struct rdt_hw_resource rdt_resources_all[] =3D { .r_resctrl =3D { .rid =3D RDT_RESOURCE_L2, .name =3D "L2", - .cache_level =3D 2, + .scope =3D SCOPE_L2_CACHE, .domains =3D domain_init(RDT_RESOURCE_L2), .parse_ctrlval =3D parse_cbm, .format_str =3D "%d=3D%0*x", @@ -93,7 +93,7 @@ struct rdt_hw_resource rdt_resources_all[] =3D { .r_resctrl =3D { .rid =3D RDT_RESOURCE_MBA, .name =3D "MB", - .cache_level =3D 3, + .scope =3D SCOPE_L3_CACHE, .domains =3D domain_init(RDT_RESOURCE_MBA), .parse_ctrlval =3D parse_bw, .format_str =3D "%d=3D%*u", @@ -105,7 +105,7 @@ struct rdt_hw_resource rdt_resources_all[] =3D { .r_resctrl =3D { .rid =3D RDT_RESOURCE_SMBA, .name =3D "SMBA", - .cache_level =3D 3, + .scope =3D 3, .domains =3D domain_init(RDT_RESOURCE_SMBA), .parse_ctrlval =3D parse_bw, .format_str =3D "%d=3D%*u", @@ -487,6 +487,11 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct = rdt_hw_domain *hw_dom) return 0; } =20 +static int get_domain_id(int cpu, enum resctrl_scope scope) +{ + return get_cpu_cacheinfo_id(cpu, scope); +} + /* * domain_add_cpu - Add a cpu to a resource's domain list. * @@ -502,7 +507,7 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct r= dt_hw_domain *hw_dom) */ static void domain_add_cpu(int cpu, struct rdt_resource *r) { - int id =3D get_cpu_cacheinfo_id(cpu, r->cache_level); + int id =3D get_domain_id(cpu, r->scope); struct list_head *add_pos =3D NULL; struct rdt_hw_domain *hw_dom; struct rdt_domain *d; @@ -552,7 +557,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource= *r) =20 static void domain_remove_cpu(int cpu, struct rdt_resource *r) { - int id =3D get_cpu_cacheinfo_id(cpu, r->cache_level); + int id =3D get_domain_id(cpu, r->scope); struct rdt_hw_domain *hw_dom; struct rdt_domain *d; =20 diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cp= u/resctrl/pseudo_lock.c index 458cb7419502..42f124ffb968 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -297,7 +297,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_r= egion *plr) plr->size =3D rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); =20 for (i =3D 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level =3D=3D plr->s->res->cache_level) { + if (ci->info_list[i].level =3D=3D plr->s->res->scope) { plr->line_size =3D ci->info_list[i].coherency_line_size; return 0; } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/r= esctrl/rdtgroup.c index 725344048f85..418658f0a9ad 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1348,7 +1348,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource= *r, num_b =3D bitmap_weight(&cbm, r->cache.cbm_len); ci =3D get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); for (i =3D 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level =3D=3D r->cache_level) { + if (ci->info_list[i].level =3D=3D r->scope) { size =3D ci->info_list[i].size / r->cache.cbm_len * num_b; break; } --=20 2.40.1 From nobody Sat Feb 7 17:20:21 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 493EEEB64D7 for ; Wed, 21 Jun 2023 17:41:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231376AbjFURlD (ORCPT ); Wed, 21 Jun 2023 13:41:03 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43718 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230287AbjFURkn (ORCPT ); Wed, 21 Jun 2023 13:40:43 -0400 Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 265FD19B; Wed, 21 Jun 2023 10:40:42 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1687369242; x=1718905242; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=dhSYaV/feip3Bku5n5e/swWWwTp+mDFZDryH6jxbEoc=; b=UPnHMomi8eaapNnXx/y37gfGqF3wQKkbY97EqxYXLwYG8HJ5ys3/9XXW +HtBB68Y0QOvJEUUOPolPGZQEg7HEwUBq1Zn7DCRwofwQyPt9ceQQ63Xp /MaqvYfV3iLJHFJL5oi1opLBnCMsbfOy5QhjXUYs1tY0b9V59UC/0kF2X 5x/JxtRXKiFACDv3k/ViToJiU/2PVEoQyEYq6OgdyPGPfuynuf87Kyk2q GVij6nTXugF+zLqi72Cdj8UvMFBozl/dFg96gudrKI+aMOGG4ek8ZeX+T CbPBAuWHhoVgDIOTVjq+QU2pofJbhqeS6c6fJ+G+QjX83eL7GGoQoOnLq g==; X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="359120514" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="359120514" Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:16 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="779921564" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="779921564" Received: from agluck-desk3.sc.intel.com ([172.25.222.74]) by fmsmga008-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:16 -0700 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH v2 2/7] x86/resctrl: Remove hard code of RDT_RESOURCE_L3 in monitor.c Date: Wed, 21 Jun 2023 10:40:01 -0700 Message-Id: <20230621174006.42533-3-tony.luck@intel.com> X-Mailer: git-send-email 2.40.1 In-Reply-To: <20230621174006.42533-1-tony.luck@intel.com> References: <20230621174006.42533-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Scope of monitoring may be scoped at L3 cache granularity (legacy) or at the node level (systems with Sub NUMA Cluster enabled). Save the struct rdt_resource pointer that was used to initialize the monitor sections of code and use that value instead of the hard-coded RDT_RESOURCE_L3. No functional change. Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/resctrl/monitor.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/re= sctrl/monitor.c index ded1fc7cb7cb..9be6ffdd01ae 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -30,6 +30,8 @@ struct rmid_entry { struct list_head list; }; =20 +static struct rdt_resource *mon_resource; + /** * @rmid_free_lru A least recently used list of free RMIDs * These RMIDs are guaranteed to have an occupancy less than the @@ -268,7 +270,7 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, stru= ct rdt_domain *d, */ void __check_limbo(struct rdt_domain *d, bool force_free) { - struct rdt_resource *r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r =3D mon_resource; struct rmid_entry *entry; u32 crmid =3D 1, nrmid; bool rmid_dirty; @@ -333,7 +335,7 @@ int alloc_rmid(void) =20 static void add_rmid_to_limbo(struct rmid_entry *entry) { - struct rdt_resource *r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r =3D mon_resource; struct rdt_domain *d; int cpu, err; u64 val =3D 0; @@ -645,7 +647,7 @@ void cqm_handle_limbo(struct work_struct *work) =20 mutex_lock(&rdtgroup_mutex); =20 - r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + r =3D mon_resource; d =3D container_of(work, struct rdt_domain, cqm_limbo.work); =20 __check_limbo(d, false); @@ -681,7 +683,7 @@ void mbm_handle_overflow(struct work_struct *work) if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; =20 - r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + r =3D mon_resource; d =3D container_of(work, struct rdt_domain, mbm_over.work); =20 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { @@ -759,9 +761,9 @@ static struct mon_evt mbm_local_event =3D { /* * Initialize the event list for the resource. * - * Note that MBM events are also part of RDT_RESOURCE_L3 resource - * because as per the SDM the total and local memory bandwidth - * are enumerated as part of L3 monitoring. + * Monitor events can either be part of RDT_RESOURCE_L3 resource, + * or they may be per NUMA node on systems with sub-NUMA cluster + * enabled and are then in the RDT_RESOURCE_NODE resource. */ static void l3_mon_evt_init(struct rdt_resource *r) { @@ -773,6 +775,8 @@ static void l3_mon_evt_init(struct rdt_resource *r) list_add_tail(&mbm_total_event.list, &r->evt_list); if (is_mbm_local_enabled()) list_add_tail(&mbm_local_event.list, &r->evt_list); + + mon_resource =3D r; } =20 int __init rdt_get_mon_l3_config(struct rdt_resource *r) --=20 2.40.1 From nobody Sat Feb 7 17:20:21 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 212DCEB64D7 for ; Wed, 21 Jun 2023 17:40:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231281AbjFURks (ORCPT ); Wed, 21 Jun 2023 13:40:48 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43686 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229954AbjFURkk (ORCPT ); Wed, 21 Jun 2023 13:40:40 -0400 Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id CBB86193; Wed, 21 Jun 2023 10:40:39 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1687369239; x=1718905239; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=aUw1ko8XN5kSoQKed9FOjszj0DPEXH1X0Jv1dS7/3l4=; b=NsfwqCvjdEyHOkI9BAMocFPGur1SDxQda3pc0G+s7Z9oW9bycOvxb9xG MQdvgdlreGcL3vmun9UvCjTWkOyw+PstasP2oV/QlifutBFziBD1tv2zO l09T36t6AZW2SvQPDwvCWtYGu8ZD7BPWWJShYdhQ3cfZp9jhrmIoMANg+ TWp8VCPJD2aAnkSinw8eDjq5GXlyppQOyw7mqOHHcuBuzRlcYmjWMQp4o GKcKd1Bm35YuCIFzk7HyXIglgLBma/F0vj8f/swixiZ6AbXI6FXj2jpTZ HCUpEfvGCorYL3VFaiy+X0qiwphexfyiJMqQ4ut3g46RPQODFiK2M4hWv A==; X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="359120523" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="359120523" Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:16 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="779921568" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="779921568" Received: from agluck-desk3.sc.intel.com ([172.25.222.74]) by fmsmga008-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:16 -0700 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH v2 3/7] x86/resctrl: Add a new node-scoped resource to rdt_resources_all[] Date: Wed, 21 Jun 2023 10:40:02 -0700 Message-Id: <20230621174006.42533-4-tony.luck@intel.com> X-Mailer: git-send-email 2.40.1 In-Reply-To: <20230621174006.42533-1-tony.luck@intel.com> References: <20230621174006.42533-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Add a placeholder in the array of struct rdt_hw_resource to be used for event monitoring of systems with Sub-NUMA Cluster enabled. Update get_domain_id() to handle SCOPE_NODE. Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/resctrl/internal.h | 4 +++- arch/x86/kernel/cpu/resctrl/core.c | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/r= esctrl/internal.h index 8275b8a74f7e..243017096ddf 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -435,6 +435,7 @@ enum resctrl_res_level { RDT_RESOURCE_L2, RDT_RESOURCE_MBA, RDT_RESOURCE_SMBA, + RDT_RESOURCE_NODE, =20 /* Must be the last */ RDT_NUM_RESOURCES, @@ -442,7 +443,8 @@ enum resctrl_res_level { =20 enum resctrl_scope { SCOPE_L2_CACHE =3D 2, - SCOPE_L3_CACHE =3D 3 + SCOPE_L3_CACHE =3D 3, + SCOPE_NODE, }; =20 static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resct= rl/core.c index 6571514752f3..e4bd3072927c 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -112,6 +112,16 @@ struct rdt_hw_resource rdt_resources_all[] =3D { .fflags =3D RFTYPE_RES_MB, }, }, + [RDT_RESOURCE_NODE] =3D + { + .r_resctrl =3D { + .rid =3D RDT_RESOURCE_NODE, + .name =3D "L3", + .scope =3D SCOPE_NODE, + .domains =3D domain_init(RDT_RESOURCE_NODE), + .fflags =3D 0, + }, + }, }; =20 /* @@ -489,6 +499,8 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct r= dt_hw_domain *hw_dom) =20 static int get_domain_id(int cpu, enum resctrl_scope scope) { + if (scope =3D=3D SCOPE_NODE) + return cpu_to_node(cpu); return get_cpu_cacheinfo_id(cpu, scope); } =20 --=20 2.40.1 From nobody Sat Feb 7 17:20:21 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1ACD7EB64D7 for ; Wed, 21 Jun 2023 17:40:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231184AbjFURku (ORCPT ); Wed, 21 Jun 2023 13:40:50 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43688 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230046AbjFURkl (ORCPT ); Wed, 21 Jun 2023 13:40:41 -0400 Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0B924F1; Wed, 21 Jun 2023 10:40:40 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1687369240; x=1718905240; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=DVuSpqlusY7XWcYJ2tQ1V92oR6nLSMqTYKzzKsKEMCc=; b=O8eeE0lHWT2G/MQYGqrYZ31W4ZkurIde1nl7wYm8SLcAAHCJEDIKSiE8 tCnjuO1FSV9UdEFv6FfUpyoGqfQlgOw0T9WZ9qUjqb9RbaR6NLMa25BaZ vnEiaCIwYySqUhavrctXVk4BHUrE67Zzn78TAV02RfWvw/WdMhCpbqfkk bidOw7QJ7HTfnbjBekn7irFQeTr/SUxSoZx1VrTMze2qx8TseHDL+dihH onAAELdQdXuHJt9xDS0AFSIUD8v7FboyTnClAujcYXrJufNd0aKYQ8KzF FpBsskXsPIV5DaPzuixPQwYhMhjbYlxgxdxG4VUpUoyIs0Po5/kMYzfFD w==; X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="359120534" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="359120534" Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:17 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="779921571" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="779921571" Received: from agluck-desk3.sc.intel.com ([172.25.222.74]) by fmsmga008-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:16 -0700 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH v2 4/7] x86/resctrl: Add code to setup monitoring at L3 or NODE scope. Date: Wed, 21 Jun 2023 10:40:03 -0700 Message-Id: <20230621174006.42533-5-tony.luck@intel.com> X-Mailer: git-send-email 2.40.1 In-Reply-To: <20230621174006.42533-1-tony.luck@intel.com> References: <20230621174006.42533-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" When Sub-NUMA cluster is enabled (snc_ways > 1) use the RDT_RESOURCE_NODE instead of RDT_RESOURCE_L3 for all monitoring operations. The mon_scale and num_rmid values from CPUID(0xf,0x1),(EBX,ECX) must be scaled down by the number of Sub-NUMA Clusters. A subsequent change will detect sub-NUMA cluster mode and set "snc_ways". For now set to one (meaning each L3 cache spans one node). Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/resctrl/internal.h | 7 +++++++ arch/x86/kernel/cpu/resctrl/core.c | 7 ++++++- arch/x86/kernel/cpu/resctrl/monitor.c | 4 ++-- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/r= esctrl/internal.h index 243017096ddf..38bac0062c82 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -430,6 +430,8 @@ DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); =20 extern struct dentry *debugfs_resctrl; =20 +extern int snc_ways; + enum resctrl_res_level { RDT_RESOURCE_L3, RDT_RESOURCE_L2, @@ -447,6 +449,11 @@ enum resctrl_scope { SCOPE_NODE, }; =20 +static inline int get_mbm_res_level(void) +{ + return snc_ways > 1 ? RDT_RESOURCE_NODE : RDT_RESOURCE_L3; +} + static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) { struct rdt_hw_resource *hw_res =3D resctrl_to_arch_res(res); diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resct= rl/core.c index e4bd3072927c..6fe9f87d4403 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -48,6 +48,11 @@ int max_name_width, max_data_width; */ bool rdt_alloc_capable; =20 +/* + * How many Sub-Numa Cluster nodes share a single L3 cache + */ +int snc_ways =3D 1; + static void mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); @@ -831,7 +836,7 @@ static __init bool get_rdt_alloc_resources(void) =20 static __init bool get_rdt_mon_resources(void) { - struct rdt_resource *r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r =3D &rdt_resources_all[get_mbm_res_level()].r_resc= trl; =20 if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) rdt_mon_features |=3D (1 << QOS_L3_OCCUP_EVENT_ID); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/re= sctrl/monitor.c index 9be6ffdd01ae..da3f36212898 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -787,8 +787,8 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) int ret; =20 resctrl_rmid_realloc_limit =3D boot_cpu_data.x86_cache_size * 1024; - hw_res->mon_scale =3D boot_cpu_data.x86_cache_occ_scale; - r->num_rmid =3D boot_cpu_data.x86_cache_max_rmid + 1; + hw_res->mon_scale =3D boot_cpu_data.x86_cache_occ_scale / snc_ways; + r->num_rmid =3D (boot_cpu_data.x86_cache_max_rmid + 1) / snc_ways; hw_res->mbm_width =3D MBM_CNTR_WIDTH_BASE; =20 if (mbm_offset > 0 && mbm_offset <=3D MBM_CNTR_WIDTH_OFFSET_MAX) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/r= esctrl/rdtgroup.c index 418658f0a9ad..d037f3da9e55 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2524,7 +2524,7 @@ static int rdt_get_tree(struct fs_context *fc) static_branch_enable_cpuslocked(&rdt_enable_key); =20 if (is_mbm_enabled()) { - r =3D &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + r =3D &rdt_resources_all[get_mbm_res_level()].r_resctrl; list_for_each_entry(dom, &r->domains, list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL); } --=20 2.40.1 From nobody Sat Feb 7 17:20:21 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id BDE5DC0015E for ; Wed, 21 Jun 2023 17:40:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231347AbjFURk5 (ORCPT ); Wed, 21 Jun 2023 13:40:57 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43708 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230231AbjFURkm (ORCPT ); Wed, 21 Jun 2023 13:40:42 -0400 Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BB8A8193; Wed, 21 Jun 2023 10:40:41 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1687369241; x=1718905241; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=RVDOF3voxFX9Crt96XzA5tUu5TmsiIolJV8bjLoqkS4=; b=GeW+PMSTO2kfKJOgOYY41iIjDM0YcXNoSYy1MmkOpNBd4QFRbtdfZ78m IP+NgucSUVkaNG6Ynd0jI1ryxxU8yoGtTu7AMg4cwm9PRenoleVwDynHs BoQE1A6bVo6NHQ0IYwXnzA83wPtbH7gx5kyZKZA/qDTJVzHw1MTZl19ZX XJLQWyf502qCf+/ars/RtB82XvmJUg+2RranFcSW/C5mDqqEuFjlZiP4A a3NAFK4cQ3AN+2YpcwHzDU16Je3m5Q1SqiuSP8THfIkp63fbSWj2/KLva MXG3hNrOoQHxNIH4HynGuPClX73szzy9klHjx/o/gPHyEaXsSZY+hFUhn g==; X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="359120543" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="359120543" Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:17 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="779921575" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="779921575" Received: from agluck-desk3.sc.intel.com ([172.25.222.74]) by fmsmga008-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:17 -0700 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH v2 5/7] x86/resctrl: Add package scoped resource Date: Wed, 21 Jun 2023 10:40:04 -0700 Message-Id: <20230621174006.42533-6-tony.luck@intel.com> X-Mailer: git-send-email 2.40.1 In-Reply-To: <20230621174006.42533-1-tony.luck@intel.com> References: <20230621174006.42533-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Some Intel features require setting a package scoped model specific register. Add a new resource that builds domains for each package. Signed-off-by: Tony Luck --- include/linux/resctrl.h | 1 + arch/x86/kernel/cpu/resctrl/internal.h | 6 ++++++ arch/x86/kernel/cpu/resctrl/core.c | 23 +++++++++++++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 25051daa6655..f504f6263fec 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -167,6 +167,7 @@ struct rdt_resource { int rid; bool alloc_capable; bool mon_capable; + bool pkg_actions; int num_rmid; int scope; struct resctrl_cache cache; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/r= esctrl/internal.h index 38bac0062c82..e51a5004be77 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -438,6 +438,7 @@ enum resctrl_res_level { RDT_RESOURCE_MBA, RDT_RESOURCE_SMBA, RDT_RESOURCE_NODE, + RDT_RESOURCE_PKG, =20 /* Must be the last */ RDT_NUM_RESOURCES, @@ -447,6 +448,7 @@ enum resctrl_scope { SCOPE_L2_CACHE =3D 2, SCOPE_L3_CACHE =3D 3, SCOPE_NODE, + SCOPE_PKG, }; =20 static inline int get_mbm_res_level(void) @@ -482,6 +484,10 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_leve= l l, bool enable); for_each_rdt_resource(r) \ if (r->alloc_capable || r->mon_capable) =20 +#define for_each_domain_needed_rdt_resource(r) \ + for_each_rdt_resource(r) \ + if (r->alloc_capable || r->mon_capable || r->pkg_actions) + #define for_each_alloc_capable_rdt_resource(r) \ for_each_rdt_resource(r) \ if (r->alloc_capable) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resct= rl/core.c index 6fe9f87d4403..af3be3c2db96 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -127,6 +127,16 @@ struct rdt_hw_resource rdt_resources_all[] =3D { .fflags =3D 0, }, }, + [RDT_RESOURCE_PKG] =3D + { + .r_resctrl =3D { + .rid =3D RDT_RESOURCE_PKG, + .name =3D "PKG", + .scope =3D SCOPE_PKG, + .domains =3D domain_init(RDT_RESOURCE_PKG), + .fflags =3D 0, + }, + }, }; =20 /* @@ -504,9 +514,14 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct = rdt_hw_domain *hw_dom) =20 static int get_domain_id(int cpu, enum resctrl_scope scope) { - if (scope =3D=3D SCOPE_NODE) + switch (scope) { + case SCOPE_NODE: return cpu_to_node(cpu); - return get_cpu_cacheinfo_id(cpu, scope); + case SCOPE_PKG: + return topology_physical_package_id(cpu); + default: + return get_cpu_cacheinfo_id(cpu, scope); + } } =20 /* @@ -630,7 +645,7 @@ static int resctrl_online_cpu(unsigned int cpu) struct rdt_resource *r; =20 mutex_lock(&rdtgroup_mutex); - for_each_capable_rdt_resource(r) + for_each_domain_needed_rdt_resource(r) domain_add_cpu(cpu, r); /* The cpu is set in default rdtgroup after online. */ cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); @@ -657,7 +672,7 @@ static int resctrl_offline_cpu(unsigned int cpu) struct rdt_resource *r; =20 mutex_lock(&rdtgroup_mutex); - for_each_capable_rdt_resource(r) + for_each_domain_needed_rdt_resource(r) domain_remove_cpu(cpu, r); list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) { --=20 2.40.1 From nobody Sat Feb 7 17:20:21 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 66F4AEB64DD for ; Wed, 21 Jun 2023 17:41:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231419AbjFURlA (ORCPT ); Wed, 21 Jun 2023 13:41:00 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43706 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230195AbjFURkm (ORCPT ); Wed, 21 Jun 2023 13:40:42 -0400 Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BB2A8F1; Wed, 21 Jun 2023 10:40:41 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1687369241; x=1718905241; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=r5UMYKpVO7CH5T1tFztMST9Hrba8NtQtdvEfQNwuWZQ=; b=DCoHPGXZKn/ckg3zWJDtlg6os+RX5UYy5B/A1AGWMSwp4nYv+gaYJ72S 7Ly6gt5NwJjIp7HzE6wytPUHwOXcyB2Ang6XNeWsDgBEgcWPtR5QBL75+ 8mpdz5Rpu2wqyYX8GPoCJnoxBIw1WwicMmk6Os+gcPoqZvw0a6nbKX8Zu j7c7BoAnXqT+jmdwVeMDnduEoyiSKllEliJ+MJAZKaNC8mIz7gXFXxQ9Q U40XSWcOX4vmMHsQ2zNHq5YBaoJ9nv42NhlHx44LDGfSAtZvGqkUqgz0p EqUxluo9eMI2VqHsT3Y4mF414raPLV0wIm2ldKh6mmwdP/7OQbqtcusUU g==; X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="359120553" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="359120553" Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:17 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="779921580" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="779921580" Received: from agluck-desk3.sc.intel.com ([172.25.222.74]) by fmsmga008-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:17 -0700 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH v2 6/7] x86/resctrl: Update documentation with Sub-NUMA cluster changes Date: Wed, 21 Jun 2023 10:40:05 -0700 Message-Id: <20230621174006.42533-7-tony.luck@intel.com> X-Mailer: git-send-email 2.40.1 In-Reply-To: <20230621174006.42533-1-tony.luck@intel.com> References: <20230621174006.42533-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" With Sub-NUMA Cluster mode enabled the scope of monitoring resources is per-NODE instead of per-L3 cache. Suffixes of directories with "L3" in their name refer to Sub-NUMA nodes instead of L3 cache ids. Signed-off-by: Tony Luck --- Documentation/arch/x86/resctrl.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/re= sctrl.rst index cb05d90111b4..13fc9fa664fc 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -345,9 +345,13 @@ When control is enabled all CTRL_MON groups will also = contain: When monitoring is enabled all MON groups will also contain: =20 "mon_data": - This contains a set of files organized by L3 domain and by - RDT event. E.g. on a system with two L3 domains there will - be subdirectories "mon_L3_00" and "mon_L3_01". Each of these + This contains a set of files organized by L3 domain or by NUMA + node (depending on whether Sub-NUMA Cluster (SNC) mode is disabled + or enabled respectively) and by RDT event. E.g. on a system with + SNC mode disabled with two L3 domains there will be subdirectories + "mon_L3_00" and "mon_L3_01" the numerical suffix refers to the + L3 cache id. With SNC enabled the directory names are the same, + but the numerical suffix refers to the node id. Each of these directories have one file per event (e.g. "llc_occupancy", "mbm_total_bytes", and "mbm_local_bytes"). In a MON group these files provide a read out of the current value of the event for --=20 2.40.1 From nobody Sat Feb 7 17:20:21 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8EE10EB64D8 for ; Wed, 21 Jun 2023 17:41:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231286AbjFURlI (ORCPT ); Wed, 21 Jun 2023 13:41:08 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43726 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230444AbjFURko (ORCPT ); Wed, 21 Jun 2023 13:40:44 -0400 Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C8F721726; Wed, 21 Jun 2023 10:40:42 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1687369242; x=1718905242; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=PNQfa0kAalxyBUqvdx04Xx6SluFbWdsz/OUOjhADRuc=; b=d3uYfDYtcUlYP8cCTHfTTuE+t66SPxJ91x8n8xzYrgyrTjV+1Nuc8CUa LijvPyZ7rUmd9SXBW+0sEy76nXZHhurIuCFb6keJ7+BApe77tSdR4g2Tj 6uvwzJNHGDpG8qV+aB9ZwykQOzhvP3ajghzQgOP+tksYa2RowA296PZUn B4sM7GTBTgmM/OqsK1qzMI1QgLWIN1LG3/GmadFz0UsxPucLk8sJHAejc OewvkD/iT34CXJT0wqS6PkZmcbDSJDb+amkhCj08QxYC+v8JwUnCbLZKd 3hPFGIduQlBKO4nhhNirWHG1MRwO2IPTp27N/F7tKZ8mKtDoeav8C/Y5c w==; X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="359120563" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="359120563" Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:18 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10748"; a="779921584" X-IronPort-AV: E=Sophos;i="6.00,261,1681196400"; d="scan'208";a="779921584" Received: from agluck-desk3.sc.intel.com ([172.25.222.74]) by fmsmga008-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Jun 2023 10:40:17 -0700 From: Tony Luck To: Fenghua Yu , Reinette Chatre , Peter Newman , Jonathan Corbet , x86@kernel.org Cc: Shaopeng Tan , James Morse , Jamie Iles , Babu Moger , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, patches@lists.linux.dev, Tony Luck Subject: [PATCH v2 7/7] x86/resctrl: Determine if Sub-NUMA Cluster is enabled and initialize. Date: Wed, 21 Jun 2023 10:40:06 -0700 Message-Id: <20230621174006.42533-8-tony.luck@intel.com> X-Mailer: git-send-email 2.40.1 In-Reply-To: <20230621174006.42533-1-tony.luck@intel.com> References: <20230621174006.42533-1-tony.luck@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" There isn't a simple hardware enumeration to indicate to software that a system is running with Sub-NUMA Cluster enabled. Compare the number of NUMA nodes with the number of L3 caches to calculate the number of Sub-NUMA nodes per L3 cache. When Sub-NUMA cluster mode is enabled in BIOS setup the RMID counters are distributed equally between the SNC nodes within each socket. E.g. if there are 400 RMID counters, and the system is configured with two SNC nodes per socket, then RMID counter 0..199 are used on SNC node 0 on the socket, and RMID counter 200..399 on SNC node 1. A model specific MSR (0xca0) can change the configuration of the RMIDs when SNC mode is enabled. The MSR controls the interpretation of the RMID field in the IA32_PQR_ASSOC MSR so that the appropriate hardware counters within the SNC node are updated. Also initialize a per-cpu RMID offset value. Use this to calculate the value to write to the IA32_QM_EVTSEL MSR when reading RMID event values. N.B. this works well for well-behaved NUMA applications that access memory predominantly from the local memory node. For applications that access memory across multiple nodes it may be necessary for the user to read counters for all SNC nodes on a socket and add the values to get the actual LLC occupancy or memory bandwidth. Perhaps this isn't all that different from applications that span across multiple sockets in a legacy system. Signed-off-by: Tony Luck Reviewed-By: Peter Newman Tested-By: Peter Newman --- arch/x86/include/asm/resctrl.h | 2 + arch/x86/kernel/cpu/resctrl/core.c | 99 ++++++++++++++++++++++++++- arch/x86/kernel/cpu/resctrl/monitor.c | 2 +- 3 files changed, 99 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 255a78d9d906..f95e69bacc65 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -35,6 +35,8 @@ DECLARE_STATIC_KEY_FALSE(rdt_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); =20 +DECLARE_PER_CPU(int, rmid_offset); + /* * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR * diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resct= rl/core.c index af3be3c2db96..869cfb46e8e4 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -16,11 +16,14 @@ =20 #define pr_fmt(fmt) "resctrl: " fmt =20 +#include #include #include #include #include +#include =20 +#include #include #include #include "internal.h" @@ -524,6 +527,39 @@ static int get_domain_id(int cpu, enum resctrl_scope s= cope) } } =20 +DEFINE_PER_CPU(int, rmid_offset); + +static void set_per_cpu_rmid_offset(int cpu, struct rdt_resource *r) +{ + this_cpu_write(rmid_offset, (cpu_to_node(cpu) % snc_ways) * r->num_rmid); +} + +/* + * This MSR provides for configuration of RMIDs on Sub-NUMA Cluster + * systems. + * Bit0 =3D 1 (default) For legacy configuration + * Bit0 =3D 0 RMIDs are divided evenly between SNC nodes. + */ +#define MSR_RMID_SNC_CONFIG 0xCA0 + +static void snc_add_pkg(void) +{ + u64 msrval; + + rdmsrl(MSR_RMID_SNC_CONFIG, msrval); + msrval |=3D BIT_ULL(0); + wrmsrl(MSR_RMID_SNC_CONFIG, msrval); +} + +static void snc_remove_pkg(void) +{ + u64 msrval; + + rdmsrl(MSR_RMID_SNC_CONFIG, msrval); + msrval &=3D ~BIT_ULL(0); + wrmsrl(MSR_RMID_SNC_CONFIG, msrval); +} + /* * domain_add_cpu - Add a cpu to a resource's domain list. * @@ -555,6 +591,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource= *r) cpumask_set_cpu(cpu, &d->cpu_mask); if (r->cache.arch_has_per_cpu_cfg) rdt_domain_reconfigure_cdp(r); + if (r->mon_capable) + set_per_cpu_rmid_offset(cpu, r); return; } =20 @@ -573,11 +611,17 @@ static void domain_add_cpu(int cpu, struct rdt_resour= ce *r) return; } =20 - if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { - domain_free(hw_dom); - return; + if (r->mon_capable) { + if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { + domain_free(hw_dom); + return; + } + set_per_cpu_rmid_offset(cpu, r); } =20 + if (r->pkg_actions) + snc_add_pkg(); + list_add_tail(&d->list, add_pos); =20 err =3D resctrl_online_domain(r, d); @@ -613,6 +657,9 @@ static void domain_remove_cpu(int cpu, struct rdt_resou= rce *r) d->plr->d =3D NULL; domain_free(hw_dom); =20 + if (r->pkg_actions) + snc_remove_pkg(); + return; } =20 @@ -899,11 +946,57 @@ static __init bool get_rdt_resources(void) return (rdt_mon_capable || rdt_alloc_capable); } =20 +static const struct x86_cpu_id snc_cpu_ids[] __initconst =3D { + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, 0), + {} +}; + +/* + * There isn't a simple enumeration bit to show whether SNC mode + * is enabled. Look at the ratio of number of NUMA nodes to the + * number of distinct L3 caches. Take care to skip memory-only nodes. + */ +static __init int find_snc_ways(void) +{ + unsigned long *node_caches; + int mem_only_nodes =3D 0; + int cpu, node, ret; + + if (!x86_match_cpu(snc_cpu_ids)) + return 1; + + node_caches =3D kcalloc(BITS_TO_LONGS(nr_node_ids), sizeof(*node_caches),= GFP_KERNEL); + if (!node_caches) + return 1; + + cpus_read_lock(); + for_each_node(node) { + cpu =3D cpumask_first(cpumask_of_node(node)); + if (cpu < nr_cpu_ids) + set_bit(get_cpu_cacheinfo_id(cpu, 3), node_caches); + else + mem_only_nodes++; + } + cpus_read_unlock(); + + ret =3D (nr_node_ids - mem_only_nodes) / bitmap_weight(node_caches, nr_no= de_ids); + kfree(node_caches); + + if (ret > 1) + rdt_resources_all[RDT_RESOURCE_PKG].r_resctrl.pkg_actions =3D true; + + return ret; +} + static __init void rdt_init_res_defs_intel(void) { struct rdt_hw_resource *hw_res; struct rdt_resource *r; =20 + snc_ways =3D find_snc_ways(); + for_each_rdt_resource(r) { hw_res =3D resctrl_to_arch_res(r); =20 diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/re= sctrl/monitor.c index da3f36212898..74db99d299e1 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -160,7 +160,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id = eventid, u64 *val) * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) * are error bits. */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid + this_cpu_read(rmid_offset)); rdmsrl(MSR_IA32_QM_CTR, msr_val); =20 if (msr_val & RMID_VAL_ERROR) --=20 2.40.1