[RFC PATCH 05/13] pci: pcsc: control the cache via sysfs and kernel params

Evangelos Petrongonas posted 13 patches 2 months, 2 weeks ago
[RFC PATCH 05/13] pci: pcsc: control the cache via sysfs and kernel params
Posted by Evangelos Petrongonas 2 months, 2 weeks ago
Add kernel parameters and runtime control mechanisms for the PCSC

A new kernel parameter 'pcsc_enabled' allows enabling or disabling
the cache at boot time. The parameter defaults to disabled.

A sysfs interface at /sys/bus/pci/pcsc/enabled provides:
- Read access to query current cache status (1=enabled, 0=disabled)
- Write access to dynamically enable/disable the cache at runtime

Signed-off-by: Evangelos Petrongonas <epetron@amazon.de>
---
 Documentation/ABI/testing/sysfs-bus-pci-pcsc  | 20 ++++
 .../admin-guide/kernel-parameters.txt         |  3 +
 drivers/pci/pcsc.c                            | 93 ++++++++++++++++++-
 3 files changed, 114 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-pci-pcsc

diff --git a/Documentation/ABI/testing/sysfs-bus-pci-pcsc b/Documentation/ABI/testing/sysfs-bus-pci-pcsc
new file mode 100644
index 000000000000..ee92bf087816
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-pcsc
@@ -0,0 +1,20 @@
+PCI Configuration Space Cache (PCSC)
+-------------------------------------
+
+The PCI Configuration Space Cache (PCSC) is a transparent caching layer
+that intercepts configuration space operations to reduce hardware access
+overhead. This subsystem addresses performance bottlenecks in PCI
+configuration space accesses, particularly in virtualization
+environments with high-density SR-IOV deployments where repeated
+enumeration of Virtual Functions creates substantial delays.
+
+What:			/sys/bus/pci/pcsc/enabled
+Date:			September 2025
+Contact:		Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+				PCI Configuration Space Cache (PCSC) is a subsystem that
+				caches accesses to the PCI configuration space of PCI
+				functions. When this file contains the "1", the kernel
+				is utilizing the cache, while when on "0" the
+				system bypasses it. This setting can also be controlled
+parameter.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 747a55abf494..08c7a13f107c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5036,6 +5036,9 @@
 
 	pcmv=		[HW,PCMCIA] BadgePAD 4
 
+	pcsc_enabled=	[PCSC] enable the use of the PCI Configuration Space
+		Cache (PCSC).
+
 	pd_ignore_unused
 			[PM]
 			Keep all power-domains already enabled by bootloader on,
diff --git a/drivers/pci/pcsc.c b/drivers/pci/pcsc.c
index 343f8b03831a..44d842733230 100644
--- a/drivers/pci/pcsc.c
+++ b/drivers/pci/pcsc.c
@@ -16,13 +16,21 @@
 
 #include <linux/atomic.h>
 #include <linux/pcsc.h>
+#include <linux/sysfs.h>
+
+static bool pcsc_enabled;
+static int __init pcsc_enabled_setup(char *str)
+{
+	return kstrtobool(str, &pcsc_enabled) == 0;
+}
+__setup("pcsc_enabled=", pcsc_enabled_setup);
 
 static bool pcsc_initialised;
 static atomic_t num_nodes = ATOMIC_INIT(0);
 
 inline bool pcsc_is_initialised(void)
 {
-	return pcsc_initialised;
+	return pcsc_initialised && pcsc_enabled;
 }
 
 static int pcsc_add_bus(struct pci_bus *bus)
@@ -899,14 +907,95 @@ static struct notifier_block pcsc_bus_nb = {
 	.notifier_call = pcsc_bus_notify,
 };
 
+static ssize_t pcsc_enabled_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", pcsc_enabled);
+}
+
+static ssize_t pcsc_enabled_store(struct kobject *kobj,
+				  struct kobj_attribute *attr, const char *buf,
+				  size_t count)
+{
+	bool new_value;
+	int ret;
+
+	ret = kstrtobool(buf, &new_value);
+	if (ret < 0)
+		return ret;
+
+	pcsc_enabled = new_value;
+	return count;
+}
+
+static struct kobj_attribute pcsc_enabled_attribute =
+	__ATTR(enabled, 0644, pcsc_enabled_show, pcsc_enabled_store);
+
+static struct attribute *pcsc_attrs[] = {
+	&pcsc_enabled_attribute.attr,
+	NULL,
+};
+
+static struct attribute_group pcsc_attr_group = {
+	.attrs = pcsc_attrs,
+};
+
+static struct kobject *pcsc_kobj;
+
+static void pcsc_create_sysfs(void)
+{
+	struct kset *pci_bus_kset;
+	int ret;
+
+	if (pcsc_kobj)
+		return; /* Already created */
+
+	pci_bus_kset = bus_get_kset(&pci_bus_type);
+	if (!pci_bus_kset) {
+		/* PCI bus kset not ready yet, will be retried later */
+		return;
+	}
+
+	pcsc_kobj = kobject_create_and_add("pcsc", &pci_bus_kset->kobj);
+	if (!pcsc_kobj) {
+		pr_err("Failed to create sysfs kobject\n");
+		return;
+	}
+
+	ret = sysfs_create_group(pcsc_kobj, &pcsc_attr_group);
+	if (ret) {
+		pr_err("Failed to create sysfs group\n");
+		kobject_put(pcsc_kobj);
+		pcsc_kobj = NULL;
+		return;
+	}
+}
+
 static int __init pcsc_init(void)
 {
 	bus_register_notifier(&pci_bus_type, &pcsc_bus_nb);
 
+	/* Try to create sysfs entry, but don't fail if PCI bus isn't ready yet */
+	pcsc_create_sysfs();
+
 	pcsc_initialised = true;
-	pr_info("initialised\n");
+	pr_info("initialised (enabled=%d)\n", pcsc_enabled);
 
 	return 0;
 }
 
+/* Late initcall to retry sysfs creation if it failed during core_initcall */
+static int __init pcsc_sysfs_init(void)
+{
+	pcsc_create_sysfs();
+	return 0;
+}
+
 core_initcall(pcsc_init);
+
+/*
+ * The PCI subsystem is initialised later, therefore we need to add
+ * our sysfs entries later. This is done to avoid modifying the sysfs
+ * creation of the core pci driver.
+ */
+late_initcall(pcsc_sysfs_init);
-- 
2.47.3




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
Re: [RFC PATCH 05/13] pci: pcsc: control the cache via sysfs and kernel params
Posted by Jonathan Cameron 2 months, 1 week ago
On Fri, 3 Oct 2025 09:00:41 +0000
Evangelos Petrongonas <epetron@amazon.de> wrote:

> Add kernel parameters and runtime control mechanisms for the PCSC
> 
> A new kernel parameter 'pcsc_enabled' allows enabling or disabling
> the cache at boot time. The parameter defaults to disabled.
> 
> A sysfs interface at /sys/bus/pci/pcsc/enabled provides:
> - Read access to query current cache status (1=enabled, 0=disabled)
> - Write access to dynamically enable/disable the cache at runtime
> 
> Signed-off-by: Evangelos Petrongonas <epetron@amazon.de>
> ---
>  Documentation/ABI/testing/sysfs-bus-pci-pcsc  | 20 ++++
>  .../admin-guide/kernel-parameters.txt         |  3 +
>  drivers/pci/pcsc.c                            | 93 ++++++++++++++++++-
>  3 files changed, 114 insertions(+), 2 deletions(-)
>  create mode 100644 Documentation/ABI/testing/sysfs-bus-pci-pcsc
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-pci-pcsc b/Documentation/ABI/testing/sysfs-bus-pci-pcsc
> new file mode 100644
> index 000000000000..ee92bf087816
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-bus-pci-pcsc
> @@ -0,0 +1,20 @@
> +PCI Configuration Space Cache (PCSC)
> +-------------------------------------
> +
> +The PCI Configuration Space Cache (PCSC) is a transparent caching layer
> +that intercepts configuration space operations to reduce hardware access
> +overhead. This subsystem addresses performance bottlenecks in PCI
> +configuration space accesses, particularly in virtualization
> +environments with high-density SR-IOV deployments where repeated
> +enumeration of Virtual Functions creates substantial delays.
> +
> +What:			/sys/bus/pci/pcsc/enabled
> +Date:			September 2025
> +Contact:		Linux PCI developers <linux-pci@vger.kernel.org>
> +Description:
> +				PCI Configuration Space Cache (PCSC) is a subsystem that
> +				caches accesses to the PCI configuration space of PCI
> +				functions. When this file contains the "1", the kernel
> +				is utilizing the cache, while when on "0" the
> +				system bypasses it. This setting can also be controlled
> +parameter.
indent issue on this last line.

Excellent to see someone remembering the ABI docs for once in an RFC!


> diff --git a/drivers/pci/pcsc.c b/drivers/pci/pcsc.c
> index 343f8b03831a..44d842733230 100644
> --- a/drivers/pci/pcsc.c
> +++ b/drivers/pci/pcsc.c
> +static struct kobj_attribute pcsc_enabled_attribute =
> +	__ATTR(enabled, 0644, pcsc_enabled_show, pcsc_enabled_store);
> +
> +static struct attribute *pcsc_attrs[] = {
> +	&pcsc_enabled_attribute.attr,
> +	NULL,
Trivial but no need for that trailing comma after the NULL terminator.
We don't want it to be easy to accidentally add something after that.

> +};
> +
> +static struct attribute_group pcsc_attr_group = {
> +	.attrs = pcsc_attrs,
> +};
> +
> +static struct kobject *pcsc_kobj;
> +
> +static void pcsc_create_sysfs(void)
> +{
> +	struct kset *pci_bus_kset;
> +	int ret;
> +
> +	if (pcsc_kobj)
> +		return; /* Already created */

Why do we need the kobject? Can't we make this a group on the
pci_bus_kset->kobj with a group name of pcsc?

(I see you have a comment on this next bit later in here)
Event better if we can arrange for not to be added after that is
created but just be a group on it in the first place.
That is make it a group in bus_groups of the pci_bus_type alongside
the one with bus_attr_rescan.attr in it.

That should mean you don't need the two tried to set it up that
you have currently.

> +
> +	pci_bus_kset = bus_get_kset(&pci_bus_type);
> +	if (!pci_bus_kset) {
> +		/* PCI bus kset not ready yet, will be retried later */
> +		return;
> +	}
> +

> +
> +/*
> + * The PCI subsystem is initialised later, therefore we need to add
> + * our sysfs entries later. This is done to avoid modifying the sysfs
> + * creation of the core pci driver.
Vs complexity and races, I think I'd rather you did modify that.

> + */
> +late_initcall(pcsc_sysfs_init);