[PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC

Yushan Wang posted 3 patches 5 days, 22 hours ago
There is a newer version of this series
[PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Yushan Wang 5 days, 22 hours ago
The driver will create a file of `/dev/hisi_l3c` on init, mmap
operations to it will allocate a memory region that is guaranteed to be
placed in L3 cache.

The driver also provides unmap() to deallocated the locked memory.

The driver also provides an ioctl interface for user to get cache lock
information, such as lock restrictions and locked sizes.

Signed-off-by: Yushan Wang <wangyushan12@huawei.com>
---
 .../userspace-api/ioctl/ioctl-number.rst      |   1 +
 MAINTAINERS                                   |   6 +
 drivers/soc/hisilicon/Kconfig                 |  11 +
 drivers/soc/hisilicon/Makefile                |   2 +
 drivers/soc/hisilicon/hisi_soc_l3c.c          | 357 ++++++++++++++++++
 include/uapi/misc/hisi_l3c.h                  |  28 ++
 6 files changed, 405 insertions(+)
 create mode 100644 drivers/soc/hisilicon/hisi_soc_l3c.c
 create mode 100644 include/uapi/misc/hisi_l3c.h

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 7232b3544cec..439c5bcbfa94 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -387,6 +387,7 @@ Code  Seq#    Include File                                             Comments
                                                                        <mailto:linux-hyperv@vger.kernel.org>
 0xBA  00-0F  uapi/linux/liveupdate.h                                   Pasha Tatashin
                                                                        <mailto:pasha.tatashin@soleen.com>
+0xBB  all    uapi/misc/hisi_soc_cache.h                                HiSilicon SoC cache driver
 0xC0  00-0F  linux/usb/iowarrior.h
 0xCA  00-0F  uapi/misc/cxl.h                                           Dead since 6.15
 0xCA  10-2F  uapi/misc/ocxl.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 0efa8cc6775b..247df6e69c10 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11375,6 +11375,12 @@ F:	Documentation/ABI/testing/sysfs-devices-platform-kunpeng_hccs
 F:	drivers/soc/hisilicon/kunpeng_hccs.c
 F:	drivers/soc/hisilicon/kunpeng_hccs.h
 
+HISILICON SOC L3C DRIVER
+M:	Yushan Wang <wangyushan12@huawei.com>
+S:	Maintained
+F:	drivers/soc/hisilicon/hisi_soc_l3c.c
+F:	include/uapi/misc/hisi_l3c.h
+
 HISILICON LPC BUS DRIVER
 M:	Jay Fang <f.fangjian@huawei.com>
 S:	Maintained
diff --git a/drivers/soc/hisilicon/Kconfig b/drivers/soc/hisilicon/Kconfig
index 6d7c244d2e78..8f4202e2d8d9 100644
--- a/drivers/soc/hisilicon/Kconfig
+++ b/drivers/soc/hisilicon/Kconfig
@@ -21,4 +21,15 @@ config KUNPENG_HCCS
 	  health status and port information of HCCS, or reducing system
 	  power consumption on Kunpeng SoC.
 
+config HISI_SOC_L3C
+	bool "HiSilicon L3 Cache device driver"
+	depends on ACPI
+	depends on ARM64 || COMPILE_TEST
+	help
+	  This driver provides the functions to lock L3 cache entries from
+	  being evicted for better performance.
+
+	  This driver can be built as a module. If so, the module will be
+	  called hisi_soc_l3c.
+
 endmenu
diff --git a/drivers/soc/hisilicon/Makefile b/drivers/soc/hisilicon/Makefile
index 226e747e70d6..16ff2c73c4a5 100644
--- a/drivers/soc/hisilicon/Makefile
+++ b/drivers/soc/hisilicon/Makefile
@@ -1,2 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_KUNPENG_HCCS)	+= kunpeng_hccs.o
+
+obj-$(CONFIG_HISI_SOC_L3C)	+= hisi_soc_l3c.o
diff --git a/drivers/soc/hisilicon/hisi_soc_l3c.c b/drivers/soc/hisilicon/hisi_soc_l3c.c
new file mode 100644
index 000000000000..b6f6d5bdd4e5
--- /dev/null
+++ b/drivers/soc/hisilicon/hisi_soc_l3c.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for HiSilicon L3 cache.
+ *
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd.
+ * Author: Yushan Wang <wangyushan12@huawei.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cleanup.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+
+#include <uapi/misc/hisi_l3c.h>
+
+#define to_hisi_l3c(p) container_of((p), struct hisi_l3c, comp)
+
+/**
+ * struct hisi_soc_comp - Struct of HiSilicon SoC cache components.
+ *
+ * @node: list node of hisi_soc_comp_list.
+ * @ops: possible operations a component may perform.
+ * @affinity_mask: cpus that associate with this component.
+ * @private: component specific data.
+ */
+struct hisi_soc_comp {
+	struct list_head node;
+	struct hisi_soc_comp_ops *ops;
+	cpumask_t affinity_mask;
+	void *private;
+};
+
+/**
+ * struct hisi_soc_comp_ops - Callbacks for SoC cache drivers to handle
+ *			      operation requests.
+ *
+ * @do_lock: lock certain region of L3 cache from being evicted.
+ * @poll_lock_done: check if the lock operation has succeeded.
+ * @do_unlock: unlock the locked region of L3 cache back to normal.
+ * @poll_unlock_done: check if the unlock operation has succeeded.
+	      operation requests.
+ *
+ * Operations are decoupled into two phases so that framework does not have
+ * to wait for one operation to finish before calling the next when multiple
+ * hardwares onboard.
+ *
+ * Implementers must implement the functions in pairs.  Implementation should
+ * return -EBUSY when:
+ * - insufficient resources are available to perform the operation.
+ * - previously raised operation is not finished.
+ * - new operations (do_lock(), do_unlock() etc.) to the same address
+ *   before corresponding done functions being called.
+ */
+struct hisi_soc_comp_ops {
+	int (*do_lock)(struct hisi_soc_comp *comp, phys_addr_t addr, size_t size);
+	int (*poll_lock_done)(struct hisi_soc_comp *comp, phys_addr_t addr, size_t size);
+	int (*do_unlock)(struct hisi_soc_comp *comp, phys_addr_t addr);
+	int (*poll_unlock_done)(struct hisi_soc_comp *comp, phys_addr_t addr);
+};
+
+struct hisi_l3c_lock_region {
+	/* physical address of the arena allocated for aligned address */
+	unsigned long arena_start;
+	/* VMA region of locked memory for future release */
+	unsigned long vm_start;
+	unsigned long vm_end;
+	phys_addr_t addr;
+	size_t size;
+	/* Return value of cache lock call */
+	int status;
+	int cpu;
+};
+
+struct hisi_soc_comp_list {
+	struct list_head node;
+	/* protects list of HiSilicon SoC cache components */
+	spinlock_t lock;
+};
+
+static struct hisi_soc_comp_list l3c_devs;
+
+static int hisi_l3c_lock(int cpu, phys_addr_t addr, size_t size)
+{
+	struct hisi_soc_comp *comp;
+	int ret;
+
+	guard(spinlock)(&l3c_devs.lock);
+
+	/* When there is no instance onboard, no locked memory is available. */
+	if (list_empty(&l3c_devs.node))
+		return -ENOMEM;
+
+	/* Lock need to be performed on each channel of associated L3 cache. */
+	list_for_each_entry(comp, &l3c_devs.node, node) {
+		if (!cpumask_test_cpu(cpu, &comp->affinity_mask))
+			continue;
+		ret = comp->ops->do_lock(comp, addr, size);
+		if (ret)
+			return ret;
+	}
+
+	list_for_each_entry(comp, &l3c_devs.node, node) {
+		if (!cpumask_test_cpu(cpu, &comp->affinity_mask))
+			continue;
+		ret = comp->ops->poll_lock_done(comp, addr, size);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hisi_l3c_unlock(int cpu, phys_addr_t addr)
+{
+	struct hisi_soc_comp *comp;
+	int ret;
+
+	guard(spinlock)(&l3c_devs.lock);
+
+	if (list_empty(&l3c_devs.node))
+		return -EINVAL;
+
+	/* Perform unlock on each channel of associated L3 cache. */
+	list_for_each_entry(comp, &l3c_devs.node, node) {
+		if (!cpumask_test_cpu(cpu, &comp->affinity_mask))
+			continue;
+		ret = comp->ops->do_unlock(comp, addr);
+		if (ret)
+			return ret;
+	}
+
+	list_for_each_entry(comp, &l3c_devs.node, node) {
+		if (!cpumask_test_cpu(cpu, &comp->affinity_mask))
+			continue;
+		ret = comp->ops->poll_unlock_done(comp, addr);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void hisi_soc_comp_add(struct hisi_soc_comp *comp)
+{
+	guard(spinlock)(&l3c_devs.lock);
+	list_add_tail(&comp->node, &l3c_devs.node);
+}
+
+/* Null @comp means to delete all instances. */
+static int hisi_soc_comp_del(struct hisi_soc_comp *comp)
+{
+	struct hisi_soc_comp *entry, *tmp;
+
+	guard(spinlock)(&l3c_devs.lock);
+	list_for_each_entry_safe(entry, tmp, &l3c_devs.node, node) {
+		if (comp && comp != entry)
+			continue;
+
+		list_del(&entry->node);
+
+		/* Only continue to delete nodes when @comp is NULL */
+		if (comp)
+			break;
+	}
+
+	return 0;
+}
+
+static void hisi_l3c_vm_open(struct vm_area_struct *vma)
+{
+	struct hisi_l3c_lock_region *clr = vma->vm_private_data;
+
+	/*
+	 * Only perform cache lock when the vma passed in is created in
+	 * hisi_l3c_mmap.
+	 */
+	if (clr->vm_start != vma->vm_start || clr->vm_end != vma->vm_end)
+		return;
+
+	clr->status = hisi_l3c_lock(clr->cpu, clr->addr, clr->size);
+}
+
+static void hisi_l3c_vm_close(struct vm_area_struct *vma)
+{
+	struct hisi_l3c_lock_region *clr = vma->vm_private_data;
+	int order = get_order(clr->size);
+
+	/*
+	 * Only perform cache unlock when the vma passed in is created
+	 * in hisi_l3c_mmap.
+	 */
+	if (clr->vm_start != vma->vm_start || clr->vm_end != vma->vm_end)
+		return;
+
+	hisi_l3c_unlock(clr->cpu, clr->addr);
+
+	free_contig_range(PHYS_PFN(clr->addr), 1 << order);
+	kfree(clr);
+	vma->vm_private_data = NULL;
+}
+
+/* mremap operation is not supported for HiSilicon SoC cache. */
+static int hisi_l3c_vm_mremap(struct vm_area_struct *vma)
+{
+	struct hisi_l3c_lock_region *clr = vma->vm_private_data;
+
+	/*
+	 * vma region size will be changed as requested by mremap despite the
+	 * callback failure in this function.  Thus, change the vma region
+	 * stored in clr according to the parameters to verify if the pages
+	 * should be freed when unmapping.
+	 */
+	clr->vm_end = clr->vm_start + (vma->vm_end - vma->vm_start);
+	pr_err("mremap for HiSilicon SoC locked cache is not supported\n");
+
+	return -EOPNOTSUPP;
+}
+
+static int hisi_l3c_may_split(struct vm_area_struct *area, unsigned long addr)
+{
+	pr_err("HiSilicon SoC locked cache may not be split.\n");
+	return -EINVAL;
+}
+
+static const struct vm_operations_struct hisi_l3c_vm_ops = {
+	.open = hisi_l3c_vm_open,
+	.close = hisi_l3c_vm_close,
+	.may_split = hisi_l3c_may_split,
+	.mremap = hisi_l3c_vm_mremap,
+};
+
+static int hisi_l3c_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	unsigned long size = vma->vm_end - vma->vm_start;
+	int order = get_order(size);
+	unsigned long addr;
+	struct page *pg;
+	int ret;
+
+	struct hisi_l3c_lock_region *clr __free(kfree) = kzalloc(sizeof(*clr), GFP_KERNEL);
+	if (!clr)
+		return -ENOMEM;
+
+	/* Continuous physical memory is required for L3 cache lock. */
+	pg = alloc_contig_pages(1 << order, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
+				cpu_to_node(smp_processor_id()), NULL);
+	if (!pg)
+		return -ENOMEM;
+
+	addr = page_to_phys(pg);
+	*clr = (struct hisi_l3c_lock_region) {
+		.addr = addr,
+		.size = size,
+		.cpu = smp_processor_id(),
+		/* vma should not be moved, store here for validation */
+		.vm_start = vma->vm_start,
+		.vm_end = vma->vm_end,
+	};
+
+	vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND);
+	vma->vm_ops = &hisi_l3c_vm_ops;
+	vma->vm_private_data = clr;
+
+	hisi_l3c_vm_ops.open(vma);
+	if (clr->status) {
+		ret = clr->status;
+		goto out_page;
+	}
+
+	ret = remap_pfn_range(vma, vma->vm_start, PFN_DOWN(addr), size,
+			      vma->vm_page_prot);
+	if (ret)
+		goto out_page;
+
+	/* Save clr from being freed when lock succeeds. */
+	vma->vm_private_data = no_free_ptr(clr);
+
+	return 0;
+
+out_page:
+	free_contig_range(PHYS_PFN(clr->addr), 1 << order);
+	return ret;
+}
+
+static int hisi_l3c_lock_restriction(unsigned long arg)
+{
+	void __user *uarg = (void __user *)arg;
+	int cpu = smp_processor_id();
+	struct hisi_soc_comp *comp;
+
+	if (list_empty(&l3c_devs.node))
+		return -ENODEV;
+
+	list_for_each_entry(comp, &l3c_devs.node, node) {
+		if (!cpumask_test_cpu(cpu, &comp->affinity_mask))
+			continue;
+
+		if (!comp->private)
+			return -ENOENT;
+
+		if (copy_to_user(uarg, comp->private, sizeof(struct hisi_l3c_lock_info)))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	return -ENODEV;
+}
+
+static long hisi_l3c_ioctl(struct file *file, u32 cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case HISI_L3C_LOCK_INFO:
+		return hisi_l3c_lock_restriction(arg);
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct file_operations l3c_dev_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = hisi_l3c_ioctl,
+	.mmap = hisi_l3c_mmap,
+};
+
+static struct miscdevice l3c_miscdev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "hisi_l3c",
+	.fops = &l3c_dev_fops,
+	.mode = 0600,
+};
+
+static int __init hisi_l3c_init(void)
+{
+	spin_lock_init(&l3c_devs.lock);
+	INIT_LIST_HEAD(&l3c_devs.node);
+
+	return misc_register(&l3c_miscdev);
+}
+module_init(hisi_l3c_init);
+
+static void __exit hisi_l3c_exit(void)
+{
+	misc_deregister(&l3c_miscdev);
+	hisi_soc_comp_del(NULL);
+}
+module_exit(hisi_l3c_exit);
+
+MODULE_DESCRIPTION("Hisilicon L3 Cache Driver");
+MODULE_AUTHOR("Yushan Wang <wangyushan12@huawei.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/uapi/misc/hisi_l3c.h b/include/uapi/misc/hisi_l3c.h
new file mode 100644
index 000000000000..6555be18aa1c
--- /dev/null
+++ b/include/uapi/misc/hisi_l3c.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */
+/* Copyright (c) 2024 HiSilicon Technologies Co., Ltd. */
+#ifndef _UAPI_HISI_SOC_L3C_H
+#define _UAPI_HISI_SOC_L3C_H
+
+#include <linux/types.h>
+
+/* HISI_L3C_INFO: cache lock info for HiSilicon SoC */
+#define HISI_L3C_LOCK_INFO	_IOW(0xBB, 1, unsigned long)
+
+/**
+ * struct hisi_l3c_info - User data for hisi cache operates.
+ * @lock_region_num: available locked memory region on a L3C instance
+ * @lock_size: available size to be locked of the L3C instance.
+ * @address_alignment: if the L3C lock requires locked region physical start
+ *		       address to be aligned with the memory region size.
+ * @max_lock_size: maximum locked memory size on a L3C instance.
+ * @min_lock_size: minimum locked memory size on a L3C instance.
+ */
+struct hisi_l3c_lock_info {
+	__u32 lock_region_num;
+	__u64 lock_size;
+	__u8 address_alignment;
+	__u64 max_lock_size;
+	__u64 min_lock_size;
+};
+
+#endif
-- 
2.33.0
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by kernel test robot 5 days, 12 hours ago
Hi Yushan,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.19-rc8]
[cannot apply to soc/for-next next-20260203]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Yushan-Wang/soc-cache-L3-cache-driver-for-HiSilicon-SoC/20260204-004656
base:   linus/master
patch link:    https://lore.kernel.org/r/20260203161843.649417-2-wangyushan12%40huawei.com
patch subject: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
config: loongarch-randconfig-r131-20260204 (https://download.01.org/0day-ci/archive/20260204/202602041006.7Hb46Sl8-lkp@intel.com/config)
compiler: clang version 22.0.0git (https://github.com/llvm/llvm-project 9b8addffa70cee5b2acc5454712d9cf78ce45710)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260204/202602041006.7Hb46Sl8-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602041006.7Hb46Sl8-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/soc/hisilicon/hisi_soc_l3c.c:251:7: error: call to undeclared function 'alloc_contig_pages'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     251 |         pg = alloc_contig_pages(1 << order, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
         |              ^
>> drivers/soc/hisilicon/hisi_soc_l3c.c:251:5: error: incompatible integer to pointer conversion assigning to 'struct page *' from 'int' [-Wint-conversion]
     251 |         pg = alloc_contig_pages(1 << order, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
         |            ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     252 |                                 cpu_to_node(smp_processor_id()), NULL);
         |                                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2 errors generated.


vim +/alloc_contig_pages +251 drivers/soc/hisilicon/hisi_soc_l3c.c

   237	
   238	static int hisi_l3c_mmap(struct file *file, struct vm_area_struct *vma)
   239	{
   240		unsigned long size = vma->vm_end - vma->vm_start;
   241		int order = get_order(size);
   242		unsigned long addr;
   243		struct page *pg;
   244		int ret;
   245	
   246		struct hisi_l3c_lock_region *clr __free(kfree) = kzalloc(sizeof(*clr), GFP_KERNEL);
   247		if (!clr)
   248			return -ENOMEM;
   249	
   250		/* Continuous physical memory is required for L3 cache lock. */
 > 251		pg = alloc_contig_pages(1 << order, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
   252					cpu_to_node(smp_processor_id()), NULL);
   253		if (!pg)
   254			return -ENOMEM;
   255	
   256		addr = page_to_phys(pg);
   257		*clr = (struct hisi_l3c_lock_region) {
   258			.addr = addr,
   259			.size = size,
   260			.cpu = smp_processor_id(),
   261			/* vma should not be moved, store here for validation */
   262			.vm_start = vma->vm_start,
   263			.vm_end = vma->vm_end,
   264		};
   265	
   266		vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND);
   267		vma->vm_ops = &hisi_l3c_vm_ops;
   268		vma->vm_private_data = clr;
   269	
   270		hisi_l3c_vm_ops.open(vma);
   271		if (clr->status) {
   272			ret = clr->status;
   273			goto out_page;
   274		}
   275	
   276		ret = remap_pfn_range(vma, vma->vm_start, PFN_DOWN(addr), size,
   277				      vma->vm_page_prot);
   278		if (ret)
   279			goto out_page;
   280	
   281		/* Save clr from being freed when lock succeeds. */
   282		vma->vm_private_data = no_free_ptr(clr);
   283	
   284		return 0;
   285	
   286	out_page:
   287		free_contig_range(PHYS_PFN(clr->addr), 1 << order);
   288		return ret;
   289	}
   290	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Linus Walleij 5 days, 15 hours ago
Hi Yushan,

thanks for your patch!

On Tue, Feb 3, 2026 at 5:18 PM Yushan Wang <wangyushan12@huawei.com> wrote:
>
> The driver will create a file of `/dev/hisi_l3c` on init, mmap
> operations to it will allocate a memory region that is guaranteed to be
> placed in L3 cache.
>
> The driver also provides unmap() to deallocated the locked memory.
>
> The driver also provides an ioctl interface for user to get cache lock
> information, such as lock restrictions and locked sizes.
>
> Signed-off-by: Yushan Wang <wangyushan12@huawei.com>

The commit message does not say *why* you are doing this?

> +config HISI_SOC_L3C
> +       bool "HiSilicon L3 Cache device driver"
> +       depends on ACPI
> +       depends on ARM64 || COMPILE_TEST
> +       help
> +         This driver provides the functions to lock L3 cache entries from
> +         being evicted for better performance.

Here is the reason though.

Things like this need to be CC to linux-mm@vger.kernel.org.

I don't see why userspace would be so well informed as to make decisions
about what should be locked in the L3 cache and not?

I see the memory hierarchy as any other hardware: a resource that is
allocated and arbitrated by the kernel.

The MM subsytem knows which memory is most cache hot.
Especially when you use DAMON DAMOS, which has the sole
purpose of executing actions like that. Here is a good YouTube.
https://www.youtube.com/watch?v=xKJO4kLTHOI

Shouldn't the MM subsystem be in charge of determining, locking
down and freeing up hot regions in L3 cache?

This looks more like userspace is going to determine that but
how exactly? By running DAMON? Then it's better to keep the
whole mechanism in the kernel where it belongs and let the
MM subsystem adapt locked L3 cache to the usage patterns.

Yours,
Linus Walleij
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Jonathan Cameron 5 days, 1 hour ago
On Wed, 4 Feb 2026 01:10:01 +0100
Linus Walleij <linusw@kernel.org> wrote:

> Hi Yushan,
> 
> thanks for your patch!
> 
> On Tue, Feb 3, 2026 at 5:18 PM Yushan Wang <wangyushan12@huawei.com> wrote:
> >
> > The driver will create a file of `/dev/hisi_l3c` on init, mmap
> > operations to it will allocate a memory region that is guaranteed to be
> > placed in L3 cache.
> >
> > The driver also provides unmap() to deallocated the locked memory.
> >
> > The driver also provides an ioctl interface for user to get cache lock
> > information, such as lock restrictions and locked sizes.
> >
> > Signed-off-by: Yushan Wang <wangyushan12@huawei.com>  
> 
> The commit message does not say *why* you are doing this?
> 
> > +config HISI_SOC_L3C
> > +       bool "HiSilicon L3 Cache device driver"
> > +       depends on ACPI
> > +       depends on ARM64 || COMPILE_TEST
> > +       help
> > +         This driver provides the functions to lock L3 cache entries from
> > +         being evicted for better performance.  
> 
> Here is the reason though.
> 
> Things like this need to be CC to linux-mm@vger.kernel.org.
> 
> I don't see why userspace would be so well informed as to make decisions
> about what should be locked in the L3 cache and not?
> 
> I see the memory hierarchy as any other hardware: a resource that is
> allocated and arbitrated by the kernel.
> 
> The MM subsytem knows which memory is most cache hot.
> Especially when you use DAMON DAMOS, which has the sole
> purpose of executing actions like that. Here is a good YouTube.
> https://www.youtube.com/watch?v=xKJO4kLTHOI
Hi Linus,

This typically isn't about cache hot.  It it were, the data would
be in the cache without this. It's about ensuring something that would
otherwise unlikely to be there is in the cache.

Normally that's a latency critical region.  In general the kernel
has no chance of figuring out what those are ahead of time, only
userspace can know (based on profiling etc) that is per workload.
The first hit matters in these use cases and it's not something
the prefetchers can help with.

The only thing we could do if this was in kernel would be to
have userspace pass some hints and then let the kernel actually
kick off the process. That just boils down to using a different
interface to do what this driver is doing (and that's the conversaion
this series is trying to get going)  It's a finite resource
and you absolutely need userspace to be able to tell if it
got what it asked for or not.

Damon might be useful for that preanalysis though but it can't do
anything for the infrequent extremely latency sensitive accesses.
Normally this is fleet wide stuff based on intensive benchmarking
of a few nodes.  Same sort of approach as the original warehouse
scale computing paper on tuning zswap capacity across a fleet.
Its an extreme form of profile guided optimization (and not
currently automatic I think?). If we are putting code in this
locked region, the program has been carefully recompiled / linked
to group the critical parts so that we can use the minimum number
of these locked regions. Data is a little simpler.

It's kind of similar to resctl but at a sub process granularity.

> 
> Shouldn't the MM subsystem be in charge of determining, locking
> down and freeing up hot regions in L3 cache?
> 
> This looks more like userspace is going to determine that but
> how exactly? By running DAMON? Then it's better to keep the
> whole mechanism in the kernel where it belongs and let the
> MM subsystem adapt locked L3 cache to the usage patterns.

I haven't yet come up with any plausible scheme by which the MM
subsystem could do this.

I think what we need here Yushan, is more detail on end to end
use cases for this.  Some examples etc as clearer motivation.

Jonathan

> 
> Yours,
> Linus Walleij
> 
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by wangyushan 3 days, 5 hours ago
On 2/4/2026 9:40 PM, Jonathan Cameron wrote:
> On Wed, 4 Feb 2026 01:10:01 +0100
> Linus Walleij <linusw@kernel.org> wrote:
>
>> Shouldn't the MM subsystem be in charge of determining, locking
>> down and freeing up hot regions in L3 cache?
>>
>> This looks more like userspace is going to determine that but
>> how exactly? By running DAMON? Then it's better to keep the
>> whole mechanism in the kernel where it belongs and let the
>> MM subsystem adapt locked L3 cache to the usage patterns.
> I haven't yet come up with any plausible scheme by which the MM
> subsystem could do this.
>
> I think what we need here Yushan, is more detail on end to end
> use cases for this.  Some examples etc as clearer motivation.
>

Hi,

Let me try to explain the use case here.

The idea is similar to this article:
https://www.cl.cam.ac.uk/~rnw24/papers/201708-sigcomm-diskcryptnet.pdf

Suppose we have data on SSD that need to be transferred through network.
We have technologies like DDIO and IO stash to make data flow through
L3 cache instead of DDR to avoid the influence of DDR bandwidth.

But if something is to be done to the data instead of merely copying,
and cores needs to participate, we'd like to make data to climb a bit
higher up through the memory hierarchy and stay there before data
processing is done. That is, correct amount of data being fetched to
L3 cache, and consumed just in time, then free L3 for next batch.
It is more of a userspace defined pipeline that utilizes capability
provided by kernel, where cache locks are allocated and freed quickly
with batches.

In above use case, C2C latency is chosen to avoid DDR latency, precisely
which L3 cache to store the data is not required. (For this part maybe
including steering tag as the hint to choose the correct L3 is a smarter
way, like AMD SDCIAE).

Memory management is, in many way, independent to architecture and
vendors, we might not want to take hardware specific feature into
account when kernel makes decisions of, say, swapping a page or not,
but we can control the hardware resource to lean more on a process,
like resctl.

Thanks,
Yushan

Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Linus Walleij 3 days, 2 hours ago
On Fri, Feb 6, 2026 at 11:08 AM wangyushan <wangyushan12@huawei.com> wrote:

> Suppose we have data on SSD that need to be transferred through network.
> We have technologies like DDIO and IO stash to make data flow through
> L3 cache instead of DDR to avoid the influence of DDR bandwidth.
[https://www.cl.cam.ac.uk/~rnw24/papers/201708-sigcomm-diskcryptnet.pdf]

So as to decode, encrypt or run some AI training/inference stuff on the
data, I get it.

The paper immediately gives at hand a use case the Linux kernel
(not userspace) could use: lock down the code and constants used
by in-kernel cipher algorithms to reduce latency on encrypted disk
or networks.

[Added in Ard and Herbert who may be interested]

Which means that if this could actually be used for these "hard
kernels" in Linux the proper way to abstract this is to give the kernel
a generic interface to request L3 cacheline lockdown no matter
if that is employed by the kernel or userspace.

> But if something is to be done to the data instead of merely copying,
> and cores needs to participate,

When you say this, is it "CPU cores" or others cores such as DSPs or
GPU/NPUs you are thinking of, or any kind of data processing core
(all of them)?

This surely need to be abstracted in such a way that either of these
can use it, Arnd mentions dma-buf which is a way devices think about
data that the CPU cores doesn't necessarily (but may) touch,
and resctrl could very well integrate into that I think.

What I think is important is that the modeling in the kernel is consistent
and that l3 cache lockdown is something any part of the kernel
needing it can request.

Yours,
Linus Walleij
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Arnd Bergmann 3 days, 4 hours ago
On Fri, Feb 6, 2026, at 11:07, wangyushan wrote:
>
> Let me try to explain the use case here.
>
> The idea is similar to this article:
> https://www.cl.cam.ac.uk/~rnw24/papers/201708-sigcomm-diskcryptnet.pdf
>
> Suppose we have data on SSD that need to be transferred through network.
> We have technologies like DDIO and IO stash to make data flow through
> L3 cache instead of DDR to avoid the influence of DDR bandwidth.
>
> But if something is to be done to the data instead of merely copying,
> and cores needs to participate, we'd like to make data to climb a bit
> higher up through the memory hierarchy and stay there before data
> processing is done. That is, correct amount of data being fetched to
> L3 cache, and consumed just in time, then free L3 for next batch.
> It is more of a userspace defined pipeline that utilizes capability
> provided by kernel, where cache locks are allocated and freed quickly
> with batches.
>
> In above use case, C2C latency is chosen to avoid DDR latency, precisely
> which L3 cache to store the data is not required. (For this part maybe
> including steering tag as the hint to choose the correct L3 is a smarter
> way, like AMD SDCIAE).
>
> Memory management is, in many way, independent to architecture and
> vendors, we might not want to take hardware specific feature into
> account when kernel makes decisions of, say, swapping a page or not,
> but we can control the hardware resource to lean more on a process,
> like resctl.

Ah, so if the main purpose here is to access the memory from
devices, I wonder if this should be structured as a dma-buf
driver. This would still allow you to mmap() a character
device, but in addition allow passing the file descriptor
to driver interfaces that take a dmabuf instead of a user
memory pointer.

    Arnd
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Linus Walleij 4 days, 6 hours ago
Hi Jonathan,

thanks for stepping in, I'm trying to be healthy sceptical here...

What you and others need to do is to tell me if I'm being too
critical. But right now it feels like I need some more senior
MM developers to tell me to be a good boy and let this
hack patch slip before I shut up ;)

On Wed, Feb 4, 2026 at 2:40 PM Jonathan Cameron
<jonathan.cameron@huawei.com> wrote:

> > The MM subsytem knows which memory is most cache hot.
> > Especially when you use DAMON DAMOS, which has the sole
> > purpose of executing actions like that. Here is a good YouTube.
> > https://www.youtube.com/watch?v=xKJO4kLTHOI
>
> This typically isn't about cache hot.  It it were, the data would
> be in the cache without this. It's about ensuring something that would
> otherwise unlikely to be there is in the cache.

OK I get it.

> Normally that's a latency critical region.  In general the kernel
> has no chance of figuring out what those are ahead of time, only
> userspace can know (based on profiling etc) that is per workload.
(...)
> The only thing we could do if this was in kernel would be to
> have userspace pass some hints and then let the kernel actually
> kick off the process.
(...)
> and you absolutely need userspace to be able to tell if it
> got what it asked for or not.
(...)
> Its an extreme form of profile guided optimization (and not
> currently automatic I think?). If we are putting code in this
> locked region, the program has been carefully recompiled / linked
> to group the critical parts so that we can use the minimum number
> of these locked regions. Data is a little simpler.

OK so the argument is "only userspace knows what cache lines
are performance critical, and therefore this info must be passed
from userspace". Do I understand correctly?

What I'm worried about here is that "an extreme form of profile
guided optimization" is a bit handwavy. I would accept if it is
based on simulation or simply human know-how, such as
if a developer puts signal-processing algorithm kernels
there because they know it is going to be the hard kernel
of the process.

But does the developer know if that hard kernel is importantest
taken into account all other processes running on the system,
and what happens if several processes say they have
such hard kernels? Who will arbitrate? That is usually the
kernels job.

> I haven't yet come up with any plausible scheme by which the MM
> subsystem could do this.

I find it kind of worrying if userspace knows which lines are most
performance-critical but the kernel MM subsystem does not.

That strongly inidicates that if only userspace knows that, then
madvise() is the way to go. The MM might need and use this
information for other reasons than just locking down lines in
the L3 cache.

In my mind:

Userspace madvise -> Linux MM -> arch cache-line lockdown

So the MM needs to take the decision that this indication from
userspace is something that should result in asking the arch
to lock down these cache lines, as well as re-evaluate it if
new processes start sending the same madise() calls and we
run out in lock-downable cache lines.

L3 lock-downs is a finite resource after all, and it needs to be
arbitrated. Just OTOMH, maybe if several processes ask for this
simultaneously and we run out of lockdownable cache lines,
who wins? First come first served? The process with the highest
nice value or realtime priority? Etc.

I.e. the kernel MM needs to arbitrate any cache lockdown.

Bypassing the whole MM like this patch does is a hack designed
for one single process that the user "knows" is "importantest"
and will be the only process asking for cache lines to be locked
down.

And this isn't abstract and it does not scale. We can't do that.

That's the kind of resource management we expect from the
kernel.

MM might want to use that information for other things.

> I think what we need here Yushan, is more detail on end to end
> use cases for this.  Some examples etc as clearer motivation.

I agree.

Yours,
Linus Walleij
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Jonathan Cameron 4 days, 4 hours ago
On Thu, 5 Feb 2026 10:12:33 +0100
Linus Walleij <linusw@kernel.org> wrote:

> Hi Jonathan,
> 
> thanks for stepping in, I'm trying to be healthy sceptical here...
> 
> What you and others need to do is to tell me if I'm being too
> critical. But right now it feels like I need some more senior
> MM developers to tell me to be a good boy and let this
> hack patch slip before I shut up ;)

It's good to have these discussions as it makes us actually
explain what they want to do much more clearly!
wangyushan and I have both been taking about this for too long so
it's easy to miss that it's not been explained properly.

Note I was absolutely expecting a non trivial discussion on how to do
this and in particular how generic it should be.
 
+CC a various resctl / mpam related people.

> 
> On Wed, Feb 4, 2026 at 2:40 PM Jonathan Cameron
> <jonathan.cameron@huawei.com> wrote:
> 
> > > The MM subsytem knows which memory is most cache hot.
> > > Especially when you use DAMON DAMOS, which has the sole
> > > purpose of executing actions like that. Here is a good YouTube.
> > > https://www.youtube.com/watch?v=xKJO4kLTHOI  
> >
> > This typically isn't about cache hot.  It it were, the data would
> > be in the cache without this. It's about ensuring something that would
> > otherwise unlikely to be there is in the cache.  
> 
> OK I get it.
> 
> > Normally that's a latency critical region.  In general the kernel
> > has no chance of figuring out what those are ahead of time, only
> > userspace can know (based on profiling etc) that is per workload.  
> (...)
> > The only thing we could do if this was in kernel would be to
> > have userspace pass some hints and then let the kernel actually
> > kick off the process.  
> (...)
> > and you absolutely need userspace to be able to tell if it
> > got what it asked for or not.  
> (...)
> > Its an extreme form of profile guided optimization (and not
> > currently automatic I think?). If we are putting code in this
> > locked region, the program has been carefully recompiled / linked
> > to group the critical parts so that we can use the minimum number
> > of these locked regions. Data is a little simpler.  
> 
> OK so the argument is "only userspace knows what cache lines
> are performance critical, and therefore this info must be passed
> from userspace". Do I understand correctly?

Yes.

> 
> What I'm worried about here is that "an extreme form of profile
> guided optimization" is a bit handwavy. I would accept if it is
> based on simulation or simply human know-how, such as
> if a developer puts signal-processing algorithm kernels
> there because they know it is going to be the hard kernel
> of the process.

Those methods are part of what I'd consider profile guided optimization.
I wasn't meaning to only including the automatic methods.
For all the ways to tune this, you get lots of data from
simulation or real hardware and use that to understand what makes
sense to lock in cache.  The human involved is often going to guide
those simulations - but follow that with a lot of testing and
data gathering.

One existing user I'm aware did a lot of work to identify exactly
what they needed to pin. It's an appliance type situation where
they know exactly what the workloads are on that server. I'm not
sure how much more we can share on that customer use case / case study
beyond this vague description, so will leave it to Yushan to maybe
provide more info.

> 
> But does the developer know if that hard kernel is importantest
> taken into account all other processes running on the system,
> and what happens if several processes say they have
> such hard kernels? Who will arbitrate? That is usually the
> kernels job.

Take the closest example to this which is resctl (mpam on arm).
This actually has a feature that smells a bit like this.
Pseudo-cache locking. 

https://docs.kernel.org/filesystems/resctrl.html#cache-pseudo-locking

My understanding is that the semantics of that don't align perfectly
with what we have here.  Yushan can you add more on why we didn't
try to fit into that scheme?  Other than the obvious bit that more
general upstream support for the arch definitions of MPAM is a work in
progress and fitting vendor specific features on top will be tricky
for a while at least.  The hardware here is also independent of the
MPAM support.

Resctl puts the control on resource allocation into the hands of
userspace (in that case via cgroups etc as it's process level controls).
The cache lockdown is a weird because you have go through a dance of
creating a temporary setup, demand fetching the lines into cache and
then rely on various operations not occuring that might push them out
again.

Resctl provides many footguns and is (I believe) used by administrators
who are very careful in how they use it.  Note that there are some guards
in this new code to only allow locking a portion of the l3. We also rely
somewhat on the uarch and cache design to ensure it is safe to do this
type of locking (other than reducing perf of other tasks).
I'm dancing around uarch details here that I would need to go seek
agreement to share more on.

> 
> > I haven't yet come up with any plausible scheme by which the MM
> > subsystem could do this.  
> 
> I find it kind of worrying if userspace knows which lines are most
> performance-critical but the kernel MM subsystem does not.
> 
> That strongly inidicates that if only userspace knows that, then
> madvise() is the way to go. The MM might need and use this
> information for other reasons than just locking down lines in
> the L3 cache.

I agree that something like madvise() may well be more suitable.
We do need paths to know how many regions are left etc though so
it will need a few other bits of interface.

I'm also not sure what appetite there will be for an madvise()
for something that today we have no idea if anyone else actually
has hardware for.  If people do, then please shout and we can
look at how something like this can be generalized.

> 
> In my mind:
> 
> Userspace madvise -> Linux MM -> arch cache-line lockdown
> 
> So the MM needs to take the decision that this indication from
> userspace is something that should result in asking the arch
> to lock down these cache lines, as well as re-evaluate it if
> new processes start sending the same madise() calls and we
> run out in lock-downable cache lines.
> 
> L3 lock-downs is a finite resource after all, and it needs to be
> arbitrated. Just OTOMH, maybe if several processes ask for this
> simultaneously and we run out of lockdownable cache lines,
> who wins? First come first served? The process with the highest
> nice value or realtime priority? Etc.

My current thinking is first come first served with a path to
clearly tell an application it didn't get what it wanted.
Scheduling, priority etc being involved would all interfere
with the strong guarantees lock down provides. 

That's kind of why we ended up with a device type model as
it's common to have finite resources and just say no if
they have run out (accelerator queues etc).
It's up to the userspace code to know what to do if they
can't get what they asked for.


> 
> I.e. the kernel MM needs to arbitrate any cache lockdown.
> 
> Bypassing the whole MM like this patch does is a hack designed
> for one single process that the user "knows" is "importantest"
> and will be the only process asking for cache lines to be locked
> down.
> 
> And this isn't abstract and it does not scale. We can't do that.
> 
> That's the kind of resource management we expect from the
> kernel.

I'm with you in many ways on this, but there are other
things for which we absolutely do allocate from a finite resource
and don't let the kernel make decisions - typically because there is no
right way to arbitrate.  If we can invent a scheme for arbitration
for this then great, right now I can't envision anything other than
1st come 1st served being appropriate. Maybe there are other use cases
where other schemes work (e.g. if we ever figure out how to use this
as a form of live optimization)

> 
> MM might want to use that information for other things.

Absolutely, though I'm doubtful about trying to design a generic
way of conveying latency criticality without knowing more of those
use cases from the start.

Thanks,
Jonathan


> 
> > I think what we need here Yushan, is more detail on end to end
> > use cases for this.  Some examples etc as clearer motivation.  
> 
> I agree.
> 
> Yours,
> Linus Walleij
> 
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by wangyushan 3 days, 5 hours ago
On 2/5/2026 6:18 PM, Jonathan Cameron wrote:
> On Thu, 5 Feb 2026 10:12:33 +0100
> Linus Walleij <linusw@kernel.org> wrote:
>
>> But does the developer know if that hard kernel is importantest
>> taken into account all other processes running on the system,
>> and what happens if several processes say they have
>> such hard kernels? Who will arbitrate? That is usually the
>> kernels job.
>
> Take the closest example to this which is resctl (mpam on arm).
> This actually has a feature that smells a bit like this.
> Pseudo-cache locking.
>
> https://docs.kernel.org/filesystems/resctrl.html#cache-pseudo-locking
>
> My understanding is that the semantics of that don't align perfectly
> with what we have here.  Yushan can you add more on why we didn't
> try to fit into that scheme?  Other than the obvious bit that more
> general upstream support for the arch definitions of MPAM is a work in
> progress and fitting vendor specific features on top will be tricky
> for a while at least.  The hardware here is also independent of the
> MPAM support.

Intel cache pseudo lock requires help of IA32_PQR_ASSOC MSR, according
to [1], that register can save necessary information for processes acquired
cache pseudo locks, but Arm64 does not have the equivalent register.

[1]: https://www.intel.com/content/www/us/en/developer/articles/technical/cache-allocation-technology-usage-models.html

>
> Resctl puts the control on resource allocation into the hands of
> userspace (in that case via cgroups etc as it's process level controls).
> The cache lockdown is a weird because you have go through a dance of
> creating a temporary setup, demand fetching the lines into cache and
> then rely on various operations not occuring that might push them out
> again.
>
> Resctl provides many footguns and is (I believe) used by administrators
> who are very careful in how they use it.  Note that there are some guards
> in this new code to only allow locking a portion of the l3. We also rely
> somewhat on the uarch and cache design to ensure it is safe to do this
> type of locking (other than reducing perf of other tasks).
> I'm dancing around uarch details here that I would need to go seek
> agreement to share more on.
>
>>
>>> I haven't yet come up with any plausible scheme by which the MM
>>> subsystem could do this.
>>
>> I find it kind of worrying if userspace knows which lines are most
>> performance-critical but the kernel MM subsystem does not.
>>
>> That strongly inidicates that if only userspace knows that, then
>> madvise() is the way to go. The MM might need and use this
>> information for other reasons than just locking down lines in
>> the L3 cache.
>
> I agree that something like madvise() may well be more suitable.
> We do need paths to know how many regions are left etc though so
> it will need a few other bits of interface.
>
> I'm also not sure what appetite there will be for an madvise()
> for something that today we have no idea if anyone else actually
> has hardware for.  If people do, then please shout and we can
> look at how something like this can be generalized.

Currently madvise() "only operates on whole pages", maybe
madvise() will not be happy with the semantic change of
page / cacheline.

Cache size available for lock may be far less than the size
madvise() can handle. Though madvise() can always speculatively call
cache lock once appropriate and get back to original track if refused,
but that's a hack that need more deep discussion.

I think resctl is more suitable for this, as this serves the same
purpose as MPAM etc, to save QoS of a task, and the way to achieve it,
by tweaking hardware capability.

Thanks,
Yushan


Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Ben Horgan 2 days, 23 hours ago
Hi Yushan,

On 2/6/26 09:54, wangyushan wrote:
> 
> On 2/5/2026 6:18 PM, Jonathan Cameron wrote:
>> On Thu, 5 Feb 2026 10:12:33 +0100
>> Linus Walleij <linusw@kernel.org> wrote:
>>
>>> But does the developer know if that hard kernel is importantest
>>> taken into account all other processes running on the system,
>>> and what happens if several processes say they have
>>> such hard kernels? Who will arbitrate? That is usually the
>>> kernels job.
>>
>> Take the closest example to this which is resctl (mpam on arm).
>> This actually has a feature that smells a bit like this.
>> Pseudo-cache locking.
>>
>> https://docs.kernel.org/filesystems/resctrl.html#cache-pseudo-locking
>>
>> My understanding is that the semantics of that don't align perfectly
>> with what we have here.  Yushan can you add more on why we didn't
>> try to fit into that scheme?  Other than the obvious bit that more
>> general upstream support for the arch definitions of MPAM is a work in
>> progress and fitting vendor specific features on top will be tricky
>> for a while at least.  The hardware here is also independent of the
>> MPAM support.
> 
> Intel cache pseudo lock requires help of IA32_PQR_ASSOC MSR, according
> to [1], that register can save necessary information for processes acquired
> cache pseudo locks, but Arm64 does not have the equivalent register.

If you have MPAM, the per exception level MPAMx_ELy registers are
somewhat equivalent. They tell you which partid and pmg identifiers the
CPU is using and IA32_PQR_ASSOC tells you the closid and rmid which are
much the same thing. Is there a difference that stops being equivalent
in this scenario?

> 
> [1]: https://www.intel.com/content/www/us/en/developer/articles/technical/cache-allocation-technology-usage-models.html
> 
[...]

Thanks,

Ben

Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Ben Horgan 4 days ago
On 2/5/26 10:18, Jonathan Cameron wrote:
> On Thu, 5 Feb 2026 10:12:33 +0100
> Linus Walleij <linusw@kernel.org> wrote:
> 
>> Hi Jonathan,
>>
>> thanks for stepping in, I'm trying to be healthy sceptical here...
>>
>> What you and others need to do is to tell me if I'm being too
>> critical. But right now it feels like I need some more senior
>> MM developers to tell me to be a good boy and let this
>> hack patch slip before I shut up ;)
> 
> It's good to have these discussions as it makes us actually
> explain what they want to do much more clearly!
> wangyushan and I have both been taking about this for too long so
> it's easy to miss that it's not been explained properly.
> 
> Note I was absolutely expecting a non trivial discussion on how to do
> this and in particular how generic it should be.
>  
> +CC a various resctl / mpam related people.
[...]
> 
>>
>> But does the developer know if that hard kernel is importantest
>> taken into account all other processes running on the system,
>> and what happens if several processes say they have
>> such hard kernels? Who will arbitrate? That is usually the
>> kernels job.
> 
> Take the closest example to this which is resctl (mpam on arm).
> This actually has a feature that smells a bit like this.
> Pseudo-cache locking. 
> 
> https://docs.kernel.org/filesystems/resctrl.html#cache-pseudo-locking
> 
> My understanding is that the semantics of that don't align perfectly
> with what we have here.  Yushan can you add more on why we didn't
> try to fit into that scheme?  Other than the obvious bit that more
> general upstream support for the arch definitions of MPAM is a work in
> progress and fitting vendor specific features on top will be tricky
> for a while at least.  The hardware here is also independent of the
> MPAM support.
> 
> Resctl puts the control on resource allocation into the hands of
> userspace (in that case via cgroups etc as it's process level controls).
> The cache lockdown is a weird because you have go through a dance of
> creating a temporary setup, demand fetching the lines into cache and
> then rely on various operations not occuring that might push them out
> again.
> 
> Resctl provides many footguns and is (I believe) used by administrators
> who are very careful in how they use it.  Note that there are some guards
> in this new code to only allow locking a portion of the l3. We also rely
> somewhat on the uarch and cache design to ensure it is safe to do this
> type of locking (other than reducing perf of other tasks).
> I'm dancing around uarch details here that I would need to go seek
> agreement to share more on.
> 

Just wondering about the compatiblity of cache lockdown and
resctrl/mpam. If this is done outside resctrl then how would this
interact with the cache portion bitmaps used in resctrl/mpam? For
instance, how would a user know whether or not a resctrl/mpam cache
portion is unusable because it has been locked?
Thanks,

Ben
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Linus Walleij 4 days, 1 hour ago
On Thu, Feb 5, 2026 at 11:18 AM Jonathan Cameron
<jonathan.cameron@huawei.com> wrote:

> Take the closest example to this which is resctl (mpam on arm).
> This actually has a feature that smells a bit like this.
> Pseudo-cache locking.
>
> https://docs.kernel.org/filesystems/resctrl.html#cache-pseudo-locking

That was very interesting. And more than a little bit complex.
IIUC MPAM is mostly about requesting bandwidth to/from the
memory.

But maybe cache lockdown can build on top?

> I'm also not sure what appetite there will be for an madvise()
> for something that today we have no idea if anyone else actually
> has hardware for.  If people do, then please shout and we can
> look at how something like this can be generalized.

Cache lockdown is an ages old concept, I think others can do
it too, you are just the first to try to support it upstream.
Personally I'm all for this, as long as we can come up with
something generic for others to use as well. No custom device
+ ioctl stuff.

There are adjacent stuff that vendors are doing is about prefetch
and which I mentioned briefly:

Fujitus prefetch:
https://lore.kernel.org/linux-arm-kernel/20220607120530.2447112-1-tarumizu.kohei@fujitsu.com/

AmpereOne prefetch:
https://lore.kernel.org/linux-arm-kernel/20231122092855.4440-1-shijie@os.amperecomputing.com/

Maybe that is more related to MPAM actually. What it has in common
with cache lockdown is "significatly indicate memore areas of special
interest". But notice Will Deacons reply:
https://lore.kernel.org/linux-arm-kernel/ZV3omRGtVS9l-tKk@FVFF77S0Q05N/

 "We tend to shy away from micro-architecture specific optimisations in
 the arm64 kernel as they're pretty unmaintainable, hard to test properly,
 generally lead to bloat and add additional obstacles to updating our
 library routines."

> My current thinking is first come first served with a path to
> clearly tell an application it didn't get what it wanted.
> Scheduling, priority etc being involved would all interfere
> with the strong guarantees lock down provides.

That sounds more like mdemand() than madvise() doesn't it ;)

But surely an all-or-nothing ABI can be specified, and maybe
a please-if-you-can ABI as well.

> > MM might want to use that information for other things.
>
> Absolutely, though I'm doubtful about trying to design a generic
> way of conveying latency criticality without knowing more of those
> use cases from the start.

Well, abstracting is about boiling the world down to a few facts
that can be used for making general decisions.

But for one I suppose if someone locks down some cache lines
in L3 and then not actually use them much at long intervals
because of misc, I suppose it's not very nice if the kernel decide
to swap out the page with these cache lines in it, because that
would have adverse impact on the performace once it hits for example?

Or did someone think about that already? Is that avoided in the
current patch set? (Maybe a stupid question...)

Likewise I see that this code is keeping track of which CPU
the l3 cache line were locked from, but I don't see anything in
this code blocking task migration for whoever called this ABI
or am I wrong? What happens if the scheduler moves the
process to another CPU? Or is it implicit that this is nailed to
the current CPU? Then surely that need to be enforced?

I just get the overall feeling that this was just tested on a scenario
such as:

1. Boot
2. Run a process calling this code, hey it works
3. Terminate process

No sleeping and swapping under memory pressure etc happing.

Designing for the generic case and in a central part of the kernel
(inside MM not in drivers/soc...) would avoid such snags I think.

Yours,
Linus Walleij
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Jonathan Cameron 5 days, 1 hour ago
Fixed linux-mm address that got added a few emails back.

On Wed, 4 Feb 2026 13:40:20 +0000
Jonathan Cameron <jonathan.cameron@huawei.com> wrote:

> On Wed, 4 Feb 2026 01:10:01 +0100
> Linus Walleij <linusw@kernel.org> wrote:
> 
> > Hi Yushan,
> > 
> > thanks for your patch!
> > 
> > On Tue, Feb 3, 2026 at 5:18 PM Yushan Wang <wangyushan12@huawei.com> wrote:  
> > >
> > > The driver will create a file of `/dev/hisi_l3c` on init, mmap
> > > operations to it will allocate a memory region that is guaranteed to be
> > > placed in L3 cache.
> > >
> > > The driver also provides unmap() to deallocated the locked memory.
> > >
> > > The driver also provides an ioctl interface for user to get cache lock
> > > information, such as lock restrictions and locked sizes.
> > >
> > > Signed-off-by: Yushan Wang <wangyushan12@huawei.com>    
> > 
> > The commit message does not say *why* you are doing this?
> >   
> > > +config HISI_SOC_L3C
> > > +       bool "HiSilicon L3 Cache device driver"
> > > +       depends on ACPI
> > > +       depends on ARM64 || COMPILE_TEST
> > > +       help
> > > +         This driver provides the functions to lock L3 cache entries from
> > > +         being evicted for better performance.    
> > 
> > Here is the reason though.
> > 
> > Things like this need to be CC to linux-mm@vger.kernel.org.
> > 
> > I don't see why userspace would be so well informed as to make decisions
> > about what should be locked in the L3 cache and not?
> > 
> > I see the memory hierarchy as any other hardware: a resource that is
> > allocated and arbitrated by the kernel.
> > 
> > The MM subsytem knows which memory is most cache hot.
> > Especially when you use DAMON DAMOS, which has the sole
> > purpose of executing actions like that. Here is a good YouTube.
> > https://www.youtube.com/watch?v=xKJO4kLTHOI  
> Hi Linus,
> 
> This typically isn't about cache hot.  It it were, the data would
> be in the cache without this. It's about ensuring something that would
> otherwise unlikely to be there is in the cache.
> 
> Normally that's a latency critical region.  In general the kernel
> has no chance of figuring out what those are ahead of time, only
> userspace can know (based on profiling etc) that is per workload.
> The first hit matters in these use cases and it's not something
> the prefetchers can help with.
> 
> The only thing we could do if this was in kernel would be to
> have userspace pass some hints and then let the kernel actually
> kick off the process. That just boils down to using a different
> interface to do what this driver is doing (and that's the conversaion
> this series is trying to get going)  It's a finite resource
> and you absolutely need userspace to be able to tell if it
> got what it asked for or not.
> 
> Damon might be useful for that preanalysis though but it can't do
> anything for the infrequent extremely latency sensitive accesses.
> Normally this is fleet wide stuff based on intensive benchmarking
> of a few nodes.  Same sort of approach as the original warehouse
> scale computing paper on tuning zswap capacity across a fleet.
> Its an extreme form of profile guided optimization (and not
> currently automatic I think?). If we are putting code in this
> locked region, the program has been carefully recompiled / linked
> to group the critical parts so that we can use the minimum number
> of these locked regions. Data is a little simpler.
> 
> It's kind of similar to resctl but at a sub process granularity.
> 
> > 
> > Shouldn't the MM subsystem be in charge of determining, locking
> > down and freeing up hot regions in L3 cache?
> > 
> > This looks more like userspace is going to determine that but
> > how exactly? By running DAMON? Then it's better to keep the
> > whole mechanism in the kernel where it belongs and let the
> > MM subsystem adapt locked L3 cache to the usage patterns.  
> 
> I haven't yet come up with any plausible scheme by which the MM
> subsystem could do this.
> 
> I think what we need here Yushan, is more detail on end to end
> use cases for this.  Some examples etc as clearer motivation.
> 
> Jonathan
> 
> > 
> > Yours,
> > Linus Walleij
> >   
> 
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by SeongJae Park 4 days, 12 hours ago
On Wed, 4 Feb 2026 13:44:47 +0000 Jonathan Cameron <jonathan.cameron@huawei.com> wrote:

> 
> Fixed linux-mm address that got added a few emails back.
> 
> On Wed, 4 Feb 2026 13:40:20 +0000
> Jonathan Cameron <jonathan.cameron@huawei.com> wrote:
> 
> > On Wed, 4 Feb 2026 01:10:01 +0100
> > Linus Walleij <linusw@kernel.org> wrote:
> > 
> > > Hi Yushan,
> > > 
> > > thanks for your patch!
> > > 
> > > On Tue, Feb 3, 2026 at 5:18 PM Yushan Wang <wangyushan12@huawei.com> wrote:  
> > > >
> > > > The driver will create a file of `/dev/hisi_l3c` on init, mmap
> > > > operations to it will allocate a memory region that is guaranteed to be
> > > > placed in L3 cache.
> > > >
> > > > The driver also provides unmap() to deallocated the locked memory.
> > > >
> > > > The driver also provides an ioctl interface for user to get cache lock
> > > > information, such as lock restrictions and locked sizes.
> > > >
> > > > Signed-off-by: Yushan Wang <wangyushan12@huawei.com>    
> > > 
> > > The commit message does not say *why* you are doing this?
> > >   
> > > > +config HISI_SOC_L3C
> > > > +       bool "HiSilicon L3 Cache device driver"
> > > > +       depends on ACPI
> > > > +       depends on ARM64 || COMPILE_TEST
> > > > +       help
> > > > +         This driver provides the functions to lock L3 cache entries from
> > > > +         being evicted for better performance.    
> > > 
> > > Here is the reason though.
> > > 
> > > Things like this need to be CC to linux-mm@vger.kernel.org.
> > > 
> > > I don't see why userspace would be so well informed as to make decisions
> > > about what should be locked in the L3 cache and not?
> > > 
> > > I see the memory hierarchy as any other hardware: a resource that is
> > > allocated and arbitrated by the kernel.
> > > 
> > > The MM subsytem knows which memory is most cache hot.
> > > Especially when you use DAMON DAMOS, which has the sole
> > > purpose of executing actions like that. Here is a good YouTube.
> > > https://www.youtube.com/watch?v=xKJO4kLTHOI  

Thank you for Cc-ing me, Linus.

> > Hi Linus,
> > 
> > This typically isn't about cache hot.  It it were, the data would
> > be in the cache without this. It's about ensuring something that would
> > otherwise unlikely to be there is in the cache.
> > 
> > Normally that's a latency critical region.  In general the kernel
> > has no chance of figuring out what those are ahead of time, only
> > userspace can know (based on profiling etc) that is per workload.
> > The first hit matters in these use cases and it's not something
> > the prefetchers can help with.
> > 
> > The only thing we could do if this was in kernel would be to
> > have userspace pass some hints and then let the kernel actually
> > kick off the process. That just boils down to using a different
> > interface to do what this driver is doing (and that's the conversaion
> > this series is trying to get going)  It's a finite resource
> > and you absolutely need userspace to be able to tell if it
> > got what it asked for or not.

And thank you for clarifying, Jonathan.

> > 
> > Damon might be useful for that preanalysis though but it can't do
> > anything for the infrequent extremely latency sensitive accesses.

I also find no good idea to let DAMON help in this scenario.

If I have to make a brain storming idea off the top of my humble head, though.
Maybe we can ask DAMON to monitor address ranges that assumed to have the
latency sensitive data.  And further ask DAMOS to find sub regions of the area
that getting colder than desired, and make an access to cache lines of the sub
regions so that they can be in the cache for "most cases".

It is just a brain storming idea off the top of my head and probably not work
for your case, since...  It ain't work if there is no good way to know or
guarantee the address ranges for the latency sensitive data.  It ain't work for
extremely latency sensitive case, as DAMON is just a best effort.  It ain't
work with DAMON of today because DAMOS doesn't support such kind of
cache-granularity access generation action.  So, it sounds like not a good
idea.

Nonetheless, if you get any question for DAMON in future, please feel free to
reach out :)


Thanks,
SJ

[...]
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by wangyushan 5 days, 5 hours ago

On 2/4/2026 8:10 AM, Linus Walleij wrote:
> Hi Yushan,
>
> thanks for your patch!

Thanks for review!

> On Tue, Feb 3, 2026 at 5:18 PM Yushan Wang <wangyushan12@huawei.com> wrote:
>> The driver will create a file of `/dev/hisi_l3c` on init, mmap
>> operations to it will allocate a memory region that is guaranteed to be
>> placed in L3 cache.
>>
>> The driver also provides unmap() to deallocated the locked memory.
>>
>> The driver also provides an ioctl interface for user to get cache lock
>> information, such as lock restrictions and locked sizes.
>>
>> Signed-off-by: Yushan Wang <wangyushan12@huawei.com>
> The commit message does not say *why* you are doing this?
>> +config HISI_SOC_L3C
>> +       bool "HiSilicon L3 Cache device driver"
>> +       depends on ACPI
>> +       depends on ARM64 || COMPILE_TEST
>> +       help
>> +         This driver provides the functions to lock L3 cache entries from
>> +         being evicted for better performance.
> Here is the reason though.

Sorry, I will include this into the commit message.
> Things like this need to be CC to linux-mm@vger.kernel.org.
>
> I don't see why userspace would be so well informed as to make decisions
> about what should be locked in the L3 cache and not?

This question is actually: should it be kernel or user space
application to decide if a cache lock should be applied?

Maybe the ideal situation is that this capability should be reserved into kernel space as a vendor specific optimization option. With the lack of knowledge of memory interleave etc the best move of an application might be allocate cache lock as much as possible.
> I see the memory hierarchy as any other hardware: a resource that is
> allocated and arbitrated by the kernel.
>
> The MM subsytem knows which memory is most cache hot.
> Especially when you use DAMON DAMOS, which has the sole
> purpose of executing actions like that. Here is a good YouTube.
> https://www.youtube.com/watch?v=xKJO4kLTHOI
>
> Shouldn't the MM subsystem be in charge of determining, locking
> down and freeing up hot regions in L3 cache?
Thanks for the link, I will see if there's any chance this can
cooperate with DAMON.

Gaps still exists here because DAMON operate with pages, but cache
works with cachelines, though the cache lock here supports cache
lock size larger than a page.
> This looks more like userspace is going to determine that but
> how exactly? By running DAMON? Then it's better to keep the
> whole mechanism in the kernel where it belongs and let the
> MM subsystem adapt locked L3 cache to the usage patterns.
Currently the patchset simply trusts that user knows well what
he is doing, which might not be good enough.

I will try to see if this could work with DAMON or madvice()
maybe :)

> Yours,
> Linus Walleij

Thanks,
Yushan
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Linus Walleij 5 days, 5 hours ago
On Wed, Feb 4, 2026 at 10:53 AM wangyushan <wangyushan12@huawei.com> wrote:

> > I don't see why userspace would be so well informed as to make decisions
> > about what should be locked in the L3 cache and not?
>
> This question is actually: should it be kernel or user space
> application to decide if a cache lock should be applied?
>
> Maybe the ideal situation is that this capability should be reserved into kernel
> space as a vendor specific optimization option. With the lack of knowledge
> of memory interleave etc the best move of an application might be allocate
> cache lock as much as possible.

If it is a vendor-specific optimization that has no generic applicability
outside of this specific system, dependent on a specific userspace
that only exist on this system, what is the value for the generic
kernel to carry and maintain this code?

In that case maybe the code should be maintained outside of the
mainline kernel tree.

What we want to see as maintainers are things that are reusable
across several systems.

Integrating this with DAMOS in a generic way is what will help the
next silicon that comes down the road.

I have already seen similar things from Fujitsu (IIRC). We need this
mechanism to be kernel-driven and generic, not custom and
system-specific, least of all driven from userspace by sysfs.

Yours,
Linus Walleij
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Arnd Bergmann 5 days, 21 hours ago
On Tue, Feb 3, 2026, at 17:18, Yushan Wang wrote:
> The driver will create a file of `/dev/hisi_l3c` on init, mmap
> operations to it will allocate a memory region that is guaranteed to be
> placed in L3 cache.
>
> The driver also provides unmap() to deallocated the locked memory.
>
> The driver also provides an ioctl interface for user to get cache lock
> information, such as lock restrictions and locked sizes.
>
> Signed-off-by: Yushan Wang <wangyushan12@huawei.com>

Hi Yushan,

Thanks for your submission. Since we are in the last week of
the merge window, this is not going to be linux-7.0 material,
but I'll have a quick look for now.

>  .../userspace-api/ioctl/ioctl-number.rst      |   1 +
>  MAINTAINERS                                   |   6 +
>  drivers/soc/hisilicon/Kconfig                 |  11 +
>  drivers/soc/hisilicon/Makefile                |   2 +
>  drivers/soc/hisilicon/hisi_soc_l3c.c          | 357 ++++++++++++++++++
>  include/uapi/misc/hisi_l3c.h                  |  28 ++

I don't think this should be in drivers/soc/, since I want
to reserve that for internal drivers without a user visible
interface other than the soc_device information. (yes, there
are a few historic counterexamples)

I also don't think this should be a hilicon specific interface,
if possible. The functionality is not that unusual in the end.

We had similar concepts using the numactl system calls in the
part, but I don't think we should do that here because you may
need the numa interfaces for other purposes as well, and it
may be confusing to existing callers.

Having a generic madvise() based interface would be great,
not sure if hardware support for that is common enough for that.

> +	/* Continuous physical memory is required for L3 cache lock. */
> +	pg = alloc_contig_pages(1 << order, GFP_KERNEL | __GFP_NOWARN | 
> __GFP_ZERO,
> +				cpu_to_node(smp_processor_id()), NULL);

Since this is a user allocation, should that be GFP_USER instead
of GFP_KERNEL?

> +/* HISI_L3C_INFO: cache lock info for HiSilicon SoC */
> +#define HISI_L3C_LOCK_INFO	_IOW(0xBB, 1, unsigned long)

The specification here looks wrong, please see
Documentation/driver-api/ioctl.rst

I think for your implementation it should be

#define HISI_L3C_LOCK_INFO	_IOR(0xBB, 1, hisi_l3c_lock_info)

> +struct hisi_l3c_lock_info {
> +	__u32 lock_region_num;
> +	__u64 lock_size;
> +	__u8 address_alignment;
> +	__u64 max_lock_size;
> +	__u64 min_lock_size;
> +};

You are leaking kernel data because of the padding in this structure,
please rearrange the members to avoid padding.

It may be better to use a different interface instead of ioctl(),
possibly exporting global data in sysfs.

       Arnd
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Krzysztof Kozlowski 4 days, 5 hours ago
On 03/02/2026 18:19, Arnd Bergmann wrote:
> On Tue, Feb 3, 2026, at 17:18, Yushan Wang wrote:
>> The driver will create a file of `/dev/hisi_l3c` on init, mmap
>> operations to it will allocate a memory region that is guaranteed to be
>> placed in L3 cache.
>>
>> The driver also provides unmap() to deallocated the locked memory.
>>
>> The driver also provides an ioctl interface for user to get cache lock
>> information, such as lock restrictions and locked sizes.
>>
>> Signed-off-by: Yushan Wang <wangyushan12@huawei.com>
> 
> Hi Yushan,
> 
> Thanks for your submission. Since we are in the last week of
> the merge window, this is not going to be linux-7.0 material,
> but I'll have a quick look for now.


To be clear - this is a v3 but with removed previous history...

Previous version:
https://lore.kernel.org/all/20251217102357.1730573-2-wangyushan12@huawei.com/

Or even v4?

https://lore.kernel.org/all/20250122065803.3363926-2-wangyushan12@huawei.com/

Yushan, please start versioning your patches correctly. Use b4 or git
format-patch -vx

Otherwise, please explain us how can we compare it with `b4 diff` with
previous version?

Sending something AGAIN as v1 ignoring entire previous submission is
clear no go. Like you are trying till it succeeds. Negative review?
Let's try from v1 this time...

This is not correct and it should not be my task to find your previous
discussions and decipher this v1.


Best regards,
Krzysztof
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by wangyushan 4 days, 3 hours ago
On 2/5/2026 5:37 PM, Krzysztof Kozlowski wrote:
> On 03/02/2026 18:19, Arnd Bergmann wrote:
>> On Tue, Feb 3, 2026, at 17:18, Yushan Wang wrote:
>>> The driver will create a file of `/dev/hisi_l3c` on init, mmap
>>> operations to it will allocate a memory region that is guaranteed to be
>>> placed in L3 cache.
>>>
>>> The driver also provides unmap() to deallocated the locked memory.
>>>
>>> The driver also provides an ioctl interface for user to get cache lock
>>> information, such as lock restrictions and locked sizes.
>>>
>>> Signed-off-by: Yushan Wang <wangyushan12@huawei.com>
>>
>> Hi Yushan,
>>
>> Thanks for your submission. Since we are in the last week of
>> the merge window, this is not going to be linux-7.0 material,
>> but I'll have a quick look for now.
>
>
> To be clear - this is a v3 but with removed previous history...
>
> Previous version:
> https://lore.kernel.org/all/20251217102357.1730573-2-wangyushan12@huawei.com/
>
> Or even v4?
>
> https://lore.kernel.org/all/20250122065803.3363926-2-wangyushan12@huawei.com/
>
> Yushan, please start versioning your patches correctly. Use b4 or git
> format-patch -vx

Hi Krzysztof,

Sorry about the confusing versions, the complete history is as below:

Link to v1: https://lore.kernel.org/all/20250107132907.3521574-1-wangyushan12@huawei.com

Link to v2: https://lore.kernel.org/all/20250122065803.3363926-1-wangyushan12@huawei.com/

Link to RFC v1: https://lore.kernel.org/all/20251125080542.3721829-1-wangyushan12@huawei.com/

Link to RFC v2: https://lore.kernel.org/all/20251217102357.1730573-1-wangyushan12@huawei.com/

Link to v1 again (this message): https://lore.kernel.org/all/20260203161843.649417-1-wangyushan12@huawei.com/

>
> Otherwise, please explain us how can we compare it with `b4 diff` with
> previous version?
>
> Sending something AGAIN as v1 ignoring entire previous submission is
> clear no go. Like you are trying till it succeeds. Negative review?
> Let's try from v1 this time...
>
> This is not correct and it should not be my task to find your previous
> discussions and decipher this v1.

I did spin 2 versions to mainline as the actual v1, the thread was quiet.
Then I made a major refactor to it and sent it as RFC, the thread went
quiet again but some compile check issues popped up. I spinned 2
versions of RFC for the compile issues and removed RFC in this version
since no strong objection showed up.

Apologize that I broke the rules and any inconvenience caused by it.
As there's little discussion in previous patches, is it OK that we start
here as v1?

Anyway I will include the whole history in the coverletter to prevent
more confusion.

>
>
> Best regards,
> Krzysztof
>

Thanks,
Yushan


Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by Krzysztof Kozlowski 4 days, 3 hours ago
On 05/02/2026 12:19, wangyushan wrote:
> 
> On 2/5/2026 5:37 PM, Krzysztof Kozlowski wrote:
>> On 03/02/2026 18:19, Arnd Bergmann wrote:
>>> On Tue, Feb 3, 2026, at 17:18, Yushan Wang wrote:
>>>> The driver will create a file of `/dev/hisi_l3c` on init, mmap
>>>> operations to it will allocate a memory region that is guaranteed to be
>>>> placed in L3 cache.
>>>>
>>>> The driver also provides unmap() to deallocated the locked memory.
>>>>
>>>> The driver also provides an ioctl interface for user to get cache lock
>>>> information, such as lock restrictions and locked sizes.
>>>>
>>>> Signed-off-by: Yushan Wang <wangyushan12@huawei.com>
>>>
>>> Hi Yushan,
>>>
>>> Thanks for your submission. Since we are in the last week of
>>> the merge window, this is not going to be linux-7.0 material,
>>> but I'll have a quick look for now.
>>
>>
>> To be clear - this is a v3 but with removed previous history...
>>
>> Previous version:
>> https://lore.kernel.org/all/20251217102357.1730573-2-wangyushan12@huawei.com/
>>
>> Or even v4?
>>
>> https://lore.kernel.org/all/20250122065803.3363926-2-wangyushan12@huawei.com/
>>
>> Yushan, please start versioning your patches correctly. Use b4 or git
>> format-patch -vx
> 
> Hi Krzysztof,
> 
> Sorry about the confusing versions, the complete history is as below:
> 
> Link to v1: https://lore.kernel.org/all/20250107132907.3521574-1-wangyushan12@huawei.com
> 
> Link to v2: https://lore.kernel.org/all/20250122065803.3363926-1-wangyushan12@huawei.com/
> 
> Link to RFC v1: https://lore.kernel.org/all/20251125080542.3721829-1-wangyushan12@huawei.com/
> 
> Link to RFC v2: https://lore.kernel.org/all/20251217102357.1730573-1-wangyushan12@huawei.com/
> 
> Link to v1 again (this message): https://lore.kernel.org/all/20260203161843.649417-1-wangyushan12@huawei.com/
> 
>>
>> Otherwise, please explain us how can we compare it with `b4 diff` with
>> previous version?
>>
>> Sending something AGAIN as v1 ignoring entire previous submission is
>> clear no go. Like you are trying till it succeeds. Negative review?
>> Let's try from v1 this time...
>>
>> This is not correct and it should not be my task to find your previous
>> discussions and decipher this v1.
> 
> I did spin 2 versions to mainline as the actual v1, the thread was quiet.
> Then I made a major refactor to it and sent it as RFC, the thread went
> quiet again but some compile check issues popped up. I spinned 2
> versions of RFC for the compile issues and removed RFC in this version
> since no strong objection showed up.
> 
> Apologize that I broke the rules and any inconvenience caused by it.
> As there's little discussion in previous patches, is it OK that we start
> here as v1?

No, it is not okay. Your patchset continues and entire previous feedback
and history is important. Otherwise why would I review this if I can as
well ignore it and wait for next year you sending another v1?

Best regards,
Krzysztof
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by wangyushan 4 days, 3 hours ago
On 2/5/2026 7:23 PM, Krzysztof Kozlowski wrote:
> On 05/02/2026 12:19, wangyushan wrote:
>>
>> On 2/5/2026 5:37 PM, Krzysztof Kozlowski wrote:
>>> On 03/02/2026 18:19, Arnd Bergmann wrote:
>>>> On Tue, Feb 3, 2026, at 17:18, Yushan Wang wrote:
>>>>> The driver will create a file of `/dev/hisi_l3c` on init, mmap
>>>>> operations to it will allocate a memory region that is guaranteed to be
>>>>> placed in L3 cache.
>>>>>
>>>>> The driver also provides unmap() to deallocated the locked memory.
>>>>>
>>>>> The driver also provides an ioctl interface for user to get cache lock
>>>>> information, such as lock restrictions and locked sizes.
>>>>>
>>>>> Signed-off-by: Yushan Wang <wangyushan12@huawei.com>
>>>>
>>>> Hi Yushan,
>>>>
>>>> Thanks for your submission. Since we are in the last week of
>>>> the merge window, this is not going to be linux-7.0 material,
>>>> but I'll have a quick look for now.
>>>
>>>
>>> To be clear - this is a v3 but with removed previous history...
>>>
>>> Previous version:
>>> https://lore.kernel.org/all/20251217102357.1730573-2-wangyushan12@huawei.com/
>>>
>>> Or even v4?
>>>
>>> https://lore.kernel.org/all/20250122065803.3363926-2-wangyushan12@huawei.com/
>>>
>>> Yushan, please start versioning your patches correctly. Use b4 or git
>>> format-patch -vx
>>
>> Hi Krzysztof,
>>
>> Sorry about the confusing versions, the complete history is as below:
>>
>> Link to v1: https://lore.kernel.org/all/20250107132907.3521574-1-wangyushan12@huawei.com
>>
>> Link to v2: https://lore.kernel.org/all/20250122065803.3363926-1-wangyushan12@huawei.com/
>>
>> Link to RFC v1: https://lore.kernel.org/all/20251125080542.3721829-1-wangyushan12@huawei.com/
>>
>> Link to RFC v2: https://lore.kernel.org/all/20251217102357.1730573-1-wangyushan12@huawei.com/
>>
>> Link to v1 again (this message): https://lore.kernel.org/all/20260203161843.649417-1-wangyushan12@huawei.com/
>>
>>>
>>> Otherwise, please explain us how can we compare it with `b4 diff` with
>>> previous version?
>>>
>>> Sending something AGAIN as v1 ignoring entire previous submission is
>>> clear no go. Like you are trying till it succeeds. Negative review?
>>> Let's try from v1 this time...
>>>
>>> This is not correct and it should not be my task to find your previous
>>> discussions and decipher this v1.
>>
>> I did spin 2 versions to mainline as the actual v1, the thread was quiet.
>> Then I made a major refactor to it and sent it as RFC, the thread went
>> quiet again but some compile check issues popped up. I spinned 2
>> versions of RFC for the compile issues and removed RFC in this version
>> since no strong objection showed up.
>>
>> Apologize that I broke the rules and any inconvenience caused by it.
>> As there's little discussion in previous patches, is it OK that we start
>> here as v1?
>
> No, it is not okay. Your patchset continues and entire previous feedback
> and history is important. Otherwise why would I review this if I can as
> well ignore it and wait for next year you sending another v1?

Sorry, I will correct the version numbers in next versions, with whole
history and explanations about the mess.

Sincerely apologies, I won't hide the versions again.

Yushan
Re: [PATCH 1/3] soc cache: L3 cache driver for HiSilicon SoC
Posted by wangyushan 5 days, 5 hours ago

On 2/4/2026 1:19 AM, Arnd Bergmann wrote:
> On Tue, Feb 3, 2026, at 17:18, Yushan Wang wrote:
>> The driver will create a file of `/dev/hisi_l3c` on init, mmap
>> operations to it will allocate a memory region that is guaranteed to be
>> placed in L3 cache.
>>
>> The driver also provides unmap() to deallocated the locked memory.
>>
>> The driver also provides an ioctl interface for user to get cache lock
>> information, such as lock restrictions and locked sizes.
>>
>> Signed-off-by: Yushan Wang <wangyushan12@huawei.com>
> Hi Yushan,
>
> Thanks for your submission. Since we are in the last week of
> the merge window, this is not going to be linux-7.0 material,
> but I'll have a quick look for now.

Many thanks for review!
>>  .../userspace-api/ioctl/ioctl-number.rst      |   1 +
>>  MAINTAINERS                                   |   6 +
>>  drivers/soc/hisilicon/Kconfig                 |  11 +
>>  drivers/soc/hisilicon/Makefile                |   2 +
>>  drivers/soc/hisilicon/hisi_soc_l3c.c          | 357 ++++++++++++++++++
>>  include/uapi/misc/hisi_l3c.h                  |  28 ++
> I don't think this should be in drivers/soc/, since I want
> to reserve that for internal drivers without a user visible
> interface other than the soc_device information. (yes, there
> are a few historic counterexamples)
>
> I also don't think this should be a hilicon specific interface,
> if possible. The functionality is not that unusual in the end.

I hesitated about the directory as well, exporting a user aware
interface directly from driver may not be a good practice and I
am hoping for advice :)
The driver itself doesn't provide much more than a mere register configuration which is platform specific, what do you think about a kernel space interface somewhere else which implemented here?
> We had similar concepts using the numactl system calls in the
> part, but I don't think we should do that here because you may
> need the numa interfaces for other purposes as well, and it
> may be confusing to existing callers.
>
> Having a generic madvise() based interface would be great,
> not sure if hardware support for that is common enough for that.
As above, cache lock could be an option for optimization of numactl or madvise(), maybe a more generic interface in kernel space which is speculatively called by other infrastructure is better, and of course extra support of the infrastructures is needed. I can try to propose an interface and maybe an example of the caller in the next version.
>> +	/* Continuous physical memory is required for L3 cache lock. */
>> +	pg = alloc_contig_pages(1 << order, GFP_KERNEL | __GFP_NOWARN | 
>> __GFP_ZERO,
>> +				cpu_to_node(smp_processor_id()), NULL);
> Since this is a user allocation, should that be GFP_USER instead
> of GFP_KERNEL?

Yes, it should be GFP_USER in this version since it is used in userspace.
>> +/* HISI_L3C_INFO: cache lock info for HiSilicon SoC */
>> +#define HISI_L3C_LOCK_INFO	_IOW(0xBB, 1, unsigned long)
> The specification here looks wrong, please see
> Documentation/driver-api/ioctl.rst
>
> I think for your implementation it should be
>
> #define HISI_L3C_LOCK_INFO	_IOR(0xBB, 1, hisi_l3c_lock_info)

Sorry, I will correct that in the next version.
>> +struct hisi_l3c_lock_info {
>> +	__u32 lock_region_num;
>> +	__u64 lock_size;
>> +	__u8 address_alignment;
>> +	__u64 max_lock_size;
>> +	__u64 min_lock_size;
>> +};
> You are leaking kernel data because of the padding in this structure,
> please rearrange the members to avoid padding.
>
> It may be better to use a different interface instead of ioctl(),
> possibly exporting global data in sysfs.

Yes, information through sysfs should do the job.
I will fix that in the next version.

>        Arnd

Thanks,
Yushan