[PATCH v4 2/2] irq/irq-loongarch-ir:Add Redirect irqchip support

Tianyang Zhang posted 2 patches 4 months ago
Only 1 patches received!
[PATCH v4 2/2] irq/irq-loongarch-ir:Add Redirect irqchip support
Posted by Tianyang Zhang 4 months ago
The main function of the Redirected interrupt controller is to manage the
redirected-interrupt table, which consists of many redirected entries.
When MSI interrupts are requested, the driver creates a corresponding
redirected entry that describes the target CPU/vector number and the
operating mode of the interrupt. The redirected interrupt module has an
independent cache, and during the interrupt routing process, it will
prioritize the redirected entries that hit the cache. The driver
invalidates certain entry caches via a command queue.

Co-developed-by: Liupu Wang <wangliupu@loongson.cn>
Signed-off-by: Liupu Wang <wangliupu@loongson.cn>
Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
---
 arch/loongarch/include/asm/cpu-features.h |   1 +
 arch/loongarch/include/asm/cpu.h          |   2 +
 arch/loongarch/include/asm/loongarch.h    |   6 +
 arch/loongarch/kernel/cpu-probe.c         |   2 +
 drivers/irqchip/Makefile                  |   2 +-
 drivers/irqchip/irq-loongarch-avec.c      |  25 +-
 drivers/irqchip/irq-loongarch-ir.c        | 562 ++++++++++++++++++++++
 drivers/irqchip/irq-loongson.h            |  12 +
 include/linux/cpuhotplug.h                |   1 +
 9 files changed, 599 insertions(+), 14 deletions(-)
 create mode 100644 drivers/irqchip/irq-loongarch-ir.c

diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h
index fc83bb32f9f0..03f7e93e81e0 100644
--- a/arch/loongarch/include/asm/cpu-features.h
+++ b/arch/loongarch/include/asm/cpu-features.h
@@ -68,5 +68,6 @@
 #define cpu_has_ptw		cpu_opt(LOONGARCH_CPU_PTW)
 #define cpu_has_lspw		cpu_opt(LOONGARCH_CPU_LSPW)
 #define cpu_has_avecint		cpu_opt(LOONGARCH_CPU_AVECINT)
+#define cpu_has_redirectint	cpu_opt(LOONGARCH_CPU_REDIRECTINT)
 
 #endif /* __ASM_CPU_FEATURES_H */
diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
index 98cf4d7b4b0a..33cd96e569d8 100644
--- a/arch/loongarch/include/asm/cpu.h
+++ b/arch/loongarch/include/asm/cpu.h
@@ -102,6 +102,7 @@ enum cpu_type_enum {
 #define CPU_FEATURE_PTW			27	/* CPU has hardware page table walker */
 #define CPU_FEATURE_LSPW		28	/* CPU has LSPW (lddir/ldpte instructions) */
 #define CPU_FEATURE_AVECINT		29	/* CPU has AVEC interrupt */
+#define CPU_FEATURE_REDIRECTINT		30      /* CPU has interrupt remmap */
 
 #define LOONGARCH_CPU_CPUCFG		BIT_ULL(CPU_FEATURE_CPUCFG)
 #define LOONGARCH_CPU_LAM		BIT_ULL(CPU_FEATURE_LAM)
@@ -133,5 +134,6 @@ enum cpu_type_enum {
 #define LOONGARCH_CPU_PTW		BIT_ULL(CPU_FEATURE_PTW)
 #define LOONGARCH_CPU_LSPW		BIT_ULL(CPU_FEATURE_LSPW)
 #define LOONGARCH_CPU_AVECINT		BIT_ULL(CPU_FEATURE_AVECINT)
+#define LOONGARCH_CPU_REDIRECTINT	BIT_ULL(CPU_FEATURE_REDIRECTINT)
 
 #endif /* _ASM_CPU_H */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 52651aa0e583..95e06cb6831e 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -1130,6 +1130,7 @@
 #define  IOCSRF_FLATMODE		BIT_ULL(10)
 #define  IOCSRF_VM			BIT_ULL(11)
 #define  IOCSRF_AVEC			BIT_ULL(15)
+#define  IOCSRF_REDIRECTINT		BIT_ULL(16)
 
 #define LOONGARCH_IOCSR_VENDOR		0x10
 
@@ -1189,6 +1190,11 @@
 
 #define LOONGARCH_IOCSR_EXTIOI_NODEMAP_BASE	0x14a0
 #define LOONGARCH_IOCSR_EXTIOI_IPMAP_BASE	0x14c0
+#define LOONGARCH_IOCSR_REDIRECT_CFG		0x15e0
+#define LOONGARCH_IOCSR_REDIRECT_TBR		0x15e8  /* IRT BASE REG*/
+#define LOONGARCH_IOCSR_REDIRECT_CQB		0x15f0  /* IRT CACHE QUEUE BASE */
+#define LOONGARCH_IOCSR_REDIRECT_CQH		0x15f8  /* IRT CACHE QUEUE HEAD, 32bit */
+#define LOONGARCH_IOCSR_REDIRECT_CQT		0x15fc  /* IRT CACHE QUEUE TAIL, 32bit */
 #define LOONGARCH_IOCSR_EXTIOI_EN_BASE		0x1600
 #define LOONGARCH_IOCSR_EXTIOI_BOUNCE_BASE	0x1680
 #define LOONGARCH_IOCSR_EXTIOI_ISR_BASE		0x1800
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index fedaa67cde41..543474fd1399 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -289,6 +289,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int
 		c->options |= LOONGARCH_CPU_EIODECODE;
 	if (config & IOCSRF_AVEC)
 		c->options |= LOONGARCH_CPU_AVECINT;
+	if (config & IOCSRF_REDIRECTINT)
+		c->options |= LOONGARCH_CPU_REDIRECTINT;
 	if (config & IOCSRF_VM)
 		c->options |= LOONGARCH_CPU_HYPERVISOR;
 }
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 365bcea9a61f..2bb8618f96d1 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -114,7 +114,7 @@ obj-$(CONFIG_LS1X_IRQ)			+= irq-ls1x.o
 obj-$(CONFIG_TI_SCI_INTR_IRQCHIP)	+= irq-ti-sci-intr.o
 obj-$(CONFIG_TI_SCI_INTA_IRQCHIP)	+= irq-ti-sci-inta.o
 obj-$(CONFIG_TI_PRUSS_INTC)		+= irq-pruss-intc.o
-obj-$(CONFIG_IRQ_LOONGARCH_CPU)		+= irq-loongarch-cpu.o irq-loongarch-avec.o
+obj-$(CONFIG_IRQ_LOONGARCH_CPU)		+= irq-loongarch-cpu.o irq-loongarch-avec.o irq-loongarch-ir.o
 obj-$(CONFIG_LOONGSON_LIOINTC)		+= irq-loongson-liointc.o
 obj-$(CONFIG_LOONGSON_EIOINTC)		+= irq-loongson-eiointc.o
 obj-$(CONFIG_LOONGSON_HTPIC)		+= irq-loongson-htpic.o
diff --git a/drivers/irqchip/irq-loongarch-avec.c b/drivers/irqchip/irq-loongarch-avec.c
index 80e55955a29f..4e114c8ca3d0 100644
--- a/drivers/irqchip/irq-loongarch-avec.c
+++ b/drivers/irqchip/irq-loongarch-avec.c
@@ -24,7 +24,6 @@
 #define VECTORS_PER_REG		64
 #define IRR_VECTOR_MASK		0xffUL
 #define IRR_INVALID_MASK	0x80000000UL
-#define AVEC_MSG_OFFSET		0x100000
 
 #ifdef CONFIG_SMP
 struct pending_list {
@@ -47,15 +46,6 @@ struct avecintc_chip {
 
 static struct avecintc_chip loongarch_avec;
 
-struct avecintc_data {
-	struct list_head	entry;
-	unsigned int		cpu;
-	unsigned int		vec;
-	unsigned int		prev_cpu;
-	unsigned int		prev_vec;
-	unsigned int		moving;
-};
-
 static inline void avecintc_enable(void)
 {
 	u64 value;
@@ -85,7 +75,7 @@ static inline void pending_list_init(int cpu)
 	INIT_LIST_HEAD(&plist->head);
 }
 
-static void avecintc_sync(struct avecintc_data *adata)
+void avecintc_sync(struct avecintc_data *adata)
 {
 	struct pending_list *plist;
 
@@ -109,7 +99,12 @@ static int avecintc_set_affinity(struct irq_data *data, const struct cpumask *de
 			return -EBUSY;
 
 		if (cpu_online(adata->cpu) && cpumask_test_cpu(adata->cpu, dest))
-			return 0;
+			/*
+			 * The new affinity configuration has taken effect,
+			 * and returning IRQ_SET_MASK_OK_DONE here indicates that no further
+			 * changes need to be made in the subsequent process
+			 */
+			return IRQ_SET_MASK_OK_DONE;
 
 		cpumask_and(&intersect_mask, dest, cpu_online_mask);
 
@@ -121,7 +116,8 @@ static int avecintc_set_affinity(struct irq_data *data, const struct cpumask *de
 		adata->cpu = cpu;
 		adata->vec = vector;
 		per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(data);
-		avecintc_sync(adata);
+		if (!cpu_has_redirectint)
+			avecintc_sync(adata);
 	}
 
 	irq_data_update_effective_affinity(data, cpumask_of(cpu));
@@ -412,6 +408,9 @@ static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
 
 static inline int __init acpi_cascade_irqdomain_init(void)
 {
+	if (cpu_has_redirectint)
+		return redirect_acpi_init(loongarch_avec.domain);
+
 	return acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1);
 }
 
diff --git a/drivers/irqchip/irq-loongarch-ir.c b/drivers/irqchip/irq-loongarch-ir.c
new file mode 100644
index 000000000000..ae42ec5028d2
--- /dev/null
+++ b/drivers/irqchip/irq-loongarch-ir.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Loongson Technologies, Inc.
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/irq.h>
+#include <asm/loongarch.h>
+#include <asm/setup.h>
+#include <larchintrin.h>
+
+#include "irq-loongson.h"
+#include "irq-msi-lib.h"
+
+#define IRD_ENTRIES			65536
+
+/* redirect entry size 128bits */
+#define IRD_PAGE_ORDER			(20 - PAGE_SHIFT)
+
+/* irt cache invalid queue */
+#define	INVALID_QUEUE_SIZE		4096
+
+#define INVALID_QUEUE_PAGE_ORDER	(16 - PAGE_SHIFT)
+
+#define GPID_ADDR_MASK			0x3ffffffffffULL
+#define GPID_ADDR_SHIFT			6
+
+#define CQB_SIZE_SHIFT			0
+#define CQB_SIZE_MASK			0xf
+#define CQB_ADDR_SHIFT			12
+#define CQB_ADDR_MASK			(0xfffffffffULL)
+
+#define CFG_DISABLE_IDLE		2
+#define INVALID_INDEX			0
+
+#define MAX_IR_ENGINES			16
+
+struct irq_domain *redirect_domain;
+
+struct redirect_entry {
+	struct  {
+		__u64	valid	: 1,
+			res1	: 5,
+			gpid	: 42,
+			res2	: 8,
+			vector	: 8;
+	}	lo;
+	__u64	hi;
+};
+
+struct redirect_gpid {
+	u64	pir[4];      /* Pending interrupt requested */
+	u8	en	: 1, /* doorbell */
+		res0	: 7;
+	u8	irqnum;
+	u16	res1;
+	u32	dst;
+	u32	rsvd[6];
+} __aligned(64);
+
+struct redirect_table {
+	int			node;
+	struct redirect_entry	*table;
+	unsigned long		*bitmap;
+	unsigned int		nr_ird;
+	struct page		*page;
+	raw_spinlock_t		lock;
+};
+
+struct redirect_item {
+	int			index;
+	struct redirect_entry	*entry;
+	struct redirect_gpid	*gpid;
+	struct redirect_table	*table;
+};
+
+struct redirect_queue {
+	int		node;
+	u64		base;
+	u32		max_size;
+	int		head;
+	int		tail;
+	struct page	*page;
+	raw_spinlock_t	lock;
+};
+
+struct irde_desc {
+	struct	redirect_table	ird_table;
+	struct	redirect_queue	inv_queue;
+	int	finish;
+};
+
+struct irde_inv_cmd {
+	union {
+		__u64	cmd_info;
+		struct {
+			__u64	res1		: 4,
+				type		: 1,
+				need_notice	: 1,
+				pad		: 2,
+				index		: 16,
+				pad2		: 40;
+		}	index;
+	};
+	__u64		notice_addr;
+};
+
+static struct irde_desc irde_descs[MAX_IR_ENGINES];
+static phys_addr_t msi_base_addr;
+static phys_addr_t redirect_reg_base = 0x1fe00000;
+
+#define REDIRECT_REG_BASE(reg, node) \
+	(UNCACHE_BASE | redirect_reg_base | (u64)(node) << NODE_ADDRSPACE_SHIFT | (reg))
+#define	redirect_reg_queue_head(node)	REDIRECT_REG_BASE(LOONGARCH_IOCSR_REDIRECT_CQH, (node))
+#define	redirect_reg_queue_tail(node)	REDIRECT_REG_BASE(LOONGARCH_IOCSR_REDIRECT_CQT, (node))
+#define read_queue_head(node)		(*((u32 *)(redirect_reg_queue_head(node))))
+#define read_queue_tail(node)		(*((u32 *)(redirect_reg_queue_tail(node))))
+#define write_queue_tail(node, val)	(*((u32 *)(redirect_reg_queue_tail(node))) = (val))
+
+static inline bool invalid_queue_is_full(int node, u32 *tail)
+{
+	u32 head = read_queue_head(node);
+
+	*tail = read_queue_tail(node);
+
+	return head == ((*tail + 1) % INVALID_QUEUE_SIZE);
+}
+
+static void invalid_enqueue(struct redirect_queue *rqueue, struct irde_inv_cmd *cmd)
+{
+	struct irde_inv_cmd *inv_addr;
+	u32 tail;
+
+	guard(raw_spinlock_irqsave)(&rqueue->lock);
+
+	while (invalid_queue_is_full(rqueue->node, &tail))
+		cpu_relax();
+
+	inv_addr = (struct irde_inv_cmd *)(rqueue->base + tail * sizeof(struct irde_inv_cmd));
+	memcpy(inv_addr, cmd, sizeof(struct irde_inv_cmd));
+	tail = (tail + 1) % INVALID_QUEUE_SIZE;
+
+	/*
+	 * the barrier ensure that the previously written data is visible
+	 * before updating the tail register
+	 */
+	wmb();
+
+	write_queue_tail(rqueue->node, tail);
+}
+
+static void irde_invlid_entry_node(struct redirect_item *item)
+{
+	struct redirect_queue *rqueue;
+	struct irde_inv_cmd cmd;
+	volatile u64 raddr = 0;
+	int node = item->table->node;
+
+	rqueue = &(irde_descs[node].inv_queue);
+	cmd.cmd_info = 0;
+	cmd.index.type = INVALID_INDEX;
+	cmd.index.need_notice = 1;
+	cmd.index.index = item->index;
+	cmd.notice_addr = (u64)(__pa(&raddr));
+
+	invalid_enqueue(rqueue, &cmd);
+
+	while (!raddr)
+		cpu_relax();
+
+}
+
+static inline struct avecintc_data *irq_data_get_avec_data(struct irq_data *data)
+{
+	return data->parent_data->chip_data;
+}
+
+static int redirect_table_alloc(struct redirect_item *item, struct redirect_table *ird_table)
+{
+	int index;
+
+	guard(raw_spinlock_irqsave)(&ird_table->lock);
+
+	index = find_first_zero_bit(ird_table->bitmap, IRD_ENTRIES);
+	if (index > IRD_ENTRIES) {
+		pr_err("No redirect entry to use\n");
+		return -ENOMEM;
+	}
+
+	__set_bit(index, ird_table->bitmap);
+
+	item->index = index;
+	item->entry = &ird_table->table[index];
+	item->table = ird_table;
+
+	return 0;
+}
+
+static void redirect_table_free(struct redirect_item *item)
+{
+	struct redirect_table *ird_table;
+	struct redirect_entry *entry;
+
+	ird_table = item->table;
+
+	entry = item->entry;
+	memset(entry, 0, sizeof(struct redirect_entry));
+
+	scoped_guard(raw_spinlock_irqsave, &ird_table->lock)
+		bitmap_release_region(ird_table->bitmap, item->index, 0);
+
+	kfree(item->gpid);
+
+	irde_invlid_entry_node(item);
+}
+
+static inline void redirect_domain_prepare_entry(struct redirect_item *item,
+						 struct avecintc_data *adata)
+{
+	struct redirect_entry *entry = item->entry;
+
+	item->gpid->en = 1;
+	item->gpid->irqnum = adata->vec;
+	item->gpid->dst = adata->cpu;
+
+	entry->lo.valid = 1;
+	entry->lo.gpid = ((long)item->gpid >> GPID_ADDR_SHIFT) & (GPID_ADDR_MASK);
+	entry->lo.vector = 0xff;
+}
+
+static int redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force)
+{
+	struct redirect_item *item = data->chip_data;
+	struct avecintc_data *adata;
+	int ret;
+
+	ret = irq_chip_set_affinity_parent(data, dest, force);
+	if (ret == IRQ_SET_MASK_OK_DONE) {
+		return ret;
+	} else if (ret) {
+		pr_err("IRDE:set_affinity error %d\n", ret);
+		return ret;
+	}
+
+	adata = irq_data_get_avec_data(data);
+	redirect_domain_prepare_entry(item, adata);
+	irde_invlid_entry_node(item);
+	avecintc_sync(adata);
+
+	return IRQ_SET_MASK_OK;
+}
+
+static void redirect_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
+{
+	struct redirect_item *item;
+
+	item = irq_data_get_irq_chip_data(d);
+	msg->address_lo = (msi_base_addr | 1 << 2 | ((item->index & 0xffff) << 4));
+	msg->address_hi = 0x0;
+	msg->data = 0x0;
+}
+
+static struct irq_chip loongarch_redirect_chip = {
+	.name			= "REDIRECT",
+	.irq_ack		= irq_chip_ack_parent,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_set_affinity	= redirect_set_affinity,
+	.irq_compose_msi_msg	= redirect_compose_msi_msg,
+};
+
+static void redirect_free_resources(struct irq_domain *domain, unsigned int virq,
+				    unsigned int nr_irqs)
+{
+	for (int i = 0; i < nr_irqs; i++) {
+		struct irq_data *irq_data;
+
+		irq_data = irq_domain_get_irq_data(domain, virq  + i);
+		if (irq_data && irq_data->chip_data) {
+			struct redirect_item *item;
+
+			item = irq_data->chip_data;
+			redirect_table_free(item);
+			kfree(item);
+		}
+	}
+}
+
+static int redirect_domain_alloc(struct irq_domain *domain, unsigned int virq,
+			unsigned int nr_irqs, void *arg)
+{
+	struct redirect_table *ird_table;
+	struct avecintc_data *avec_data;
+	struct irq_data *irq_data;
+	msi_alloc_info_t *info;
+	int ret, i, node;
+
+	info = (msi_alloc_info_t *)arg;
+	node = dev_to_node(info->desc->dev);
+	ird_table = &irde_descs[node].ird_table;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct redirect_item *item;
+
+		item = kzalloc(sizeof(struct redirect_item), GFP_KERNEL);
+		if (!item) {
+			pr_err("Alloc redirect descriptor failed\n");
+			goto out_free_resources;
+		}
+
+		irq_data = irq_domain_get_irq_data(domain, virq + i);
+
+		avec_data = irq_data_get_avec_data(irq_data);
+		ret = redirect_table_alloc(item, ird_table);
+		if (ret) {
+			pr_err("Alloc redirect table entry failed\n");
+			goto out_free_resources;
+		}
+
+		item->gpid = kzalloc_node(sizeof(struct redirect_gpid), GFP_KERNEL, node);
+		if (!item->gpid) {
+			pr_err("Alloc redirect GPID failed\n");
+			goto out_free_resources;
+		}
+
+		irq_data->chip_data = item;
+		irq_data->chip = &loongarch_redirect_chip;
+		redirect_domain_prepare_entry(item, avec_data);
+	}
+	return 0;
+
+out_free_resources:
+	redirect_free_resources(domain, virq, nr_irqs);
+	irq_domain_free_irqs_common(domain, virq, nr_irqs);
+
+	return -ENOMEM;
+}
+
+static void redirect_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs)
+{
+	redirect_free_resources(domain, virq, nr_irqs);
+	return irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops redirect_domain_ops = {
+	.alloc		= redirect_domain_alloc,
+	.free		= redirect_domain_free,
+	.select		= msi_lib_irq_domain_select,
+};
+
+static int redirect_queue_init(int node)
+{
+	struct redirect_queue *rqueue = &(irde_descs[node].inv_queue);
+	struct page *pages;
+
+	pages = alloc_pages_node(0, GFP_KERNEL | __GFP_ZERO, INVALID_QUEUE_PAGE_ORDER);
+	if (!pages) {
+		pr_err("Node [%d] Invalid Queue alloc pages failed!\n", node);
+		return -ENOMEM;
+	}
+
+	rqueue->page = pages;
+	rqueue->base = (u64)page_address(pages);
+	rqueue->max_size = INVALID_QUEUE_SIZE;
+	rqueue->head = 0;
+	rqueue->tail = 0;
+	rqueue->node = node;
+	raw_spin_lock_init(&rqueue->lock);
+
+	iocsr_write32(0, LOONGARCH_IOCSR_REDIRECT_CQH);
+	iocsr_write32(0, LOONGARCH_IOCSR_REDIRECT_CQT);
+	iocsr_write64(((rqueue->base & (CQB_ADDR_MASK << CQB_ADDR_SHIFT)) |
+				(CQB_SIZE_MASK << CQB_SIZE_SHIFT)), LOONGARCH_IOCSR_REDIRECT_CQB);
+	return 0;
+}
+
+static int redirect_table_init(int node)
+{
+	struct redirect_table *ird_table = &(irde_descs[node].ird_table);
+	unsigned long *bitmap;
+	struct page *pages;
+	int ret;
+
+	pages = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, IRD_PAGE_ORDER);
+	if (!pages) {
+		pr_err("Node [%d] redirect table alloc pages failed!\n", node);
+		return -ENOMEM;
+	}
+	ird_table->page = pages;
+	ird_table->table = page_address(pages);
+
+	bitmap = bitmap_zalloc(IRD_ENTRIES, GFP_KERNEL);
+	if (!bitmap) {
+		pr_err("Node [%d] redirect table bitmap alloc pages failed!\n", node);
+		ret = -ENOMEM;
+		goto free_pages;
+	}
+
+	ird_table->bitmap = bitmap;
+	ird_table->nr_ird = IRD_ENTRIES;
+	ird_table->node = node;
+
+	raw_spin_lock_init(&ird_table->lock);
+
+	if (redirect_queue_init(node)) {
+		ret = -EINVAL;
+		goto free_bitmap;
+	}
+
+	iocsr_write64(CFG_DISABLE_IDLE, LOONGARCH_IOCSR_REDIRECT_CFG);
+	iocsr_write64(__pa(ird_table->table), LOONGARCH_IOCSR_REDIRECT_TBR);
+
+	return 0;
+
+free_pages:
+	__free_pages(pages, IRD_PAGE_ORDER);
+free_bitmap:
+	bitmap_free(bitmap);
+	return ret;
+}
+
+static void redirect_table_fini(int node)
+{
+	struct redirect_table *ird_table = &(irde_descs[node].ird_table);
+	struct redirect_queue *rqueue = &(irde_descs[node].inv_queue);
+
+	if (ird_table->page) {
+		__free_pages(ird_table->page, IRD_PAGE_ORDER);
+		ird_table->table = NULL;
+		ird_table->page = NULL;
+	}
+
+	if (ird_table->page) {
+		bitmap_free(ird_table->bitmap);
+		ird_table->bitmap = NULL;
+	}
+
+	if (rqueue->page) {
+		__free_pages(rqueue->page, INVALID_QUEUE_PAGE_ORDER);
+		rqueue->page = NULL;
+		rqueue->base = 0;
+	}
+
+	iocsr_write64(0, LOONGARCH_IOCSR_REDIRECT_CQB);
+	iocsr_write64(0, LOONGARCH_IOCSR_REDIRECT_TBR);
+}
+
+static int redirect_cpu_online(unsigned int cpu)
+{
+	int ret, node = cpu_to_node(cpu);
+
+	if (cpu != cpumask_first(cpumask_of_node(node)))
+		return 0;
+
+	if (irde_descs[node].finish)
+		return 0;
+
+	ret = redirect_table_init(node);
+	if (ret) {
+		redirect_table_fini(node);
+		return -EINVAL;
+	}
+
+	irde_descs[node].finish = 1;
+	return 0;
+}
+
+#ifdef	CONFIG_ACPI
+static int __init redirect_reg_base_init(void)
+{
+	acpi_status status;
+	uint64_t addr;
+
+	if (acpi_disabled)
+		return 0;
+
+	status = acpi_evaluate_integer(NULL, "\\_SB.NO00", NULL, &addr);
+	if (ACPI_FAILURE(status))
+		pr_info("redirect_iocsr_base used default 0x1fe00000\n");
+	else
+		redirect_reg_base = addr;
+
+	return 0;
+}
+subsys_initcall_sync(redirect_reg_base_init);
+
+static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
+		const unsigned long end)
+{
+	struct acpi_madt_msi_pic *pchmsi_entry = (struct acpi_madt_msi_pic *)header;
+
+	msi_base_addr = pchmsi_entry->msg_address - AVEC_MSG_OFFSET;
+
+	return pch_msi_acpi_init_avec(redirect_domain);
+}
+
+static int __init acpi_cascade_irqdomain_init(void)
+{
+	return acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1);
+}
+
+int __init redirect_acpi_init(struct irq_domain *parent)
+{
+	struct fwnode_handle *fwnode;
+	struct irq_domain *domain;
+	int ret;
+
+	fwnode = irq_domain_alloc_named_fwnode("redirect");
+	if (!fwnode) {
+		pr_err("Unable to alloc redirect domain handle\n");
+		goto fail;
+	}
+
+	domain = irq_domain_create_hierarchy(parent, 0, IRD_ENTRIES, fwnode,
+					     &redirect_domain_ops, irde_descs);
+	if (!domain) {
+		pr_err("Unable to alloc redirect domain\n");
+		goto out_free_fwnode;
+	}
+
+	redirect_domain = domain;
+
+	ret = redirect_table_init(0);
+	if (ret)
+		goto out_free_table;
+
+	ret = acpi_cascade_irqdomain_init();
+	if (ret < 0) {
+		pr_err("Failed to cascade IRQ domain, ret=%d\n", ret);
+		goto out_free_table;
+	}
+
+	cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_REDIRECT_STARTING,
+				  "irqchip/loongarch/redirect:starting",
+				  redirect_cpu_online, NULL);
+
+	pr_info("loongarch irq redirect modules init succeeded\n");
+	return 0;
+
+out_free_table:
+	redirect_table_fini(0);
+	irq_domain_remove(redirect_domain);
+	redirect_domain = NULL;
+out_free_fwnode:
+	irq_domain_free_fwnode(fwnode);
+fail:
+	return -EINVAL;
+}
+#endif
diff --git a/drivers/irqchip/irq-loongson.h b/drivers/irqchip/irq-loongson.h
index 11fa138d1f44..05ad40ffb62b 100644
--- a/drivers/irqchip/irq-loongson.h
+++ b/drivers/irqchip/irq-loongson.h
@@ -5,6 +5,15 @@
 
 #ifndef _DRIVERS_IRQCHIP_IRQ_LOONGSON_H
 #define _DRIVERS_IRQCHIP_IRQ_LOONGSON_H
+#define AVEC_MSG_OFFSET		0x100000
+struct avecintc_data {
+	struct list_head        entry;
+	unsigned int            cpu;
+	unsigned int            vec;
+	unsigned int            prev_cpu;
+	unsigned int            prev_vec;
+	unsigned int            moving;
+};
 
 int find_pch_pic(u32 gsi);
 
@@ -24,4 +33,7 @@ int pch_msi_acpi_init(struct irq_domain *parent,
 					struct acpi_madt_msi_pic *acpi_pchmsi);
 int pch_msi_acpi_init_avec(struct irq_domain *parent);
 
+int redirect_acpi_init(struct irq_domain *parent);
+
+void avecintc_sync(struct avecintc_data *adata);
 #endif /* _DRIVERS_IRQCHIP_IRQ_LOONGSON_H */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 1987400000b4..6a4ff072db42 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -145,6 +145,7 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_MIPS_GIC_STARTING,
 	CPUHP_AP_IRQ_EIOINTC_STARTING,
 	CPUHP_AP_IRQ_AVECINTC_STARTING,
+	CPUHP_AP_IRQ_REDIRECT_STARTING,
 	CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
 	CPUHP_AP_IRQ_THEAD_ACLINT_SSWI_STARTING,
 	CPUHP_AP_IRQ_RISCV_IMSIC_STARTING,
-- 
2.41.0
Re: [PATCH v4 2/2] irq/irq-loongarch-ir:Add Redirect irqchip support
Posted by Thomas Gleixner 3 months, 4 weeks ago
On Tue, Jun 10 2025 at 19:42, Tianyang Zhang wrote:
>  		if (cpu_online(adata->cpu) && cpumask_test_cpu(adata->cpu, dest))
> -			return 0;
> +			/*
> +			 * The new affinity configuration has taken effect,
> +			 * and returning IRQ_SET_MASK_OK_DONE here indicates that no further
> +			 * changes need to be made in the subsequent process

This is not what IRQ_SET_MASK_OK_DONE is about. The documentation
clearly says:

 * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to
 *                        support stacked irqchips, which indicates skipping
 *                        all descendant irqchips.

It's not about further changes. It's about preventing invoking
set_affinity() callbacks down the hierarchy.

And you still fail to explain why this change is correct for the
existing code. That explanation wants to be in the changelog of the
seperate patch doing this change.

And then you can spare the comment, which is btw. also violating the
bracket rules in the tip maintainers documentation.


> +			 */
> +			return IRQ_SET_MASK_OK_DONE;
>  
>  		cpumask_and(&intersect_mask, dest, cpu_online_mask);
>  
> @@ -121,7 +116,8 @@ static int avecintc_set_affinity(struct irq_data *data, const struct cpumask *de
>  		adata->cpu = cpu;
>  		adata->vec = vector;
>  		per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(data);
> -		avecintc_sync(adata);
> +		if (!cpu_has_redirectint)
> +			avecintc_sync(adata);
>  	}
>  
>  	irq_data_update_effective_affinity(data, cpumask_of(cpu));
> @@ -412,6 +408,9 @@ static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
>  
>  static inline int __init acpi_cascade_irqdomain_init(void)
>  {
> +	if (cpu_has_redirectint)
> +		return redirect_acpi_init(loongarch_avec.domain);
> +
>  	return acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1);
>  }
>  
> diff --git a/drivers/irqchip/irq-loongarch-ir.c b/drivers/irqchip/irq-loongarch-ir.c
> new file mode 100644
> index 000000000000..ae42ec5028d2
> --- /dev/null
> +++ b/drivers/irqchip/irq-loongarch-ir.c
> @@ -0,0 +1,562 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2020 Loongson Technologies, Inc.
> + */
> +
> +#include <linux/cpuhotplug.h>
> +#include <linux/init.h>
> +#include <linux/interrupt.h>
> +#include <linux/kernel.h>
> +#include <linux/irq.h>
> +#include <linux/irqchip.h>
> +#include <linux/irqdomain.h>
> +#include <linux/spinlock.h>
> +#include <linux/msi.h>
> +
> +#include <asm/irq.h>
> +#include <asm/loongarch.h>
> +#include <asm/setup.h>
> +#include <larchintrin.h>
> +
> +#include "irq-loongson.h"
> +#include "irq-msi-lib.h"
> +
> +#define IRD_ENTRIES			65536
> +
> +/* redirect entry size 128bits */
> +#define IRD_PAGE_ORDER			(20 - PAGE_SHIFT)
> +
> +/* irt cache invalid queue */
> +#define	INVALID_QUEUE_SIZE		4096

Use SPACE after #define, not TAB. All over the place...

> +#define INVALID_QUEUE_PAGE_ORDER	(16 - PAGE_SHIFT)

I'm pretty sure that the above magic numbers have dependencies in some
way, right? So why is it not expressed in a way which makes this obvious? 

These magic numbers are just incomprehensible as they make the reader
guess what this is about. Here is my (probably not so) wild guess:

#define IRD_ENTRY_SIZE			16

#define IRD_ENTRIES			65536
#define IRD_PAGE_ORDER			get_order(IRD_ENTRIES * IRD_ENTRY_SIZE)

#define	INVALID_QUEUE_SIZE		4096
#define IRD_INVALID__QUEUE_PAGE_ORDER	get_order(INVALID_QUEUE_SIZE * IRD_ENTRY_SIZE)

No?

> +#define GPID_ADDR_MASK			0x3ffffffffffULL

GENMASK()

> +#define GPID_ADDR_SHIFT			6
> +
> +#define CQB_SIZE_SHIFT			0
> +#define CQB_SIZE_MASK			0xf
> +#define CQB_ADDR_SHIFT			12
> +#define CQB_ADDR_MASK			(0xfffffffffULL)

GENMASK()

> +#define CFG_DISABLE_IDLE		2
> +#define INVALID_INDEX			0
> +
> +#define MAX_IR_ENGINES			16


> +struct redirect_gpid {
> +	u64	pir[4];      /* Pending interrupt requested */
> +	u8	en	: 1, /* doorbell */

Use C++ style tail comments for this as documented. Do you I really have
to point out every single item in the documentation explicitely or can
you just read all of it and follow the guidelines?

> +struct irde_desc {
> +	struct	redirect_table	ird_table;
> +	struct	redirect_queue	inv_queue;
> +	int	finish;

Groan.

"Struct declarations should align the struct member names in a tabular fashion:

 struct bar_order {
        unsigned int    guest_id;
        int             ordered_item;
        struct menu     *menu;
 };"

It clearly says to align the struct member names, no?

Otherwise the example would be:

 struct bar_order {
        unsigned int    guest_id;
        int             ordered_item;
        struct 		menu     	*menu;
 };

which is unreadable garbage obviously.

> +};

> +static void invalid_enqueue(struct redirect_queue *rqueue, struct irde_inv_cmd *cmd)
> +{
> +	struct irde_inv_cmd *inv_addr;
> +	u32 tail;
> +
> +	guard(raw_spinlock_irqsave)(&rqueue->lock);
> +
> +	while (invalid_queue_is_full(rqueue->node, &tail))
> +		cpu_relax();
> +
> +	inv_addr = (struct irde_inv_cmd *)(rqueue->base + tail * sizeof(struct irde_inv_cmd));
> +	memcpy(inv_addr, cmd, sizeof(struct irde_inv_cmd));

Seriously?

struct redirect_queue {
	int				node;
        union {
 		u64			base;
                struct irde_inv_cmd	*cmds;
        };
	u32				max_size;
        ...
};

and then this becomes

    	memcpy(&rqueue->cmds[tail], cmd, sizeof(cmd));

That's too comprehensible, right?

> +	tail = (tail + 1) % INVALID_QUEUE_SIZE;

Why is this before the barrier? The barrier does not do anything about
this and you can simplify this. See below.

> +	/*
> +	 * the barrier ensure that the previously written data is visible

This barrier ensures .....

> +	 * before updating the tail register

And as there is no rmb() counterpart you want to explain that this is
serializing against the hardware.

> +	 */
> +	wmb();
> +
> +	write_queue_tail(rqueue->node, tail);

	write_queue_tail(rqueue->node, (tail + 1) & INVALID_QUEUE_MASK);

No?

> +}
> +
> +static void irde_invlid_entry_node(struct redirect_item *item)
> +{
> +	struct redirect_queue *rqueue;
> +	struct irde_inv_cmd cmd;
> +	volatile u64 raddr = 0;

No. See Documentation/process/volatile-considered-harmful.rst

> +static void redirect_table_free(struct redirect_item *item)
> +{
> +	struct redirect_table *ird_table;
> +	struct redirect_entry *entry;
> +
> +	ird_table = item->table;
> +
> +	entry = item->entry;
> +	memset(entry, 0, sizeof(struct redirect_entry));

        memset(entry, 0, sizeof(entry));

It's obvious why using sizeof(type) is a bad idea.

> +	scoped_guard(raw_spinlock_irqsave, &ird_table->lock)

raw_spinlock_irq as this code can never be invoked from an interrupt
disabled region.

> +		bitmap_release_region(ird_table->bitmap, item->index, 0);
> +
> +	kfree(item->gpid);
> +
> +	irde_invlid_entry_node(item);
> +}

> +static inline void redirect_domain_prepare_entry(struct redirect_item *item,
> +						 struct avecintc_data *adata)
> +{
> +	struct redirect_entry *entry = item->entry;
> +
> +	item->gpid->en = 1;
> +	item->gpid->irqnum = adata->vec;
> +	item->gpid->dst = adata->cpu;
> +
> +	entry->lo.valid = 1;
> +	entry->lo.gpid = ((long)item->gpid >> GPID_ADDR_SHIFT) & (GPID_ADDR_MASK);

Hardware addresses are type unsigned long and not long.

> +	entry->lo.vector = 0xff;
> +}

> +static void redirect_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
> +{
> +	struct redirect_item *item;
> +
> +	item = irq_data_get_irq_chip_data(d);

Just move the initialization into the declaration line.

> +	msg->address_lo = (msi_base_addr | 1 << 2 | ((item->index & 0xffff) << 4));
> +	msg->address_hi = 0x0;
> +	msg->data = 0x0;
> +}

> +static void redirect_free_resources(struct irq_domain *domain, unsigned int virq,
> +				    unsigned int nr_irqs)
> +{
> +	for (int i = 0; i < nr_irqs; i++) {
> +		struct irq_data *irq_data;
> +
> +		irq_data = irq_domain_get_irq_data(domain, virq  + i);

Ditto.

> +		if (irq_data && irq_data->chip_data) {
> +			struct redirect_item *item;
> +
> +			item = irq_data->chip_data;

Same and all over the place.

> +			redirect_table_free(item);
> +			kfree(item);
> +		}
> +	}
> +}
> +
> +static int redirect_domain_alloc(struct irq_domain *domain, unsigned int virq,
> +			unsigned int nr_irqs, void *arg)

I asked you before to align the arguments of the second line properly
and according to documentation..

> +{
> +	struct redirect_table *ird_table;
> +	struct avecintc_data *avec_data;
> +	struct irq_data *irq_data;
> +	msi_alloc_info_t *info;
> +	int ret, i, node;
> +
> +	info = (msi_alloc_info_t *)arg;

What's that type cast for?

> +	node = dev_to_node(info->desc->dev);
> +	ird_table = &irde_descs[node].ird_table;
> +
> +	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
> +	if (ret < 0)
> +		return ret;
> +
> +	for (i = 0; i < nr_irqs; i++) {
> +		struct redirect_item *item;
> +
> +		item = kzalloc(sizeof(struct redirect_item), GFP_KERNEL);
> +		if (!item) {
> +			pr_err("Alloc redirect descriptor failed\n");
> +			goto out_free_resources;
> +		}
> +
> +		irq_data = irq_domain_get_irq_data(domain, virq + i);
> +
> +		avec_data = irq_data_get_avec_data(irq_data);

Neither irq_data nor avec_data are used here and only required way
down. Can you structure your code so it makes sense?

> +		ret = redirect_table_alloc(item, ird_table);
> +		if (ret) {
> +			pr_err("Alloc redirect table entry failed\n");
> +			goto out_free_resources;
> +		}
> +
> +		item->gpid = kzalloc_node(sizeof(struct redirect_gpid), GFP_KERNEL, node);
> +		if (!item->gpid) {
> +			pr_err("Alloc redirect GPID failed\n");
> +			goto out_free_resources;
> +		}

Why do you need this extra allocation here instead of simply embedding
gpid into item?

> +		irq_data->chip_data = item;
> +		irq_data->chip = &loongarch_redirect_chip;
> +		redirect_domain_prepare_entry(item, avec_data);
> +	}
> +	return 0;

> +	iocsr_write64(((rqueue->base & (CQB_ADDR_MASK << CQB_ADDR_SHIFT)) |
> +				(CQB_SIZE_MASK << CQB_SIZE_SHIFT)), LOONGARCH_IOCSR_REDIRECT_CQB);

Align second line properly.

> +	return 0;
> +}
> +
> +static int redirect_table_init(int node)
> +{
> +	struct redirect_table *ird_table = &(irde_descs[node].ird_table);

Remove the pointless brackets.

> +	unsigned long *bitmap;
> +	struct page *pages;
> +	int ret;
> +
> +	pages = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, IRD_PAGE_ORDER);
> +	if (!pages) {
> +		pr_err("Node [%d] redirect table alloc pages failed!\n", node);
> +		return -ENOMEM;
> +	}
> +	ird_table->page = pages;
> +	ird_table->table = page_address(pages);
> +
> +	bitmap = bitmap_zalloc(IRD_ENTRIES, GFP_KERNEL);
> +	if (!bitmap) {
> +		pr_err("Node [%d] redirect table bitmap alloc pages failed!\n", node);
> +		ret = -ENOMEM;
> +		goto free_pages;
> +	}
> +
> +	ird_table->bitmap = bitmap;
> +	ird_table->nr_ird = IRD_ENTRIES;
> +	ird_table->node = node;
> +
> +	raw_spin_lock_init(&ird_table->lock);
> +
> +	if (redirect_queue_init(node)) {
> +		ret = -EINVAL;
> +		goto free_bitmap;

So redirect_queue_init() returns -ENOMEM which is then converted to
-EINVAL here. That makes absolutely no sense at all.

Neither makes the 'ret' variable sense because all failures should
return -ENOMEM and therefore you can make redirect_queue_init() return
bool (true on success) and return -ENOMEM in the failure path.

No?

> +static void redirect_table_fini(int node)
> +{
> +	struct redirect_table *ird_table = &(irde_descs[node].ird_table);
> +	struct redirect_queue *rqueue = &(irde_descs[node].inv_queue);

No brackets. They have no value and just disturb reading.

> +static int redirect_cpu_online(unsigned int cpu)
> +{
> +	int ret, node = cpu_to_node(cpu);
> +
> +	if (cpu != cpumask_first(cpumask_of_node(node)))
> +		return 0;
> +
> +	if (irde_descs[node].finish)
> +		return 0;

What's this finish thing about?

Neither the CPU mask check nor this finish hack is required:

        if (irde_descs[node].pages)
        	return 0;

covers all of it, no?

> +	ret = redirect_table_init(node);
> +	if (ret) {
> +		redirect_table_fini(node);

What is this for? You already mopped up the mess in the failure path of
redirect_table_init(), so doing it again makes no sense.

Just get rid of the failure path in redirect_table_init() and let that
return a bool (true on success) and invoke redirect_table_fini() here

> +		return -EINVAL;

Seriously? The failure condition is -ENOMEM so why do you want to change
that?

> +	}
> +
> +	irde_descs[node].finish = 1;
> +	return 0;
> +}
> +
> +#ifdef	CONFIG_ACPI

What's the TAB for?

> +static int __init redirect_reg_base_init(void)
> +{
> +	acpi_status status;
> +	uint64_t addr;
> +
> +	if (acpi_disabled)
> +		return 0;
> +
> +	status = acpi_evaluate_integer(NULL, "\\_SB.NO00", NULL, &addr);
> +	if (ACPI_FAILURE(status))
> +		pr_info("redirect_iocsr_base used default 0x1fe00000\n");
> +	else
> +		redirect_reg_base = addr;
> +
> +	return 0;
> +}
> +subsys_initcall_sync(redirect_reg_base_init);
> +
> +static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
> +		const unsigned long end)

Sigh.

> +int __init redirect_acpi_init(struct irq_domain *parent)
> +{
> +	struct fwnode_handle *fwnode;
> +	struct irq_domain *domain;
> +	int ret;
> +
> +	fwnode = irq_domain_alloc_named_fwnode("redirect");
> +	if (!fwnode) {
> +		pr_err("Unable to alloc redirect domain handle\n");
> +		goto fail;
> +	}
> +
> +	domain = irq_domain_create_hierarchy(parent, 0, IRD_ENTRIES, fwnode,
> +					     &redirect_domain_ops, irde_descs);
> +	if (!domain) {
> +		pr_err("Unable to alloc redirect domain\n");
> +		goto out_free_fwnode;
> +	}
> +
> +	redirect_domain = domain;

What's the point of this local domain variable?

> +	ret = redirect_table_init(0);
> +	if (ret)
> +		goto out_free_table;
> +

Oh well...

Thanks,

        tglx
Re: [PATCH v4 2/2] irq/irq-loongarch-ir:Add Redirect irqchip support
Posted by Tianyang Zhang 3 months, 2 weeks ago
Hi, Thomas

在 2025/6/13 下午11:20, Thomas Gleixner 写道:
> On Tue, Jun 10 2025 at 19:42, Tianyang Zhang wrote:
>>   		if (cpu_online(adata->cpu) && cpumask_test_cpu(adata->cpu, dest))
>> -			return 0;
>> +			/*
>> +			 * The new affinity configuration has taken effect,
>> +			 * and returning IRQ_SET_MASK_OK_DONE here indicates that no further
>> +			 * changes need to be made in the subsequent process
> This is not what IRQ_SET_MASK_OK_DONE is about. The documentation
> clearly says:
>
>   * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to
>   *                        support stacked irqchips, which indicates skipping
>   *                        all descendant irqchips.
>
> It's not about further changes. It's about preventing invoking
> set_affinity() callbacks down the hierarchy.
>
> And you still fail to explain why this change is correct for the
> existing code. That explanation wants to be in the changelog of the
> seperate patch doing this change.
>
> And then you can spare the comment, which is btw. also violating the
> bracket rules in the tip maintainers documentation.

OK, This means that this change involves adjustments to the code 
organization

and cannot be included solely in a large patch. It requires clearer 
submission records and explanations

If that's the case, I will make the necessary modifications as 
requested. Thank you

>
>
>> +			 */
>> +			return IRQ_SET_MASK_OK_DONE;
>>   
>>   		cpumask_and(&intersect_mask, dest, cpu_online_mask);
>>   
>> @@ -121,7 +116,8 @@ static int avecintc_set_affinity(struct irq_data *data, const struct cpumask *de
>>   		adata->cpu = cpu;
>>   		adata->vec = vector;
>>   		per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(data);
>> -		avecintc_sync(adata);
>> +		if (!cpu_has_redirectint)
>> +			avecintc_sync(adata);
>>   	}
>>   
>>   	irq_data_update_effective_affinity(data, cpumask_of(cpu));
>> @@ -412,6 +408,9 @@ static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
>>   
>>   static inline int __init acpi_cascade_irqdomain_init(void)
>>   {
>> +	if (cpu_has_redirectint)
>> +		return redirect_acpi_init(loongarch_avec.domain);
>> +
>>   	return acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1);
>>   }
>>   
>> diff --git a/drivers/irqchip/irq-loongarch-ir.c b/drivers/irqchip/irq-loongarch-ir.c
>> new file mode 100644
>> index 000000000000..ae42ec5028d2
>> --- /dev/null
>> +++ b/drivers/irqchip/irq-loongarch-ir.c
>> @@ -0,0 +1,562 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2020 Loongson Technologies, Inc.
>> + */
>> +
>> +#include <linux/cpuhotplug.h>
>> +#include <linux/init.h>
>> +#include <linux/interrupt.h>
>> +#include <linux/kernel.h>
>> +#include <linux/irq.h>
>> +#include <linux/irqchip.h>
>> +#include <linux/irqdomain.h>
>> +#include <linux/spinlock.h>
>> +#include <linux/msi.h>
>> +
>> +#include <asm/irq.h>
>> +#include <asm/loongarch.h>
>> +#include <asm/setup.h>
>> +#include <larchintrin.h>
>> +
>> +#include "irq-loongson.h"
>> +#include "irq-msi-lib.h"
>> +
>> +#define IRD_ENTRIES			65536
>> +
>> +/* redirect entry size 128bits */
>> +#define IRD_PAGE_ORDER			(20 - PAGE_SHIFT)
>> +
>> +/* irt cache invalid queue */
>> +#define	INVALID_QUEUE_SIZE		4096
> Use SPACE after #define, not TAB. All over the place...
OK, I got it
>
>> +#define INVALID_QUEUE_PAGE_ORDER	(16 - PAGE_SHIFT)
> I'm pretty sure that the above magic numbers have dependencies in some
> way, right? So why is it not expressed in a way which makes this obvious?
>
> These magic numbers are just incomprehensible as they make the reader
> guess what this is about. Here is my (probably not so) wild guess:
>
> #define IRD_ENTRY_SIZE			16
>
> #define IRD_ENTRIES			65536
> #define IRD_PAGE_ORDER			get_order(IRD_ENTRIES * IRD_ENTRY_SIZE)
>
> #define	INVALID_QUEUE_SIZE		4096
> #define IRD_INVALID__QUEUE_PAGE_ORDER	get_order(INVALID_QUEUE_SIZE * IRD_ENTRY_SIZE)
>
> No?
Yes, this is better way , and I will pay attention to improving the 
readability of the code in the future
>
> GENMASK()
Ok , I got it , thanks
>
>> +#define CFG_DISABLE_IDLE		2
>> +#define INVALID_INDEX			0
>> +
>> +#define MAX_IR_ENGINES			16
>
>> +struct redirect_gpid {
>> +	u64	pir[4];      /* Pending interrupt requested */
>> +	u8	en	: 1, /* doorbell */
> Use C++ style tail comments for this as documented. Do you I really have
> to point out every single item in the documentation explicitely or can
> you just read all of it and follow the guidelines?

Okay, here are some references to existing code....

I will reorganize all code style issues according to the documentation

>
>> +struct irde_desc {
>> +	struct	redirect_table	ird_table;
>> +	struct	redirect_queue	inv_queue;
>> +	int	finish;
> Groan.
>
> "Struct declarations should align the struct member names in a tabular fashion:
>
>   struct bar_order {
>          unsigned int    guest_id;
>          int             ordered_item;
>          struct menu     *menu;
>   };"
>
> It clearly says to align the struct member names, no?
>
> Otherwise the example would be:
>
>   struct bar_order {
>          unsigned int    guest_id;
>          int             ordered_item;
>          struct 		menu     	*menu;
>   };
>
> which is unreadable garbage obviously.
Ok, I will adjust here and check for other similar issues
>
>> +static void invalid_enqueue(struct redirect_queue *rqueue, struct irde_inv_cmd *cmd)
>> +{
>> +	struct irde_inv_cmd *inv_addr;
>> +	u32 tail;
>> +
>> +	guard(raw_spinlock_irqsave)(&rqueue->lock);
>> +
>> +	while (invalid_queue_is_full(rqueue->node, &tail))
>> +		cpu_relax();
>> +
>> +	inv_addr = (struct irde_inv_cmd *)(rqueue->base + tail * sizeof(struct irde_inv_cmd));
>> +	memcpy(inv_addr, cmd, sizeof(struct irde_inv_cmd));
> Seriously?
>
> struct redirect_queue {
> 	int				node;
>          union {
>   		u64			base;
>                  struct irde_inv_cmd	*cmds;
>          };
> 	u32				max_size;
>          ...
> };
>
> and then this becomes
>
>      	memcpy(&rqueue->cmds[tail], cmd, sizeof(cmd));
>
> That's too comprehensible, right?
Yes, this is a more readable writing style, thank you
>
>> +	tail = (tail + 1) % INVALID_QUEUE_SIZE;
> Why is this before the barrier? The barrier does not do anything about
> this and you can simplify this. See below.
>
> And as there is no rmb() counterpart you want to explain that this is
> serializing against the hardware.
>> +	 */
>> +	wmb();
>> +
>> +	write_queue_tail(rqueue->node, tail);
> 	write_queue_tail(rqueue->node, (tail + 1) & INVALID_QUEUE_MASK);
>
> No?

The reason fo coding here is that during testing, it was found that a 
barrier is needed between the

update of temporary variable 'tail' and the operation of register with 
'write_queue_tail'

, otherwise write_queue_tail will probabilistically fail to obtain the 
correct value.

We will conduct testing on the above methods, and if possible, make 
changes as required, or promote improvements in code readability

thanks

>
>> +}
>> +
>> +static void irde_invlid_entry_node(struct redirect_item *item)
>> +{
>> +	struct redirect_queue *rqueue;
>> +	struct irde_inv_cmd cmd;
>> +	volatile u64 raddr = 0;
> No. See Documentation/process/volatile-considered-harmful.rst
Ok, I will follow the agreement here
>
>> +static void redirect_table_free(struct redirect_item *item)
>> +{
>> +	struct redirect_table *ird_table;
>> +	struct redirect_entry *entry;
>> +
>> +	ird_table = item->table;
>> +
>> +	entry = item->entry;
>> +	memset(entry, 0, sizeof(struct redirect_entry));
>          memset(entry, 0, sizeof(entry));
>
> It's obvious why using sizeof(type) is a bad idea.
Ok, I got it
>
>> +	scoped_guard(raw_spinlock_irqsave, &ird_table->lock)
> raw_spinlock_irq as this code can never be invoked from an interrupt
> disabled region.
Ok, I will make the necessary corrections here
>
>> +		bitmap_release_region(ird_table->bitmap, item->index, 0);
>> +
>> +	kfree(item->gpid);
>> +
>> +	irde_invlid_entry_node(item);
>> +}
>> +static inline void redirect_domain_prepare_entry(struct redirect_item *item,
>> +						 struct avecintc_data *adata)
>> +{
>> +	struct redirect_entry *entry = item->entry;
>> +
>> +	item->gpid->en = 1;
>> +	item->gpid->irqnum = adata->vec;
>> +	item->gpid->dst = adata->cpu;
>> +
>> +	entry->lo.valid = 1;
>> +	entry->lo.gpid = ((long)item->gpid >> GPID_ADDR_SHIFT) & (GPID_ADDR_MASK);
> Hardware addresses are type unsigned long and not long.
Ok , I got it
>
>> +	entry->lo.vector = 0xff;
>> +}
>> +static void redirect_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
>> +{
>> +	struct redirect_item *item;
>> +
>> +	item = irq_data_get_irq_chip_data(d);
> Just move the initialization into the declaration line.
Ok, I got it
>
>> +		if (irq_data && irq_data->chip_data) {
>> +			struct redirect_item *item;
>> +
>> +			item = irq_data->chip_data;
> Same and all over the place.
>
>> +			redirect_table_free(item);
>> +			kfree(item);
>> +		}
>> +	}
>> +}
>> +
>> +static int redirect_domain_alloc(struct irq_domain *domain, unsigned int virq,
>> +			unsigned int nr_irqs, void *arg)
> I asked you before to align the arguments of the second line properly
> and according to documentation..

Sorry, I have reconfirmed the modification records and found that it was 
fixed in the first temporary version.

However, the modification was lost during several testing environment 
migrations in the middle.

I will adjust my code organization process

>
>> +{
>> +	struct redirect_table *ird_table;
>> +	struct avecintc_data *avec_data;
>> +	struct irq_data *irq_data;
>> +	msi_alloc_info_t *info;
>> +	int ret, i, node;
>> +
>> +	info = (msi_alloc_info_t *)arg;
> What's that type cast for?
emm......, An oversight during the code modification process
>
>> +	node = dev_to_node(info->desc->dev);
>> +	ird_table = &irde_descs[node].ird_table;
>> +
>> +	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	for (i = 0; i < nr_irqs; i++) {
>> +		struct redirect_item *item;
>> +
>> +		item = kzalloc(sizeof(struct redirect_item), GFP_KERNEL);
>> +		if (!item) {
>> +			pr_err("Alloc redirect descriptor failed\n");
>> +			goto out_free_resources;
>> +		}
>> +
>> +		irq_data = irq_domain_get_irq_data(domain, virq + i);
>> +
>> +		avec_data = irq_data_get_avec_data(irq_data);
> Neither irq_data nor avec_data are used here and only required way
> down. Can you structure your code so it makes sense?
Ok, I will follow your advice
>
>> +		ret = redirect_table_alloc(item, ird_table);
>> +		if (ret) {
>> +			pr_err("Alloc redirect table entry failed\n");
>> +			goto out_free_resources;
>> +		}
>> +
>> +		item->gpid = kzalloc_node(sizeof(struct redirect_gpid), GFP_KERNEL, node);
>> +		if (!item->gpid) {
>> +			pr_err("Alloc redirect GPID failed\n");
>> +			goto out_free_resources;
>> +		}
> Why do you need this extra allocation here instead of simply embedding
> gpid into item?
Ok, I got it , thanks
>> +		irq_data->chip_data = item;
>> +		irq_data->chip = &loongarch_redirect_chip;
>> +		redirect_domain_prepare_entry(item, avec_data);
>> +	}
>> +	return 0;
>> +	iocsr_write64(((rqueue->base & (CQB_ADDR_MASK << CQB_ADDR_SHIFT)) |
>> +				(CQB_SIZE_MASK << CQB_SIZE_SHIFT)), LOONGARCH_IOCSR_REDIRECT_CQB);
> Align second line properly.
Ok, I got it , thanks
>
>> +	return 0;
>> +}
>> +
>> +static int redirect_table_init(int node)
>> +{
>> +	struct redirect_table *ird_table = &(irde_descs[node].ird_table);
> Remove the pointless brackets.
Ok, I got it , thanks
>
>> +	unsigned long *bitmap;
>> +	struct page *pages;
>> +	int ret;
>> +
>> +	pages = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, IRD_PAGE_ORDER);
>> +	if (!pages) {
>> +		pr_err("Node [%d] redirect table alloc pages failed!\n", node);
>> +		return -ENOMEM;
>> +	}
>> +	ird_table->page = pages;
>> +	ird_table->table = page_address(pages);
>> +
>> +	bitmap = bitmap_zalloc(IRD_ENTRIES, GFP_KERNEL);
>> +	if (!bitmap) {
>> +		pr_err("Node [%d] redirect table bitmap alloc pages failed!\n", node);
>> +		ret = -ENOMEM;
>> +		goto free_pages;
>> +	}
>> +
>> +	ird_table->bitmap = bitmap;
>> +	ird_table->nr_ird = IRD_ENTRIES;
>> +	ird_table->node = node;
>> +
>> +	raw_spin_lock_init(&ird_table->lock);
>> +
>> +	if (redirect_queue_init(node)) {
>> +		ret = -EINVAL;
>> +		goto free_bitmap;
> So redirect_queue_init() returns -ENOMEM which is then converted to
> -EINVAL here. That makes absolutely no sense at all.
>
> Neither makes the 'ret' variable sense because all failures should
> return -ENOMEM and therefore you can make redirect_queue_init() return
> bool (true on success) and return -ENOMEM in the failure path.
>
> No?
Yes, it is a more reasonable way to do this
>> +static void redirect_table_fini(int node)
>> +{
>> +	struct redirect_table *ird_table = &(irde_descs[node].ird_table);
>> +	struct redirect_queue *rqueue = &(irde_descs[node].inv_queue);
> No brackets. They have no value and just disturb reading.
Ok, I got it , thanks
>
>> +static int redirect_cpu_online(unsigned int cpu)
>> +{
>> +	int ret, node = cpu_to_node(cpu);
>> +
>> +	if (cpu != cpumask_first(cpumask_of_node(node)))
>> +		return 0;
>> +
>> +	if (irde_descs[node].finish)
>> +		return 0;
> What's this finish thing about?
>
> Neither the CPU mask check nor this finish hack is required:
>
>          if (irde_descs[node].pages)
>          	return 0;
>
> covers all of it, no?
Ok, I got it , thanks
>
>> +	ret = redirect_table_init(node);
>> +	if (ret) {
>> +		redirect_table_fini(node);
> What is this for? You already mopped up the mess in the failure path of
> redirect_table_init(), so doing it again makes no sense.
>
> Just get rid of the failure path in redirect_table_init() and let that
> return a bool (true on success) and invoke redirect_table_fini() here
There may have been a logical confusion in the version iteration here, 
and I will reorganize this part of the code, thanks
>
>> +		return -EINVAL;
> Seriously? The failure condition is -ENOMEM so why do you want to change
> that?
Ok , return ret is enought there, thanks
>> +	}
>> +
>> +	irde_descs[node].finish = 1;
>> +	return 0;
>> +}
>> +
>> +#ifdef	CONFIG_ACPI
> What's the TAB for?
Ok. I got it , thanks
>> +int __init redirect_acpi_init(struct irq_domain *parent)
>> +{
>> +	struct fwnode_handle *fwnode;
>> +	struct irq_domain *domain;
>> +	int ret;
>> +
>> +	fwnode = irq_domain_alloc_named_fwnode("redirect");
>> +	if (!fwnode) {
>> +		pr_err("Unable to alloc redirect domain handle\n");
>> +		goto fail;
>> +	}
>> +
>> +	domain = irq_domain_create_hierarchy(parent, 0, IRD_ENTRIES, fwnode,
>> +					     &redirect_domain_ops, irde_descs);
>> +	if (!domain) {
>> +		pr_err("Unable to alloc redirect domain\n");
>> +		goto out_free_fwnode;
>> +	}
>> +
>> +	redirect_domain = domain;
> What's the point of this local domain variable?
This is indeed meaningless, thanks
>
>> +	ret = redirect_table_init(0);
>> +	if (ret)
>> +		goto out_free_table;
>> +
> Oh well...
There may have been a logical confusion in the version iteration here, 
and I will reorganize this part of the code, thanks
>
> Thanks,
>
>          tglx

Thanks

Tianyang

Re: [PATCH v4 2/2] irq/irq-loongarch-ir:Add Redirect irqchip support
Posted by Thomas Gleixner 3 months, 2 weeks ago
On Mon, Jun 23 2025 at 10:45, Tianyang Zhang wrote:
> 在 2025/6/13 下午11:20, Thomas Gleixner 写道:
>> On Tue, Jun 10 2025 at 19:42, Tianyang Zhang wrote:
>>
>>> +	tail = (tail + 1) % INVALID_QUEUE_SIZE;
>> Why is this before the barrier? The barrier does not do anything about
>> this and you can simplify this. See below.
>>
>> And as there is no rmb() counterpart you want to explain that this is
>> serializing against the hardware.
>>> +	 */
>>> +	wmb();
>>> +
>>> +	write_queue_tail(rqueue->node, tail);
>> 	write_queue_tail(rqueue->node, (tail + 1) & INVALID_QUEUE_MASK);
>>
>> No?
>
> The reason fo coding here is that during testing, it was found that a
> barrier is needed between the update of temporary variable 'tail' and
> the operation of register with 'write_queue_tail' , otherwise
> write_queue_tail will probabilistically fail to obtain the correct
> value.

How so?

tail is the software managed part of the ringbuffer which is shared with
the hardware, right?

So even if the compiler would be allowed to reevalutate tail after the
barrier (it is NOT), then tail would still contain the same value as
before, no?

The wmb() is required to ensure that the hardware can observe the full
write of the command _before_ it can observe the update to the tail
index.

Anything else is voodoo.

Thanks,

        tglx
Re: [PATCH v4 2/2] irq/irq-loongarch-ir:Add Redirect irqchip support
Posted by Tianyang Zhang 3 months, 2 weeks ago
Hi, Thomas

在 2025/6/23 下午4:27, Thomas Gleixner 写道:
> On Mon, Jun 23 2025 at 10:45, Tianyang Zhang wrote:
>> 在 2025/6/13 下午11:20, Thomas Gleixner 写道:
>>> On Tue, Jun 10 2025 at 19:42, Tianyang Zhang wrote:
>>>
>>>> +	tail = (tail + 1) % INVALID_QUEUE_SIZE;
>>> Why is this before the barrier? The barrier does not do anything about
>>> this and you can simplify this. See below.
>>>
>>> And as there is no rmb() counterpart you want to explain that this is
>>> serializing against the hardware.
>>>> +	 */
>>>> +	wmb();
>>>> +
>>>> +	write_queue_tail(rqueue->node, tail);
>>> 	write_queue_tail(rqueue->node, (tail + 1) & INVALID_QUEUE_MASK);
>>>
>>> No?
>> The reason fo coding here is that during testing, it was found that a
>> barrier is needed between the update of temporary variable 'tail' and
>> the operation of register with 'write_queue_tail' , otherwise
>> write_queue_tail will probabilistically fail to obtain the correct
>> value.
> How so?
>
> tail is the software managed part of the ringbuffer which is shared with
> the hardware, right?
>
> So even if the compiler would be allowed to reevalutate tail after the
> barrier (it is NOT), then tail would still contain the same value as
> before, no?
>
> The wmb() is required to ensure that the hardware can observe the full
> write of the command _before_ it can observe the update to the tail
> index.
>
> Anything else is voodoo.
>
> Thanks,
>
>          tglx

In my previous understanding, tail'value is actually a part of 'full 
write of the command '.

We must ensure that tail is updated to the correct value first, and then 
write this value

into the register (perhaps by adding wmb in write_queue_tail ).

In other words, this is originally to prevent the write register 
instruction from being executed

out of order before updating tail.

The above is just my personal understanding

Thanks

Tianyang


Re: [PATCH v4 2/2] irq/irq-loongarch-ir:Add Redirect irqchip support
Posted by Thomas Gleixner 3 months, 2 weeks ago
On Mon, Jun 23 2025 at 17:33, Tianyang Zhang wrote:
> 在 2025/6/23 下午4:27, Thomas Gleixner 写道:
>> tail is the software managed part of the ringbuffer which is shared with
>> the hardware, right?
>>
>> So even if the compiler would be allowed to reevalutate tail after the
>> barrier (it is NOT), then tail would still contain the same value as
>> before, no?
>>
>> The wmb() is required to ensure that the hardware can observe the full
>> write of the command _before_ it can observe the update to the tail
>> index.
>>
>> Anything else is voodoo.
>>
>> Thanks,
>>
>>          tglx
>
> In my previous understanding, tail'value is actually a part of 'full 
> write of the command '.

Of course. The hardware observes the tail value. If it is not updated
then the command is not executed. But these are two distinct things:

The invalidate command is written to a location in the command buffer
which is determined by tail:

	inv_addr = (struct irde_inv_cmd *)(rqueue->base + tail * sizeof(struct irde_inv_cmd));
	memcpy(inv_addr, cmd, sizeof(struct irde_inv_cmd));

requeue::base points to an array of invalidate commands. The array
size is INVALID_QUEUE_SIZE. tail is the position in the array to which
the software writes the next command. tail is software managed and
written to a completely different location via write_queue_tail(...):

static phys_addr_t redirect_reg_base = 0x1fe00000;

#define REDIRECT_REG_BASE(reg, node) \
	(UNCACHE_BASE | redirect_reg_base | (u64)(node) << NODE_ADDRSPACE_SHIFT | (reg))
#define	redirect_reg_queue_tail(node)	REDIRECT_REG_BASE(LOONGARCH_IOCSR_REDIRECT_CQT, (node))
#define read_queue_tail(node)		(*((u32 *)(redirect_reg_queue_tail(node))))
#define write_queue_tail(node, val)	(*((u32 *)(redirect_reg_queue_tail(node))) = (val))

The hardware maintains the head index. It's the last command index the
hardware processed. When the hardware observes that head != tail then it
processes the next entry and after completion it updates head with the
next index. This repeats until head == tail.

> We must ensure that tail is updated to the correct value first, and
> then write this value into the register (perhaps by adding wmb in
> write_queue_tail ).

No. The local variable 'tail' is purely local to the CPU and the
invalidation hardware does not even know that it exists.

There are two things which are relevant to the hardware:

   1) command is written to the hardware visible array at index of tail

   2) hardware visible tail memory (register) is updated to tail + 1

The memory barrier is required to prevent that #2 is written to the
hardware _before_ #1 completed and is fully visible to the hardware.

> In other words, this is originally to prevent the write register 
> instruction from being executed out of order before updating tail.

No. The barrier is solely for the above #1 vs. #2 ordering.

There is a difference between program flow ordering and memory ordering.

The hardware _CANNOT_ execute the write _before_ the value it writes is
computed. That's enforced by program flow order.

So it's always guaranteed that the CPU executes

   tail + 1

_before_ executing the write to the register because that's a program
order dependency.

If that would not be guaranteed, then your CPU would have more serious
problems than this piece of code. It simply would not even survive the
first five instructions in the boot loader.

But what's not guaranteed is that consecutive writes become visible to
other parts of the system (other CPUs, the invalidation hardware, ....)
in the same consecutive order.

To ensure that ordering you need a WMB(). If you would have CPU to CPU
communication then you would need a RMB() on the reader side to ensure
that the command is not read before the tail. In your case the hardware
side takes care of that.

> The above is just my personal understanding

Which might need some slight adjustments :)

Thanks,

        tglx
Re: [PATCH v4 2/2] irq/irq-loongarch-ir:Add Redirect irqchip support
Posted by Tianyang Zhang 3 months, 2 weeks ago
Hi, Thomas

在 2025/6/24 上午4:05, Thomas Gleixner 写道:
> On Mon, Jun 23 2025 at 17:33, Tianyang Zhang wrote:
>> 在 2025/6/23 下午4:27, Thomas Gleixner 写道:
>>> tail is the software managed part of the ringbuffer which is shared with
>>> the hardware, right?
>>>
>>> So even if the compiler would be allowed to reevalutate tail after the
>>> barrier (it is NOT), then tail would still contain the same value as
>>> before, no?
>>>
>>> The wmb() is required to ensure that the hardware can observe the full
>>> write of the command _before_ it can observe the update to the tail
>>> index.
>>>
>>> Anything else is voodoo.
>>>
>>> Thanks,
>>>
>>>           tglx
>> In my previous understanding, tail'value is actually a part of 'full
>> write of the command '.
> Of course. The hardware observes the tail value. If it is not updated
> then the command is not executed. But these are two distinct things:
>
> The invalidate command is written to a location in the command buffer
> which is determined by tail:
>
> 	inv_addr = (struct irde_inv_cmd *)(rqueue->base + tail * sizeof(struct irde_inv_cmd));
> 	memcpy(inv_addr, cmd, sizeof(struct irde_inv_cmd));
>
> requeue::base points to an array of invalidate commands. The array
> size is INVALID_QUEUE_SIZE. tail is the position in the array to which
> the software writes the next command. tail is software managed and
> written to a completely different location via write_queue_tail(...):
>
> static phys_addr_t redirect_reg_base = 0x1fe00000;
>
> #define REDIRECT_REG_BASE(reg, node) \
> 	(UNCACHE_BASE | redirect_reg_base | (u64)(node) << NODE_ADDRSPACE_SHIFT | (reg))
> #define	redirect_reg_queue_tail(node)	REDIRECT_REG_BASE(LOONGARCH_IOCSR_REDIRECT_CQT, (node))
> #define read_queue_tail(node)		(*((u32 *)(redirect_reg_queue_tail(node))))
> #define write_queue_tail(node, val)	(*((u32 *)(redirect_reg_queue_tail(node))) = (val))
>
> The hardware maintains the head index. It's the last command index the
> hardware processed. When the hardware observes that head != tail then it
> processes the next entry and after completion it updates head with the
> next index. This repeats until head == tail.
>
>> We must ensure that tail is updated to the correct value first, and
>> then write this value into the register (perhaps by adding wmb in
>> write_queue_tail ).
> No. The local variable 'tail' is purely local to the CPU and the
> invalidation hardware does not even know that it exists.
>
> There are two things which are relevant to the hardware:
>
>     1) command is written to the hardware visible array at index of tail
>
>     2) hardware visible tail memory (register) is updated to tail + 1
>
> The memory barrier is required to prevent that #2 is written to the
> hardware _before_ #1 completed and is fully visible to the hardware.
>
>> In other words, this is originally to prevent the write register
>> instruction from being executed out of order before updating tail.
> No. The barrier is solely for the above #1 vs. #2 ordering.
>
> There is a difference between program flow ordering and memory ordering.
>
> The hardware _CANNOT_ execute the write _before_ the value it writes is
> computed. That's enforced by program flow order.
>
> So it's always guaranteed that the CPU executes
>
>     tail + 1
>
> _before_ executing the write to the register because that's a program
> order dependency.
>
> If that would not be guaranteed, then your CPU would have more serious
> problems than this piece of code. It simply would not even survive the
> first five instructions in the boot loader.
>
> But what's not guaranteed is that consecutive writes become visible to
> other parts of the system (other CPUs, the invalidation hardware, ....)
> in the same consecutive order.
>
> To ensure that ordering you need a WMB(). If you would have CPU to CPU
> communication then you would need a RMB() on the reader side to ensure
> that the command is not read before the tail. In your case the hardware
> side takes care of that.
>
>> The above is just my personal understanding
> Which might need some slight adjustments :)
>
> Thanks,
>
>          tglx

Hmm... it seems I confused program-flow-ordering and memory-ordering .

my understanding of the previous issue may not have been right.

Your reply has taught me a great deal, truly appreciate it

Thank you very much

Tianyang

  • [PATCH v4 2/2] irq/irq-loongarch-ir:Add Redirect irqchip support