[PATCH] eeprom: at25: convert to spi-mem API

A. Sverdlin posted 1 patch 7 months, 1 week ago
drivers/misc/eeprom/Kconfig |   1 +
drivers/misc/eeprom/at25.c  | 321 ++++++++++++++++++------------------
2 files changed, 162 insertions(+), 160 deletions(-)
[PATCH] eeprom: at25: convert to spi-mem API
Posted by A. Sverdlin 7 months, 1 week ago
From: Alexander Sverdlin <alexander.sverdlin@siemens.com>

Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
RAW SPI accesses if spi-mem callbacks are not implemented by a controller
driver.

Notable advantages:
- read function now allocates a bounce buffer for SPI DMA compatibility,
  similar to write function;
- the driver can now be used in conjunction with SPI controller drivers
  providing spi-mem API only, e.g. spi-nxp-fspi.
- during the initial probe the driver polls busy/ready status bit for 25ms
  instead of giving up instantly and hoping that the FW didn't write the
  EEPROM

Notes:
- mutex_lock() has been dropped from fm25_aux_read() because the latter is
  only being called in probe phase and therefore cannot race with
  at25_ee_read() or at25_ee_write()

Quick 4KB block size test with CY15B102Q 256KB F-RAM over spi_omap2_mcspi
driver (no spi-mem ops provided, fallback to raw SPI inside spi-mem):

OP	| throughput, KB/s	| change
--------+-----------------------+-------
write	| 1717.847 -> 1656.684	| -3.6%
read	| 1115.868 -> 1059.367	| -5.1%

The lower throughtput probably comes from the 3 messages per SPI transfer
inside spi-mem instead of hand-crafted 2 messages per transfer in the
former at25 code. However, if the raw SPI access is not preserved, then
the driver doesn't grow from the lines-of-code perspective and subjectively
could be considered even a bit simpler.

Higher performance impact on the read operation could be explained by the
newly introduced bounce buffer in read operation. I didn't find any
explanation or guarantee, why would a bounce buffer be not needed on the
read side, so I assume it's a pure luck that nobody read EEPROM into
some variable on stack on an architecture where kernel stack would be
not DMA-able.

Cc: Michael Walle <mwalle@kernel.org>
Cc: Hui Wang <hui.wang@canonical.com>
Link: https://lore.kernel.org/all/28ab8b72afee1af59b628f7389f0d7f5@kernel.org/
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
---
 drivers/misc/eeprom/Kconfig |   1 +
 drivers/misc/eeprom/at25.c  | 321 ++++++++++++++++++------------------
 2 files changed, 162 insertions(+), 160 deletions(-)

diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
index cb1c4b8e7fd37..0bef5b93bd6dc 100644
--- a/drivers/misc/eeprom/Kconfig
+++ b/drivers/misc/eeprom/Kconfig
@@ -37,6 +37,7 @@ config EEPROM_AT25
 	depends on SPI && SYSFS
 	select NVMEM
 	select NVMEM_SYSFS
+	select SPI_MEM
 	help
 	  Enable this driver to get read/write support to most SPI EEPROMs
 	  and Cypress FRAMs,
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 595ceb9a71261..20611320e7152 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -7,8 +7,10 @@
  */
 
 #include <linux/bits.h>
+#include <linux/cleanup.h>
 #include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/property.h>
@@ -17,6 +19,7 @@
 
 #include <linux/spi/eeprom.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
 
 #include <linux/nvmem-provider.h>
 
@@ -35,13 +38,12 @@
 
 struct at25_data {
 	struct spi_eeprom	chip;
-	struct spi_device	*spi;
+	struct spi_mem		*spimem;
 	struct mutex		lock;
 	unsigned		addrlen;
 	struct nvmem_config	nvmem_config;
 	struct nvmem_device	*nvmem;
 	u8 sernum[FM25_SN_LEN];
-	u8 command[EE_MAXADDRLEN + 1];
 };
 
 #define	AT25_WREN	0x06		/* latch the write enable */
@@ -74,20 +76,29 @@ struct at25_data {
 
 #define	io_limit	PAGE_SIZE	/* bytes */
 
+/* Handle the address MSB as part of instruction byte */
+static u8 at25_instr(struct at25_data *at25, u8 instr, unsigned int off)
+{
+	if (!(at25->chip.flags & EE_INSTR_BIT3_IS_ADDR))
+		return instr;
+	if (off < BIT(at25->addrlen * 8))
+		return instr;
+	return instr | AT25_INSTR_BIT3;
+}
+
 static int at25_ee_read(void *priv, unsigned int offset,
 			void *val, size_t count)
 {
+	u8 *bounce __free(kfree) = kmalloc(min(count, io_limit), GFP_KERNEL);
 	struct at25_data *at25 = priv;
 	char *buf = val;
-	size_t max_chunk = spi_max_transfer_size(at25->spi);
 	unsigned int msg_offset = offset;
 	size_t bytes_left = count;
 	size_t segment;
-	u8			*cp;
-	ssize_t			status;
-	struct spi_transfer	t[2];
-	struct spi_message	m;
-	u8			instr;
+	int status;
+
+	if (!bounce)
+		return -ENOMEM;
 
 	if (unlikely(offset >= at25->chip.byte_len))
 		return -EINVAL;
@@ -97,87 +108,67 @@ static int at25_ee_read(void *priv, unsigned int offset,
 		return -EINVAL;
 
 	do {
-		segment = min(bytes_left, max_chunk);
-		cp = at25->command;
+		struct spi_mem_op op;
 
-		instr = AT25_READ;
-		if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
-			if (msg_offset >= BIT(at25->addrlen * 8))
-				instr |= AT25_INSTR_BIT3;
+		segment = min(bytes_left, io_limit);
 
-		mutex_lock(&at25->lock);
+		op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(at25_instr(at25, AT25_READ,
+									     msg_offset), 1),
+						   SPI_MEM_OP_ADDR(at25->addrlen, msg_offset, 1),
+						   SPI_MEM_OP_NO_DUMMY,
+						   SPI_MEM_OP_DATA_IN(segment, bounce, 1));
 
-		*cp++ = instr;
-
-		/* 8/16/24-bit address is written MSB first */
-		switch (at25->addrlen) {
-		default:	/* case 3 */
-			*cp++ = msg_offset >> 16;
-			fallthrough;
-		case 2:
-			*cp++ = msg_offset >> 8;
-			fallthrough;
-		case 1:
-		case 0:	/* can't happen: for better code generation */
-			*cp++ = msg_offset >> 0;
-		}
-
-		spi_message_init(&m);
-		memset(t, 0, sizeof(t));
-
-		t[0].tx_buf = at25->command;
-		t[0].len = at25->addrlen + 1;
-		spi_message_add_tail(&t[0], &m);
-
-		t[1].rx_buf = buf;
-		t[1].len = segment;
-		spi_message_add_tail(&t[1], &m);
-
-		status = spi_sync(at25->spi, &m);
+		status = spi_mem_adjust_op_size(at25->spimem, &op);
+		if (status)
+			return status;
+		segment = op.data.nbytes;
 
+		mutex_lock(&at25->lock);
+		status = spi_mem_exec_op(at25->spimem, &op);
 		mutex_unlock(&at25->lock);
-
 		if (status)
 			return status;
+		memcpy(buf, bounce, segment);
 
 		msg_offset += segment;
 		buf += segment;
 		bytes_left -= segment;
 	} while (bytes_left > 0);
 
-	dev_dbg(&at25->spi->dev, "read %zu bytes at %d\n",
+	dev_dbg(&at25->spimem->spi->dev, "read %zu bytes at %d\n",
 		count, offset);
 	return 0;
 }
 
-/* Read extra registers as ID or serial number */
+/*
+ * Read extra registers as ID or serial number
+ *
+ * Allow for the callers to provide @buf on stack (not necessary DMA-capable)
+ * by allocating a bounce buffer internally.
+ */
 static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command,
 			 int len)
 {
+	u8 *bounce __free(kfree) = kmalloc(len, GFP_KERNEL);
+	struct spi_mem_op op;
 	int status;
-	struct spi_transfer t[2];
-	struct spi_message m;
-
-	spi_message_init(&m);
-	memset(t, 0, sizeof(t));
 
-	t[0].tx_buf = at25->command;
-	t[0].len = 1;
-	spi_message_add_tail(&t[0], &m);
-
-	t[1].rx_buf = buf;
-	t[1].len = len;
-	spi_message_add_tail(&t[1], &m);
+	if (!bounce)
+		return -ENOMEM;
 
-	mutex_lock(&at25->lock);
+	op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(command, 1),
+					   SPI_MEM_OP_NO_ADDR,
+					   SPI_MEM_OP_NO_DUMMY,
+					   SPI_MEM_OP_DATA_IN(len, bounce, 1));
 
-	at25->command[0] = command;
+	status = spi_mem_exec_op(at25->spimem, &op);
+	dev_dbg(&at25->spimem->spi->dev, "read %d aux bytes --> %d\n", len, status);
+	if (status)
+		return status;
 
-	status = spi_sync(at25->spi, &m);
-	dev_dbg(&at25->spi->dev, "read %d aux bytes --> %d\n", len, status);
+	memcpy(buf, bounce, len);
 
-	mutex_unlock(&at25->lock);
-	return status;
+	return 0;
 }
 
 static ssize_t sernum_show(struct device *dev, struct device_attribute *attr, char *buf)
@@ -195,14 +186,47 @@ static struct attribute *sernum_attrs[] = {
 };
 ATTRIBUTE_GROUPS(sernum);
 
+/*
+ * Poll Read Status Register with timeout
+ *
+ * Return:
+ * 0, if the chip is ready
+ * [positive] Status Register value as-is, if the chip is busy
+ * [negative] error code in case of read failure
+ */
+static int at25_wait_ready(struct at25_data *at25)
+{
+	u8 *bounce __free(kfree) = kmalloc(1, GFP_KERNEL);
+	struct spi_mem_op op;
+	int status;
+
+	if (!bounce)
+		return -ENOMEM;
+
+	op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(AT25_RDSR, 1),
+					   SPI_MEM_OP_NO_ADDR,
+					   SPI_MEM_OP_NO_DUMMY,
+					   SPI_MEM_OP_DATA_IN(1, bounce, 1));
+
+	read_poll_timeout(spi_mem_exec_op, status,
+			  status || !(bounce[0] & AT25_SR_nRDY), false,
+			  USEC_PER_MSEC, USEC_PER_MSEC * EE_TIMEOUT,
+			  at25->spimem, &op);
+	if (status < 0)
+		return status;
+	if (!(bounce[0] & AT25_SR_nRDY))
+		return 0;
+
+	return bounce[0];
+}
+
 static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
 {
+	u8 *bounce __free(kfree) = kmalloc(min(count, io_limit), GFP_KERNEL);
 	struct at25_data *at25 = priv;
-	size_t maxsz = spi_max_transfer_size(at25->spi);
 	const char *buf = val;
-	int			status = 0;
-	unsigned		buf_size;
-	u8			*bounce;
+	unsigned int buf_size;
+	int status;
 
 	if (unlikely(off >= at25->chip.byte_len))
 		return -EFBIG;
@@ -211,11 +235,8 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
 	if (unlikely(!count))
 		return -EINVAL;
 
-	/* Temp buffer starts with command and address */
 	buf_size = at25->chip.page_size;
-	if (buf_size > io_limit)
-		buf_size = io_limit;
-	bounce = kmalloc(buf_size + at25->addrlen + 1, GFP_KERNEL);
+
 	if (!bounce)
 		return -ENOMEM;
 
@@ -223,85 +244,64 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
 	 * For write, rollover is within the page ... so we write at
 	 * most one page, then manually roll over to the next page.
 	 */
-	mutex_lock(&at25->lock);
+	guard(mutex)(&at25->lock);
 	do {
-		unsigned long	timeout, retries;
-		unsigned	segment;
-		unsigned	offset = off;
-		u8		*cp = bounce;
-		int		sr;
-		u8		instr;
-
-		*cp = AT25_WREN;
-		status = spi_write(at25->spi, cp, 1);
-		if (status < 0) {
-			dev_dbg(&at25->spi->dev, "WREN --> %d\n", status);
-			break;
-		}
-
-		instr = AT25_WRITE;
-		if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
-			if (offset >= BIT(at25->addrlen * 8))
-				instr |= AT25_INSTR_BIT3;
-		*cp++ = instr;
+		struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(AT25_WREN, 1),
+						  SPI_MEM_OP_NO_ADDR,
+						  SPI_MEM_OP_NO_DUMMY,
+						  SPI_MEM_OP_NO_DATA);
+		unsigned int segment;
 
-		/* 8/16/24-bit address is written MSB first */
-		switch (at25->addrlen) {
-		default:	/* case 3 */
-			*cp++ = offset >> 16;
-			fallthrough;
-		case 2:
-			*cp++ = offset >> 8;
-			fallthrough;
-		case 1:
-		case 0:	/* can't happen: for better code generation */
-			*cp++ = offset >> 0;
+		status = spi_mem_exec_op(at25->spimem, &op);
+		if (status < 0) {
+			dev_dbg(&at25->spimem->spi->dev, "WREN --> %d\n", status);
+			return status;
 		}
 
 		/* Write as much of a page as we can */
-		segment = buf_size - (offset % buf_size);
+		segment = buf_size - (off % buf_size);
 		if (segment > count)
 			segment = count;
-		if (segment > maxsz)
-			segment = maxsz;
-		memcpy(cp, buf, segment);
-		status = spi_write(at25->spi, bounce,
-				segment + at25->addrlen + 1);
-		dev_dbg(&at25->spi->dev, "write %u bytes at %u --> %d\n",
-			segment, offset, status);
-		if (status < 0)
-			break;
+		if (segment > io_limit)
+			segment = io_limit;
+
+		op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(at25_instr(at25, AT25_WRITE, off),
+								  1),
+						   SPI_MEM_OP_ADDR(at25->addrlen, off, 1),
+						   SPI_MEM_OP_NO_DUMMY,
+						   SPI_MEM_OP_DATA_OUT(segment, bounce, 1));
+
+		status = spi_mem_adjust_op_size(at25->spimem, &op);
+		if (status)
+			return status;
+		segment = op.data.nbytes;
+
+		memcpy(bounce, buf, segment);
+
+		status = spi_mem_exec_op(at25->spimem, &op);
+		dev_dbg(&at25->spimem->spi->dev, "write %u bytes at %u --> %d\n",
+			segment, off, status);
+		if (status)
+			return status;
 
 		/*
 		 * REVISIT this should detect (or prevent) failed writes
 		 * to read-only sections of the EEPROM...
 		 */
 
-		/* Wait for non-busy status */
-		timeout = jiffies + msecs_to_jiffies(EE_TIMEOUT);
-		retries = 0;
-		do {
-
-			sr = spi_w8r8(at25->spi, AT25_RDSR);
-			if (sr < 0 || (sr & AT25_SR_nRDY)) {
-				dev_dbg(&at25->spi->dev,
-					"rdsr --> %d (%02x)\n", sr, sr);
-				/* at HZ=100, this is sloooow */
-				msleep(1);
-				continue;
-			}
-			if (!(sr & AT25_SR_nRDY))
-				break;
-		} while (retries++ < 3 || time_before_eq(jiffies, timeout));
-
-		if ((sr < 0) || (sr & AT25_SR_nRDY)) {
-			dev_err(&at25->spi->dev,
+		status = at25_wait_ready(at25);
+		if (status < 0) {
+			dev_err_probe(&at25->spimem->spi->dev, status,
+				      "Read Status Redister command failed\n");
+			return status;
+		}
+		if (status) {
+			dev_dbg(&at25->spimem->spi->dev,
+				"Status %02x\n", status);
+			dev_err(&at25->spimem->spi->dev,
 				"write %u bytes offset %u, timeout after %u msecs\n",
-				segment, offset,
-				jiffies_to_msecs(jiffies -
-					(timeout - EE_TIMEOUT)));
-			status = -ETIMEDOUT;
-			break;
+				segment, off, EE_TIMEOUT);
+			return -ETIMEDOUT;
 		}
 
 		off += segment;
@@ -310,9 +310,6 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
 
 	} while (count > 0);
 
-	mutex_unlock(&at25->lock);
-
-	kfree(bounce);
 	return status;
 }
 
@@ -429,31 +426,33 @@ static const struct spi_device_id at25_spi_ids[] = {
 };
 MODULE_DEVICE_TABLE(spi, at25_spi_ids);
 
-static int at25_probe(struct spi_device *spi)
+static int at25_probe(struct spi_mem *mem)
 {
-	struct at25_data	*at25 = NULL;
-	int			err;
-	int			sr;
+	struct spi_device *spi = mem->spi;
 	struct spi_eeprom *pdata;
+	struct at25_data *at25;
 	bool is_fram;
+	int err;
+
+	at25 = devm_kzalloc(&spi->dev, sizeof(*at25), GFP_KERNEL);
+	if (!at25)
+		return -ENOMEM;
+
+	at25->spimem = mem;
 
 	/*
 	 * Ping the chip ... the status register is pretty portable,
-	 * unlike probing manufacturer IDs. We do expect that system
-	 * firmware didn't write it in the past few milliseconds!
+	 * unlike probing manufacturer IDs.
 	 */
-	sr = spi_w8r8(spi, AT25_RDSR);
-	if (sr < 0 || sr & AT25_SR_nRDY) {
-		dev_dbg(&spi->dev, "rdsr --> %d (%02x)\n", sr, sr);
+	err = at25_wait_ready(at25);
+	if (err < 0)
+		return dev_err_probe(&spi->dev, err, "Read Status Register command failed\n");
+	if (err) {
+		dev_err(&spi->dev, "Not ready (%02x)\n", err);
 		return -ENXIO;
 	}
 
-	at25 = devm_kzalloc(&spi->dev, sizeof(*at25), GFP_KERNEL);
-	if (!at25)
-		return -ENOMEM;
-
 	mutex_init(&at25->lock);
-	at25->spi = spi;
 	spi_set_drvdata(spi, at25);
 
 	is_fram = fwnode_device_is_compatible(dev_fwnode(&spi->dev), "cypress,fm25");
@@ -514,17 +513,19 @@ static int at25_probe(struct spi_device *spi)
 
 /*-------------------------------------------------------------------------*/
 
-static struct spi_driver at25_driver = {
-	.driver = {
-		.name		= "at25",
-		.of_match_table = at25_of_match,
-		.dev_groups	= sernum_groups,
+static struct spi_mem_driver at25_driver = {
+	.spidrv = {
+		.driver = {
+			.name		= "at25",
+			.of_match_table = at25_of_match,
+			.dev_groups	= sernum_groups,
+		},
+		.id_table	= at25_spi_ids,
 	},
 	.probe		= at25_probe,
-	.id_table	= at25_spi_ids,
 };
 
-module_spi_driver(at25_driver);
+module_spi_mem_driver(at25_driver);
 
 MODULE_DESCRIPTION("Driver for most SPI EEPROMs");
 MODULE_AUTHOR("David Brownell");
-- 
2.50.0
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Christophe Leroy 3 months ago
Hi,

Le 03/07/2025 à 00:28, A. Sverdlin a écrit :
> From: Alexander Sverdlin <alexander.sverdlin@siemens.com>
> 
> Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
> RAW SPI accesses if spi-mem callbacks are not implemented by a controller
> driver.

With this patch (kernel v6.17.1) our powerpc boards are totally 
unstable, we get multiple random Oops due to bad memory accesses.

With this commit reverted the board is stable again.

The SPI driver is:

CONFIG_SPI=y
CONFIG_SPI_MASTER=y
CONFIG_SPI_MEM=y
CONFIG_SPI_FSL_LIB=y
CONFIG_SPI_FSL_CPM=y
CONFIG_SPI_FSL_SPI=y

How can we further investigate the issue ?

Thanks
Christophe

> 
> Notable advantages:
> - read function now allocates a bounce buffer for SPI DMA compatibility,
>    similar to write function;
> - the driver can now be used in conjunction with SPI controller drivers
>    providing spi-mem API only, e.g. spi-nxp-fspi.
> - during the initial probe the driver polls busy/ready status bit for 25ms
>    instead of giving up instantly and hoping that the FW didn't write the
>    EEPROM
> 
> Notes:
> - mutex_lock() has been dropped from fm25_aux_read() because the latter is
>    only being called in probe phase and therefore cannot race with
>    at25_ee_read() or at25_ee_write()
> 
> Quick 4KB block size test with CY15B102Q 256KB F-RAM over spi_omap2_mcspi
> driver (no spi-mem ops provided, fallback to raw SPI inside spi-mem):
> 
> OP	| throughput, KB/s	| change
> --------+-----------------------+-------
> write	| 1717.847 -> 1656.684	| -3.6%
> read	| 1115.868 -> 1059.367	| -5.1%
> 
> The lower throughtput probably comes from the 3 messages per SPI transfer
> inside spi-mem instead of hand-crafted 2 messages per transfer in the
> former at25 code. However, if the raw SPI access is not preserved, then
> the driver doesn't grow from the lines-of-code perspective and subjectively
> could be considered even a bit simpler.
> 
> Higher performance impact on the read operation could be explained by the
> newly introduced bounce buffer in read operation. I didn't find any
> explanation or guarantee, why would a bounce buffer be not needed on the
> read side, so I assume it's a pure luck that nobody read EEPROM into
> some variable on stack on an architecture where kernel stack would be
> not DMA-able.
> 
> Cc: Michael Walle <mwalle@kernel.org>
> Cc: Hui Wang <hui.wang@canonical.com>
> Link: https://lore.kernel.org/all/28ab8b72afee1af59b628f7389f0d7f5@kernel.org/
> Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
> ---
>   drivers/misc/eeprom/Kconfig |   1 +
>   drivers/misc/eeprom/at25.c  | 321 ++++++++++++++++++------------------
>   2 files changed, 162 insertions(+), 160 deletions(-)
> 
> diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
> index cb1c4b8e7fd37..0bef5b93bd6dc 100644
> --- a/drivers/misc/eeprom/Kconfig
> +++ b/drivers/misc/eeprom/Kconfig
> @@ -37,6 +37,7 @@ config EEPROM_AT25
>   	depends on SPI && SYSFS
>   	select NVMEM
>   	select NVMEM_SYSFS
> +	select SPI_MEM
>   	help
>   	  Enable this driver to get read/write support to most SPI EEPROMs
>   	  and Cypress FRAMs,
> diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
> index 595ceb9a71261..20611320e7152 100644
> --- a/drivers/misc/eeprom/at25.c
> +++ b/drivers/misc/eeprom/at25.c
> @@ -7,8 +7,10 @@
>    */
>   
>   #include <linux/bits.h>
> +#include <linux/cleanup.h>
>   #include <linux/delay.h>
>   #include <linux/device.h>
> +#include <linux/iopoll.h>
>   #include <linux/kernel.h>
>   #include <linux/module.h>
>   #include <linux/property.h>
> @@ -17,6 +19,7 @@
>   
>   #include <linux/spi/eeprom.h>
>   #include <linux/spi/spi.h>
> +#include <linux/spi/spi-mem.h>
>   
>   #include <linux/nvmem-provider.h>
>   
> @@ -35,13 +38,12 @@
>   
>   struct at25_data {
>   	struct spi_eeprom	chip;
> -	struct spi_device	*spi;
> +	struct spi_mem		*spimem;
>   	struct mutex		lock;
>   	unsigned		addrlen;
>   	struct nvmem_config	nvmem_config;
>   	struct nvmem_device	*nvmem;
>   	u8 sernum[FM25_SN_LEN];
> -	u8 command[EE_MAXADDRLEN + 1];
>   };
>   
>   #define	AT25_WREN	0x06		/* latch the write enable */
> @@ -74,20 +76,29 @@ struct at25_data {
>   
>   #define	io_limit	PAGE_SIZE	/* bytes */
>   
> +/* Handle the address MSB as part of instruction byte */
> +static u8 at25_instr(struct at25_data *at25, u8 instr, unsigned int off)
> +{
> +	if (!(at25->chip.flags & EE_INSTR_BIT3_IS_ADDR))
> +		return instr;
> +	if (off < BIT(at25->addrlen * 8))
> +		return instr;
> +	return instr | AT25_INSTR_BIT3;
> +}
> +
>   static int at25_ee_read(void *priv, unsigned int offset,
>   			void *val, size_t count)
>   {
> +	u8 *bounce __free(kfree) = kmalloc(min(count, io_limit), GFP_KERNEL);
>   	struct at25_data *at25 = priv;
>   	char *buf = val;
> -	size_t max_chunk = spi_max_transfer_size(at25->spi);
>   	unsigned int msg_offset = offset;
>   	size_t bytes_left = count;
>   	size_t segment;
> -	u8			*cp;
> -	ssize_t			status;
> -	struct spi_transfer	t[2];
> -	struct spi_message	m;
> -	u8			instr;
> +	int status;
> +
> +	if (!bounce)
> +		return -ENOMEM;
>   
>   	if (unlikely(offset >= at25->chip.byte_len))
>   		return -EINVAL;
> @@ -97,87 +108,67 @@ static int at25_ee_read(void *priv, unsigned int offset,
>   		return -EINVAL;
>   
>   	do {
> -		segment = min(bytes_left, max_chunk);
> -		cp = at25->command;
> +		struct spi_mem_op op;
>   
> -		instr = AT25_READ;
> -		if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
> -			if (msg_offset >= BIT(at25->addrlen * 8))
> -				instr |= AT25_INSTR_BIT3;
> +		segment = min(bytes_left, io_limit);
>   
> -		mutex_lock(&at25->lock);
> +		op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(at25_instr(at25, AT25_READ,
> +									     msg_offset), 1),
> +						   SPI_MEM_OP_ADDR(at25->addrlen, msg_offset, 1),
> +						   SPI_MEM_OP_NO_DUMMY,
> +						   SPI_MEM_OP_DATA_IN(segment, bounce, 1));
>   
> -		*cp++ = instr;
> -
> -		/* 8/16/24-bit address is written MSB first */
> -		switch (at25->addrlen) {
> -		default:	/* case 3 */
> -			*cp++ = msg_offset >> 16;
> -			fallthrough;
> -		case 2:
> -			*cp++ = msg_offset >> 8;
> -			fallthrough;
> -		case 1:
> -		case 0:	/* can't happen: for better code generation */
> -			*cp++ = msg_offset >> 0;
> -		}
> -
> -		spi_message_init(&m);
> -		memset(t, 0, sizeof(t));
> -
> -		t[0].tx_buf = at25->command;
> -		t[0].len = at25->addrlen + 1;
> -		spi_message_add_tail(&t[0], &m);
> -
> -		t[1].rx_buf = buf;
> -		t[1].len = segment;
> -		spi_message_add_tail(&t[1], &m);
> -
> -		status = spi_sync(at25->spi, &m);
> +		status = spi_mem_adjust_op_size(at25->spimem, &op);
> +		if (status)
> +			return status;
> +		segment = op.data.nbytes;
>   
> +		mutex_lock(&at25->lock);
> +		status = spi_mem_exec_op(at25->spimem, &op);
>   		mutex_unlock(&at25->lock);
> -
>   		if (status)
>   			return status;
> +		memcpy(buf, bounce, segment);
>   
>   		msg_offset += segment;
>   		buf += segment;
>   		bytes_left -= segment;
>   	} while (bytes_left > 0);
>   
> -	dev_dbg(&at25->spi->dev, "read %zu bytes at %d\n",
> +	dev_dbg(&at25->spimem->spi->dev, "read %zu bytes at %d\n",
>   		count, offset);
>   	return 0;
>   }
>   
> -/* Read extra registers as ID or serial number */
> +/*
> + * Read extra registers as ID or serial number
> + *
> + * Allow for the callers to provide @buf on stack (not necessary DMA-capable)
> + * by allocating a bounce buffer internally.
> + */
>   static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command,
>   			 int len)
>   {
> +	u8 *bounce __free(kfree) = kmalloc(len, GFP_KERNEL);
> +	struct spi_mem_op op;
>   	int status;
> -	struct spi_transfer t[2];
> -	struct spi_message m;
> -
> -	spi_message_init(&m);
> -	memset(t, 0, sizeof(t));
>   
> -	t[0].tx_buf = at25->command;
> -	t[0].len = 1;
> -	spi_message_add_tail(&t[0], &m);
> -
> -	t[1].rx_buf = buf;
> -	t[1].len = len;
> -	spi_message_add_tail(&t[1], &m);
> +	if (!bounce)
> +		return -ENOMEM;
>   
> -	mutex_lock(&at25->lock);
> +	op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(command, 1),
> +					   SPI_MEM_OP_NO_ADDR,
> +					   SPI_MEM_OP_NO_DUMMY,
> +					   SPI_MEM_OP_DATA_IN(len, bounce, 1));
>   
> -	at25->command[0] = command;
> +	status = spi_mem_exec_op(at25->spimem, &op);
> +	dev_dbg(&at25->spimem->spi->dev, "read %d aux bytes --> %d\n", len, status);
> +	if (status)
> +		return status;
>   
> -	status = spi_sync(at25->spi, &m);
> -	dev_dbg(&at25->spi->dev, "read %d aux bytes --> %d\n", len, status);
> +	memcpy(buf, bounce, len);
>   
> -	mutex_unlock(&at25->lock);
> -	return status;
> +	return 0;
>   }
>   
>   static ssize_t sernum_show(struct device *dev, struct device_attribute *attr, char *buf)
> @@ -195,14 +186,47 @@ static struct attribute *sernum_attrs[] = {
>   };
>   ATTRIBUTE_GROUPS(sernum);
>   
> +/*
> + * Poll Read Status Register with timeout
> + *
> + * Return:
> + * 0, if the chip is ready
> + * [positive] Status Register value as-is, if the chip is busy
> + * [negative] error code in case of read failure
> + */
> +static int at25_wait_ready(struct at25_data *at25)
> +{
> +	u8 *bounce __free(kfree) = kmalloc(1, GFP_KERNEL);
> +	struct spi_mem_op op;
> +	int status;
> +
> +	if (!bounce)
> +		return -ENOMEM;
> +
> +	op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(AT25_RDSR, 1),
> +					   SPI_MEM_OP_NO_ADDR,
> +					   SPI_MEM_OP_NO_DUMMY,
> +					   SPI_MEM_OP_DATA_IN(1, bounce, 1));
> +
> +	read_poll_timeout(spi_mem_exec_op, status,
> +			  status || !(bounce[0] & AT25_SR_nRDY), false,
> +			  USEC_PER_MSEC, USEC_PER_MSEC * EE_TIMEOUT,
> +			  at25->spimem, &op);
> +	if (status < 0)
> +		return status;
> +	if (!(bounce[0] & AT25_SR_nRDY))
> +		return 0;
> +
> +	return bounce[0];
> +}
> +
>   static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
>   {
> +	u8 *bounce __free(kfree) = kmalloc(min(count, io_limit), GFP_KERNEL);
>   	struct at25_data *at25 = priv;
> -	size_t maxsz = spi_max_transfer_size(at25->spi);
>   	const char *buf = val;
> -	int			status = 0;
> -	unsigned		buf_size;
> -	u8			*bounce;
> +	unsigned int buf_size;
> +	int status;
>   
>   	if (unlikely(off >= at25->chip.byte_len))
>   		return -EFBIG;
> @@ -211,11 +235,8 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
>   	if (unlikely(!count))
>   		return -EINVAL;
>   
> -	/* Temp buffer starts with command and address */
>   	buf_size = at25->chip.page_size;
> -	if (buf_size > io_limit)
> -		buf_size = io_limit;
> -	bounce = kmalloc(buf_size + at25->addrlen + 1, GFP_KERNEL);
> +
>   	if (!bounce)
>   		return -ENOMEM;
>   
> @@ -223,85 +244,64 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
>   	 * For write, rollover is within the page ... so we write at
>   	 * most one page, then manually roll over to the next page.
>   	 */
> -	mutex_lock(&at25->lock);
> +	guard(mutex)(&at25->lock);
>   	do {
> -		unsigned long	timeout, retries;
> -		unsigned	segment;
> -		unsigned	offset = off;
> -		u8		*cp = bounce;
> -		int		sr;
> -		u8		instr;
> -
> -		*cp = AT25_WREN;
> -		status = spi_write(at25->spi, cp, 1);
> -		if (status < 0) {
> -			dev_dbg(&at25->spi->dev, "WREN --> %d\n", status);
> -			break;
> -		}
> -
> -		instr = AT25_WRITE;
> -		if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
> -			if (offset >= BIT(at25->addrlen * 8))
> -				instr |= AT25_INSTR_BIT3;
> -		*cp++ = instr;
> +		struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(AT25_WREN, 1),
> +						  SPI_MEM_OP_NO_ADDR,
> +						  SPI_MEM_OP_NO_DUMMY,
> +						  SPI_MEM_OP_NO_DATA);
> +		unsigned int segment;
>   
> -		/* 8/16/24-bit address is written MSB first */
> -		switch (at25->addrlen) {
> -		default:	/* case 3 */
> -			*cp++ = offset >> 16;
> -			fallthrough;
> -		case 2:
> -			*cp++ = offset >> 8;
> -			fallthrough;
> -		case 1:
> -		case 0:	/* can't happen: for better code generation */
> -			*cp++ = offset >> 0;
> +		status = spi_mem_exec_op(at25->spimem, &op);
> +		if (status < 0) {
> +			dev_dbg(&at25->spimem->spi->dev, "WREN --> %d\n", status);
> +			return status;
>   		}
>   
>   		/* Write as much of a page as we can */
> -		segment = buf_size - (offset % buf_size);
> +		segment = buf_size - (off % buf_size);
>   		if (segment > count)
>   			segment = count;
> -		if (segment > maxsz)
> -			segment = maxsz;
> -		memcpy(cp, buf, segment);
> -		status = spi_write(at25->spi, bounce,
> -				segment + at25->addrlen + 1);
> -		dev_dbg(&at25->spi->dev, "write %u bytes at %u --> %d\n",
> -			segment, offset, status);
> -		if (status < 0)
> -			break;
> +		if (segment > io_limit)
> +			segment = io_limit;
> +
> +		op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(at25_instr(at25, AT25_WRITE, off),
> +								  1),
> +						   SPI_MEM_OP_ADDR(at25->addrlen, off, 1),
> +						   SPI_MEM_OP_NO_DUMMY,
> +						   SPI_MEM_OP_DATA_OUT(segment, bounce, 1));
> +
> +		status = spi_mem_adjust_op_size(at25->spimem, &op);
> +		if (status)
> +			return status;
> +		segment = op.data.nbytes;
> +
> +		memcpy(bounce, buf, segment);
> +
> +		status = spi_mem_exec_op(at25->spimem, &op);
> +		dev_dbg(&at25->spimem->spi->dev, "write %u bytes at %u --> %d\n",
> +			segment, off, status);
> +		if (status)
> +			return status;
>   
>   		/*
>   		 * REVISIT this should detect (or prevent) failed writes
>   		 * to read-only sections of the EEPROM...
>   		 */
>   
> -		/* Wait for non-busy status */
> -		timeout = jiffies + msecs_to_jiffies(EE_TIMEOUT);
> -		retries = 0;
> -		do {
> -
> -			sr = spi_w8r8(at25->spi, AT25_RDSR);
> -			if (sr < 0 || (sr & AT25_SR_nRDY)) {
> -				dev_dbg(&at25->spi->dev,
> -					"rdsr --> %d (%02x)\n", sr, sr);
> -				/* at HZ=100, this is sloooow */
> -				msleep(1);
> -				continue;
> -			}
> -			if (!(sr & AT25_SR_nRDY))
> -				break;
> -		} while (retries++ < 3 || time_before_eq(jiffies, timeout));
> -
> -		if ((sr < 0) || (sr & AT25_SR_nRDY)) {
> -			dev_err(&at25->spi->dev,
> +		status = at25_wait_ready(at25);
> +		if (status < 0) {
> +			dev_err_probe(&at25->spimem->spi->dev, status,
> +				      "Read Status Redister command failed\n");
> +			return status;
> +		}
> +		if (status) {
> +			dev_dbg(&at25->spimem->spi->dev,
> +				"Status %02x\n", status);
> +			dev_err(&at25->spimem->spi->dev,
>   				"write %u bytes offset %u, timeout after %u msecs\n",
> -				segment, offset,
> -				jiffies_to_msecs(jiffies -
> -					(timeout - EE_TIMEOUT)));
> -			status = -ETIMEDOUT;
> -			break;
> +				segment, off, EE_TIMEOUT);
> +			return -ETIMEDOUT;
>   		}
>   
>   		off += segment;
> @@ -310,9 +310,6 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
>   
>   	} while (count > 0);
>   
> -	mutex_unlock(&at25->lock);
> -
> -	kfree(bounce);
>   	return status;
>   }
>   
> @@ -429,31 +426,33 @@ static const struct spi_device_id at25_spi_ids[] = {
>   };
>   MODULE_DEVICE_TABLE(spi, at25_spi_ids);
>   
> -static int at25_probe(struct spi_device *spi)
> +static int at25_probe(struct spi_mem *mem)
>   {
> -	struct at25_data	*at25 = NULL;
> -	int			err;
> -	int			sr;
> +	struct spi_device *spi = mem->spi;
>   	struct spi_eeprom *pdata;
> +	struct at25_data *at25;
>   	bool is_fram;
> +	int err;
> +
> +	at25 = devm_kzalloc(&spi->dev, sizeof(*at25), GFP_KERNEL);
> +	if (!at25)
> +		return -ENOMEM;
> +
> +	at25->spimem = mem;
>   
>   	/*
>   	 * Ping the chip ... the status register is pretty portable,
> -	 * unlike probing manufacturer IDs. We do expect that system
> -	 * firmware didn't write it in the past few milliseconds!
> +	 * unlike probing manufacturer IDs.
>   	 */
> -	sr = spi_w8r8(spi, AT25_RDSR);
> -	if (sr < 0 || sr & AT25_SR_nRDY) {
> -		dev_dbg(&spi->dev, "rdsr --> %d (%02x)\n", sr, sr);
> +	err = at25_wait_ready(at25);
> +	if (err < 0)
> +		return dev_err_probe(&spi->dev, err, "Read Status Register command failed\n");
> +	if (err) {
> +		dev_err(&spi->dev, "Not ready (%02x)\n", err);
>   		return -ENXIO;
>   	}
>   
> -	at25 = devm_kzalloc(&spi->dev, sizeof(*at25), GFP_KERNEL);
> -	if (!at25)
> -		return -ENOMEM;
> -
>   	mutex_init(&at25->lock);
> -	at25->spi = spi;
>   	spi_set_drvdata(spi, at25);
>   
>   	is_fram = fwnode_device_is_compatible(dev_fwnode(&spi->dev), "cypress,fm25");
> @@ -514,17 +513,19 @@ static int at25_probe(struct spi_device *spi)
>   
>   /*-------------------------------------------------------------------------*/
>   
> -static struct spi_driver at25_driver = {
> -	.driver = {
> -		.name		= "at25",
> -		.of_match_table = at25_of_match,
> -		.dev_groups	= sernum_groups,
> +static struct spi_mem_driver at25_driver = {
> +	.spidrv = {
> +		.driver = {
> +			.name		= "at25",
> +			.of_match_table = at25_of_match,
> +			.dev_groups	= sernum_groups,
> +		},
> +		.id_table	= at25_spi_ids,
>   	},
>   	.probe		= at25_probe,
> -	.id_table	= at25_spi_ids,
>   };
>   
> -module_spi_driver(at25_driver);
> +module_spi_mem_driver(at25_driver);
>   
>   MODULE_DESCRIPTION("Driver for most SPI EEPROMs");
>   MODULE_AUTHOR("David Brownell");

Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Greg Kroah-Hartman 3 months ago
On Mon, Nov 03, 2025 at 05:33:34PM +0100, Christophe Leroy wrote:
> Hi,
> 
> Le 03/07/2025 à 00:28, A. Sverdlin a écrit :
> > From: Alexander Sverdlin <alexander.sverdlin@siemens.com>
> > 
> > Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
> > RAW SPI accesses if spi-mem callbacks are not implemented by a controller
> > driver.
> 
> With this patch (kernel v6.17.1) our powerpc boards are totally unstable, we
> get multiple random Oops due to bad memory accesses.
> 
> With this commit reverted the board is stable again.
> 
> The SPI driver is:
> 
> CONFIG_SPI=y
> CONFIG_SPI_MASTER=y
> CONFIG_SPI_MEM=y
> CONFIG_SPI_FSL_LIB=y
> CONFIG_SPI_FSL_CPM=y
> CONFIG_SPI_FSL_SPI=y
> 
> How can we further investigate the issue ?

We can revert it until it comes back working properly.  Can you send a
revert so that I can apply it?

thanks,

greg k-h
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Sverdlin, Alexander 3 months ago
Hi Greg,

On Wed, 2025-11-05 at 08:15 +0900, Greg Kroah-Hartman wrote:
> > > Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
> > > RAW SPI accesses if spi-mem callbacks are not implemented by a controller
> > > driver.
> > 
> > With this patch (kernel v6.17.1) our powerpc boards are totally unstable, we
> > get multiple random Oops due to bad memory accesses.
> > 
> > With this commit reverted the board is stable again.
> > 
> > The SPI driver is:
> > 
> > CONFIG_SPI=y
> > CONFIG_SPI_MASTER=y
> > CONFIG_SPI_MEM=y
> > CONFIG_SPI_FSL_LIB=y
> > CONFIG_SPI_FSL_CPM=y
> > CONFIG_SPI_FSL_SPI=y
> > 
> > How can we further investigate the issue ?
> 
> We can revert it until it comes back working properly.  Can you send a
> revert so that I can apply it?

what is known for sure is that it triggers a bug in SPI_FSL_CPM [1],
which probably justifies a revert in -stable. But there are no indications the
patch in question misbehaves itself as of now. I'm going to KASAN it on all the
HW I can get my hands on this week.

[1] https://lore.kernel.org/all/764858d5-5633-4aeb-aabe-52f9eb1eb530@csgroup.eu/

-- 
Alexander Sverdlin
Siemens AG
www.siemens.com
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Sverdlin, Alexander 3 months ago
Hi Greg, Christophe,

On Wed, 2025-11-05 at 08:20 +0100, Alexander Sverdlin wrote:
> > > > Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
> > > > RAW SPI accesses if spi-mem callbacks are not implemented by a controller
> > > > driver.
> > > 
> > > With this patch (kernel v6.17.1) our powerpc boards are totally unstable, we
> > > get multiple random Oops due to bad memory accesses.
> > > 
> > > With this commit reverted the board is stable again.
> > > 
> > > The SPI driver is:
> > > 
> > > CONFIG_SPI=y
> > > CONFIG_SPI_MASTER=y
> > > CONFIG_SPI_MEM=y
> > > CONFIG_SPI_FSL_LIB=y
> > > CONFIG_SPI_FSL_CPM=y
> > > CONFIG_SPI_FSL_SPI=y
> > > 
> > > How can we further investigate the issue ?
> > 
> > We can revert it until it comes back working properly.  Can you send a
> > revert so that I can apply it?
> 
> what is known for sure is that it triggers a bug in SPI_FSL_CPM [1],
> which probably justifies a revert in -stable. But there are no indications the
> patch in question misbehaves itself as of now. I'm going to KASAN it on all the
> HW I can get my hands on this week.
> 
> [1] https://lore.kernel.org/all/764858d5-5633-4aeb-aabe-52f9eb1eb530@csgroup.eu/

just letting you know that I stress-tested the at25 driver with KASAN on two ARM64
platforms, TI AM62 and i.MX8QXP, in the latter case it's even spi-nxp-fspi driver
only providing spi-mem API, while the TI SoC goes over normal SPI. Up to now it
went smoothly.

Christophe, while I'm trying to get my hands on a PPC32 HW similar to yours, would
you be able to test with CONFIG_DMA_API_DEBUG=y? The fact the KASAN doesn't detect
anything after the fix to spi-fsl-cpm you've mentioned makes me think the corruption
is external to CPU core? Will you post you fix to fsl-cpm code?

-- 
Alexander Sverdlin
Siemens AG
www.siemens.com
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Christophe Leroy 3 months ago
Hi Alexander,

Le 07/11/2025 à 12:49, Sverdlin, Alexander a écrit :
> Hi Greg, Christophe,
> 
> On Wed, 2025-11-05 at 08:20 +0100, Alexander Sverdlin wrote:
>>>>> Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
>>>>> RAW SPI accesses if spi-mem callbacks are not implemented by a controller
>>>>> driver.
>>>>
>>>> With this patch (kernel v6.17.1) our powerpc boards are totally unstable, we
>>>> get multiple random Oops due to bad memory accesses.
>>>>
>>>> With this commit reverted the board is stable again.
>>>>
>>>> The SPI driver is:
>>>>
>>>> CONFIG_SPI=y
>>>> CONFIG_SPI_MASTER=y
>>>> CONFIG_SPI_MEM=y
>>>> CONFIG_SPI_FSL_LIB=y
>>>> CONFIG_SPI_FSL_CPM=y
>>>> CONFIG_SPI_FSL_SPI=y
>>>>
>>>> How can we further investigate the issue ?
>>>
>>> We can revert it until it comes back working properly.  Can you send a
>>> revert so that I can apply it?
>>
>> what is known for sure is that it triggers a bug in SPI_FSL_CPM [1],
>> which probably justifies a revert in -stable. But there are no indications the
>> patch in question misbehaves itself as of now. I'm going to KASAN it on all the
>> HW I can get my hands on this week.
>>
>> [1] https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fall%2F764858d5-5633-4aeb-aabe-52f9eb1eb530%40csgroup.eu%2F&data=05%7C02%7Cchristophe.leroy%40csgroup.eu%7Cf9772eb8a7ac440e64ef08de1df3c518%7C8b87af7d86474dc78df45f69a2011bb5%7C0%7C0%7C638981129997670721%7CUnknown%7CTWFpbGZsb3d8eyJFbXB0eU1hcGkiOnRydWUsIlYiOiIwLjAuMDAwMCIsIlAiOiJXaW4zMiIsIkFOIjoiTWFpbCIsIldUIjoyfQ%3D%3D%7C0%7C%7C%7C&sdata=VnQ2%2B063DdxLpc0vMopRRqGGHXt6zgqUV%2FITU19Zg0s%3D&reserved=0
> 
> just letting you know that I stress-tested the at25 driver with KASAN on two ARM64
> platforms, TI AM62 and i.MX8QXP, in the latter case it's even spi-nxp-fspi driver
> only providing spi-mem API, while the TI SoC goes over normal SPI. Up to now it
> went smoothly.
> 
> Christophe, while I'm trying to get my hands on a PPC32 HW similar to yours, would
> you be able to test with CONFIG_DMA_API_DEBUG=y? The fact the KASAN doesn't detect
> anything after the fix to spi-fsl-cpm you've mentioned makes me think the corruption
> is external to CPU core? Will you post you fix to fsl-cpm code?
> 


I'm now back from travelling and have been able to dig more into the 
problem.

So it seems the (only) problem is due to the buffer overrun when t->len 
is over 256 and odd. And it is only on CPM1 (mpc8xx), you won't see it 
on CPM2 (mpx82xx)

It looks like when this overrun is properly fixed the board is stable 
again. The collegue of mine who tried to fix the KASAN report used AI, 
leading to a stupid fix: it just truncated the transfer to the below 
even length, which fixed the KASAN report but created more problems.

The problem was introduced by commit fc96ec826bce ("spi: fsl-cpm: Use 16 
bit mode for large transfers with even size") which fails to check the 
length is even before switching to 16 bits per word. The right fix is:

diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 2f2082652a1a2..81d5c3f4eb6fe 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -335,7 +335,7 @@ static int fsl_spi_prepare_message(struct 
spi_controller *ctlr,
  			if (t->bits_per_word == 16 || t->bits_per_word == 32)
  				t->bits_per_word = 8; /* pretend its 8 bits */
  			if (t->bits_per_word == 8 && t->len >= 256 &&
-			    (mpc8xxx_spi->flags & SPI_CPM1))
+			    (t->len & 1) == 0 && (mpc8xxx_spi->flags & SPI_CPM1))
  				t->bits_per_word = 16;
  		}
  	}

Now I'm trying to understand why the problem surfaced with commit 
8ad6249c51d0 ("eeprom: at25: convert to spi-mem API")

Christophe
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Christophe Leroy 3 months ago
Hi Again,

Le 08/11/2025 à 11:05, Christophe Leroy a écrit :
> Hi Alexander,
> 
> Now I'm trying to understand why the problem surfaced with commit 
> 8ad6249c51d0 ("eeprom: at25: convert to spi-mem API")
> 

The reason why it was not a problem before was that the transfer was 
done into of->prealloc_buf (fs/kernfs/file.c) which is a kmalloc() with 
size (PAGE_SIZE + 1).

Following the rework of at25 it now goes into the bounce buffer which is 
allocated with the exact size of the transfer.

Why do we need an intermediate bounce buffer now, why can't 
of->prealloc_buf be used directly as before ?

Christophe
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Sverdlin, Alexander 3 months ago
Hi Christophe,

On Sat, 2025-11-08 at 12:14 +0100, Christophe Leroy wrote:
> > Now I'm trying to understand why the problem surfaced with commit 
> > 8ad6249c51d0 ("eeprom: at25: convert to spi-mem API")
> > 
> 
> The reason why it was not a problem before was that the transfer was 
> done into of->prealloc_buf (fs/kernfs/file.c) which is a kmalloc() with 
> size (PAGE_SIZE + 1).
> 
> Following the rework of at25 it now goes into the bounce buffer which is 
> allocated with the exact size of the transfer.
> 
> Why do we need an intermediate bounce buffer now, why can't 
> of->prealloc_buf be used directly as before ?

userspace access is only one part of the story, the other is NVMEM
kernel-internal API, like nvmem_cell_read*() and I suppose there is
no requirement for a destination buffer to be DMA-able.

-- 
Alexander Sverdlin
Siemens AG
www.siemens.com
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Christophe Leroy 3 months ago

Le 08/11/2025 à 12:41, Sverdlin, Alexander a écrit :
> Hi Christophe,
> 
> On Sat, 2025-11-08 at 12:14 +0100, Christophe Leroy wrote:
>>> Now I'm trying to understand why the problem surfaced with commit
>>> 8ad6249c51d0 ("eeprom: at25: convert to spi-mem API")
>>>
>>
>> The reason why it was not a problem before was that the transfer was
>> done into of->prealloc_buf (fs/kernfs/file.c) which is a kmalloc() with
>> size (PAGE_SIZE + 1).
>>
>> Following the rework of at25 it now goes into the bounce buffer which is
>> allocated with the exact size of the transfer.
>>
>> Why do we need an intermediate bounce buffer now, why can't
>> of->prealloc_buf be used directly as before ?
> 
> userspace access is only one part of the story, the other is NVMEM
> kernel-internal API, like nvmem_cell_read*() and I suppose there is
> no requirement for a destination buffer to be DMA-able.
> 

As far as I can see nvmem_cell_read*() allocates a kmalloc() bounce 
buffer already:

	buf = kzalloc(max_t(size_t, entry->raw_len, entry->bytes), GFP_KERNEL);
	if (!buf)
		return ERR_PTR(-ENOMEM);

	rc = __nvmem_cell_read(nvmem, cell->entry, buf, len, cell->id, 
cell->index);
	if (rc) {
		kfree(buf);
		return ERR_PTR(rc);
	}

	return buf;

Christophe
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Sverdlin, Alexander 2 months, 4 weeks ago
Hi Christophe,

On Sat, 2025-11-08 at 17:24 +0100, Christophe Leroy wrote:
> > > The reason why it was not a problem before was that the transfer was
> > > done into of->prealloc_buf (fs/kernfs/file.c) which is a kmalloc() with
> > > size (PAGE_SIZE + 1).
> > > 
> > > Following the rework of at25 it now goes into the bounce buffer which is
> > > allocated with the exact size of the transfer.
> > > 
> > > Why do we need an intermediate bounce buffer now, why can't
> > > of->prealloc_buf be used directly as before ?
> > 
> > userspace access is only one part of the story, the other is NVMEM
> > kernel-internal API, like nvmem_cell_read*() and I suppose there is
> > no requirement for a destination buffer to be DMA-able.
> > 
> 
> As far as I can see nvmem_cell_read*() allocates a kmalloc() bounce 
> buffer already:

sorry, I referenced wrong family of functions, but...

> 	buf = kzalloc(max_t(size_t, entry->raw_len, entry->bytes), GFP_KERNEL);

[]

There are zero-copy call chains as well in nvmem core:

nvmem->reg_read() <= __nvmem_reg_read() <= nvmem_reg_read() <= __nvmem_cell_read() <= nvmem_device_cell_read() (exported symbol)
nvmem->reg_read() <= __nvmem_reg_read() <= nvmem_reg_read() <= nvmem_device_read() (exported)

Documentation/driver-api/nvmem.rst doesn't impose any DMA-related requirements
on a destination buffer. Seems that we are not allowed to drop the bounce buffer
in at25 driver as of now.

-- 
Alexander Sverdlin
Siemens AG
www.siemens.com
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Arnd Bergmann 3 months ago
On Fri, Nov 7, 2025, at 12:49, Sverdlin, Alexander wrote:

> Christophe, while I'm trying to get my hands on a PPC32 HW similar to 
> yours, would
> you be able to test with CONFIG_DMA_API_DEBUG=y? The fact the KASAN 
> doesn't detect
> anything after the fix to spi-fsl-cpm you've mentioned makes me think 
> the corruption
> is external to CPU core? Will you post you fix to fsl-cpm code?

I had a look over the patch and don't didn't see anything extra
suspicious, but I wonder if this is possibly a problem with a DMA
to stack, as Christophe mentioned this showing up on a system with
VMAP_STACK=y.

If for some reason the original driver used to bounce the data while
the current version attempts a DMA, that may lead to arbitrary
data corruption from the invalid virt_to_phys(vmalloc_pointer)
conversion.

The opposite might be true as well: if the 'val' pointer passed
into the read/write functions was previously detected as
needing a bounce buffer down the stack (spi core or spi
master driver) but now the added 'bounce' allocation gets
passed directly into a dma engine, that may also fail if the
GFP_KERNEL allocation is not sufficient for the range or
alignment requirements of the DMA master.

Christophe, do you know the CPM DMA has any restrictions
there, e.g. needing a GFP_DMA allocation?

     Arnd
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Sverdlin, Alexander 3 months ago
Hi Arnd,

On Fri, 2025-11-07 at 14:08 +0100, Arnd Bergmann wrote:
> > Christophe, while I'm trying to get my hands on a PPC32 HW similar to 
> > yours, would
> > you be able to test with CONFIG_DMA_API_DEBUG=y? The fact the KASAN 
> > doesn't detect
> > anything after the fix to spi-fsl-cpm you've mentioned makes me think 
> > the corruption
> > is external to CPU core? Will you post you fix to fsl-cpm code?
> 
> I had a look over the patch and don't didn't see anything extra
> suspicious, but I wonder if this is possibly a problem with a DMA
> to stack, as Christophe mentioned this showing up on a system with
> VMAP_STACK=y.
> 
> If for some reason the original driver used to bounce the data while
> the current version attempts a DMA, that may lead to arbitrary
> data corruption from the invalid virt_to_phys(vmalloc_pointer)
> conversion.

I've thought about this as well, but in my patch I actually add
and additional bounce buffer, comparing to the original code:

https://lore.kernel.org/all/d5be177d-505d-4d72-9d18-913e69c23ea8@app.fastmail.com/

> The opposite might be true as well: if the 'val' pointer passed
> into the read/write functions was previously detected as
> needing a bounce buffer down the stack (spi core or spi
> master driver) but now the added 'bounce' allocation gets
> passed directly into a dma engine, that may also fail if the
> GFP_KERNEL allocation is not sufficient for the range or
> alignment requirements of the DMA master.
> 
> Christophe, do you know the CPM DMA has any restrictions
> there, e.g. needing a GFP_DMA allocation?

-- 
Alexander Sverdlin
Siemens AG
www.siemens.com
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Sverdlin, Alexander 3 months ago
Hi Christophe,

On Mon, 2025-11-03 at 17:33 +0100, Christophe Leroy wrote:
> > Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
> > RAW SPI accesses if spi-mem callbacks are not implemented by a controller
> > driver.
> 
> With this patch (kernel v6.17.1) our powerpc boards are totally 
> unstable, we get multiple random Oops due to bad memory accesses.
> 
> With this commit reverted the board is stable again.
> 
> The SPI driver is:
> 
> CONFIG_SPI=y
> CONFIG_SPI_MASTER=y
> CONFIG_SPI_MEM=y
> CONFIG_SPI_FSL_LIB=y
> CONFIG_SPI_FSL_CPM=y
> CONFIG_SPI_FSL_SPI=y
> 
> How can we further investigate the issue ?

could you share these "random Oops"?

Looks like spi-fsl-spi doesn't support spi-mem interface (similar to spi-fsl-lpspi
we use the patch with), so spi-mem falls back to the regular SPI. From this standpoint
it's not that much different from the situation before patch.

But let's look into the splats.

-- 
Alexander Sverdlin
Siemens AG
www.siemens.com
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Christophe Leroy 3 months ago

Le 03/11/2025 à 20:12, Sverdlin, Alexander a écrit :
> [Vous ne recevez pas souvent de courriers de alexander.sverdlin@siemens.com. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> 
> Hi Christophe,
> 
> On Mon, 2025-11-03 at 17:33 +0100, Christophe Leroy wrote:
>>> Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
>>> RAW SPI accesses if spi-mem callbacks are not implemented by a controller
>>> driver.
>>
>> With this patch (kernel v6.17.1) our powerpc boards are totally
>> unstable, we get multiple random Oops due to bad memory accesses.
>>
>> With this commit reverted the board is stable again.
>>
>> The SPI driver is:
>>
>> CONFIG_SPI=y
>> CONFIG_SPI_MASTER=y
>> CONFIG_SPI_MEM=y
>> CONFIG_SPI_FSL_LIB=y
>> CONFIG_SPI_FSL_CPM=y
>> CONFIG_SPI_FSL_SPI=y
>>
>> How can we further investigate the issue ?
> 
> could you share these "random Oops"?

Sure. At the first place they look unrelated. Something is likely 
writing in the weed.

> 
> Looks like spi-fsl-spi doesn't support spi-mem interface (similar to spi-fsl-lpspi
> we use the patch with), so spi-mem falls back to the regular SPI. From this standpoint
> it's not that much different from the situation before patch.
> 
> But let's look into the splats.

First one:

[   27.112241] Kernel attempted to read user page (7f) - exploit 
attempt? (uid: 0)
[   27.119739] BUG: Kernel NULL pointer dereference on read at 0x0000007f
[   27.126181] Faulting instruction address: 0xc01af5fc
[   27.131093] Oops: Kernel access of bad area, sig: 11 [#2]
[   27.136422] BE PAGE_SIZE=16K  CMPC885
[   27.143594] CPU: 0 UID: 0 PID: 64 Comm: rcS Tainted: G      D W 
     6.17.1-knld-3.16.4rc3-git8ac3a4-g568c147ca0f7 #17 PREEMPT
[   27.155385] Tainted: [D]=DIE, [W]=WARN
[   27.159056] Hardware name: MCR3000_2G 8xx 0x500000 CMPC885
[   27.164479] NIP:  c01af5fc LR: c08c3698 CTR: 00000000
[   27.169481] REGS: ca173c00 TRAP: 0300   Tainted: G      D W 
  (6.17.1-knld-3.16.4rc3-git8ac3a4-g568c147ca0f7)
[   27.180054] MSR:  00009032 <EE,ME,IR,DR,RI>  CR: 95003599  XER: a000bf00
[   27.187024] DAR: 0000007f DSISR: c0000000
[   27.187024] GPR00: c08c3d1c ca173cc0 c32b4c80 c2004400 00000cc0 
00000cc0 00000100 000039db
[   27.187024] GPR08: 000039da 00000080 c31ea000 00000004 35003599 
100d815e c329a1c0 c11b0000
[   27.187024] GPR16: c312ac30 c11659ec c11d7f98 c11b01b4 c3284260 
c107b85c c312abc0 c1165450
[   27.187024] GPR24: c11d7f08 c11f0000 00000cc0 ffffffff c08c3698 
00000cc0 ffffffff c2004400
[   27.226870] NIP [c01af5fc] kmem_cache_alloc_noprof+0x54/0x21c
[   27.232551] LR [c08c3698] mas_dup_build+0x154/0x75c
[   27.237372] Call Trace:
[   27.239781] [ca173cc0] [00000001] 0x1 (unreliable)
[   27.244514] [ca173ce0] [00000000] 0x0
[   27.248129] [ca173d20] [c08c3d1c] __mt_dup+0x7c/0xf8
[   27.253034] [ca173d90] [c0188b14] dup_mmap+0xc8/0x690
[   27.258026] [ca173df0] [c001f8a4] copy_process+0xcd4/0x148c
[   27.263534] [ca173e70] [c0020188] kernel_clone+0xa4/0x3e8
[   27.268869] [ca173eb0] [c0020820] sys_clone+0x78/0x9c
[   27.273861] [ca173f20] [c000ddcc] system_call_exception+0x8c/0x160
[   27.279971] [ca173f30] [c00110a8] ret_from_syscall+0x0/0x28
[   27.285479] ---- interrupt: c00 at 0xfca2a6c
[   27.289696] NIP:  0fca2a6c LR: 0fca7c40 CTR: 0fc6f51c
[   27.294701] REGS: ca173f40 TRAP: 0c00   Tainted: G      D W 
  (6.17.1-knld-3.16.4rc3-git8ac3a4-g568c147ca0f7)
[   27.305272] MSR:  0000d032 <EE,PR,ME,IR,DR,RI>  CR: 25005590  XER: 
a000bf00
[   27.312587]
[   27.312587] GPR00: 00000078 7fc30fb0 7795b520 01200011 00000000 
00000000 00000000 77954088
[   27.312587] GPR08: 0000d032 100d4010 100cdd2d 0fc6f51c 59005998 
100d815e c0004a68 00000000
[   27.312587] GPR16: 00000000 7fc467dc 1001041c 10010000 100103ec 
10005984 00000000 00000000
[   27.312587] GPR24: ffffffff 00000000 100d58a4 100d58a4 100d5340 
00000000 0fde378c 00000000
[   27.349679] NIP [0fca2a6c] 0xfca2a6c
[   27.353207] LR [0fca7c40] 0xfca7c40
[   27.356649] ---- interrupt: c00
[   27.359753] Code: 7c9d2378 418201ec 813f0000 81090004 83c90000 
81290008 2c1e0000 4182004c 2c090000 38e80001 41820040 813f001c 
<7d3e482e> 7ca000a6 7c5113a6 815f0000
[   27.375418] ---[ end trace 0000000000000000 ]---

Second one:

[   25.086900] Disabling lock debugging due to kernel taint
[   25.086966] Machine check in kernel mode.
[   25.086999] Caused by (from SRR1=d032): Data access error at address 
7f9aeb94
[   25.087136] Oops: Machine check, sig: 7 [#1]
[   25.107454] BE PAGE_SIZE=16K  CMPC885
[   25.114628] CPU: 0 UID: 0 PID: 260 Comm: syslogd Tainted: G   M    W 
          6.17.1-knld-3.16.4rc3-git8ac3a4-g568c147ca0f7 #17 PREEMPT
[   25.126850] Tainted: [M]=MACHINE_CHECK, [W]=WARN
[   25.131380] Hardware name: MCR3000_2G 8xx 0x500000 CMPC885
[   25.136804] NIP:  0fcdbb30 LR: 0fde378c CTR: 00000000
[   25.141805] REGS: ca22bf40 TRAP: 0200   Tainted: G   M    W 
  (6.17.1-knld-3.16.4rc3-git8ac3a4-g568c147ca0f7)
[   25.152378] MSR:  0000d032 <EE,PR,ME,IR,DR,RI>  CR: 99003398  XER: 
a000bf00
[   25.159693] DAR: 7f9aeb94 DSISR: 00000001
[   25.159693] GPR00: 00000000 7f9aeb70 77d07520 0000004e 0fde378c 
29003398 77d004cc 0fcdbb14
[   25.159693] GPR08: 0000d032 fffff000 00000000 00000000 59003398 
100d815e 7fa52560 100d0000
[   25.159693] GPR16: 100d0000 00000000 ffffffff 00000059 100d0000 
11a681f7 11a683f8 11a681f8
[   25.159693] GPR24: 100d0000 100b4559 100d0178 00000058 11a681f8 
00000058 0fde378c 0000004e
[   25.199453] NIP [0fcdbb30] 0xfcdbb30
[   25.202982] LR [0fde378c] 0xfde378c
[   25.206427] Call Trace:
[   25.208845] ---[ end trace 0000000000000000 ]---

Third one:

[   25.295400] BUG: Bad page map in process syslogd  pte:ffffffff 
pmd:031c0041
[   25.302212] addr:7f98c000 vm_flags:00100173 anon_vma:c3278c30 
mapping:00000000 index:1fff7
[   25.310682] file:(null) fault:0x0 mmap:0x0 mmap_prepare: 0x0 
read_folio:0x0
[   25.317750] CPU: 0 UID: 0 PID: 260 Comm: syslogd Tainted: G   M  D W 
          6.17.1-knld-3.16.4rc3-git8ac3a4-g568c147ca0f7 #17 PREEMPT
[   25.329963] Tainted: [M]=MACHINE_CHECK, [D]=DIE, [W]=WARN
[   25.335258] Hardware name: MCR3000_2G 8xx 0x500000 CMPC885
[   25.340684] Call Trace:
[   25.343090] [ca22bc50] [c08b4d48] dump_stack+0x78/0x94 (unreliable)
[   25.349373] [ca22bc60] [c017b2c0] print_bad_pte.isra.0+0x134/0x240
[   25.355483] [ca22bcb0] [c017d374] vm_normal_page+0xc0/0xd0
[   25.360905] [ca22bcc0] [c017df94] zap_pte_range+0x1ec/0xaf4
[   25.366413] [ca22bd70] [c017f194] unmap_page_range+0xfc/0x144
[   25.372093] [ca22bda0] [c017f2d0] unmap_vmas+0x70/0x134
[   25.377256] [ca22bde0] [c0188244] exit_mmap+0xbc/0x3d0
[   25.382334] [ca22be80] [c001db84] mmput+0x4c/0x12c
[   25.387067] [ca22be90] [c0024b54] do_exit+0x20c/0x954
[   25.392059] [ca22bed0] [c0025324] make_task_dead+0x88/0x164
[   25.397567] [ca22bee0] [c000a6bc] die+0x204/0x20c
[   25.402214] [ca22bf10] [c000b4b0] machine_check_exception+0x110/0x264
[   25.408583] [ca22bf30] [c00031f4] MachineCheck_virt+0x100/0x104
[   25.414434] ---- interrupt: 200 at 0xfcdbb30
[   25.418651] NIP:  0fcdbb30 LR: 0fde378c CTR: 00000000
[   25.423657] REGS: ca22bf40 TRAP: 0200   Tainted: G   M  D W 
  (6.17.1-knld-3.16.4rc3-git8ac3a4-g568c147ca0f7)
[   25.434228] MSR:  0000d032 <EE,PR,ME,IR,DR,RI>  CR: 99003398  XER: 
a000bf00
[   25.441543] DAR: 7f9aeb94 DSISR: 00000001
[   25.441543] GPR00: 00000000 7f9aeb70 77d07520 0000004e 0fde378c 
29003398 77d004cc 0fcdbb14
[   25.441543] GPR08: 0000d032 fffff000 00000000 00000000 59003398 
100d815e 7fa52560 100d0000
[   25.441543] GPR16: 100d0000 00000000 ffffffff 00000059 100d0000 
11a681f7 11a683f8 11a681f8
[   25.441543] GPR24: 100d0000 100b4559 100d0178 00000058 11a681f8 
00000058 0fde378c 0000004e
[   25.481302] NIP [0fcdbb30] 0xfcdbb30
[   25.484831] LR [0fde378c] 0xfde378c
[   25.488273] ---- interrupt: 200
[   25.694562] BUG: Bad page map in process syslogd  pte:ffffffff 
pmd:031c0041
[   25.701361] addr:7f990000 vm_flags:00100173 anon_vma:c3278c30 
mapping:00000000 index:1fff8

Fourth one:

[   29.366796] Kernel attempted to read user page (24) - exploit 
attempt? (uid: 0)
[   29.373925] BUG: Kernel NULL pointer dereference on read at 0x00000024
[   29.380367] Faulting instruction address: 0xc08cbd0c
[   29.385279] Oops: Kernel access of bad area, sig: 11 [#1]
[   29.390607] BE PAGE_SIZE=16K  CMPC885
[   29.397780] CPU: 0 UID: 0 PID: 415 Comm: rm Tainted: G        W 
     6.17.1-knld-3.16.4rc3-git8ac3a4-g568c147ca0f7 #17 PREEMPT
[   29.409562] Tainted: [W]=WARN
[   29.412467] Hardware name: MCR3000_2G 8xx 0x500000 CMPC885
[   29.417890] NIP:  c08cbd0c LR: c08cbcf8 CTR: c08cbc44
[   29.422892] REGS: ca2f3c80 TRAP: 0300   Tainted: G        W 
  (6.17.1-knld-3.16.4rc3-git8ac3a4-g568c147ca0f7)
[   29.433465] MSR:  00009032 <EE,ME,IR,DR,RI>  CR: 99005999  XER: a000bf00
[   29.440435] DAR: 00000024 DSISR: c0000000
[   29.440435] GPR00: c08ce480 ca2f3d40 c31c8640 00000000 c31f0800 
ffffffff c31f0880 ffffffff
[   29.440435] GPR08: 00000000 00000000 00000000 ffffffff 0000001f 
100d815e 7fab0cc0 100d0000
[   29.440435] GPR16: 100d0000 00000000 100d442c 100d4430 c334a740 
00100000 c31f0880 00000000
[   29.440435] GPR24: 00000000 00000001 00000004 ff074600 00000009 
00000024 ca2f3df8 0e0a2324
[   29.480281] NIP [c08cbd0c] mas_wr_store_entry+0x508/0xa90
[   29.485618] LR [c08cbcf8] mas_wr_store_entry+0x4f4/0xa90
[   29.490869] Call Trace:
[   29.493278] [ca2f3d40] [000000ca] 0xca (unreliable)
[   29.498097] [ca2f3d80] [c08ce480] mas_erase+0x63c/0x6c4
[   29.503261] [ca2f3df0] [c08ce568] mtree_erase+0x60/0x100
[   29.508510] [ca2f3e40] [c02093f4] simple_offset_remove+0x24/0x40
[   29.514449] [ca2f3e50] [c0158a10] shmem_unlink+0x4c/0x108
[   29.519784] [ca2f3ea0] [c01daa98] vfs_unlink+0xc4/0x344
[   29.524948] [ca2f3ec0] [c01df0d8] do_unlinkat+0x288/0x334
[   29.530284] [ca2f3f20] [c000ddcc] system_call_exception+0x8c/0x160
[   29.536394] [ca2f3f30] [c00110a8] ret_from_syscall+0x0/0x28
[   29.541902] ---- interrupt: c00 at 0xfcdc8c4
[   29.546118] NIP:  0fcdc8c4 LR: 10098e84 CTR: 0fcda734
[   29.551124] REGS: ca2f3f40 TRAP: 0c00   Tainted: G        W 
  (6.17.1-knld-3.16.4rc3-git8ac3a4-g568c147ca0f7)
[   29.561695] MSR:  0000d032 <EE,PR,ME,IR,DR,RI>  CR: 29005590  XER: 
a000bf00
[   29.569010]
[   29.569010] GPR00: 0000000a 7fab0b80 77d3f520 100d4414 10098eb8 
29005590 ca2f3ebc 0fccf170
[   29.569010] GPR08: 0000d032 00000074 005b5d4e 0fcda734 00000003 
100d815e 7fab0cc0 100d0000
[   29.569010] GPR16: 100d0000 00000000 100d442c 100d4430 ffffffff 
00000001 00000000 00000000
[   29.569010] GPR24: 00000000 00000000 00000002 ffffff4d 100b7abc 
100d4434 0fde378c 100d4414
[   29.606102] NIP [0fcdc8c4] 0xfcdc8c4
[   29.609630] LR [10098e84] 0x10098e84
[   29.613158] ---- interrupt: c00
[   29.616261] Code: 57e9063a 418203c0 7d29e430 7d3c4b78 553d103a 
7fc3f378 4bff40c5 2c030003 39200000 4082000c 57ff002e 393f00a8 
<7d29e82e> 7c09d800 418202a4 7f65db78
[   29.631927] ---[ end trace 0000000000000000 ]---
[   29.636486]
[   29.637947] note: rm[415] exited with irqs disabled

Christophe

Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Sverdlin, Alexander 3 months ago
Hi Christophe,

On Mon, 2025-11-03 at 22:46 +0100, Christophe Leroy wrote:
> > > > Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
> > > > RAW SPI accesses if spi-mem callbacks are not implemented by a controller
> > > > driver.
> > > 
> > > With this patch (kernel v6.17.1) our powerpc boards are totally
> > > unstable, we get multiple random Oops due to bad memory accesses.
> > > 
> > > With this commit reverted the board is stable again.
> > > 
> > > The SPI driver is:
> > > 
> > > CONFIG_SPI=y
> > > CONFIG_SPI_MASTER=y
> > > CONFIG_SPI_MEM=y
> > > CONFIG_SPI_FSL_LIB=y
> > > CONFIG_SPI_FSL_CPM=y
> > > CONFIG_SPI_FSL_SPI=y
> > > 
> > > How can we further investigate the issue ?
> > 
> > could you share these "random Oops"?
> 
> Sure. At the first place they look unrelated. Something is likely 

indeed, no obvious connection... Do you have a chance to run KASAN?

> writing in the weed.

-- 
Alexander Sverdlin
Siemens AG
www.siemens.com
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Christophe Leroy 3 months ago

Le 03/11/2025 à 23:29, Sverdlin, Alexander a écrit :
> [Vous ne recevez pas souvent de courriers de alexander.sverdlin@siemens.com. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> 
> Hi Christophe,
> 
> On Mon, 2025-11-03 at 22:46 +0100, Christophe Leroy wrote:
>>>>> Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
>>>>> RAW SPI accesses if spi-mem callbacks are not implemented by a controller
>>>>> driver.
>>>>
>>>> With this patch (kernel v6.17.1) our powerpc boards are totally
>>>> unstable, we get multiple random Oops due to bad memory accesses.
>>>>
>>>> With this commit reverted the board is stable again.
>>>>
>>>> The SPI driver is:
>>>>
>>>> CONFIG_SPI=y
>>>> CONFIG_SPI_MASTER=y
>>>> CONFIG_SPI_MEM=y
>>>> CONFIG_SPI_FSL_LIB=y
>>>> CONFIG_SPI_FSL_CPM=y
>>>> CONFIG_SPI_FSL_SPI=y
>>>>
>>>> How can we further investigate the issue ?
>>>
>>> could you share these "random Oops"?
>>
>> Sure. At the first place they look unrelated. Something is likely
> 
> indeed, no obvious connection... Do you have a chance to run KASAN?

I ran KASAN, it found an overrun in the loop in 
fsl_spi_cpm_bufs_complete() when t->len is odd, but once fixed the 
random problem is still there.

Christophe
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Sverdlin, Alexander 3 months ago
Hi Christophe,

On Mon, 2025-11-03 at 23:36 +0100, Christophe Leroy wrote:
> > > > > > Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
> > > > > > RAW SPI accesses if spi-mem callbacks are not implemented by a controller
> > > > > > driver.
> > > > > 
> > > > > With this patch (kernel v6.17.1) our powerpc boards are totally
> > > > > unstable, we get multiple random Oops due to bad memory accesses.
> > > > > 
> > > > > With this commit reverted the board is stable again.
> > > > > 
> > > > > The SPI driver is:
> > > > > 
> > > > > CONFIG_SPI=y
> > > > > CONFIG_SPI_MASTER=y
> > > > > CONFIG_SPI_MEM=y
> > > > > CONFIG_SPI_FSL_LIB=y
> > > > > CONFIG_SPI_FSL_CPM=y
> > > > > CONFIG_SPI_FSL_SPI=y
> > > > > 
> > > > > How can we further investigate the issue ?
> > > > 
> > > > could you share these "random Oops"?
> > > 
> > > Sure. At the first place they look unrelated. Something is likely
> > 
> > indeed, no obvious connection... Do you have a chance to run KASAN?
> 
> I ran KASAN, it found an overrun in the loop in 
> fsl_spi_cpm_bufs_complete() when t->len is odd, but once fixed the 
> random problem is still there.

thanks for the quick reply!
That sounds promising already!

Do you have a chance to run with CONFIG_DEBUG_STACKOVERFLOW=y ?

-- 
Alexander Sverdlin
Siemens AG
www.siemens.com
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Christophe Leroy 3 months ago
Hi Alexander,

Le 04/11/2025 à 14:13, Sverdlin, Alexander a écrit :
> Hi Christophe,
> 
> On Mon, 2025-11-03 at 23:36 +0100, Christophe Leroy wrote:
>>>>>>> Replace the RAW SPI accesses with spi-mem API. The latter will fall back to
>>>>>>> RAW SPI accesses if spi-mem callbacks are not implemented by a controller
>>>>>>> driver.
>>>>>>
>>>>>> With this patch (kernel v6.17.1) our powerpc boards are totally
>>>>>> unstable, we get multiple random Oops due to bad memory accesses.
>>>>>>
>>>>>> With this commit reverted the board is stable again.
>>>>>>
>>>>>> The SPI driver is:
>>>>>>
>>>>>> CONFIG_SPI=y
>>>>>> CONFIG_SPI_MASTER=y
>>>>>> CONFIG_SPI_MEM=y
>>>>>> CONFIG_SPI_FSL_LIB=y
>>>>>> CONFIG_SPI_FSL_CPM=y
>>>>>> CONFIG_SPI_FSL_SPI=y
>>>>>>
>>>>>> How can we further investigate the issue ?
>>>>>
>>>>> could you share these "random Oops"?
>>>>
>>>> Sure. At the first place they look unrelated. Something is likely
>>>
>>> indeed, no obvious connection... Do you have a chance to run KASAN?
>>
>> I ran KASAN, it found an overrun in the loop in
>> fsl_spi_cpm_bufs_complete() when t->len is odd, but once fixed the
>> random problem is still there.
> 
> thanks for the quick reply!
> That sounds promising already!
> 
> Do you have a chance to run with CONFIG_DEBUG_STACKOVERFLOW=y ?
> 

Unfortunately I won't be able to do any additionnal test until Monday 
next week as I'm travelling at the moment.

However, our kernel is built with CONFIG_VMAP_STACK, so a stack overflow 
would have been caught by the early stack overflow detection, see commit 
3978eb78517c ("powerpc/32: Add early stack overflow detection with VMAP 
stack.") for details.

Christophe
Re: [PATCH] eeprom: at25: convert to spi-mem API
Posted by Arnd Bergmann 7 months, 1 week ago
On Thu, Jul 3, 2025, at 00:28, A. Sverdlin wrote:
>  static int at25_ee_read(void *priv, unsigned int offset,
>  			void *val, size_t count)
>  {
> +	u8 *bounce __free(kfree) = kmalloc(min(count, io_limit), GFP_KERNEL);
>  	struct at25_data *at25 = priv;
>  	char *buf = val;

I see nothing wrong with your patch, but the added bounce buffer
reminds me or a general problem with such buffers in the SPI
layer (and a couple of other places like it).

The problem is that kmalloc() does not take into account the
DMA mask of the device, which can have two suboptimal outcomes:

- on builds without SWIOTLB/IOMMU and an SPI host that has a DMA
  mask smaller than RAM, dma_map_sg() fails down the line,
  so either the transfer will fail or fall back to MMIO mode

- when SWIOTLB is available, dma_map_sg() will succeed but
  require another copy into a second bounce buffer.

There are various drivers that work around the problem by using
GFP_DMA instead of GFP_KERNEL. This should be reliable on all
platforms, but means that the allocation comes from a potentially
really small pool and is more likely to fail. Ideally I think we
should not do that any more at all but find another way to
allocate bounce buffers for SPI transfers. The two ideas I had
were:

a) and a generic interface to ask for a buffer that can be used
   by an SPI bus driver for efficient transfers, with the SPI
   core code making an informed decision on using either kmalloc()
   or dma_alloc_noncoherent() based on the size of the transfer
   and the DMA mask.

b) push down the bouncing into the SPI core, so you can just
   pass buffers from anywhere (stack, vmalloc, ...) and
   ask for the lower parts of the stack to copy these into
   an appropriate buffer if necessary. For the spi mem API
   I suppose that would require assigning a flag in
   spi_mem_op->data.

    Arnd