[PATCH v2] mtd: nand: brcmnand: fix mtd corrected bits stat

David Regan posted 1 patch 8 months ago
There is a newer version of this series
drivers/mtd/nand/raw/brcmnand/brcmnand.c | 50 ++++++++++++++++++------
1 file changed, 39 insertions(+), 11 deletions(-)
[PATCH v2] mtd: nand: brcmnand: fix mtd corrected bits stat
Posted by David Regan 8 months ago
Currently we attempt to get the amount of flipped bits from a hardware
location which is reset on every subpage. Instead obtain total flipped
bits stat from hardware accumulator. In addition identify the correct
maximum subpage corrected bits.

Signed-off-by: David Regan <dregan@broadcom.com>
Reviewed-by: William Zhang <william.zhang@broadcom.com>
---
 v2: Add >= v4 NAND controller support as requested by Jonas.
     mtd->ecc_stats.corrected accumulates instead of set to total.
     Remove DMA specific flipped bits count.
---
 drivers/mtd/nand/raw/brcmnand/brcmnand.c | 50 ++++++++++++++++++------
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 62bdda3be92f..c3ed3dcf0871 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -359,6 +359,7 @@ enum brcmnand_reg {
 	BRCMNAND_CORR_THRESHOLD_EXT,
 	BRCMNAND_UNCORR_COUNT,
 	BRCMNAND_CORR_COUNT,
+	BRCMNAND_READ_ERROR_COUNT,
 	BRCMNAND_CORR_EXT_ADDR,
 	BRCMNAND_CORR_ADDR,
 	BRCMNAND_UNCORR_EXT_ADDR,
@@ -389,6 +390,7 @@ static const u16 brcmnand_regs_v21[] = {
 	[BRCMNAND_CORR_THRESHOLD_EXT]	=     0,
 	[BRCMNAND_UNCORR_COUNT]		=     0,
 	[BRCMNAND_CORR_COUNT]		=     0,
+	[BRCMNAND_READ_ERROR_COUNT]	=     0,
 	[BRCMNAND_CORR_EXT_ADDR]	=  0x60,
 	[BRCMNAND_CORR_ADDR]		=  0x64,
 	[BRCMNAND_UNCORR_EXT_ADDR]	=  0x68,
@@ -419,6 +421,7 @@ static const u16 brcmnand_regs_v33[] = {
 	[BRCMNAND_CORR_THRESHOLD_EXT]	=     0,
 	[BRCMNAND_UNCORR_COUNT]		=     0,
 	[BRCMNAND_CORR_COUNT]		=     0,
+	[BRCMNAND_READ_ERROR_COUNT]	=  0x80,
 	[BRCMNAND_CORR_EXT_ADDR]	=  0x70,
 	[BRCMNAND_CORR_ADDR]		=  0x74,
 	[BRCMNAND_UNCORR_EXT_ADDR]	=  0x78,
@@ -449,6 +452,7 @@ static const u16 brcmnand_regs_v50[] = {
 	[BRCMNAND_CORR_THRESHOLD_EXT]	=     0,
 	[BRCMNAND_UNCORR_COUNT]		=     0,
 	[BRCMNAND_CORR_COUNT]		=     0,
+	[BRCMNAND_READ_ERROR_COUNT]	=  0x80,
 	[BRCMNAND_CORR_EXT_ADDR]	=  0x70,
 	[BRCMNAND_CORR_ADDR]		=  0x74,
 	[BRCMNAND_UNCORR_EXT_ADDR]	=  0x78,
@@ -479,6 +483,7 @@ static const u16 brcmnand_regs_v60[] = {
 	[BRCMNAND_CORR_THRESHOLD_EXT]	=  0xc4,
 	[BRCMNAND_UNCORR_COUNT]		=  0xfc,
 	[BRCMNAND_CORR_COUNT]		= 0x100,
+	[BRCMNAND_READ_ERROR_COUNT]	= 0x104,
 	[BRCMNAND_CORR_EXT_ADDR]	= 0x10c,
 	[BRCMNAND_CORR_ADDR]		= 0x110,
 	[BRCMNAND_UNCORR_EXT_ADDR]	= 0x114,
@@ -509,6 +514,7 @@ static const u16 brcmnand_regs_v71[] = {
 	[BRCMNAND_CORR_THRESHOLD_EXT]	=  0xe0,
 	[BRCMNAND_UNCORR_COUNT]		=  0xfc,
 	[BRCMNAND_CORR_COUNT]		= 0x100,
+	[BRCMNAND_READ_ERROR_COUNT]	= 0x104,
 	[BRCMNAND_CORR_EXT_ADDR]	= 0x10c,
 	[BRCMNAND_CORR_ADDR]		= 0x110,
 	[BRCMNAND_UNCORR_EXT_ADDR]	= 0x114,
@@ -539,6 +545,7 @@ static const u16 brcmnand_regs_v72[] = {
 	[BRCMNAND_CORR_THRESHOLD_EXT]	=  0xe0,
 	[BRCMNAND_UNCORR_COUNT]		=  0xfc,
 	[BRCMNAND_CORR_COUNT]		= 0x100,
+	[BRCMNAND_READ_ERROR_COUNT]	= 0x104,
 	[BRCMNAND_CORR_EXT_ADDR]	= 0x10c,
 	[BRCMNAND_CORR_ADDR]		= 0x110,
 	[BRCMNAND_UNCORR_EXT_ADDR]	= 0x114,
@@ -966,6 +973,13 @@ static inline u32 brcmnand_count_corrected(struct brcmnand_controller *ctrl)
 	return brcmnand_read_reg(ctrl, BRCMNAND_CORR_COUNT);
 }
 
+static inline u32 brcmnand_corr_total(struct brcmnand_controller *ctrl)
+{
+	if (ctrl->nand_version < 0x400)
+		return 0;
+	return brcmnand_read_reg(ctrl, BRCMNAND_READ_ERROR_COUNT);
+}
+
 static void brcmnand_wr_corr_thresh(struct brcmnand_host *host, u8 val)
 {
 	struct brcmnand_controller *ctrl = host->ctrl;
@@ -2066,12 +2080,15 @@ static int brcmnand_dma_trans(struct brcmnand_host *host, u64 addr, u32 *buf,
  */
 static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
 				u64 addr, unsigned int trans, u32 *buf,
-				u8 *oob, u64 *err_addr)
+				u8 *oob, u64 *err_addr, unsigned int *corr)
 {
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 	struct brcmnand_controller *ctrl = host->ctrl;
 	int i, ret = 0;
 
+	if (corr)
+		*corr = 0;
+
 	brcmnand_clear_ecc_addr(ctrl);
 
 	for (i = 0; i < trans; i++, addr += FC_BYTES) {
@@ -2099,13 +2116,16 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
 
 			if (*err_addr)
 				ret = -EBADMSG;
-		}
+			else {
+				*err_addr = brcmnand_get_correcc_addr(ctrl);
 
-		if (!ret) {
-			*err_addr = brcmnand_get_correcc_addr(ctrl);
+				if (*err_addr) {
+					ret = -EUCLEAN;
 
-			if (*err_addr)
-				ret = -EUCLEAN;
+					if (corr && brcmnand_count_corrected(ctrl) > *corr)
+						*corr = brcmnand_count_corrected(ctrl);
+				}
+			}
 		}
 	}
 
@@ -2173,6 +2193,8 @@ static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip,
 	int err;
 	bool retry = true;
 	bool edu_err = false;
+	unsigned int corrected = 0; /* max corrected bits per subpage */
+	unsigned int prev_tot = brcmnand_corr_total(ctrl);
 
 	dev_dbg(ctrl->dev, "read %llx -> %p\n", (unsigned long long)addr, buf);
 
@@ -2200,9 +2222,11 @@ static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip,
 			memset(oob, 0x99, mtd->oobsize);
 
 		err = brcmnand_read_by_pio(mtd, chip, addr, trans, buf,
-					       oob, &err_addr);
+					   oob, &err_addr, &corrected);
 	}
 
+	mtd->ecc_stats.corrected += brcmnand_corr_total(ctrl) - prev_tot;
+
 	if (mtd_is_eccerr(err)) {
 		/*
 		 * On controller version and 7.0, 7.1 , DMA read after a
@@ -2240,16 +2264,20 @@ static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip,
 	}
 
 	if (mtd_is_bitflip(err)) {
-		unsigned int corrected = brcmnand_count_corrected(ctrl);
-
 		/* in case of EDU correctable error we read again using PIO */
 		if (edu_err)
 			err = brcmnand_read_by_pio(mtd, chip, addr, trans, buf,
-						   oob, &err_addr);
+						   oob, &err_addr, &corrected);
 
 		dev_dbg(ctrl->dev, "corrected error at 0x%llx\n",
 			(unsigned long long)err_addr);
-		mtd->ecc_stats.corrected += corrected;
+		/*
+		 * if flipped bits accumulator is not supported but we detected
+		 * a correction, increase stat by 1 to match previous behavior.
+		 */
+		if (brcmnand_corr_total(ctrl) == prev_tot)
+			mtd->ecc_stats.corrected++;
+
 		/* Always exceed the software-imposed threshold */
 		return max(mtd->bitflip_threshold, corrected);
 	}
-- 
2.43.5
Re: [PATCH v2] mtd: nand: brcmnand: fix mtd corrected bits stat
Posted by Miquel Raynal 8 months ago
On 06/06/2025 at 09:57:03 -07, David Regan <dregan@broadcom.com> wrote:

> Currently we attempt to get the amount of flipped bits from a hardware
> location which is reset on every subpage. Instead obtain total flipped
> bits stat from hardware accumulator. In addition identify the correct
> maximum subpage corrected bits.
>
> Signed-off-by: David Regan <dregan@broadcom.com>
> Reviewed-by: William Zhang <william.zhang@broadcom.com>
> ---

Hello,

Can you please give the output of nandbiterrs -i /dev/mtdX?

>  v2: Add >= v4 NAND controller support as requested by Jonas.
>      mtd->ecc_stats.corrected accumulates instead of set to total.
>      Remove DMA specific flipped bits count.

The changelog does not mention the fact that you return the maximum
number of corrected bitflips as I requested, and the diff does not show
a straightforward implementation of that. It is very important to get
this right.

If we take the following example of a page with 4 ECC steps, if we get
respectively: 0, 2, 3, 0 bitflips per step, the returned value shall be
3.

To be very certain that this is correct, you can use the nandflipbit
tool from the mtd-utils test suite. You can manually insert bitflips in
various areas of a page and then read the page again with ECC enabled
and see how many bit errors are reported.

Thanks,
Miquèl
Re: [PATCH v2] mtd: nand: brcmnand: fix mtd corrected bits stat
Posted by David Regan 8 months ago
Hi Miquèl,

On Mon, Jun 9, 2025 at 2:20 AM Miquel Raynal <miquel.raynal@bootlin.com> wrote:
>
> On 06/06/2025 at 09:57:03 -07, David Regan <dregan@broadcom.com> wrote:
>
> > Currently we attempt to get the amount of flipped bits from a hardware
> > location which is reset on every subpage. Instead obtain total flipped
> > bits stat from hardware accumulator. In addition identify the correct
> > maximum subpage corrected bits.
> >
> > Signed-off-by: David Regan <dregan@broadcom.com>
> > Reviewed-by: William Zhang <william.zhang@broadcom.com>
> > ---
>
> Hello,
>
> Can you please give the output of nandbiterrs -i /dev/mtdX?

I'm not familiar with nandbiterrs but here's the results from
mtd_nandbiterrs.ko on my NAND set to BCH8:

# insmod mtd_nandbiterrs.ko dev=0
[  676.097190]
[  676.098760] ==================================================
[  676.104609] mtd_nandbiterrs: MTD device: 0
[  676.108732] mtd_nandbiterrs: MTD device size 2097152,
eraseblock=262144, page=4096, oob=216
[  676.117089] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes
[  676.123188] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0
[  676.130863] mtd_nandbiterrs: incremental biterrors test
[  676.136154] mtd_nandbiterrs: write_page
[  676.140761] mtd_nandbiterrs: rewrite page
[  676.145473] mtd_nandbiterrs: read_page
[  676.149621] mtd_nandbiterrs: verify_page
[  676.153625] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage
[  676.160678] mtd_nandbiterrs: Inserted biterror @ 0/5
[  676.165647] mtd_nandbiterrs: rewrite page
[  676.170363] mtd_nandbiterrs: read_page
[  676.174508] mtd_nandbiterrs: Read reported 1 corrected bit errors
[  676.180606] mtd_nandbiterrs: verify_page
[  676.184609] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage
[  676.191662] mtd_nandbiterrs: Inserted biterror @ 0/2
[  676.196631] mtd_nandbiterrs: rewrite page
[  676.201342] mtd_nandbiterrs: read_page
[  676.205487] mtd_nandbiterrs: Read reported 2 corrected bit errors
[  676.211586] mtd_nandbiterrs: verify_page
[  676.215588] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage
[  676.222641] mtd_nandbiterrs: Inserted biterror @ 0/0
[  676.227608] mtd_nandbiterrs: rewrite page
[  676.228356] mtd_nandbiterrs: read_page
[  676.228749] mtd_nandbiterrs: Read reported 3 corrected bit errors
[  676.228751] mtd_nandbiterrs: verify_page
[  676.228829] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage
[  676.228831] mtd_nandbiterrs: Inserted biterror @ 1/7
[  676.228833] mtd_nandbiterrs: rewrite page
[  676.229530] mtd_nandbiterrs: read_page
[  676.229922] mtd_nandbiterrs: Read reported 4 corrected bit errors
[  676.229924] mtd_nandbiterrs: verify_page
[  676.230001] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage
[  676.230003] mtd_nandbiterrs: Inserted biterror @ 1/5
[  676.230005] mtd_nandbiterrs: rewrite page
[  676.294177] mtd_nandbiterrs: read_page
[  676.298337] mtd_nandbiterrs: Read reported 5 corrected bit errors
[  676.304436] mtd_nandbiterrs: verify_page
[  676.308441] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage
[  676.315494] mtd_nandbiterrs: Inserted biterror @ 1/2
[  676.320464] mtd_nandbiterrs: rewrite page
[  676.325174] mtd_nandbiterrs: read_page
[  676.329327] mtd_nandbiterrs: Read reported 6 corrected bit errors
[  676.335426] mtd_nandbiterrs: verify_page
[  676.339429] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage
[  676.346483] mtd_nandbiterrs: Inserted biterror @ 1/0
[  676.351452] mtd_nandbiterrs: rewrite page
[  676.356162] mtd_nandbiterrs: read_page
[  676.360308] mtd_nandbiterrs: Read reported 7 corrected bit errors
[  676.366407] mtd_nandbiterrs: verify_page
[  676.370409] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage
[  676.377462] mtd_nandbiterrs: Inserted biterror @ 2/6
[  676.382432] mtd_nandbiterrs: rewrite page
[  676.387142] mtd_nandbiterrs: read_page
[  676.391287] mtd_nandbiterrs: Read reported 8 corrected bit errors
[  676.397385] mtd_nandbiterrs: verify_page
[  676.401388] mtd_nandbiterrs: Successfully corrected 8 bit errors per subpage
[  676.408441] mtd_nandbiterrs: Inserted biterror @ 2/5
[  676.413411] mtd_nandbiterrs: rewrite page
[  676.418122] mtd_nandbiterrs: read_page
[  676.422267] mtd_nandbiterrs: verify_page
[  676.426194] mtd_nandbiterrs: Error: page offset 0, expected 25, got 00
[  676.432727] mtd_nandbiterrs: Error: page offset 1, expected a5, got 00
[  676.439260] mtd_nandbiterrs: Error: page offset 2, expected 65, got 05
[  676.445868] mtd_nandbiterrs: ECC failure, read data is incorrect
despite read success
[  676.474929]
[  676.476425] ==================================================
[  676.482264] mtd_nandbiterrs: MTD device: 0
[  676.486367] mtd_nandbiterrs: MTD device size 2097152,
eraseblock=262144, page=4096, oob=216
[  676.494721] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes
[  676.494724] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0
[  676.496298] mtd_nandbiterrs: incremental biterrors test
[  676.496361] mtd_nandbiterrs: write_page
[  676.497123] mtd_nandbiterrs: rewrite page
[  676.497820] mtd_nandbiterrs: read_page
[  676.498210] mtd_nandbiterrs: verify_page
[  676.498287] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage
[  676.498289] mtd_nandbiterrs: Inserted biterror @ 0/5
[  676.498291] mtd_nandbiterrs: rewrite page
[  676.547860] mtd_nandbiterrs: read_page
[  676.552005] mtd_nandbiterrs: Read reported 1 corrected bit errors
[  676.558104] mtd_nandbiterrs: verify_page
[  676.562107] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage
[  676.569160] mtd_nandbiterrs: Inserted biterror @ 0/2
[  676.574130] mtd_nandbiterrs: rewrite page
[  676.578842] mtd_nandbiterrs: read_page
[  676.582987] mtd_nandbiterrs: Read reported 2 corrected bit errors
[  676.589085] mtd_nandbiterrs: verify_page
[  676.593088] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage
[  676.600141] mtd_nandbiterrs: Inserted biterror @ 0/0
[  676.605111] mtd_nandbiterrs: rewrite page
[  676.609821] mtd_nandbiterrs: read_page
[  676.613967] mtd_nandbiterrs: Read reported 3 corrected bit errors
[  676.620065] mtd_nandbiterrs: verify_page
[  676.624068] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage
[  676.631122] mtd_nandbiterrs: Inserted biterror @ 1/7
[  676.636091] mtd_nandbiterrs: rewrite page
[  676.640802] mtd_nandbiterrs: read_page
[  676.644947] mtd_nandbiterrs: Read reported 4 corrected bit errors
[  676.651045] mtd_nandbiterrs: verify_page
[  676.655048] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage
[  676.662100] mtd_nandbiterrs: Inserted biterror @ 1/5
[  676.667070] mtd_nandbiterrs: rewrite page
[  676.671780] mtd_nandbiterrs: read_page
[  676.675925] mtd_nandbiterrs: Read reported 5 corrected bit errors
[  676.682025] mtd_nandbiterrs: verify_page
[  676.686028] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage
[  676.693082] mtd_nandbiterrs: Inserted biterror @ 1/2
[  676.698051] mtd_nandbiterrs: rewrite page
[  676.702762] mtd_nandbiterrs: read_page
[  676.706907] mtd_nandbiterrs: Read reported 6 corrected bit errors
[  676.713005] mtd_nandbiterrs: verify_page
[  676.717008] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage
[  676.724076] mtd_nandbiterrs: Inserted biterror @ 1/0
[  676.729047] mtd_nandbiterrs: rewrite page
[  676.733758] mtd_nandbiterrs: read_page
[  676.737904] mtd_nandbiterrs: Read reported 7 corrected bit errors
[  676.744003] mtd_nandbiterrs: verify_page
[  676.748006] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage
[  676.755059] mtd_nandbiterrs: Inserted biterror @ 2/6
[  676.760029] mtd_nandbiterrs: rewrite page
[  676.764739] mtd_nandbiterrs: read_page
[  676.768884] mtd_nandbiterrs: Read reported 8 corrected bit errors
[  676.774982] mtd_nandbiterrs: verify_page
[  676.778986] mtd_nandbiterrs: Successfully corrected 8 bit errors per subpage
[  676.786039] mtd_nandbiterrs: Inserted biterror @ 2/5
[  676.791009] mtd_nandbiterrs: rewrite page
[  676.795719] mtd_nandbiterrs: read_page
[  676.799864] mtd_nandbiterrs: verify_page
[  676.803791] mtd_nandbiterrs: Error: page offset 0, expected 25, got 00
[  676.810324] mtd_nandbiterrs: Error: page offset 1, expected a5, got 00
[  676.816857] mtd_nandbiterrs: Error: page offset 2, expected 65, got 05
[  676.823463] mtd_nandbiterrs: ECC failure, read data is incorrect
despite read success

>
> >  v2: Add >= v4 NAND controller support as requested by Jonas.
> >      mtd->ecc_stats.corrected accumulates instead of set to total.
> >      Remove DMA specific flipped bits count.
>
> The changelog does not mention the fact that you return the maximum
> number of corrected bitflips as I requested, and the diff does not show
> a straightforward implementation of that. It is very important to get
> this right.

I'm a little unclear on what sort of verbiage you would like, I mentioned
in the original and v2 patches summary: "In addition identify the correct
maximum subpage corrected bits", would you like me to maybe change
it to something such as "In addition this change fixes the maximum
number of bitflips from all the subpages" ?

>
> If we take the following example of a page with 4 ECC steps, if we get
> respectively: 0, 2, 3, 0 bitflips per step, the returned value shall be
> 3.
>
> To be very certain that this is correct, you can use the nandflipbit
> tool from the mtd-utils test suite. You can manually insert bitflips in
> various areas of a page and then read the page again with ECC enabled
> and see how many bit errors are reported.

I'm not aware if there is a tool which reads the maximum returned ECC
bits directly, however during my testing I have put in debug code which
will hopefully make things more clear which shows the maximum
amount of ECC chunk corrected bits being returned. We will return the
maximum subpage corrected bits from brcmnand_read only when we
cross a threshold (in this case 6 of a possible 8 bits) because this will
trigger an EUCLEAN at a higher level. Here is my debug messages
highlighting the values (flipped bits in each subpage: 0, 2, 3, 0, 0, 6, 7, 0):

nandflipbits /dev/mtd2 0@512 1@513 2@1024 3@1025 4@1026
nandflipbits /dev/mtd2 1@2560 2@2561 3@2562 4@2563 5@2564 6@2565
nandflipbits /dev/mtd2 1@3072 2@3073 3@3074 4@3075 5@3076 6@3077 7@3078

cat /sys/class/mtd/mtd2/corrected_bits
0

# cat /dev/mtd2 > /dev/null
[  394.422696] subpage 0, bitflips detected 0, max subpage bitflips detected 0
[  394.422764] subpage 1, bitflips detected 2, max subpage bitflips detected 0
[  394.422818] subpage 2, bitflips detected 3, max subpage bitflips detected 0
[  394.422869] subpage 3, bitflips detected 0, max subpage bitflips detected 0
[  394.422919] subpage 4, bitflips detected 0, max subpage bitflips detected 0
[  394.422974] subpage 5, bitflips detected 6, max subpage bitflips detected 6
[  394.423028] subpage 6, bitflips detected 7, max subpage bitflips detected 7
[  394.423079] subpage 7, bitflips detected 0, max subpage bitflips detected 7
[  394.423085] bitflip threshold exceeded, returning 7 bitflips
[  394.423161] subpage 0, bitflips detected 0, max subpage bitflips detected 0
[  394.423212] subpage 1, bitflips detected 0, max subpage bitflips detected 0
[  394.423263] subpage 2, bitflips detected 0, max subpage bitflips detected 0
[  394.423313] subpage 3, bitflips detected 0, max subpage bitflips detected 0
[  394.423363] subpage 4, bitflips detected 0, max subpage bitflips detected 0
[  394.423414] subpage 5, bitflips detected 0, max subpage bitflips detected 0
[  394.423464] subpage 6, bitflips detected 0, max subpage bitflips detected 0
[  394.423514] subpage 7, bitflips detected 0, max subpage bitflips detected 0
...

cat /sys/class/mtd/mtd2/corrected_bits
18

>
> Thanks,
> Miquèl

Thanks!

-Dave
Re: [PATCH v2] mtd: nand: brcmnand: fix mtd corrected bits stat
Posted by Miquel Raynal 7 months, 3 weeks ago
Hello David,

> I'm not familiar with nandbiterrs but here's the results from
> mtd_nandbiterrs.ko on my NAND set to BCH8:
>
> # insmod mtd_nandbiterrs.ko dev=0
> [  676.097190]
> [  676.098760] ==================================================
> [  676.104609] mtd_nandbiterrs: MTD device: 0
> [  676.108732] mtd_nandbiterrs: MTD device size 2097152,
> eraseblock=262144, page=4096, oob=216
> [  676.117089] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes
> [  676.123188] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0
> [  676.130863] mtd_nandbiterrs: incremental biterrors test
> [  676.136154] mtd_nandbiterrs: write_page
> [  676.140761] mtd_nandbiterrs: rewrite page
> [  676.145473] mtd_nandbiterrs: read_page
> [  676.149621] mtd_nandbiterrs: verify_page
> [  676.153625] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage
> [  676.160678] mtd_nandbiterrs: Inserted biterror @ 0/5
> [  676.165647] mtd_nandbiterrs: rewrite page
> [  676.170363] mtd_nandbiterrs: read_page
> [  676.174508] mtd_nandbiterrs: Read reported 1 corrected bit errors
> [  676.180606] mtd_nandbiterrs: verify_page
> [  676.184609] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage
> [  676.191662] mtd_nandbiterrs: Inserted biterror @ 0/2
> [  676.196631] mtd_nandbiterrs: rewrite page
> [  676.201342] mtd_nandbiterrs: read_page
> [  676.205487] mtd_nandbiterrs: Read reported 2 corrected bit errors
> [  676.211586] mtd_nandbiterrs: verify_page
> [  676.215588] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage
> [  676.222641] mtd_nandbiterrs: Inserted biterror @ 0/0
> [  676.227608] mtd_nandbiterrs: rewrite page
> [  676.228356] mtd_nandbiterrs: read_page
> [  676.228749] mtd_nandbiterrs: Read reported 3 corrected bit errors
> [  676.228751] mtd_nandbiterrs: verify_page
> [  676.228829] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage
> [  676.228831] mtd_nandbiterrs: Inserted biterror @ 1/7
> [  676.228833] mtd_nandbiterrs: rewrite page
> [  676.229530] mtd_nandbiterrs: read_page
> [  676.229922] mtd_nandbiterrs: Read reported 4 corrected bit errors
> [  676.229924] mtd_nandbiterrs: verify_page
> [  676.230001] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage
> [  676.230003] mtd_nandbiterrs: Inserted biterror @ 1/5
> [  676.230005] mtd_nandbiterrs: rewrite page
> [  676.294177] mtd_nandbiterrs: read_page
> [  676.298337] mtd_nandbiterrs: Read reported 5 corrected bit errors
> [  676.304436] mtd_nandbiterrs: verify_page
> [  676.308441] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage
> [  676.315494] mtd_nandbiterrs: Inserted biterror @ 1/2
> [  676.320464] mtd_nandbiterrs: rewrite page
> [  676.325174] mtd_nandbiterrs: read_page
> [  676.329327] mtd_nandbiterrs: Read reported 6 corrected bit errors
> [  676.335426] mtd_nandbiterrs: verify_page
> [  676.339429] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage
> [  676.346483] mtd_nandbiterrs: Inserted biterror @ 1/0
> [  676.351452] mtd_nandbiterrs: rewrite page
> [  676.356162] mtd_nandbiterrs: read_page
> [  676.360308] mtd_nandbiterrs: Read reported 7 corrected bit errors
> [  676.366407] mtd_nandbiterrs: verify_page
> [  676.370409] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage
> [  676.377462] mtd_nandbiterrs: Inserted biterror @ 2/6
> [  676.382432] mtd_nandbiterrs: rewrite page
> [  676.387142] mtd_nandbiterrs: read_page
> [  676.391287] mtd_nandbiterrs: Read reported 8 corrected bit errors
> [  676.397385] mtd_nandbiterrs: verify_page
> [  676.401388] mtd_nandbiterrs: Successfully corrected 8 bit errors
> per subpage

So far the reporting looks good (and the nandflipbits output looks
correct as well).

> [  676.408441] mtd_nandbiterrs: Inserted biterror @ 2/5
> [  676.413411] mtd_nandbiterrs: rewrite page
> [  676.418122] mtd_nandbiterrs: read_page
> [  676.422267] mtd_nandbiterrs: verify_page
> [  676.426194] mtd_nandbiterrs: Error: page offset 0, expected 25, got 00
> [  676.432727] mtd_nandbiterrs: Error: page offset 1, expected a5, got 00
> [  676.439260] mtd_nandbiterrs: Error: page offset 2, expected 65, got 05
> [  676.445868] mtd_nandbiterrs: ECC failure, read data is incorrect
> despite read success

Here however there is something wrong. We do expect a read failure,
instead of returning wrong data. There is still a problem, I do not know
if this problem was there before though, but this must be fixed.

Hello Florian, if you have time, I'd welcome you opinion on this patch :)

Cheers,
Miquèl
Re: [PATCH v2] mtd: nand: brcmnand: fix mtd corrected bits stat
Posted by David Regan 7 months, 2 weeks ago
Hi Miquèl,

On Wed, Jun 18, 2025 at 2:07 AM Miquel Raynal <miquel.raynal@bootlin.com> wrote:
>
> Hello David,
>
...
> Here however there is something wrong. We do expect a read failure,
> instead of returning wrong data. There is still a problem, I do not know
> if this problem was there before though, but this must be fixed.

This was user error as I picked the wrong partition to test. There's a
hardware limitation in which the first ECC chunk in page zero of block
zero is ECC protected however the hardware flag to signify an ECC
problem does not trigger (as can be seen by the result.) This
limitation only affects page 0 of block 0 where it is typically a
bootloader raw partition that is not under the management of the Linux
file system, and in addition we have extra checksum/signature
verification and loader redundancies to account for this limitation.

Here is the result of the nandbiterrs test using a different partition:

# insmod mtd_nandbiterrs.ko dev=2
[   30.670312]
[   30.671853] ==================================================
[   30.677713] mtd_nandbiterrs: MTD device: 2
[   30.681818] mtd_nandbiterrs: MTD device size 8388608,
eraseblock=262144, page=4096, oob=216
[   30.690175] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes
[   30.696273] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0
[   30.703951] mtd_nandbiterrs: incremental biterrors test
[   30.709242] mtd_nandbiterrs: write_page
[   30.713851] mtd_nandbiterrs: rewrite page
[   30.718562] mtd_nandbiterrs: read_page
[   30.722708] mtd_nandbiterrs: verify_page
[   30.726711] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage
[   30.733764] mtd_nandbiterrs: Inserted biterror @ 0/5
[   30.738733] mtd_nandbiterrs: rewrite page
[   30.743443] mtd_nandbiterrs: read_page
[   30.747588] mtd_nandbiterrs: Read reported 1 corrected bit errors
[   30.753686] mtd_nandbiterrs: verify_page
[   30.757688] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage
[   30.764741] mtd_nandbiterrs: Inserted biterror @ 0/2
[   30.769710] mtd_nandbiterrs: rewrite page
[   30.774475] mtd_nandbiterrs: read_page
[   30.778620] mtd_nandbiterrs: Read reported 2 corrected bit errors
[   30.784718] mtd_nandbiterrs: verify_page
[   30.788721] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage
[   30.795773] mtd_nandbiterrs: Inserted biterror @ 0/0
[   30.800743] mtd_nandbiterrs: rewrite page
[   30.805502] mtd_nandbiterrs: read_page
[   30.809647] mtd_nandbiterrs: Read reported 3 corrected bit errors
[   30.815745] mtd_nandbiterrs: verify_page
[   30.819748] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage
[   30.826801] mtd_nandbiterrs: Inserted biterror @ 1/7
[   30.831770] mtd_nandbiterrs: rewrite page
[   30.836480] mtd_nandbiterrs: read_page
[   30.840625] mtd_nandbiterrs: Read reported 4 corrected bit errors
[   30.846723] mtd_nandbiterrs: verify_page
[   30.850725] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage
[   30.857777] mtd_nandbiterrs: Inserted biterror @ 1/5
[   30.862746] mtd_nandbiterrs: rewrite page
[   30.867457] mtd_nandbiterrs: read_page
[   30.871602] mtd_nandbiterrs: Read reported 5 corrected bit errors
[   30.877700] mtd_nandbiterrs: verify_page
[   30.881702] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage
[   30.888754] mtd_nandbiterrs: Inserted biterror @ 1/2
[   30.893722] mtd_nandbiterrs: rewrite page
[   30.898433] mtd_nandbiterrs: read_page
[   30.902581] mtd_nandbiterrs: Read reported 6 corrected bit errors
[   30.908679] mtd_nandbiterrs: verify_page
[   30.912681] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage
[   30.919734] mtd_nandbiterrs: Inserted biterror @ 1/0
[   30.924704] mtd_nandbiterrs: rewrite page
[   30.929412] mtd_nandbiterrs: read_page
[   30.929808] mtd_nandbiterrs: Read reported 7 corrected bit errors
[   30.929810] mtd_nandbiterrs: verify_page
[   30.929887] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage
[   30.929889] mtd_nandbiterrs: Inserted biterror @ 2/6
[   30.929891] mtd_nandbiterrs: rewrite page
[   30.960226] mtd_nandbiterrs: read_page
[   30.964374] mtd_nandbiterrs: Read reported 8 corrected bit errors
[   30.970476] mtd_nandbiterrs: verify_page
[   30.974480] mtd_nandbiterrs: Successfully corrected 8 bit errors per subpage
[   30.981533] mtd_nandbiterrs: Inserted biterror @ 2/5
[   30.986528] mtd_nandbiterrs: rewrite page
[   30.991289] mtd_nandbiterrs: read_page
[   30.996228] bcmbca_nand ff801800.nand-controller: uncorrectable
error at 0x1f700000
[   31.003892] mtd_nandbiterrs: error: read failed at 0x0
[   31.009035] mtd_nandbiterrs: After 9 biterrors per subpage, read
reported error -74
[   31.018263] mtd_nandbiterrs: finished successfully.
[   31.023146] ==================================================
[   31.051035]
[   31.052530] ==================================================
[   31.058369] mtd_nandbiterrs: MTD device: 2
[   31.062475] mtd_nandbiterrs: MTD device size 8388608,
eraseblock=262144, page=4096, oob=216
[   31.070832] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes
[   31.076931] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0
[   31.083751] mtd_nandbiterrs: incremental biterrors test
[   31.089041] mtd_nandbiterrs: write_page
[   31.093643] mtd_nandbiterrs: rewrite page
[   31.098354] mtd_nandbiterrs: read_page
[   31.102496] mtd_nandbiterrs: verify_page
[   31.106498] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage
[   31.113549] mtd_nandbiterrs: Inserted biterror @ 0/5
[   31.113552] mtd_nandbiterrs: rewrite page
[   31.114250] mtd_nandbiterrs: read_page
[   31.127333] mtd_nandbiterrs: Read reported 1 corrected bit errors
[   31.133431] mtd_nandbiterrs: verify_page
[   31.137435] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage
[   31.144509] mtd_nandbiterrs: Inserted biterror @ 0/2
[   31.149480] mtd_nandbiterrs: rewrite page
[   31.154240] mtd_nandbiterrs: read_page
[   31.158385] mtd_nandbiterrs: Read reported 2 corrected bit errors
[   31.164483] mtd_nandbiterrs: verify_page
[   31.168486] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage
[   31.175552] mtd_nandbiterrs: Inserted biterror @ 0/0
[   31.180522] mtd_nandbiterrs: rewrite page
[   31.185282] mtd_nandbiterrs: read_page
[   31.189428] mtd_nandbiterrs: Read reported 3 corrected bit errors
[   31.195524] mtd_nandbiterrs: verify_page
[   31.195602] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage
[   31.195604] mtd_nandbiterrs: Inserted biterror @ 1/7
[   31.195606] mtd_nandbiterrs: rewrite page
[   31.196304] mtd_nandbiterrs: read_page
[   31.196696] mtd_nandbiterrs: Read reported 4 corrected bit errors
[   31.196698] mtd_nandbiterrs: verify_page
[   31.196776] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage
[   31.196778] mtd_nandbiterrs: Inserted biterror @ 1/5
[   31.196780] mtd_nandbiterrs: rewrite page
[   31.197477] mtd_nandbiterrs: read_page
[   31.197869] mtd_nandbiterrs: Read reported 5 corrected bit errors
[   31.197871] mtd_nandbiterrs: verify_page
[   31.197948] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage
[   31.197950] mtd_nandbiterrs: Inserted biterror @ 1/2
[   31.197952] mtd_nandbiterrs: rewrite page
[   31.278059] mtd_nandbiterrs: read_page
[   31.282207] mtd_nandbiterrs: Read reported 6 corrected bit errors
[   31.288323] mtd_nandbiterrs: verify_page
[   31.292327] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage
[   31.299380] mtd_nandbiterrs: Inserted biterror @ 1/0
[   31.304349] mtd_nandbiterrs: rewrite page
[   31.309059] mtd_nandbiterrs: read_page
[   31.313207] mtd_nandbiterrs: Read reported 7 corrected bit errors
[   31.319306] mtd_nandbiterrs: verify_page
[   31.323308] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage
[   31.330360] mtd_nandbiterrs: Inserted biterror @ 2/6
[   31.335335] mtd_nandbiterrs: rewrite page
[   31.340045] mtd_nandbiterrs: read_page
[   31.344194] mtd_nandbiterrs: Read reported 8 corrected bit errors
[   31.350291] mtd_nandbiterrs: verify_page
[   31.354294] mtd_nandbiterrs: Successfully corrected 8 bit errors per subpage
[   31.361347] mtd_nandbiterrs: Inserted biterror @ 2/5
[   31.366316] mtd_nandbiterrs: rewrite page
[   31.371076] mtd_nandbiterrs: read_page
[   31.376011] bcmbca_nand ff801800.nand-controller: uncorrectable
error at 0x1f700000
[   31.383674] mtd_nandbiterrs: error: read failed at 0x0
[   31.388818] mtd_nandbiterrs: After 9 biterrors per subpage, read
reported error -74
[   31.398046] mtd_nandbiterrs: finished successfully.
[   31.402928] ==================================================

> Cheers,
> Miquèl

Thanks!

-Dave
Re: [PATCH v2] mtd: nand: brcmnand: fix mtd corrected bits stat
Posted by David Regan 7 months, 3 weeks ago
Hi   Miquèl,

On Wed, Jun 18, 2025 at 2:07 AM Miquel Raynal <miquel.raynal@bootlin.com> wrote:
>
> Hello David

...

> So far the reporting looks good (and the nandflipbits output looks
> correct as well).
>
> > [  676.408441] mtd_nandbiterrs: Inserted biterror @ 2/5
> > [  676.413411] mtd_nandbiterrs: rewrite page
> > [  676.418122] mtd_nandbiterrs: read_page
> > [  676.422267] mtd_nandbiterrs: verify_page
> > [  676.426194] mtd_nandbiterrs: Error: page offset 0, expected 25, got 00
> > [  676.432727] mtd_nandbiterrs: Error: page offset 1, expected a5, got 00
> > [  676.439260] mtd_nandbiterrs: Error: page offset 2, expected 65, got 05
> > [  676.445868] mtd_nandbiterrs: ECC failure, read data is incorrect
> > despite read success
>
> Here however there is something wrong. We do expect a read failure,
> instead of returning wrong data. There is still a problem, I do not know
> if this problem was there before though, but this must be fixed.
>

Thank you for highlighting this issue, I will investigate.

>
> Hello Florian, if you have time, I'd welcome you opinion on this patch :)
>
> Cheers,
> Miquèl

Thanks!

-Dave
Re: [PATCH v2] mtd: nand: brcmnand: fix mtd corrected bits stat
Posted by Florian Fainelli 7 months, 3 weeks ago
On 6/18/25 02:07, Miquel Raynal wrote:
> Hello David,
> 
>> I'm not familiar with nandbiterrs but here's the results from
>> mtd_nandbiterrs.ko on my NAND set to BCH8:
>>
>> # insmod mtd_nandbiterrs.ko dev=0
>> [  676.097190]
>> [  676.098760] ==================================================
>> [  676.104609] mtd_nandbiterrs: MTD device: 0
>> [  676.108732] mtd_nandbiterrs: MTD device size 2097152,
>> eraseblock=262144, page=4096, oob=216
>> [  676.117089] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes
>> [  676.123188] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0
>> [  676.130863] mtd_nandbiterrs: incremental biterrors test
>> [  676.136154] mtd_nandbiterrs: write_page
>> [  676.140761] mtd_nandbiterrs: rewrite page
>> [  676.145473] mtd_nandbiterrs: read_page
>> [  676.149621] mtd_nandbiterrs: verify_page
>> [  676.153625] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage
>> [  676.160678] mtd_nandbiterrs: Inserted biterror @ 0/5
>> [  676.165647] mtd_nandbiterrs: rewrite page
>> [  676.170363] mtd_nandbiterrs: read_page
>> [  676.174508] mtd_nandbiterrs: Read reported 1 corrected bit errors
>> [  676.180606] mtd_nandbiterrs: verify_page
>> [  676.184609] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage
>> [  676.191662] mtd_nandbiterrs: Inserted biterror @ 0/2
>> [  676.196631] mtd_nandbiterrs: rewrite page
>> [  676.201342] mtd_nandbiterrs: read_page
>> [  676.205487] mtd_nandbiterrs: Read reported 2 corrected bit errors
>> [  676.211586] mtd_nandbiterrs: verify_page
>> [  676.215588] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage
>> [  676.222641] mtd_nandbiterrs: Inserted biterror @ 0/0
>> [  676.227608] mtd_nandbiterrs: rewrite page
>> [  676.228356] mtd_nandbiterrs: read_page
>> [  676.228749] mtd_nandbiterrs: Read reported 3 corrected bit errors
>> [  676.228751] mtd_nandbiterrs: verify_page
>> [  676.228829] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage
>> [  676.228831] mtd_nandbiterrs: Inserted biterror @ 1/7
>> [  676.228833] mtd_nandbiterrs: rewrite page
>> [  676.229530] mtd_nandbiterrs: read_page
>> [  676.229922] mtd_nandbiterrs: Read reported 4 corrected bit errors
>> [  676.229924] mtd_nandbiterrs: verify_page
>> [  676.230001] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage
>> [  676.230003] mtd_nandbiterrs: Inserted biterror @ 1/5
>> [  676.230005] mtd_nandbiterrs: rewrite page
>> [  676.294177] mtd_nandbiterrs: read_page
>> [  676.298337] mtd_nandbiterrs: Read reported 5 corrected bit errors
>> [  676.304436] mtd_nandbiterrs: verify_page
>> [  676.308441] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage
>> [  676.315494] mtd_nandbiterrs: Inserted biterror @ 1/2
>> [  676.320464] mtd_nandbiterrs: rewrite page
>> [  676.325174] mtd_nandbiterrs: read_page
>> [  676.329327] mtd_nandbiterrs: Read reported 6 corrected bit errors
>> [  676.335426] mtd_nandbiterrs: verify_page
>> [  676.339429] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage
>> [  676.346483] mtd_nandbiterrs: Inserted biterror @ 1/0
>> [  676.351452] mtd_nandbiterrs: rewrite page
>> [  676.356162] mtd_nandbiterrs: read_page
>> [  676.360308] mtd_nandbiterrs: Read reported 7 corrected bit errors
>> [  676.366407] mtd_nandbiterrs: verify_page
>> [  676.370409] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage
>> [  676.377462] mtd_nandbiterrs: Inserted biterror @ 2/6
>> [  676.382432] mtd_nandbiterrs: rewrite page
>> [  676.387142] mtd_nandbiterrs: read_page
>> [  676.391287] mtd_nandbiterrs: Read reported 8 corrected bit errors
>> [  676.397385] mtd_nandbiterrs: verify_page
>> [  676.401388] mtd_nandbiterrs: Successfully corrected 8 bit errors
>> per subpage
> 
> So far the reporting looks good (and the nandflipbits output looks
> correct as well).
> 
>> [  676.408441] mtd_nandbiterrs: Inserted biterror @ 2/5
>> [  676.413411] mtd_nandbiterrs: rewrite page
>> [  676.418122] mtd_nandbiterrs: read_page
>> [  676.422267] mtd_nandbiterrs: verify_page
>> [  676.426194] mtd_nandbiterrs: Error: page offset 0, expected 25, got 00
>> [  676.432727] mtd_nandbiterrs: Error: page offset 1, expected a5, got 00
>> [  676.439260] mtd_nandbiterrs: Error: page offset 2, expected 65, got 05
>> [  676.445868] mtd_nandbiterrs: ECC failure, read data is incorrect
>> despite read success
> 
> Here however there is something wrong. We do expect a read failure,
> instead of returning wrong data. There is still a problem, I do not know
> if this problem was there before though, but this must be fixed.
> 
> Hello Florian, if you have time, I'd welcome you opinion on this patch :)

I don't think I am entitled any opinion on this patch other than 
confirming the register offset locations. Both William and David have 
far more experience with this NAND controller and devices that I do.
-- 
Florian