drivers/mtd/nand/raw/brcmnand/brcmnand.c | 50 ++++++++++++++++++------ 1 file changed, 39 insertions(+), 11 deletions(-)
Currently we attempt to get the amount of flipped bits from a hardware
location which is reset on every subpage. Instead obtain total flipped
bits stat from hardware accumulator. In addition identify the correct
maximum subpage corrected bits.
Signed-off-by: David Regan <dregan@broadcom.com>
Reviewed-by: William Zhang <william.zhang@broadcom.com>
---
v2: Add >= v4 NAND controller support as requested by Jonas.
mtd->ecc_stats.corrected accumulates instead of set to total.
Remove DMA specific flipped bits count.
---
drivers/mtd/nand/raw/brcmnand/brcmnand.c | 50 ++++++++++++++++++------
1 file changed, 39 insertions(+), 11 deletions(-)
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 62bdda3be92f..c3ed3dcf0871 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -359,6 +359,7 @@ enum brcmnand_reg {
BRCMNAND_CORR_THRESHOLD_EXT,
BRCMNAND_UNCORR_COUNT,
BRCMNAND_CORR_COUNT,
+ BRCMNAND_READ_ERROR_COUNT,
BRCMNAND_CORR_EXT_ADDR,
BRCMNAND_CORR_ADDR,
BRCMNAND_UNCORR_EXT_ADDR,
@@ -389,6 +390,7 @@ static const u16 brcmnand_regs_v21[] = {
[BRCMNAND_CORR_THRESHOLD_EXT] = 0,
[BRCMNAND_UNCORR_COUNT] = 0,
[BRCMNAND_CORR_COUNT] = 0,
+ [BRCMNAND_READ_ERROR_COUNT] = 0,
[BRCMNAND_CORR_EXT_ADDR] = 0x60,
[BRCMNAND_CORR_ADDR] = 0x64,
[BRCMNAND_UNCORR_EXT_ADDR] = 0x68,
@@ -419,6 +421,7 @@ static const u16 brcmnand_regs_v33[] = {
[BRCMNAND_CORR_THRESHOLD_EXT] = 0,
[BRCMNAND_UNCORR_COUNT] = 0,
[BRCMNAND_CORR_COUNT] = 0,
+ [BRCMNAND_READ_ERROR_COUNT] = 0x80,
[BRCMNAND_CORR_EXT_ADDR] = 0x70,
[BRCMNAND_CORR_ADDR] = 0x74,
[BRCMNAND_UNCORR_EXT_ADDR] = 0x78,
@@ -449,6 +452,7 @@ static const u16 brcmnand_regs_v50[] = {
[BRCMNAND_CORR_THRESHOLD_EXT] = 0,
[BRCMNAND_UNCORR_COUNT] = 0,
[BRCMNAND_CORR_COUNT] = 0,
+ [BRCMNAND_READ_ERROR_COUNT] = 0x80,
[BRCMNAND_CORR_EXT_ADDR] = 0x70,
[BRCMNAND_CORR_ADDR] = 0x74,
[BRCMNAND_UNCORR_EXT_ADDR] = 0x78,
@@ -479,6 +483,7 @@ static const u16 brcmnand_regs_v60[] = {
[BRCMNAND_CORR_THRESHOLD_EXT] = 0xc4,
[BRCMNAND_UNCORR_COUNT] = 0xfc,
[BRCMNAND_CORR_COUNT] = 0x100,
+ [BRCMNAND_READ_ERROR_COUNT] = 0x104,
[BRCMNAND_CORR_EXT_ADDR] = 0x10c,
[BRCMNAND_CORR_ADDR] = 0x110,
[BRCMNAND_UNCORR_EXT_ADDR] = 0x114,
@@ -509,6 +514,7 @@ static const u16 brcmnand_regs_v71[] = {
[BRCMNAND_CORR_THRESHOLD_EXT] = 0xe0,
[BRCMNAND_UNCORR_COUNT] = 0xfc,
[BRCMNAND_CORR_COUNT] = 0x100,
+ [BRCMNAND_READ_ERROR_COUNT] = 0x104,
[BRCMNAND_CORR_EXT_ADDR] = 0x10c,
[BRCMNAND_CORR_ADDR] = 0x110,
[BRCMNAND_UNCORR_EXT_ADDR] = 0x114,
@@ -539,6 +545,7 @@ static const u16 brcmnand_regs_v72[] = {
[BRCMNAND_CORR_THRESHOLD_EXT] = 0xe0,
[BRCMNAND_UNCORR_COUNT] = 0xfc,
[BRCMNAND_CORR_COUNT] = 0x100,
+ [BRCMNAND_READ_ERROR_COUNT] = 0x104,
[BRCMNAND_CORR_EXT_ADDR] = 0x10c,
[BRCMNAND_CORR_ADDR] = 0x110,
[BRCMNAND_UNCORR_EXT_ADDR] = 0x114,
@@ -966,6 +973,13 @@ static inline u32 brcmnand_count_corrected(struct brcmnand_controller *ctrl)
return brcmnand_read_reg(ctrl, BRCMNAND_CORR_COUNT);
}
+static inline u32 brcmnand_corr_total(struct brcmnand_controller *ctrl)
+{
+ if (ctrl->nand_version < 0x400)
+ return 0;
+ return brcmnand_read_reg(ctrl, BRCMNAND_READ_ERROR_COUNT);
+}
+
static void brcmnand_wr_corr_thresh(struct brcmnand_host *host, u8 val)
{
struct brcmnand_controller *ctrl = host->ctrl;
@@ -2066,12 +2080,15 @@ static int brcmnand_dma_trans(struct brcmnand_host *host, u64 addr, u32 *buf,
*/
static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
u64 addr, unsigned int trans, u32 *buf,
- u8 *oob, u64 *err_addr)
+ u8 *oob, u64 *err_addr, unsigned int *corr)
{
struct brcmnand_host *host = nand_get_controller_data(chip);
struct brcmnand_controller *ctrl = host->ctrl;
int i, ret = 0;
+ if (corr)
+ *corr = 0;
+
brcmnand_clear_ecc_addr(ctrl);
for (i = 0; i < trans; i++, addr += FC_BYTES) {
@@ -2099,13 +2116,16 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
if (*err_addr)
ret = -EBADMSG;
- }
+ else {
+ *err_addr = brcmnand_get_correcc_addr(ctrl);
- if (!ret) {
- *err_addr = brcmnand_get_correcc_addr(ctrl);
+ if (*err_addr) {
+ ret = -EUCLEAN;
- if (*err_addr)
- ret = -EUCLEAN;
+ if (corr && brcmnand_count_corrected(ctrl) > *corr)
+ *corr = brcmnand_count_corrected(ctrl);
+ }
+ }
}
}
@@ -2173,6 +2193,8 @@ static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip,
int err;
bool retry = true;
bool edu_err = false;
+ unsigned int corrected = 0; /* max corrected bits per subpage */
+ unsigned int prev_tot = brcmnand_corr_total(ctrl);
dev_dbg(ctrl->dev, "read %llx -> %p\n", (unsigned long long)addr, buf);
@@ -2200,9 +2222,11 @@ static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip,
memset(oob, 0x99, mtd->oobsize);
err = brcmnand_read_by_pio(mtd, chip, addr, trans, buf,
- oob, &err_addr);
+ oob, &err_addr, &corrected);
}
+ mtd->ecc_stats.corrected += brcmnand_corr_total(ctrl) - prev_tot;
+
if (mtd_is_eccerr(err)) {
/*
* On controller version and 7.0, 7.1 , DMA read after a
@@ -2240,16 +2264,20 @@ static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip,
}
if (mtd_is_bitflip(err)) {
- unsigned int corrected = brcmnand_count_corrected(ctrl);
-
/* in case of EDU correctable error we read again using PIO */
if (edu_err)
err = brcmnand_read_by_pio(mtd, chip, addr, trans, buf,
- oob, &err_addr);
+ oob, &err_addr, &corrected);
dev_dbg(ctrl->dev, "corrected error at 0x%llx\n",
(unsigned long long)err_addr);
- mtd->ecc_stats.corrected += corrected;
+ /*
+ * if flipped bits accumulator is not supported but we detected
+ * a correction, increase stat by 1 to match previous behavior.
+ */
+ if (brcmnand_corr_total(ctrl) == prev_tot)
+ mtd->ecc_stats.corrected++;
+
/* Always exceed the software-imposed threshold */
return max(mtd->bitflip_threshold, corrected);
}
--
2.43.5
On 06/06/2025 at 09:57:03 -07, David Regan <dregan@broadcom.com> wrote: > Currently we attempt to get the amount of flipped bits from a hardware > location which is reset on every subpage. Instead obtain total flipped > bits stat from hardware accumulator. In addition identify the correct > maximum subpage corrected bits. > > Signed-off-by: David Regan <dregan@broadcom.com> > Reviewed-by: William Zhang <william.zhang@broadcom.com> > --- Hello, Can you please give the output of nandbiterrs -i /dev/mtdX? > v2: Add >= v4 NAND controller support as requested by Jonas. > mtd->ecc_stats.corrected accumulates instead of set to total. > Remove DMA specific flipped bits count. The changelog does not mention the fact that you return the maximum number of corrected bitflips as I requested, and the diff does not show a straightforward implementation of that. It is very important to get this right. If we take the following example of a page with 4 ECC steps, if we get respectively: 0, 2, 3, 0 bitflips per step, the returned value shall be 3. To be very certain that this is correct, you can use the nandflipbit tool from the mtd-utils test suite. You can manually insert bitflips in various areas of a page and then read the page again with ECC enabled and see how many bit errors are reported. Thanks, Miquèl
Hi Miquèl, On Mon, Jun 9, 2025 at 2:20 AM Miquel Raynal <miquel.raynal@bootlin.com> wrote: > > On 06/06/2025 at 09:57:03 -07, David Regan <dregan@broadcom.com> wrote: > > > Currently we attempt to get the amount of flipped bits from a hardware > > location which is reset on every subpage. Instead obtain total flipped > > bits stat from hardware accumulator. In addition identify the correct > > maximum subpage corrected bits. > > > > Signed-off-by: David Regan <dregan@broadcom.com> > > Reviewed-by: William Zhang <william.zhang@broadcom.com> > > --- > > Hello, > > Can you please give the output of nandbiterrs -i /dev/mtdX? I'm not familiar with nandbiterrs but here's the results from mtd_nandbiterrs.ko on my NAND set to BCH8: # insmod mtd_nandbiterrs.ko dev=0 [ 676.097190] [ 676.098760] ================================================== [ 676.104609] mtd_nandbiterrs: MTD device: 0 [ 676.108732] mtd_nandbiterrs: MTD device size 2097152, eraseblock=262144, page=4096, oob=216 [ 676.117089] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes [ 676.123188] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0 [ 676.130863] mtd_nandbiterrs: incremental biterrors test [ 676.136154] mtd_nandbiterrs: write_page [ 676.140761] mtd_nandbiterrs: rewrite page [ 676.145473] mtd_nandbiterrs: read_page [ 676.149621] mtd_nandbiterrs: verify_page [ 676.153625] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage [ 676.160678] mtd_nandbiterrs: Inserted biterror @ 0/5 [ 676.165647] mtd_nandbiterrs: rewrite page [ 676.170363] mtd_nandbiterrs: read_page [ 676.174508] mtd_nandbiterrs: Read reported 1 corrected bit errors [ 676.180606] mtd_nandbiterrs: verify_page [ 676.184609] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage [ 676.191662] mtd_nandbiterrs: Inserted biterror @ 0/2 [ 676.196631] mtd_nandbiterrs: rewrite page [ 676.201342] mtd_nandbiterrs: read_page [ 676.205487] mtd_nandbiterrs: Read reported 2 corrected bit errors [ 676.211586] mtd_nandbiterrs: verify_page [ 676.215588] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage [ 676.222641] mtd_nandbiterrs: Inserted biterror @ 0/0 [ 676.227608] mtd_nandbiterrs: rewrite page [ 676.228356] mtd_nandbiterrs: read_page [ 676.228749] mtd_nandbiterrs: Read reported 3 corrected bit errors [ 676.228751] mtd_nandbiterrs: verify_page [ 676.228829] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage [ 676.228831] mtd_nandbiterrs: Inserted biterror @ 1/7 [ 676.228833] mtd_nandbiterrs: rewrite page [ 676.229530] mtd_nandbiterrs: read_page [ 676.229922] mtd_nandbiterrs: Read reported 4 corrected bit errors [ 676.229924] mtd_nandbiterrs: verify_page [ 676.230001] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage [ 676.230003] mtd_nandbiterrs: Inserted biterror @ 1/5 [ 676.230005] mtd_nandbiterrs: rewrite page [ 676.294177] mtd_nandbiterrs: read_page [ 676.298337] mtd_nandbiterrs: Read reported 5 corrected bit errors [ 676.304436] mtd_nandbiterrs: verify_page [ 676.308441] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage [ 676.315494] mtd_nandbiterrs: Inserted biterror @ 1/2 [ 676.320464] mtd_nandbiterrs: rewrite page [ 676.325174] mtd_nandbiterrs: read_page [ 676.329327] mtd_nandbiterrs: Read reported 6 corrected bit errors [ 676.335426] mtd_nandbiterrs: verify_page [ 676.339429] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage [ 676.346483] mtd_nandbiterrs: Inserted biterror @ 1/0 [ 676.351452] mtd_nandbiterrs: rewrite page [ 676.356162] mtd_nandbiterrs: read_page [ 676.360308] mtd_nandbiterrs: Read reported 7 corrected bit errors [ 676.366407] mtd_nandbiterrs: verify_page [ 676.370409] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage [ 676.377462] mtd_nandbiterrs: Inserted biterror @ 2/6 [ 676.382432] mtd_nandbiterrs: rewrite page [ 676.387142] mtd_nandbiterrs: read_page [ 676.391287] mtd_nandbiterrs: Read reported 8 corrected bit errors [ 676.397385] mtd_nandbiterrs: verify_page [ 676.401388] mtd_nandbiterrs: Successfully corrected 8 bit errors per subpage [ 676.408441] mtd_nandbiterrs: Inserted biterror @ 2/5 [ 676.413411] mtd_nandbiterrs: rewrite page [ 676.418122] mtd_nandbiterrs: read_page [ 676.422267] mtd_nandbiterrs: verify_page [ 676.426194] mtd_nandbiterrs: Error: page offset 0, expected 25, got 00 [ 676.432727] mtd_nandbiterrs: Error: page offset 1, expected a5, got 00 [ 676.439260] mtd_nandbiterrs: Error: page offset 2, expected 65, got 05 [ 676.445868] mtd_nandbiterrs: ECC failure, read data is incorrect despite read success [ 676.474929] [ 676.476425] ================================================== [ 676.482264] mtd_nandbiterrs: MTD device: 0 [ 676.486367] mtd_nandbiterrs: MTD device size 2097152, eraseblock=262144, page=4096, oob=216 [ 676.494721] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes [ 676.494724] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0 [ 676.496298] mtd_nandbiterrs: incremental biterrors test [ 676.496361] mtd_nandbiterrs: write_page [ 676.497123] mtd_nandbiterrs: rewrite page [ 676.497820] mtd_nandbiterrs: read_page [ 676.498210] mtd_nandbiterrs: verify_page [ 676.498287] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage [ 676.498289] mtd_nandbiterrs: Inserted biterror @ 0/5 [ 676.498291] mtd_nandbiterrs: rewrite page [ 676.547860] mtd_nandbiterrs: read_page [ 676.552005] mtd_nandbiterrs: Read reported 1 corrected bit errors [ 676.558104] mtd_nandbiterrs: verify_page [ 676.562107] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage [ 676.569160] mtd_nandbiterrs: Inserted biterror @ 0/2 [ 676.574130] mtd_nandbiterrs: rewrite page [ 676.578842] mtd_nandbiterrs: read_page [ 676.582987] mtd_nandbiterrs: Read reported 2 corrected bit errors [ 676.589085] mtd_nandbiterrs: verify_page [ 676.593088] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage [ 676.600141] mtd_nandbiterrs: Inserted biterror @ 0/0 [ 676.605111] mtd_nandbiterrs: rewrite page [ 676.609821] mtd_nandbiterrs: read_page [ 676.613967] mtd_nandbiterrs: Read reported 3 corrected bit errors [ 676.620065] mtd_nandbiterrs: verify_page [ 676.624068] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage [ 676.631122] mtd_nandbiterrs: Inserted biterror @ 1/7 [ 676.636091] mtd_nandbiterrs: rewrite page [ 676.640802] mtd_nandbiterrs: read_page [ 676.644947] mtd_nandbiterrs: Read reported 4 corrected bit errors [ 676.651045] mtd_nandbiterrs: verify_page [ 676.655048] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage [ 676.662100] mtd_nandbiterrs: Inserted biterror @ 1/5 [ 676.667070] mtd_nandbiterrs: rewrite page [ 676.671780] mtd_nandbiterrs: read_page [ 676.675925] mtd_nandbiterrs: Read reported 5 corrected bit errors [ 676.682025] mtd_nandbiterrs: verify_page [ 676.686028] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage [ 676.693082] mtd_nandbiterrs: Inserted biterror @ 1/2 [ 676.698051] mtd_nandbiterrs: rewrite page [ 676.702762] mtd_nandbiterrs: read_page [ 676.706907] mtd_nandbiterrs: Read reported 6 corrected bit errors [ 676.713005] mtd_nandbiterrs: verify_page [ 676.717008] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage [ 676.724076] mtd_nandbiterrs: Inserted biterror @ 1/0 [ 676.729047] mtd_nandbiterrs: rewrite page [ 676.733758] mtd_nandbiterrs: read_page [ 676.737904] mtd_nandbiterrs: Read reported 7 corrected bit errors [ 676.744003] mtd_nandbiterrs: verify_page [ 676.748006] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage [ 676.755059] mtd_nandbiterrs: Inserted biterror @ 2/6 [ 676.760029] mtd_nandbiterrs: rewrite page [ 676.764739] mtd_nandbiterrs: read_page [ 676.768884] mtd_nandbiterrs: Read reported 8 corrected bit errors [ 676.774982] mtd_nandbiterrs: verify_page [ 676.778986] mtd_nandbiterrs: Successfully corrected 8 bit errors per subpage [ 676.786039] mtd_nandbiterrs: Inserted biterror @ 2/5 [ 676.791009] mtd_nandbiterrs: rewrite page [ 676.795719] mtd_nandbiterrs: read_page [ 676.799864] mtd_nandbiterrs: verify_page [ 676.803791] mtd_nandbiterrs: Error: page offset 0, expected 25, got 00 [ 676.810324] mtd_nandbiterrs: Error: page offset 1, expected a5, got 00 [ 676.816857] mtd_nandbiterrs: Error: page offset 2, expected 65, got 05 [ 676.823463] mtd_nandbiterrs: ECC failure, read data is incorrect despite read success > > > v2: Add >= v4 NAND controller support as requested by Jonas. > > mtd->ecc_stats.corrected accumulates instead of set to total. > > Remove DMA specific flipped bits count. > > The changelog does not mention the fact that you return the maximum > number of corrected bitflips as I requested, and the diff does not show > a straightforward implementation of that. It is very important to get > this right. I'm a little unclear on what sort of verbiage you would like, I mentioned in the original and v2 patches summary: "In addition identify the correct maximum subpage corrected bits", would you like me to maybe change it to something such as "In addition this change fixes the maximum number of bitflips from all the subpages" ? > > If we take the following example of a page with 4 ECC steps, if we get > respectively: 0, 2, 3, 0 bitflips per step, the returned value shall be > 3. > > To be very certain that this is correct, you can use the nandflipbit > tool from the mtd-utils test suite. You can manually insert bitflips in > various areas of a page and then read the page again with ECC enabled > and see how many bit errors are reported. I'm not aware if there is a tool which reads the maximum returned ECC bits directly, however during my testing I have put in debug code which will hopefully make things more clear which shows the maximum amount of ECC chunk corrected bits being returned. We will return the maximum subpage corrected bits from brcmnand_read only when we cross a threshold (in this case 6 of a possible 8 bits) because this will trigger an EUCLEAN at a higher level. Here is my debug messages highlighting the values (flipped bits in each subpage: 0, 2, 3, 0, 0, 6, 7, 0): nandflipbits /dev/mtd2 0@512 1@513 2@1024 3@1025 4@1026 nandflipbits /dev/mtd2 1@2560 2@2561 3@2562 4@2563 5@2564 6@2565 nandflipbits /dev/mtd2 1@3072 2@3073 3@3074 4@3075 5@3076 6@3077 7@3078 cat /sys/class/mtd/mtd2/corrected_bits 0 # cat /dev/mtd2 > /dev/null [ 394.422696] subpage 0, bitflips detected 0, max subpage bitflips detected 0 [ 394.422764] subpage 1, bitflips detected 2, max subpage bitflips detected 0 [ 394.422818] subpage 2, bitflips detected 3, max subpage bitflips detected 0 [ 394.422869] subpage 3, bitflips detected 0, max subpage bitflips detected 0 [ 394.422919] subpage 4, bitflips detected 0, max subpage bitflips detected 0 [ 394.422974] subpage 5, bitflips detected 6, max subpage bitflips detected 6 [ 394.423028] subpage 6, bitflips detected 7, max subpage bitflips detected 7 [ 394.423079] subpage 7, bitflips detected 0, max subpage bitflips detected 7 [ 394.423085] bitflip threshold exceeded, returning 7 bitflips [ 394.423161] subpage 0, bitflips detected 0, max subpage bitflips detected 0 [ 394.423212] subpage 1, bitflips detected 0, max subpage bitflips detected 0 [ 394.423263] subpage 2, bitflips detected 0, max subpage bitflips detected 0 [ 394.423313] subpage 3, bitflips detected 0, max subpage bitflips detected 0 [ 394.423363] subpage 4, bitflips detected 0, max subpage bitflips detected 0 [ 394.423414] subpage 5, bitflips detected 0, max subpage bitflips detected 0 [ 394.423464] subpage 6, bitflips detected 0, max subpage bitflips detected 0 [ 394.423514] subpage 7, bitflips detected 0, max subpage bitflips detected 0 ... cat /sys/class/mtd/mtd2/corrected_bits 18 > > Thanks, > Miquèl Thanks! -Dave
Hello David, > I'm not familiar with nandbiterrs but here's the results from > mtd_nandbiterrs.ko on my NAND set to BCH8: > > # insmod mtd_nandbiterrs.ko dev=0 > [ 676.097190] > [ 676.098760] ================================================== > [ 676.104609] mtd_nandbiterrs: MTD device: 0 > [ 676.108732] mtd_nandbiterrs: MTD device size 2097152, > eraseblock=262144, page=4096, oob=216 > [ 676.117089] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes > [ 676.123188] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0 > [ 676.130863] mtd_nandbiterrs: incremental biterrors test > [ 676.136154] mtd_nandbiterrs: write_page > [ 676.140761] mtd_nandbiterrs: rewrite page > [ 676.145473] mtd_nandbiterrs: read_page > [ 676.149621] mtd_nandbiterrs: verify_page > [ 676.153625] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage > [ 676.160678] mtd_nandbiterrs: Inserted biterror @ 0/5 > [ 676.165647] mtd_nandbiterrs: rewrite page > [ 676.170363] mtd_nandbiterrs: read_page > [ 676.174508] mtd_nandbiterrs: Read reported 1 corrected bit errors > [ 676.180606] mtd_nandbiterrs: verify_page > [ 676.184609] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage > [ 676.191662] mtd_nandbiterrs: Inserted biterror @ 0/2 > [ 676.196631] mtd_nandbiterrs: rewrite page > [ 676.201342] mtd_nandbiterrs: read_page > [ 676.205487] mtd_nandbiterrs: Read reported 2 corrected bit errors > [ 676.211586] mtd_nandbiterrs: verify_page > [ 676.215588] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage > [ 676.222641] mtd_nandbiterrs: Inserted biterror @ 0/0 > [ 676.227608] mtd_nandbiterrs: rewrite page > [ 676.228356] mtd_nandbiterrs: read_page > [ 676.228749] mtd_nandbiterrs: Read reported 3 corrected bit errors > [ 676.228751] mtd_nandbiterrs: verify_page > [ 676.228829] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage > [ 676.228831] mtd_nandbiterrs: Inserted biterror @ 1/7 > [ 676.228833] mtd_nandbiterrs: rewrite page > [ 676.229530] mtd_nandbiterrs: read_page > [ 676.229922] mtd_nandbiterrs: Read reported 4 corrected bit errors > [ 676.229924] mtd_nandbiterrs: verify_page > [ 676.230001] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage > [ 676.230003] mtd_nandbiterrs: Inserted biterror @ 1/5 > [ 676.230005] mtd_nandbiterrs: rewrite page > [ 676.294177] mtd_nandbiterrs: read_page > [ 676.298337] mtd_nandbiterrs: Read reported 5 corrected bit errors > [ 676.304436] mtd_nandbiterrs: verify_page > [ 676.308441] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage > [ 676.315494] mtd_nandbiterrs: Inserted biterror @ 1/2 > [ 676.320464] mtd_nandbiterrs: rewrite page > [ 676.325174] mtd_nandbiterrs: read_page > [ 676.329327] mtd_nandbiterrs: Read reported 6 corrected bit errors > [ 676.335426] mtd_nandbiterrs: verify_page > [ 676.339429] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage > [ 676.346483] mtd_nandbiterrs: Inserted biterror @ 1/0 > [ 676.351452] mtd_nandbiterrs: rewrite page > [ 676.356162] mtd_nandbiterrs: read_page > [ 676.360308] mtd_nandbiterrs: Read reported 7 corrected bit errors > [ 676.366407] mtd_nandbiterrs: verify_page > [ 676.370409] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage > [ 676.377462] mtd_nandbiterrs: Inserted biterror @ 2/6 > [ 676.382432] mtd_nandbiterrs: rewrite page > [ 676.387142] mtd_nandbiterrs: read_page > [ 676.391287] mtd_nandbiterrs: Read reported 8 corrected bit errors > [ 676.397385] mtd_nandbiterrs: verify_page > [ 676.401388] mtd_nandbiterrs: Successfully corrected 8 bit errors > per subpage So far the reporting looks good (and the nandflipbits output looks correct as well). > [ 676.408441] mtd_nandbiterrs: Inserted biterror @ 2/5 > [ 676.413411] mtd_nandbiterrs: rewrite page > [ 676.418122] mtd_nandbiterrs: read_page > [ 676.422267] mtd_nandbiterrs: verify_page > [ 676.426194] mtd_nandbiterrs: Error: page offset 0, expected 25, got 00 > [ 676.432727] mtd_nandbiterrs: Error: page offset 1, expected a5, got 00 > [ 676.439260] mtd_nandbiterrs: Error: page offset 2, expected 65, got 05 > [ 676.445868] mtd_nandbiterrs: ECC failure, read data is incorrect > despite read success Here however there is something wrong. We do expect a read failure, instead of returning wrong data. There is still a problem, I do not know if this problem was there before though, but this must be fixed. Hello Florian, if you have time, I'd welcome you opinion on this patch :) Cheers, Miquèl
Hi Miquèl, On Wed, Jun 18, 2025 at 2:07 AM Miquel Raynal <miquel.raynal@bootlin.com> wrote: > > Hello David, > ... > Here however there is something wrong. We do expect a read failure, > instead of returning wrong data. There is still a problem, I do not know > if this problem was there before though, but this must be fixed. This was user error as I picked the wrong partition to test. There's a hardware limitation in which the first ECC chunk in page zero of block zero is ECC protected however the hardware flag to signify an ECC problem does not trigger (as can be seen by the result.) This limitation only affects page 0 of block 0 where it is typically a bootloader raw partition that is not under the management of the Linux file system, and in addition we have extra checksum/signature verification and loader redundancies to account for this limitation. Here is the result of the nandbiterrs test using a different partition: # insmod mtd_nandbiterrs.ko dev=2 [ 30.670312] [ 30.671853] ================================================== [ 30.677713] mtd_nandbiterrs: MTD device: 2 [ 30.681818] mtd_nandbiterrs: MTD device size 8388608, eraseblock=262144, page=4096, oob=216 [ 30.690175] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes [ 30.696273] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0 [ 30.703951] mtd_nandbiterrs: incremental biterrors test [ 30.709242] mtd_nandbiterrs: write_page [ 30.713851] mtd_nandbiterrs: rewrite page [ 30.718562] mtd_nandbiterrs: read_page [ 30.722708] mtd_nandbiterrs: verify_page [ 30.726711] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage [ 30.733764] mtd_nandbiterrs: Inserted biterror @ 0/5 [ 30.738733] mtd_nandbiterrs: rewrite page [ 30.743443] mtd_nandbiterrs: read_page [ 30.747588] mtd_nandbiterrs: Read reported 1 corrected bit errors [ 30.753686] mtd_nandbiterrs: verify_page [ 30.757688] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage [ 30.764741] mtd_nandbiterrs: Inserted biterror @ 0/2 [ 30.769710] mtd_nandbiterrs: rewrite page [ 30.774475] mtd_nandbiterrs: read_page [ 30.778620] mtd_nandbiterrs: Read reported 2 corrected bit errors [ 30.784718] mtd_nandbiterrs: verify_page [ 30.788721] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage [ 30.795773] mtd_nandbiterrs: Inserted biterror @ 0/0 [ 30.800743] mtd_nandbiterrs: rewrite page [ 30.805502] mtd_nandbiterrs: read_page [ 30.809647] mtd_nandbiterrs: Read reported 3 corrected bit errors [ 30.815745] mtd_nandbiterrs: verify_page [ 30.819748] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage [ 30.826801] mtd_nandbiterrs: Inserted biterror @ 1/7 [ 30.831770] mtd_nandbiterrs: rewrite page [ 30.836480] mtd_nandbiterrs: read_page [ 30.840625] mtd_nandbiterrs: Read reported 4 corrected bit errors [ 30.846723] mtd_nandbiterrs: verify_page [ 30.850725] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage [ 30.857777] mtd_nandbiterrs: Inserted biterror @ 1/5 [ 30.862746] mtd_nandbiterrs: rewrite page [ 30.867457] mtd_nandbiterrs: read_page [ 30.871602] mtd_nandbiterrs: Read reported 5 corrected bit errors [ 30.877700] mtd_nandbiterrs: verify_page [ 30.881702] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage [ 30.888754] mtd_nandbiterrs: Inserted biterror @ 1/2 [ 30.893722] mtd_nandbiterrs: rewrite page [ 30.898433] mtd_nandbiterrs: read_page [ 30.902581] mtd_nandbiterrs: Read reported 6 corrected bit errors [ 30.908679] mtd_nandbiterrs: verify_page [ 30.912681] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage [ 30.919734] mtd_nandbiterrs: Inserted biterror @ 1/0 [ 30.924704] mtd_nandbiterrs: rewrite page [ 30.929412] mtd_nandbiterrs: read_page [ 30.929808] mtd_nandbiterrs: Read reported 7 corrected bit errors [ 30.929810] mtd_nandbiterrs: verify_page [ 30.929887] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage [ 30.929889] mtd_nandbiterrs: Inserted biterror @ 2/6 [ 30.929891] mtd_nandbiterrs: rewrite page [ 30.960226] mtd_nandbiterrs: read_page [ 30.964374] mtd_nandbiterrs: Read reported 8 corrected bit errors [ 30.970476] mtd_nandbiterrs: verify_page [ 30.974480] mtd_nandbiterrs: Successfully corrected 8 bit errors per subpage [ 30.981533] mtd_nandbiterrs: Inserted biterror @ 2/5 [ 30.986528] mtd_nandbiterrs: rewrite page [ 30.991289] mtd_nandbiterrs: read_page [ 30.996228] bcmbca_nand ff801800.nand-controller: uncorrectable error at 0x1f700000 [ 31.003892] mtd_nandbiterrs: error: read failed at 0x0 [ 31.009035] mtd_nandbiterrs: After 9 biterrors per subpage, read reported error -74 [ 31.018263] mtd_nandbiterrs: finished successfully. [ 31.023146] ================================================== [ 31.051035] [ 31.052530] ================================================== [ 31.058369] mtd_nandbiterrs: MTD device: 2 [ 31.062475] mtd_nandbiterrs: MTD device size 8388608, eraseblock=262144, page=4096, oob=216 [ 31.070832] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes [ 31.076931] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0 [ 31.083751] mtd_nandbiterrs: incremental biterrors test [ 31.089041] mtd_nandbiterrs: write_page [ 31.093643] mtd_nandbiterrs: rewrite page [ 31.098354] mtd_nandbiterrs: read_page [ 31.102496] mtd_nandbiterrs: verify_page [ 31.106498] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage [ 31.113549] mtd_nandbiterrs: Inserted biterror @ 0/5 [ 31.113552] mtd_nandbiterrs: rewrite page [ 31.114250] mtd_nandbiterrs: read_page [ 31.127333] mtd_nandbiterrs: Read reported 1 corrected bit errors [ 31.133431] mtd_nandbiterrs: verify_page [ 31.137435] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage [ 31.144509] mtd_nandbiterrs: Inserted biterror @ 0/2 [ 31.149480] mtd_nandbiterrs: rewrite page [ 31.154240] mtd_nandbiterrs: read_page [ 31.158385] mtd_nandbiterrs: Read reported 2 corrected bit errors [ 31.164483] mtd_nandbiterrs: verify_page [ 31.168486] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage [ 31.175552] mtd_nandbiterrs: Inserted biterror @ 0/0 [ 31.180522] mtd_nandbiterrs: rewrite page [ 31.185282] mtd_nandbiterrs: read_page [ 31.189428] mtd_nandbiterrs: Read reported 3 corrected bit errors [ 31.195524] mtd_nandbiterrs: verify_page [ 31.195602] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage [ 31.195604] mtd_nandbiterrs: Inserted biterror @ 1/7 [ 31.195606] mtd_nandbiterrs: rewrite page [ 31.196304] mtd_nandbiterrs: read_page [ 31.196696] mtd_nandbiterrs: Read reported 4 corrected bit errors [ 31.196698] mtd_nandbiterrs: verify_page [ 31.196776] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage [ 31.196778] mtd_nandbiterrs: Inserted biterror @ 1/5 [ 31.196780] mtd_nandbiterrs: rewrite page [ 31.197477] mtd_nandbiterrs: read_page [ 31.197869] mtd_nandbiterrs: Read reported 5 corrected bit errors [ 31.197871] mtd_nandbiterrs: verify_page [ 31.197948] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage [ 31.197950] mtd_nandbiterrs: Inserted biterror @ 1/2 [ 31.197952] mtd_nandbiterrs: rewrite page [ 31.278059] mtd_nandbiterrs: read_page [ 31.282207] mtd_nandbiterrs: Read reported 6 corrected bit errors [ 31.288323] mtd_nandbiterrs: verify_page [ 31.292327] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage [ 31.299380] mtd_nandbiterrs: Inserted biterror @ 1/0 [ 31.304349] mtd_nandbiterrs: rewrite page [ 31.309059] mtd_nandbiterrs: read_page [ 31.313207] mtd_nandbiterrs: Read reported 7 corrected bit errors [ 31.319306] mtd_nandbiterrs: verify_page [ 31.323308] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage [ 31.330360] mtd_nandbiterrs: Inserted biterror @ 2/6 [ 31.335335] mtd_nandbiterrs: rewrite page [ 31.340045] mtd_nandbiterrs: read_page [ 31.344194] mtd_nandbiterrs: Read reported 8 corrected bit errors [ 31.350291] mtd_nandbiterrs: verify_page [ 31.354294] mtd_nandbiterrs: Successfully corrected 8 bit errors per subpage [ 31.361347] mtd_nandbiterrs: Inserted biterror @ 2/5 [ 31.366316] mtd_nandbiterrs: rewrite page [ 31.371076] mtd_nandbiterrs: read_page [ 31.376011] bcmbca_nand ff801800.nand-controller: uncorrectable error at 0x1f700000 [ 31.383674] mtd_nandbiterrs: error: read failed at 0x0 [ 31.388818] mtd_nandbiterrs: After 9 biterrors per subpage, read reported error -74 [ 31.398046] mtd_nandbiterrs: finished successfully. [ 31.402928] ================================================== > Cheers, > Miquèl Thanks! -Dave
Hi Miquèl, On Wed, Jun 18, 2025 at 2:07 AM Miquel Raynal <miquel.raynal@bootlin.com> wrote: > > Hello David ... > So far the reporting looks good (and the nandflipbits output looks > correct as well). > > > [ 676.408441] mtd_nandbiterrs: Inserted biterror @ 2/5 > > [ 676.413411] mtd_nandbiterrs: rewrite page > > [ 676.418122] mtd_nandbiterrs: read_page > > [ 676.422267] mtd_nandbiterrs: verify_page > > [ 676.426194] mtd_nandbiterrs: Error: page offset 0, expected 25, got 00 > > [ 676.432727] mtd_nandbiterrs: Error: page offset 1, expected a5, got 00 > > [ 676.439260] mtd_nandbiterrs: Error: page offset 2, expected 65, got 05 > > [ 676.445868] mtd_nandbiterrs: ECC failure, read data is incorrect > > despite read success > > Here however there is something wrong. We do expect a read failure, > instead of returning wrong data. There is still a problem, I do not know > if this problem was there before though, but this must be fixed. > Thank you for highlighting this issue, I will investigate. > > Hello Florian, if you have time, I'd welcome you opinion on this patch :) > > Cheers, > Miquèl Thanks! -Dave
On 6/18/25 02:07, Miquel Raynal wrote: > Hello David, > >> I'm not familiar with nandbiterrs but here's the results from >> mtd_nandbiterrs.ko on my NAND set to BCH8: >> >> # insmod mtd_nandbiterrs.ko dev=0 >> [ 676.097190] >> [ 676.098760] ================================================== >> [ 676.104609] mtd_nandbiterrs: MTD device: 0 >> [ 676.108732] mtd_nandbiterrs: MTD device size 2097152, >> eraseblock=262144, page=4096, oob=216 >> [ 676.117089] mtd_nandbiterrs: Device uses 1 subpages of 4096 bytes >> [ 676.123188] mtd_nandbiterrs: Using page=0, offset=0, eraseblock=0 >> [ 676.130863] mtd_nandbiterrs: incremental biterrors test >> [ 676.136154] mtd_nandbiterrs: write_page >> [ 676.140761] mtd_nandbiterrs: rewrite page >> [ 676.145473] mtd_nandbiterrs: read_page >> [ 676.149621] mtd_nandbiterrs: verify_page >> [ 676.153625] mtd_nandbiterrs: Successfully corrected 0 bit errors per subpage >> [ 676.160678] mtd_nandbiterrs: Inserted biterror @ 0/5 >> [ 676.165647] mtd_nandbiterrs: rewrite page >> [ 676.170363] mtd_nandbiterrs: read_page >> [ 676.174508] mtd_nandbiterrs: Read reported 1 corrected bit errors >> [ 676.180606] mtd_nandbiterrs: verify_page >> [ 676.184609] mtd_nandbiterrs: Successfully corrected 1 bit errors per subpage >> [ 676.191662] mtd_nandbiterrs: Inserted biterror @ 0/2 >> [ 676.196631] mtd_nandbiterrs: rewrite page >> [ 676.201342] mtd_nandbiterrs: read_page >> [ 676.205487] mtd_nandbiterrs: Read reported 2 corrected bit errors >> [ 676.211586] mtd_nandbiterrs: verify_page >> [ 676.215588] mtd_nandbiterrs: Successfully corrected 2 bit errors per subpage >> [ 676.222641] mtd_nandbiterrs: Inserted biterror @ 0/0 >> [ 676.227608] mtd_nandbiterrs: rewrite page >> [ 676.228356] mtd_nandbiterrs: read_page >> [ 676.228749] mtd_nandbiterrs: Read reported 3 corrected bit errors >> [ 676.228751] mtd_nandbiterrs: verify_page >> [ 676.228829] mtd_nandbiterrs: Successfully corrected 3 bit errors per subpage >> [ 676.228831] mtd_nandbiterrs: Inserted biterror @ 1/7 >> [ 676.228833] mtd_nandbiterrs: rewrite page >> [ 676.229530] mtd_nandbiterrs: read_page >> [ 676.229922] mtd_nandbiterrs: Read reported 4 corrected bit errors >> [ 676.229924] mtd_nandbiterrs: verify_page >> [ 676.230001] mtd_nandbiterrs: Successfully corrected 4 bit errors per subpage >> [ 676.230003] mtd_nandbiterrs: Inserted biterror @ 1/5 >> [ 676.230005] mtd_nandbiterrs: rewrite page >> [ 676.294177] mtd_nandbiterrs: read_page >> [ 676.298337] mtd_nandbiterrs: Read reported 5 corrected bit errors >> [ 676.304436] mtd_nandbiterrs: verify_page >> [ 676.308441] mtd_nandbiterrs: Successfully corrected 5 bit errors per subpage >> [ 676.315494] mtd_nandbiterrs: Inserted biterror @ 1/2 >> [ 676.320464] mtd_nandbiterrs: rewrite page >> [ 676.325174] mtd_nandbiterrs: read_page >> [ 676.329327] mtd_nandbiterrs: Read reported 6 corrected bit errors >> [ 676.335426] mtd_nandbiterrs: verify_page >> [ 676.339429] mtd_nandbiterrs: Successfully corrected 6 bit errors per subpage >> [ 676.346483] mtd_nandbiterrs: Inserted biterror @ 1/0 >> [ 676.351452] mtd_nandbiterrs: rewrite page >> [ 676.356162] mtd_nandbiterrs: read_page >> [ 676.360308] mtd_nandbiterrs: Read reported 7 corrected bit errors >> [ 676.366407] mtd_nandbiterrs: verify_page >> [ 676.370409] mtd_nandbiterrs: Successfully corrected 7 bit errors per subpage >> [ 676.377462] mtd_nandbiterrs: Inserted biterror @ 2/6 >> [ 676.382432] mtd_nandbiterrs: rewrite page >> [ 676.387142] mtd_nandbiterrs: read_page >> [ 676.391287] mtd_nandbiterrs: Read reported 8 corrected bit errors >> [ 676.397385] mtd_nandbiterrs: verify_page >> [ 676.401388] mtd_nandbiterrs: Successfully corrected 8 bit errors >> per subpage > > So far the reporting looks good (and the nandflipbits output looks > correct as well). > >> [ 676.408441] mtd_nandbiterrs: Inserted biterror @ 2/5 >> [ 676.413411] mtd_nandbiterrs: rewrite page >> [ 676.418122] mtd_nandbiterrs: read_page >> [ 676.422267] mtd_nandbiterrs: verify_page >> [ 676.426194] mtd_nandbiterrs: Error: page offset 0, expected 25, got 00 >> [ 676.432727] mtd_nandbiterrs: Error: page offset 1, expected a5, got 00 >> [ 676.439260] mtd_nandbiterrs: Error: page offset 2, expected 65, got 05 >> [ 676.445868] mtd_nandbiterrs: ECC failure, read data is incorrect >> despite read success > > Here however there is something wrong. We do expect a read failure, > instead of returning wrong data. There is still a problem, I do not know > if this problem was there before though, but this must be fixed. > > Hello Florian, if you have time, I'd welcome you opinion on this patch :) I don't think I am entitled any opinion on this patch other than confirming the register offset locations. Both William and David have far more experience with this NAND controller and devices that I do. -- Florian
© 2016 - 2026 Red Hat, Inc.