The setup_interface() operation isn't implemented. It forces the driver
to use the ONFI mode 0, though it could use more optimal modes.
Implement the setup_interface() operation. It uses the
aemif_set_cs_timings() function from the AEMIF driver to update the
chip select timings. The calculation of the register's contents is
directly extracted from §20.3.2.3 of the DaVinci TRM [1]
These timings are previously set by the AEMIF driver itself from
device-tree properties. Therefore, IMHO, failing to update them in the
setup_interface() isn't critical, which is why 0 is returned even when
timings aren't updated.
MAX_TH_PS and MAX_TSU_PS are the worst case timings based on the
Keystone2 and DaVinci datasheets.
[1] : https://www.ti.com/lit/ug/spruh77c/spruh77c.pdf
Signed-off-by: Bastien Curutchet <bastien.curutchet@bootlin.com>
---
drivers/mtd/nand/raw/davinci_nand.c | 78 +++++++++++++++++++++++++++++
1 file changed, 78 insertions(+)
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 563045c7ce08..2d0c564c8d17 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/iopoll.h>
#include <linux/kernel.h>
+#include <linux/memory/ti-aemif.h>
#include <linux/module.h>
#include <linux/mtd/rawnand.h>
#include <linux/mtd/partitions.h>
@@ -44,6 +45,9 @@
#define MASK_ALE 0x08
#define MASK_CLE 0x10
+#define MAX_TSU_PS 3000 /* Input setup time in ps */
+#define MAX_TH_PS 1600 /* Input hold time in ps */
+
struct davinci_nand_pdata {
uint32_t mask_ale;
uint32_t mask_cle;
@@ -120,6 +124,7 @@ struct davinci_nand_info {
uint32_t core_chipsel;
struct clk *clk;
+ struct aemif_device *aemif;
};
static DEFINE_SPINLOCK(davinci_nand_lock);
@@ -767,9 +772,81 @@ static int davinci_nand_exec_op(struct nand_chip *chip,
return 0;
}
+#define TO_CYCLES(ps, period_ns) (DIV_ROUND_UP((ps) / 1000, (period_ns)))
+
+static int davinci_nand_setup_interface(struct nand_chip *chip, int chipnr,
+ const struct nand_interface_config *conf)
+{
+ struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
+ const struct nand_sdr_timings *sdr;
+ struct aemif_cs_timings timings;
+ s32 cfg, min, cyc_ns;
+
+ cyc_ns = 1000000000 / clk_get_rate(info->clk);
+
+ sdr = nand_get_sdr_timings(conf);
+ if (IS_ERR(sdr))
+ return PTR_ERR(sdr);
+
+ cfg = TO_CYCLES(sdr->tCLR_min, cyc_ns) - 1;
+ timings.rsetup = cfg > 0 ? cfg : 0;
+
+ cfg = max_t(s32, TO_CYCLES(sdr->tREA_max + MAX_TSU_PS, cyc_ns),
+ TO_CYCLES(sdr->tRP_min, cyc_ns)) - 1;
+ timings.rstrobe = cfg > 0 ? cfg : 0;
+
+ min = TO_CYCLES(sdr->tCEA_max + MAX_TSU_PS, cyc_ns) - 2;
+ while ((s32)(timings.rsetup + timings.rstrobe) < min)
+ timings.rstrobe++;
+
+ cfg = TO_CYCLES((s32)(MAX_TH_PS - sdr->tCHZ_max), cyc_ns) - 1;
+ timings.rhold = cfg > 0 ? cfg : 0;
+
+ min = TO_CYCLES(sdr->tRC_min, cyc_ns) - 3;
+ while ((s32)(timings.rsetup + timings.rstrobe + timings.rhold) < min)
+ timings.rhold++;
+
+ cfg = TO_CYCLES((s32)(sdr->tRHZ_max - (timings.rhold + 1) * cyc_ns * 1000), cyc_ns);
+ cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCHZ_max, cyc_ns)) - 1;
+ timings.ta = cfg > 0 ? cfg : 0;
+
+ cfg = TO_CYCLES(sdr->tWP_min, cyc_ns) - 1;
+ timings.wstrobe = cfg > 0 ? cfg : 0;
+
+ cfg = max_t(s32, TO_CYCLES(sdr->tCLS_min, cyc_ns), TO_CYCLES(sdr->tALS_min, cyc_ns));
+ cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCS_min, cyc_ns)) - 1;
+ timings.wsetup = cfg > 0 ? cfg : 0;
+
+ min = TO_CYCLES(sdr->tDS_min, cyc_ns) - 2;
+ while ((s32)(timings.wsetup + timings.wstrobe) < min)
+ timings.wstrobe++;
+
+ cfg = max_t(s32, TO_CYCLES(sdr->tCLH_min, cyc_ns), TO_CYCLES(sdr->tALH_min, cyc_ns));
+ cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCH_min, cyc_ns));
+ cfg = max_t(s32, cfg, TO_CYCLES(sdr->tDH_min, cyc_ns)) - 1;
+ timings.whold = cfg > 0 ? cfg : 0;
+
+ min = TO_CYCLES(sdr->tWC_min, cyc_ns) - 2;
+ while ((s32)(timings.wsetup + timings.wstrobe + timings.whold) < min)
+ timings.whold++;
+
+ dev_dbg(&info->pdev->dev, "RSETUP %x RSTROBE %x RHOLD %x\n",
+ timings.rsetup, timings.rstrobe, timings.rhold);
+ dev_dbg(&info->pdev->dev, "TA %x\n", timings.ta);
+ dev_dbg(&info->pdev->dev, "WSETUP %x WSTROBE %x WHOLD %x\n",
+ timings.wsetup, timings.wstrobe, timings.whold);
+
+ if (aemif_set_cs_timings(info->aemif, info->core_chipsel, &timings) < 0)
+ dev_info(&info->pdev->dev,
+ "Failed to dynamically update the CS timings, keep them unchanged");
+
+ return 0;
+}
+
static const struct nand_controller_ops davinci_nand_controller_ops = {
.attach_chip = davinci_nand_attach_chip,
.exec_op = davinci_nand_exec_op,
+ .setup_interface = davinci_nand_setup_interface,
};
static int nand_davinci_probe(struct platform_device *pdev)
@@ -832,6 +909,7 @@ static int nand_davinci_probe(struct platform_device *pdev)
info->pdev = pdev;
info->base = base;
info->vaddr = vaddr;
+ info->aemif = dev_get_drvdata(pdev->dev.parent);
mtd = nand_to_mtd(&info->chip);
mtd->dev.parent = &pdev->dev;
--
2.47.0
Hi Bastien, On 06/11/2024 at 09:55:07 +01, Bastien Curutchet <bastien.curutchet@bootlin.com> wrote: > The setup_interface() operation isn't implemented. It forces the driver > to use the ONFI mode 0, though it could use more optimal modes. > > Implement the setup_interface() operation. It uses the > aemif_set_cs_timings() function from the AEMIF driver to update the > chip select timings. The calculation of the register's contents is > directly extracted from §20.3.2.3 of the DaVinci TRM [1] > > These timings are previously set by the AEMIF driver itself from > device-tree properties. Therefore, IMHO, failing to update them in the > setup_interface() isn't critical, which is why 0 is returned even when > timings aren't updated. Did you experience failures? Because I'd be more conservative and error out loudly when something is wrong. In general it is a safest approach on the long term. Here maybe you have good reasons to do differently, in this case I am all ears. > MAX_TH_PS and MAX_TSU_PS are the worst case timings based on the > Keystone2 and DaVinci datasheets. > > [1] : https://www.ti.com/lit/ug/spruh77c/spruh77c.pdf > > Signed-off-by: Bastien Curutchet <bastien.curutchet@bootlin.com> > --- > drivers/mtd/nand/raw/davinci_nand.c | 78 +++++++++++++++++++++++++++++ > 1 file changed, 78 insertions(+) > > diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c > index 563045c7ce08..2d0c564c8d17 100644 > --- a/drivers/mtd/nand/raw/davinci_nand.c > +++ b/drivers/mtd/nand/raw/davinci_nand.c > @@ -14,6 +14,7 @@ > #include <linux/err.h> > #include <linux/iopoll.h> > #include <linux/kernel.h> > +#include <linux/memory/ti-aemif.h> > #include <linux/module.h> > #include <linux/mtd/rawnand.h> > #include <linux/mtd/partitions.h> > @@ -44,6 +45,9 @@ > #define MASK_ALE 0x08 > #define MASK_CLE 0x10 > > +#define MAX_TSU_PS 3000 /* Input setup time in ps */ > +#define MAX_TH_PS 1600 /* Input hold time in ps */ > + > struct davinci_nand_pdata { > uint32_t mask_ale; > uint32_t mask_cle; > @@ -120,6 +124,7 @@ struct davinci_nand_info { > uint32_t core_chipsel; > > struct clk *clk; > + struct aemif_device *aemif; > }; > > static DEFINE_SPINLOCK(davinci_nand_lock); > @@ -767,9 +772,81 @@ static int davinci_nand_exec_op(struct nand_chip *chip, > return 0; > } > > +#define TO_CYCLES(ps, period_ns) (DIV_ROUND_UP((ps) / 1000, (period_ns))) > + > +static int davinci_nand_setup_interface(struct nand_chip *chip, int chipnr, > + const struct nand_interface_config *conf) > +{ > + struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip)); > + const struct nand_sdr_timings *sdr; > + struct aemif_cs_timings timings; > + s32 cfg, min, cyc_ns; > + > + cyc_ns = 1000000000 / clk_get_rate(info->clk); > + > + sdr = nand_get_sdr_timings(conf); > + if (IS_ERR(sdr)) > + return PTR_ERR(sdr); > + > + cfg = TO_CYCLES(sdr->tCLR_min, cyc_ns) - 1; > + timings.rsetup = cfg > 0 ? cfg : 0; > + > + cfg = max_t(s32, TO_CYCLES(sdr->tREA_max + MAX_TSU_PS, cyc_ns), > + TO_CYCLES(sdr->tRP_min, cyc_ns)) - 1; > + timings.rstrobe = cfg > 0 ? cfg : 0; > + > + min = TO_CYCLES(sdr->tCEA_max + MAX_TSU_PS, cyc_ns) - 2; > + while ((s32)(timings.rsetup + timings.rstrobe) < min) > + timings.rstrobe++; I see a lot of while loops which just stop if you reach a min/max, I believe a slightly more robust approach should be attempted, and returning errors when these limits are crossed would be probably beneficial on the long term? > + > + cfg = TO_CYCLES((s32)(MAX_TH_PS - sdr->tCHZ_max), cyc_ns) - 1; > + timings.rhold = cfg > 0 ? cfg : 0; > + > + min = TO_CYCLES(sdr->tRC_min, cyc_ns) - 3; > + while ((s32)(timings.rsetup + timings.rstrobe + timings.rhold) < min) > + timings.rhold++; > + > + cfg = TO_CYCLES((s32)(sdr->tRHZ_max - (timings.rhold + 1) * cyc_ns * 1000), cyc_ns); > + cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCHZ_max, cyc_ns)) - 1; > + timings.ta = cfg > 0 ? cfg : 0; > + > + cfg = TO_CYCLES(sdr->tWP_min, cyc_ns) - 1; > + timings.wstrobe = cfg > 0 ? cfg : 0; > + > + cfg = max_t(s32, TO_CYCLES(sdr->tCLS_min, cyc_ns), TO_CYCLES(sdr->tALS_min, cyc_ns)); > + cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCS_min, cyc_ns)) - 1; > + timings.wsetup = cfg > 0 ? cfg : 0; > + > + min = TO_CYCLES(sdr->tDS_min, cyc_ns) - 2; > + while ((s32)(timings.wsetup + timings.wstrobe) < min) > + timings.wstrobe++; > + > + cfg = max_t(s32, TO_CYCLES(sdr->tCLH_min, cyc_ns), TO_CYCLES(sdr->tALH_min, cyc_ns)); > + cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCH_min, cyc_ns)); > + cfg = max_t(s32, cfg, TO_CYCLES(sdr->tDH_min, cyc_ns)) - 1; > + timings.whold = cfg > 0 ? cfg : 0; > + > + min = TO_CYCLES(sdr->tWC_min, cyc_ns) - 2; > + while ((s32)(timings.wsetup + timings.wstrobe + timings.whold) < min) > + timings.whold++; > + > + dev_dbg(&info->pdev->dev, "RSETUP %x RSTROBE %x RHOLD %x\n", > + timings.rsetup, timings.rstrobe, timings.rhold); > + dev_dbg(&info->pdev->dev, "TA %x\n", timings.ta); > + dev_dbg(&info->pdev->dev, "WSETUP %x WSTROBE %x WHOLD %x\n", > + timings.wsetup, timings.wstrobe, timings.whold); > + Here you probably want to exit in the NAND_DATA_IFACE_CHECK_ONLY case. > + if (aemif_set_cs_timings(info->aemif, info->core_chipsel, &timings) < 0) > + dev_info(&info->pdev->dev, > + "Failed to dynamically update the CS timings, keep them unchanged"); > + > + return 0; > +} > + Thanks, Miquèl
Hi Miquèl, On 11/11/24 8:32 PM, Miquel Raynal wrote: > Hi Bastien, > > On 06/11/2024 at 09:55:07 +01, Bastien Curutchet <bastien.curutchet@bootlin.com> wrote: > >> The setup_interface() operation isn't implemented. It forces the driver >> to use the ONFI mode 0, though it could use more optimal modes. >> >> Implement the setup_interface() operation. It uses the >> aemif_set_cs_timings() function from the AEMIF driver to update the >> chip select timings. The calculation of the register's contents is >> directly extracted from §20.3.2.3 of the DaVinci TRM [1] >> >> These timings are previously set by the AEMIF driver itself from >> device-tree properties. Therefore, IMHO, failing to update them in the >> setup_interface() isn't critical, which is why 0 is returned even when >> timings aren't updated. > > Did you experience failures? Because I'd be more conservative and error > out loudly when something is wrong. In general it is a safest approach > on the long term. Here maybe you have good reasons to do differently, in > this case I am all ears. > The DaVinci's configuration isn't very wide so if its reference clock rate is too high, mode 0 timings can be too slow to fit while higher modes will fit. So returning -EINVAL here will cause the probe to fail with 'Failed to configure data interface to SDR timing mode 0' while higher modes would have worked. >> MAX_TH_PS and MAX_TSU_PS are the worst case timings based on the >> Keystone2 and DaVinci datasheets. >> >> [1] : https://www.ti.com/lit/ug/spruh77c/spruh77c.pdf >> >> Signed-off-by: Bastien Curutchet <bastien.curutchet@bootlin.com> >> --- >> drivers/mtd/nand/raw/davinci_nand.c | 78 +++++++++++++++++++++++++++++ >> 1 file changed, 78 insertions(+) >> >> diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c >> index 563045c7ce08..2d0c564c8d17 100644 >> --- a/drivers/mtd/nand/raw/davinci_nand.c >> +++ b/drivers/mtd/nand/raw/davinci_nand.c >> @@ -14,6 +14,7 @@ >> #include <linux/err.h> >> #include <linux/iopoll.h> >> #include <linux/kernel.h> >> +#include <linux/memory/ti-aemif.h> >> #include <linux/module.h> >> #include <linux/mtd/rawnand.h> >> #include <linux/mtd/partitions.h> >> @@ -44,6 +45,9 @@ >> #define MASK_ALE 0x08 >> #define MASK_CLE 0x10 >> >> +#define MAX_TSU_PS 3000 /* Input setup time in ps */ >> +#define MAX_TH_PS 1600 /* Input hold time in ps */ >> + >> struct davinci_nand_pdata { >> uint32_t mask_ale; >> uint32_t mask_cle; >> @@ -120,6 +124,7 @@ struct davinci_nand_info { >> uint32_t core_chipsel; >> >> struct clk *clk; >> + struct aemif_device *aemif; >> }; >> >> static DEFINE_SPINLOCK(davinci_nand_lock); >> @@ -767,9 +772,81 @@ static int davinci_nand_exec_op(struct nand_chip *chip, >> return 0; >> } >> >> +#define TO_CYCLES(ps, period_ns) (DIV_ROUND_UP((ps) / 1000, (period_ns))) >> + >> +static int davinci_nand_setup_interface(struct nand_chip *chip, int chipnr, >> + const struct nand_interface_config *conf) >> +{ >> + struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip)); >> + const struct nand_sdr_timings *sdr; >> + struct aemif_cs_timings timings; >> + s32 cfg, min, cyc_ns; >> + >> + cyc_ns = 1000000000 / clk_get_rate(info->clk); >> + >> + sdr = nand_get_sdr_timings(conf); >> + if (IS_ERR(sdr)) >> + return PTR_ERR(sdr); >> + >> + cfg = TO_CYCLES(sdr->tCLR_min, cyc_ns) - 1; >> + timings.rsetup = cfg > 0 ? cfg : 0; >> + >> + cfg = max_t(s32, TO_CYCLES(sdr->tREA_max + MAX_TSU_PS, cyc_ns), >> + TO_CYCLES(sdr->tRP_min, cyc_ns)) - 1; >> + timings.rstrobe = cfg > 0 ? cfg : 0; >> + >> + min = TO_CYCLES(sdr->tCEA_max + MAX_TSU_PS, cyc_ns) - 2; >> + while ((s32)(timings.rsetup + timings.rstrobe) < min) >> + timings.rstrobe++; > > I see a lot of while loops which just stop if you reach a min/max, I > believe a slightly more robust approach should be attempted, and > returning errors when these limits are crossed would be probably > beneficial on the long term? > This comes from the DaVinci NAND controller's documentation (cf p908 of https://www.ti.com/lit/ug/spruh77c/spruh77c.pdf). A first formula, gives the RSETUP timing, a second one the RSTROBE timing and then a third formula gives a constraint that has to be met by the sum of RSETUP and RSTROBE. Then the validity of the timings themselves is checked by the aemif_set_cs_timings() function. It works the same way for WSETUP, WSTROBE and WHOLD with the other while() loops. >> + >> + cfg = TO_CYCLES((s32)(MAX_TH_PS - sdr->tCHZ_max), cyc_ns) - 1; >> + timings.rhold = cfg > 0 ? cfg : 0; >> + >> + min = TO_CYCLES(sdr->tRC_min, cyc_ns) - 3; >> + while ((s32)(timings.rsetup + timings.rstrobe + timings.rhold) < min) >> + timings.rhold++; >> + >> + cfg = TO_CYCLES((s32)(sdr->tRHZ_max - (timings.rhold + 1) * cyc_ns * 1000), cyc_ns); >> + cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCHZ_max, cyc_ns)) - 1; >> + timings.ta = cfg > 0 ? cfg : 0; >> + >> + cfg = TO_CYCLES(sdr->tWP_min, cyc_ns) - 1; >> + timings.wstrobe = cfg > 0 ? cfg : 0; >> + >> + cfg = max_t(s32, TO_CYCLES(sdr->tCLS_min, cyc_ns), TO_CYCLES(sdr->tALS_min, cyc_ns)); >> + cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCS_min, cyc_ns)) - 1; >> + timings.wsetup = cfg > 0 ? cfg : 0; >> + >> + min = TO_CYCLES(sdr->tDS_min, cyc_ns) - 2; >> + while ((s32)(timings.wsetup + timings.wstrobe) < min) >> + timings.wstrobe++; >> + >> + cfg = max_t(s32, TO_CYCLES(sdr->tCLH_min, cyc_ns), TO_CYCLES(sdr->tALH_min, cyc_ns)); >> + cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCH_min, cyc_ns)); >> + cfg = max_t(s32, cfg, TO_CYCLES(sdr->tDH_min, cyc_ns)) - 1; >> + timings.whold = cfg > 0 ? cfg : 0; >> + >> + min = TO_CYCLES(sdr->tWC_min, cyc_ns) - 2; >> + while ((s32)(timings.wsetup + timings.wstrobe + timings.whold) < min) >> + timings.whold++; >> + >> + dev_dbg(&info->pdev->dev, "RSETUP %x RSTROBE %x RHOLD %x\n", >> + timings.rsetup, timings.rstrobe, timings.rhold); >> + dev_dbg(&info->pdev->dev, "TA %x\n", timings.ta); >> + dev_dbg(&info->pdev->dev, "WSETUP %x WSTROBE %x WHOLD %x\n", >> + timings.wsetup, timings.wstrobe, timings.whold); >> + > > Here you probably want to exit in the NAND_DATA_IFACE_CHECK_ONLY case. > I had missed this NAND_DATA_IFACE_CHECK_ONLY feature. In fact, additionnal checks are done in the aemif_set_cs_timings() below. I'll add an 'aemif_check_cs_timings()' function in AEMIF's driver in next iteration. >> + if (aemif_set_cs_timings(info->aemif, info->core_chipsel, &timings) < 0) >> + dev_info(&info->pdev->dev, >> + "Failed to dynamically update the CS timings, keep them unchanged"); >> + >> + return 0; >> +} >> + > > Thanks, > Miquèl
© 2016 - 2024 Red Hat, Inc.