[v2] Implement setup_interface() in the DaVinci NAND controller

[PATCH v2 6/6] mtd: rawnand: davinci: Implement setup_interface() operation

Posted by Bastien Curutchet 1 year, 3 months ago

The setup_interface() operation isn't implemented. It forces the driver
to use the ONFI mode 0, though it could use more optimal modes.

Implement the setup_interface() operation. It uses the
aemif_set_cs_timings() function from the AEMIF driver to update the
chip select timings. The calculation of the register's contents is
directly extracted from §20.3.2.3 of the DaVinci TRM [1]

These timings are previously set by the AEMIF driver itself from
device-tree properties. Therefore, IMHO, failing to update them in the
setup_interface() isn't critical, which is why 0 is returned even when
timings aren't updated.

MAX_TH_PS and MAX_TSU_PS are the worst case timings based on the
Keystone2 and DaVinci datasheets.

[1] : https://www.ti.com/lit/ug/spruh77c/spruh77c.pdf

Signed-off-by: Bastien Curutchet <bastien.curutchet@bootlin.com>
---
 drivers/mtd/nand/raw/davinci_nand.c | 78 +++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 563045c7ce08..2d0c564c8d17 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
+#include <linux/memory/ti-aemif.h>
 #include <linux/module.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
@@ -44,6 +45,9 @@
 #define	MASK_ALE		0x08
 #define	MASK_CLE		0x10
 
+#define MAX_TSU_PS		3000	/* Input setup time in ps */
+#define MAX_TH_PS		1600	/* Input hold time in ps */
+
 struct davinci_nand_pdata {
 	uint32_t		mask_ale;
 	uint32_t		mask_cle;
@@ -120,6 +124,7 @@ struct davinci_nand_info {
 	uint32_t		core_chipsel;
 
 	struct clk		*clk;
+	struct aemif_device	*aemif;
 };
 
 static DEFINE_SPINLOCK(davinci_nand_lock);
@@ -767,9 +772,81 @@ static int davinci_nand_exec_op(struct nand_chip *chip,
 	return 0;
 }
 
+#define TO_CYCLES(ps, period_ns) (DIV_ROUND_UP((ps) / 1000, (period_ns)))
+
+static int davinci_nand_setup_interface(struct nand_chip *chip, int chipnr,
+					const struct nand_interface_config *conf)
+{
+	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
+	const struct nand_sdr_timings *sdr;
+	struct aemif_cs_timings timings;
+	s32 cfg, min, cyc_ns;
+
+	cyc_ns = 1000000000 / clk_get_rate(info->clk);
+
+	sdr = nand_get_sdr_timings(conf);
+	if (IS_ERR(sdr))
+		return PTR_ERR(sdr);
+
+	cfg = TO_CYCLES(sdr->tCLR_min, cyc_ns) - 1;
+	timings.rsetup = cfg > 0 ? cfg : 0;
+
+	cfg = max_t(s32, TO_CYCLES(sdr->tREA_max + MAX_TSU_PS, cyc_ns),
+		    TO_CYCLES(sdr->tRP_min, cyc_ns)) - 1;
+	timings.rstrobe = cfg > 0 ? cfg : 0;
+
+	min = TO_CYCLES(sdr->tCEA_max + MAX_TSU_PS, cyc_ns) - 2;
+	while ((s32)(timings.rsetup + timings.rstrobe) < min)
+		timings.rstrobe++;
+
+	cfg = TO_CYCLES((s32)(MAX_TH_PS - sdr->tCHZ_max), cyc_ns) - 1;
+	timings.rhold = cfg > 0 ? cfg : 0;
+
+	min = TO_CYCLES(sdr->tRC_min, cyc_ns) - 3;
+	while ((s32)(timings.rsetup + timings.rstrobe + timings.rhold) < min)
+		timings.rhold++;
+
+	cfg = TO_CYCLES((s32)(sdr->tRHZ_max - (timings.rhold + 1) * cyc_ns * 1000), cyc_ns);
+	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCHZ_max, cyc_ns)) - 1;
+	timings.ta = cfg > 0 ? cfg : 0;
+
+	cfg = TO_CYCLES(sdr->tWP_min, cyc_ns) - 1;
+	timings.wstrobe = cfg > 0 ? cfg : 0;
+
+	cfg = max_t(s32, TO_CYCLES(sdr->tCLS_min, cyc_ns), TO_CYCLES(sdr->tALS_min, cyc_ns));
+	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCS_min, cyc_ns)) - 1;
+	timings.wsetup = cfg > 0 ? cfg : 0;
+
+	min = TO_CYCLES(sdr->tDS_min, cyc_ns) - 2;
+	while ((s32)(timings.wsetup + timings.wstrobe) < min)
+		timings.wstrobe++;
+
+	cfg = max_t(s32, TO_CYCLES(sdr->tCLH_min, cyc_ns), TO_CYCLES(sdr->tALH_min, cyc_ns));
+	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCH_min, cyc_ns));
+	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tDH_min, cyc_ns)) - 1;
+	timings.whold = cfg > 0 ? cfg : 0;
+
+	min = TO_CYCLES(sdr->tWC_min, cyc_ns) - 2;
+	while ((s32)(timings.wsetup + timings.wstrobe + timings.whold) < min)
+		timings.whold++;
+
+	dev_dbg(&info->pdev->dev, "RSETUP %x RSTROBE %x RHOLD %x\n",
+		timings.rsetup, timings.rstrobe, timings.rhold);
+	dev_dbg(&info->pdev->dev, "TA %x\n", timings.ta);
+	dev_dbg(&info->pdev->dev, "WSETUP %x WSTROBE %x WHOLD %x\n",
+		timings.wsetup, timings.wstrobe, timings.whold);
+
+	if (aemif_set_cs_timings(info->aemif, info->core_chipsel, &timings) < 0)
+		dev_info(&info->pdev->dev,
+			 "Failed to dynamically update the CS timings, keep them unchanged");
+
+	return 0;
+}
+
 static const struct nand_controller_ops davinci_nand_controller_ops = {
 	.attach_chip = davinci_nand_attach_chip,
 	.exec_op = davinci_nand_exec_op,
+	.setup_interface = davinci_nand_setup_interface,
 };
 
 static int nand_davinci_probe(struct platform_device *pdev)
@@ -832,6 +909,7 @@ static int nand_davinci_probe(struct platform_device *pdev)
 	info->pdev		= pdev;
 	info->base		= base;
 	info->vaddr		= vaddr;
+	info->aemif		= dev_get_drvdata(pdev->dev.parent);
 
 	mtd			= nand_to_mtd(&info->chip);
 	mtd->dev.parent		= &pdev->dev;
-- 
2.47.0

Re: [PATCH v2 6/6] mtd: rawnand: davinci: Implement setup_interface() operation

Posted by Miquel Raynal 1 year, 2 months ago

Hi Bastien,

On 06/11/2024 at 09:55:07 +01, Bastien Curutchet <bastien.curutchet@bootlin.com> wrote:

> The setup_interface() operation isn't implemented. It forces the driver
> to use the ONFI mode 0, though it could use more optimal modes.
>
> Implement the setup_interface() operation. It uses the
> aemif_set_cs_timings() function from the AEMIF driver to update the
> chip select timings. The calculation of the register's contents is
> directly extracted from §20.3.2.3 of the DaVinci TRM [1]
>
> These timings are previously set by the AEMIF driver itself from
> device-tree properties. Therefore, IMHO, failing to update them in the
> setup_interface() isn't critical, which is why 0 is returned even when
> timings aren't updated.

Did you experience failures? Because I'd be more conservative and error
out loudly when something is wrong. In general it is a safest approach
on the long term. Here maybe you have good reasons to do differently, in
this case I am all ears.

> MAX_TH_PS and MAX_TSU_PS are the worst case timings based on the
> Keystone2 and DaVinci datasheets.
>
> [1] : https://www.ti.com/lit/ug/spruh77c/spruh77c.pdf
>
> Signed-off-by: Bastien Curutchet <bastien.curutchet@bootlin.com>
> ---
>  drivers/mtd/nand/raw/davinci_nand.c | 78 +++++++++++++++++++++++++++++
>  1 file changed, 78 insertions(+)
>
> diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
> index 563045c7ce08..2d0c564c8d17 100644
> --- a/drivers/mtd/nand/raw/davinci_nand.c
> +++ b/drivers/mtd/nand/raw/davinci_nand.c
> @@ -14,6 +14,7 @@
>  #include <linux/err.h>
>  #include <linux/iopoll.h>
>  #include <linux/kernel.h>
> +#include <linux/memory/ti-aemif.h>
>  #include <linux/module.h>
>  #include <linux/mtd/rawnand.h>
>  #include <linux/mtd/partitions.h>
> @@ -44,6 +45,9 @@
>  #define	MASK_ALE		0x08
>  #define	MASK_CLE		0x10
>  
> +#define MAX_TSU_PS		3000	/* Input setup time in ps */
> +#define MAX_TH_PS		1600	/* Input hold time in ps */
> +
>  struct davinci_nand_pdata {
>  	uint32_t		mask_ale;
>  	uint32_t		mask_cle;
> @@ -120,6 +124,7 @@ struct davinci_nand_info {
>  	uint32_t		core_chipsel;
>  
>  	struct clk		*clk;
> +	struct aemif_device	*aemif;
>  };
>  
>  static DEFINE_SPINLOCK(davinci_nand_lock);
> @@ -767,9 +772,81 @@ static int davinci_nand_exec_op(struct nand_chip *chip,
>  	return 0;
>  }
>  
> +#define TO_CYCLES(ps, period_ns) (DIV_ROUND_UP((ps) / 1000, (period_ns)))
> +
> +static int davinci_nand_setup_interface(struct nand_chip *chip, int chipnr,
> +					const struct nand_interface_config *conf)
> +{
> +	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
> +	const struct nand_sdr_timings *sdr;
> +	struct aemif_cs_timings timings;
> +	s32 cfg, min, cyc_ns;
> +
> +	cyc_ns = 1000000000 / clk_get_rate(info->clk);
> +
> +	sdr = nand_get_sdr_timings(conf);
> +	if (IS_ERR(sdr))
> +		return PTR_ERR(sdr);
> +
> +	cfg = TO_CYCLES(sdr->tCLR_min, cyc_ns) - 1;
> +	timings.rsetup = cfg > 0 ? cfg : 0;
> +
> +	cfg = max_t(s32, TO_CYCLES(sdr->tREA_max + MAX_TSU_PS, cyc_ns),
> +		    TO_CYCLES(sdr->tRP_min, cyc_ns)) - 1;
> +	timings.rstrobe = cfg > 0 ? cfg : 0;
> +
> +	min = TO_CYCLES(sdr->tCEA_max + MAX_TSU_PS, cyc_ns) - 2;
> +	while ((s32)(timings.rsetup + timings.rstrobe) < min)
> +		timings.rstrobe++;

I see a lot of while loops which just stop if you reach a min/max, I
believe a slightly more robust approach should be attempted, and
returning errors when these limits are crossed would be probably
beneficial on the long term?

> +
> +	cfg = TO_CYCLES((s32)(MAX_TH_PS - sdr->tCHZ_max), cyc_ns) - 1;
> +	timings.rhold = cfg > 0 ? cfg : 0;
> +
> +	min = TO_CYCLES(sdr->tRC_min, cyc_ns) - 3;
> +	while ((s32)(timings.rsetup + timings.rstrobe + timings.rhold) < min)
> +		timings.rhold++;
> +
> +	cfg = TO_CYCLES((s32)(sdr->tRHZ_max - (timings.rhold + 1) * cyc_ns * 1000), cyc_ns);
> +	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCHZ_max, cyc_ns)) - 1;
> +	timings.ta = cfg > 0 ? cfg : 0;
> +
> +	cfg = TO_CYCLES(sdr->tWP_min, cyc_ns) - 1;
> +	timings.wstrobe = cfg > 0 ? cfg : 0;
> +
> +	cfg = max_t(s32, TO_CYCLES(sdr->tCLS_min, cyc_ns), TO_CYCLES(sdr->tALS_min, cyc_ns));
> +	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCS_min, cyc_ns)) - 1;
> +	timings.wsetup = cfg > 0 ? cfg : 0;
> +
> +	min = TO_CYCLES(sdr->tDS_min, cyc_ns) - 2;
> +	while ((s32)(timings.wsetup + timings.wstrobe) < min)
> +		timings.wstrobe++;
> +
> +	cfg = max_t(s32, TO_CYCLES(sdr->tCLH_min, cyc_ns), TO_CYCLES(sdr->tALH_min, cyc_ns));
> +	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCH_min, cyc_ns));
> +	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tDH_min, cyc_ns)) - 1;
> +	timings.whold = cfg > 0 ? cfg : 0;
> +
> +	min = TO_CYCLES(sdr->tWC_min, cyc_ns) - 2;
> +	while ((s32)(timings.wsetup + timings.wstrobe + timings.whold) < min)
> +		timings.whold++;
> +
> +	dev_dbg(&info->pdev->dev, "RSETUP %x RSTROBE %x RHOLD %x\n",
> +		timings.rsetup, timings.rstrobe, timings.rhold);
> +	dev_dbg(&info->pdev->dev, "TA %x\n", timings.ta);
> +	dev_dbg(&info->pdev->dev, "WSETUP %x WSTROBE %x WHOLD %x\n",
> +		timings.wsetup, timings.wstrobe, timings.whold);
> +

Here you probably want to exit in the NAND_DATA_IFACE_CHECK_ONLY case.

> +	if (aemif_set_cs_timings(info->aemif, info->core_chipsel, &timings) < 0)
> +		dev_info(&info->pdev->dev,
> +			 "Failed to dynamically update the CS timings, keep them unchanged");
> +
> +	return 0;
> +}
> +

Thanks,
Miquèl

Re: [PATCH v2 6/6] mtd: rawnand: davinci: Implement setup_interface() operation

Posted by Bastien Curutchet 1 year, 2 months ago

Hi Miquèl,

On 11/11/24 8:32 PM, Miquel Raynal wrote:
> Hi Bastien,
> 
> On 06/11/2024 at 09:55:07 +01, Bastien Curutchet <bastien.curutchet@bootlin.com> wrote:
> 
>> The setup_interface() operation isn't implemented. It forces the driver
>> to use the ONFI mode 0, though it could use more optimal modes.
>>
>> Implement the setup_interface() operation. It uses the
>> aemif_set_cs_timings() function from the AEMIF driver to update the
>> chip select timings. The calculation of the register's contents is
>> directly extracted from §20.3.2.3 of the DaVinci TRM [1]
>>
>> These timings are previously set by the AEMIF driver itself from
>> device-tree properties. Therefore, IMHO, failing to update them in the
>> setup_interface() isn't critical, which is why 0 is returned even when
>> timings aren't updated.
> 
> Did you experience failures? Because I'd be more conservative and error
> out loudly when something is wrong. In general it is a safest approach
> on the long term. Here maybe you have good reasons to do differently, in
> this case I am all ears.
> 

The DaVinci's configuration isn't very wide so if its reference clock 
rate is too high, mode 0 timings can be too slow to fit while higher 
modes will fit. So returning -EINVAL here will cause the probe to fail 
with 'Failed to configure data interface to SDR timing mode 0' while 
higher modes would have worked.

>> MAX_TH_PS and MAX_TSU_PS are the worst case timings based on the
>> Keystone2 and DaVinci datasheets.
>>
>> [1] : https://www.ti.com/lit/ug/spruh77c/spruh77c.pdf
>>
>> Signed-off-by: Bastien Curutchet <bastien.curutchet@bootlin.com>
>> ---
>>   drivers/mtd/nand/raw/davinci_nand.c | 78 +++++++++++++++++++++++++++++
>>   1 file changed, 78 insertions(+)
>>
>> diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
>> index 563045c7ce08..2d0c564c8d17 100644
>> --- a/drivers/mtd/nand/raw/davinci_nand.c
>> +++ b/drivers/mtd/nand/raw/davinci_nand.c
>> @@ -14,6 +14,7 @@
>>   #include <linux/err.h>
>>   #include <linux/iopoll.h>
>>   #include <linux/kernel.h>
>> +#include <linux/memory/ti-aemif.h>
>>   #include <linux/module.h>
>>   #include <linux/mtd/rawnand.h>
>>   #include <linux/mtd/partitions.h>
>> @@ -44,6 +45,9 @@
>>   #define	MASK_ALE		0x08
>>   #define	MASK_CLE		0x10
>>   
>> +#define MAX_TSU_PS		3000	/* Input setup time in ps */
>> +#define MAX_TH_PS		1600	/* Input hold time in ps */
>> +
>>   struct davinci_nand_pdata {
>>   	uint32_t		mask_ale;
>>   	uint32_t		mask_cle;
>> @@ -120,6 +124,7 @@ struct davinci_nand_info {
>>   	uint32_t		core_chipsel;
>>   
>>   	struct clk		*clk;
>> +	struct aemif_device	*aemif;
>>   };
>>   
>>   static DEFINE_SPINLOCK(davinci_nand_lock);
>> @@ -767,9 +772,81 @@ static int davinci_nand_exec_op(struct nand_chip *chip,
>>   	return 0;
>>   }
>>   
>> +#define TO_CYCLES(ps, period_ns) (DIV_ROUND_UP((ps) / 1000, (period_ns)))
>> +
>> +static int davinci_nand_setup_interface(struct nand_chip *chip, int chipnr,
>> +					const struct nand_interface_config *conf)
>> +{
>> +	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
>> +	const struct nand_sdr_timings *sdr;
>> +	struct aemif_cs_timings timings;
>> +	s32 cfg, min, cyc_ns;
>> +
>> +	cyc_ns = 1000000000 / clk_get_rate(info->clk);
>> +
>> +	sdr = nand_get_sdr_timings(conf);
>> +	if (IS_ERR(sdr))
>> +		return PTR_ERR(sdr);
>> +
>> +	cfg = TO_CYCLES(sdr->tCLR_min, cyc_ns) - 1;
>> +	timings.rsetup = cfg > 0 ? cfg : 0;
>> +
>> +	cfg = max_t(s32, TO_CYCLES(sdr->tREA_max + MAX_TSU_PS, cyc_ns),
>> +		    TO_CYCLES(sdr->tRP_min, cyc_ns)) - 1;
>> +	timings.rstrobe = cfg > 0 ? cfg : 0;
>> +
>> +	min = TO_CYCLES(sdr->tCEA_max + MAX_TSU_PS, cyc_ns) - 2;
>> +	while ((s32)(timings.rsetup + timings.rstrobe) < min)
>> +		timings.rstrobe++;
> 
> I see a lot of while loops which just stop if you reach a min/max, I
> believe a slightly more robust approach should be attempted, and
> returning errors when these limits are crossed would be probably
> beneficial on the long term?
> 

This comes from the DaVinci NAND controller's documentation (cf p908 of 
https://www.ti.com/lit/ug/spruh77c/spruh77c.pdf). A first formula, gives 
the RSETUP timing, a second one the RSTROBE timing and then a third 
formula gives a constraint that has to be met by the sum of RSETUP and 
RSTROBE.
Then the validity of the timings themselves is checked by the 
aemif_set_cs_timings() function.

It works the same way for WSETUP, WSTROBE and WHOLD with the other 
while() loops.

>> +
>> +	cfg = TO_CYCLES((s32)(MAX_TH_PS - sdr->tCHZ_max), cyc_ns) - 1;
>> +	timings.rhold = cfg > 0 ? cfg : 0;
>> +
>> +	min = TO_CYCLES(sdr->tRC_min, cyc_ns) - 3;
>> +	while ((s32)(timings.rsetup + timings.rstrobe + timings.rhold) < min)
>> +		timings.rhold++;
>> +
>> +	cfg = TO_CYCLES((s32)(sdr->tRHZ_max - (timings.rhold + 1) * cyc_ns * 1000), cyc_ns);
>> +	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCHZ_max, cyc_ns)) - 1;
>> +	timings.ta = cfg > 0 ? cfg : 0;
>> +
>> +	cfg = TO_CYCLES(sdr->tWP_min, cyc_ns) - 1;
>> +	timings.wstrobe = cfg > 0 ? cfg : 0;
>> +
>> +	cfg = max_t(s32, TO_CYCLES(sdr->tCLS_min, cyc_ns), TO_CYCLES(sdr->tALS_min, cyc_ns));
>> +	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCS_min, cyc_ns)) - 1;
>> +	timings.wsetup = cfg > 0 ? cfg : 0;
>> +
>> +	min = TO_CYCLES(sdr->tDS_min, cyc_ns) - 2;
>> +	while ((s32)(timings.wsetup + timings.wstrobe) < min)
>> +		timings.wstrobe++;
>> +
>> +	cfg = max_t(s32, TO_CYCLES(sdr->tCLH_min, cyc_ns), TO_CYCLES(sdr->tALH_min, cyc_ns));
>> +	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tCH_min, cyc_ns));
>> +	cfg = max_t(s32, cfg, TO_CYCLES(sdr->tDH_min, cyc_ns)) - 1;
>> +	timings.whold = cfg > 0 ? cfg : 0;
>> +
>> +	min = TO_CYCLES(sdr->tWC_min, cyc_ns) - 2;
>> +	while ((s32)(timings.wsetup + timings.wstrobe + timings.whold) < min)
>> +		timings.whold++;
>> +
>> +	dev_dbg(&info->pdev->dev, "RSETUP %x RSTROBE %x RHOLD %x\n",
>> +		timings.rsetup, timings.rstrobe, timings.rhold);
>> +	dev_dbg(&info->pdev->dev, "TA %x\n", timings.ta);
>> +	dev_dbg(&info->pdev->dev, "WSETUP %x WSTROBE %x WHOLD %x\n",
>> +		timings.wsetup, timings.wstrobe, timings.whold);
>> +
> 
> Here you probably want to exit in the NAND_DATA_IFACE_CHECK_ONLY case.
> 

I had missed this NAND_DATA_IFACE_CHECK_ONLY feature. In fact, 
additionnal checks are done in the aemif_set_cs_timings() below. I'll 
add an 'aemif_check_cs_timings()' function in AEMIF's driver in next 
iteration.

>> +	if (aemif_set_cs_timings(info->aemif, info->core_chipsel, &timings) < 0)
>> +		dev_info(&info->pdev->dev,
>> +			 "Failed to dynamically update the CS timings, keep them unchanged");
>> +
>> +	return 0;
>> +}
>> +
> 
> Thanks,
> Miquèl

[PATCH v2 1/6] memory: ti-aemif: Create aemif_set_cs_timings()
[PATCH v2 2/6] memory: ti-aemif: Export aemif_set_cs_timings()
[PATCH v2 3/6] mtd: rawnand: davinci: Always depends on TI_AEMIF
[PATCH v2 4/6] mtd: rawnand: davinci: Order headers alphabetically
[PATCH v2 5/6] mtd: rawnand: davinci: Add clock resource
[PATCH v2 6/6] mtd: rawnand: davinci: Implement setup_interface() operation