[PATCH v2 1/9] drm/panthor: Add 64-bit and poll register accessors

Karunika Choo posted 9 patches 9 months ago
[PATCH v2 1/9] drm/panthor: Add 64-bit and poll register accessors
Posted by Karunika Choo 9 months ago
This patch adds 64-bit register accessors to simplify register access in
Panthor. It also adds 32-bit and 64-bit variants for read_poll_timeout.

Signed-off-by: Karunika Choo <karunika.choo@arm.com>
---
 drivers/gpu/drm/panthor/panthor_regs.h | 55 ++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h
index 42dc3fedb0d4..7ec4a1d04e20 100644
--- a/drivers/gpu/drm/panthor/panthor_regs.h
+++ b/drivers/gpu/drm/panthor/panthor_regs.h
@@ -238,4 +238,59 @@
 #define gpu_read(dev, reg) \
 	readl((dev)->iomem + (reg))
 
+#define gpu_read_relaxed(dev, reg) readl_relaxed((dev)->iomem + (reg))
+
+#define gpu_write64(dev, reg, data)                            \
+	do {                                                   \
+		u64 __val = (u64)(data);                       \
+		gpu_write(dev, reg, lower_32_bits(__val));     \
+		gpu_write(dev, reg + 4, upper_32_bits(__val)); \
+	} while (0)
+
+#define gpu_read64(dev, reg) \
+	(gpu_read(dev, reg) | ((u64)gpu_read(dev, reg + 4) << 32))
+
+#define gpu_read64_relaxed(dev, reg)  \
+	(gpu_read_relaxed(dev, reg) | \
+	 ((u64)gpu_read_relaxed(dev, reg + 4) << 32))
+
+#define gpu_read64_sync(dev, reg)                     \
+	({                                            \
+		u32 lo, hi1, hi2;                     \
+		do {                                  \
+			hi1 = gpu_read(dev, reg + 4); \
+			lo = gpu_read(dev, reg);      \
+			hi2 = gpu_read(dev, reg + 4); \
+		} while (hi1 != hi2);                 \
+		lo | ((u64)hi2 << 32);                \
+	})
+
+#define gpu_read_poll_timeout(dev, reg, val, cond, delay_us, timeout_us)    \
+	read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false, \
+			  dev, reg)
+
+#define gpu_read_poll_timeout_atomic(dev, reg, val, cond, delay_us,         \
+				     timeout_us)                            \
+	read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us, \
+				 false, dev, reg)
+
+#define gpu_read64_poll_timeout(dev, reg, val, cond, delay_us, timeout_us)    \
+	read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false, \
+			  dev, reg)
+
+#define gpu_read64_poll_timeout_atomic(dev, reg, val, cond, delay_us,         \
+				       timeout_us)                            \
+	read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us, \
+				 false, dev, reg)
+
+#define gpu_read_relaxed_poll_timeout_atomic(dev, reg, val, cond, delay_us, \
+					     timeout_us)                    \
+	read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us,     \
+				 timeout_us, false, dev, reg)
+
+#define gpu_read64_relaxed_poll_timeout(dev, reg, val, cond, delay_us,         \
+					timeout_us)                            \
+	read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us, \
+			  false, dev, reg)
+
 #endif
-- 
2.47.1
Re: [PATCH v2 1/9] drm/panthor: Add 64-bit and poll register accessors
Posted by Boris Brezillon 9 months ago
On Thu, 20 Mar 2025 11:17:33 +0000
Karunika Choo <karunika.choo@arm.com> wrote:

> This patch adds 64-bit register accessors to simplify register access in
> Panthor. It also adds 32-bit and 64-bit variants for read_poll_timeout.
> 
> Signed-off-by: Karunika Choo <karunika.choo@arm.com>
> ---
>  drivers/gpu/drm/panthor/panthor_regs.h | 55 ++++++++++++++++++++++++++
>  1 file changed, 55 insertions(+)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h
> index 42dc3fedb0d4..7ec4a1d04e20 100644
> --- a/drivers/gpu/drm/panthor/panthor_regs.h
> +++ b/drivers/gpu/drm/panthor/panthor_regs.h
> @@ -238,4 +238,59 @@
>  #define gpu_read(dev, reg) \
>  	readl((dev)->iomem + (reg))
>  
> +#define gpu_read_relaxed(dev, reg) readl_relaxed((dev)->iomem + (reg))
> +
> +#define gpu_write64(dev, reg, data)                            \
> +	do {                                                   \
> +		u64 __val = (u64)(data);                       \
> +		gpu_write(dev, reg, lower_32_bits(__val));     \
> +		gpu_write(dev, reg + 4, upper_32_bits(__val)); \
> +	} while (0)

We're not doing funky name concatenation in these macros, so I'd rather
have them defined as static inline funcs in panthor_device.h. We
probably want to move the gpu_read/write definitions there as well if
we do that.

> +
> +#define gpu_read64(dev, reg) \
> +	(gpu_read(dev, reg) | ((u64)gpu_read(dev, reg + 4) << 32))
> +
> +#define gpu_read64_relaxed(dev, reg)  \
> +	(gpu_read_relaxed(dev, reg) | \
> +	 ((u64)gpu_read_relaxed(dev, reg + 4) << 32))
> +
> +#define gpu_read64_sync(dev, reg)                     \
> +	({                                            \
> +		u32 lo, hi1, hi2;                     \
> +		do {                                  \
> +			hi1 = gpu_read(dev, reg + 4); \
> +			lo = gpu_read(dev, reg);      \
> +			hi2 = gpu_read(dev, reg + 4); \
> +		} while (hi1 != hi2);                 \
> +		lo | ((u64)hi2 << 32);                \
> +	})

I would name that one gpu_read64_counter and make it a static inline
function. Note that we already have panthor_gpu_read_64bit_counter()
which does the same thing, so maybe move it there and rename it along
the way.

> +
> +#define gpu_read_poll_timeout(dev, reg, val, cond, delay_us, timeout_us)    \
> +	read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false, \
> +			  dev, reg)
> +
> +#define gpu_read_poll_timeout_atomic(dev, reg, val, cond, delay_us,         \
> +				     timeout_us)                            \
> +	read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us, \
> +				 false, dev, reg)
> +
> +#define gpu_read64_poll_timeout(dev, reg, val, cond, delay_us, timeout_us)    \
> +	read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false, \
> +			  dev, reg)
> +
> +#define gpu_read64_poll_timeout_atomic(dev, reg, val, cond, delay_us,         \
> +				       timeout_us)                            \
> +	read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us, \
> +				 false, dev, reg)
> +
> +#define gpu_read_relaxed_poll_timeout_atomic(dev, reg, val, cond, delay_us, \
> +					     timeout_us)                    \
> +	read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us,     \
> +				 timeout_us, false, dev, reg)
> +
> +#define gpu_read64_relaxed_poll_timeout(dev, reg, val, cond, delay_us,         \
> +					timeout_us)                            \
> +	read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us, \
> +			  false, dev, reg)
> +
>  #endif
Re: [PATCH v2 1/9] drm/panthor: Add 64-bit and poll register accessors
Posted by Karunika Choo 8 months, 1 week ago
On 21/03/2025 07:48, Boris Brezillon wrote:
> On Thu, 20 Mar 2025 11:17:33 +0000
> Karunika Choo <karunika.choo@arm.com> wrote:
>
>> This patch adds 64-bit register accessors to simplify register access in
>> Panthor. It also adds 32-bit and 64-bit variants for read_poll_timeout.
>>
>> Signed-off-by: Karunika Choo <karunika.choo@arm.com>
>> ---
>>  drivers/gpu/drm/panthor/panthor_regs.h | 55 ++++++++++++++++++++++++++
>>  1 file changed, 55 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/panthor/panthor_regs.h
b/drivers/gpu/drm/panthor/panthor_regs.h
>> index 42dc3fedb0d4..7ec4a1d04e20 100644
>> --- a/drivers/gpu/drm/panthor/panthor_regs.h
>> +++ b/drivers/gpu/drm/panthor/panthor_regs.h
>> @@ -238,4 +238,59 @@
>>  #define gpu_read(dev, reg) \
>>  	readl((dev)->iomem + (reg))
>>
>> +#define gpu_read_relaxed(dev, reg) readl_relaxed((dev)->iomem + (reg))
>> +
>> +#define gpu_write64(dev, reg, data)                            \
>> +	do {                                                   \
>> +		u64 __val = (u64)(data);                       \
>> +		gpu_write(dev, reg, lower_32_bits(__val));     \
>> +		gpu_write(dev, reg + 4, upper_32_bits(__val)); \
>> +	} while (0)
>
> We're not doing funky name concatenation in these macros, so I'd rather
> have them defined as static inline funcs in panthor_device.h. We
> probably want to move the gpu_read/write definitions there as well if
> we do that.

I see where you're coming from, and it makes sense. I was thinking it
might be better to keep it in panthor_regs.h since that's the file we
include when accessing GPU registers. That said, we could certainly
convert them to static inline functions if you prefer.

>> +
>> +#define gpu_read64(dev, reg) \
>> +	(gpu_read(dev, reg) | ((u64)gpu_read(dev, reg + 4) << 32))
>> +
>> +#define gpu_read64_relaxed(dev, reg)  \
>> +	(gpu_read_relaxed(dev, reg) | \
>> +	 ((u64)gpu_read_relaxed(dev, reg + 4) << 32))
>> +
>> +#define gpu_read64_sync(dev, reg)                     \
>> +	({                                            \
>> +		u32 lo, hi1, hi2;                     \
>> +		do {                                  \
>> +			hi1 = gpu_read(dev, reg + 4); \
>> +			lo = gpu_read(dev, reg);      \
>> +			hi2 = gpu_read(dev, reg + 4); \
>> +		} while (hi1 != hi2);                 \
>> +		lo | ((u64)hi2 << 32);                \
>> +	})
>
> I would name that one gpu_read64_counter and make it a static inline
> function. Note that we already have panthor_gpu_read_64bit_counter()
> which does the same thing, so maybe move it there and rename it along
> the way.

Happy to rename this to gpu_read64_counter in v3, if you're okay with
us keeping the macros/functions in this file.

Kind regards,
Karunika

>> +
>> +#define gpu_read_poll_timeout(dev, reg, val, cond, delay_us,
timeout_us)    \
>> +	read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false, \
>> +			  dev, reg)
>> +
>> +#define gpu_read_poll_timeout_atomic(dev, reg, val, cond, delay_us,
       \
>> +				     timeout_us)                            \
>> +	read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us, \
>> +				 false, dev, reg)
>> +
>> +#define gpu_read64_poll_timeout(dev, reg, val, cond, delay_us,
timeout_us)    \
>> +	read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false, \
>> +			  dev, reg)
>> +
>> +#define gpu_read64_poll_timeout_atomic(dev, reg, val, cond,
delay_us,         \
>> +				       timeout_us)                            \
>> +	read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us, \
>> +				 false, dev, reg)
>> +
>> +#define gpu_read_relaxed_poll_timeout_atomic(dev, reg, val, cond,
delay_us, \
>> +					     timeout_us)                    \
>> +	read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us,     \
>> +				 timeout_us, false, dev, reg)
>> +
>> +#define gpu_read64_relaxed_poll_timeout(dev, reg, val, cond,
delay_us,         \
>> +					timeout_us)                            \
>> +	read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us,
timeout_us, \
>> +			  false, dev, reg)
>> +
>>  #endif
>
Re: [PATCH v2 1/9] drm/panthor: Add 64-bit and poll register accessors
Posted by Boris Brezillon 8 months, 1 week ago
On Wed, 9 Apr 2025 14:00:54 +0100
Karunika Choo <karunika.choo@arm.com> wrote:

> On 21/03/2025 07:48, Boris Brezillon wrote:
> > On Thu, 20 Mar 2025 11:17:33 +0000
> > Karunika Choo <karunika.choo@arm.com> wrote:
> >  
> >> This patch adds 64-bit register accessors to simplify register access in
> >> Panthor. It also adds 32-bit and 64-bit variants for read_poll_timeout.
> >>
> >> Signed-off-by: Karunika Choo <karunika.choo@arm.com>
> >> ---
> >>  drivers/gpu/drm/panthor/panthor_regs.h | 55 ++++++++++++++++++++++++++
> >>  1 file changed, 55 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/panthor/panthor_regs.h  
> b/drivers/gpu/drm/panthor/panthor_regs.h
> >> index 42dc3fedb0d4..7ec4a1d04e20 100644
> >> --- a/drivers/gpu/drm/panthor/panthor_regs.h
> >> +++ b/drivers/gpu/drm/panthor/panthor_regs.h
> >> @@ -238,4 +238,59 @@
> >>  #define gpu_read(dev, reg) \
> >>  	readl((dev)->iomem + (reg))
> >>
> >> +#define gpu_read_relaxed(dev, reg) readl_relaxed((dev)->iomem + (reg))
> >> +
> >> +#define gpu_write64(dev, reg, data)                            \
> >> +	do {                                                   \
> >> +		u64 __val = (u64)(data);                       \
> >> +		gpu_write(dev, reg, lower_32_bits(__val));     \
> >> +		gpu_write(dev, reg + 4, upper_32_bits(__val)); \
> >> +	} while (0)  
> >
> > We're not doing funky name concatenation in these macros, so I'd rather
> > have them defined as static inline funcs in panthor_device.h. We
> > probably want to move the gpu_read/write definitions there as well if
> > we do that.  
> 
> I see where you're coming from, and it makes sense. I was thinking it
> might be better to keep it in panthor_regs.h since that's the file we
> include when accessing GPU registers.

Well, yes, but also gpu_write/read() take a panthor_device, which is
defined in panthor_device.h. I guess we can keep those in
panthor_regs.h and include panthor_device.h from panthor_regs.h if
there's no circular inclusion. I'm fine either way.

> That said, we could certainly
> convert them to static inline functions if you prefer.

Yeah, I'd prefer that.

> 
> >> +
> >> +#define gpu_read64(dev, reg) \
> >> +	(gpu_read(dev, reg) | ((u64)gpu_read(dev, reg + 4) << 32))
> >> +
> >> +#define gpu_read64_relaxed(dev, reg)  \
> >> +	(gpu_read_relaxed(dev, reg) | \
> >> +	 ((u64)gpu_read_relaxed(dev, reg + 4) << 32))
> >> +
> >> +#define gpu_read64_sync(dev, reg)                     \
> >> +	({                                            \
> >> +		u32 lo, hi1, hi2;                     \
> >> +		do {                                  \
> >> +			hi1 = gpu_read(dev, reg + 4); \
> >> +			lo = gpu_read(dev, reg);      \
> >> +			hi2 = gpu_read(dev, reg + 4); \
> >> +		} while (hi1 != hi2);                 \
> >> +		lo | ((u64)hi2 << 32);                \
> >> +	})  
> >
> > I would name that one gpu_read64_counter and make it a static inline
> > function. Note that we already have panthor_gpu_read_64bit_counter()
> > which does the same thing, so maybe move it there and rename it along
> > the way.  
> 
> Happy to rename this to gpu_read64_counter in v3, if you're okay with
> us keeping the macros/functions in this file.

Renaming the function is orthogonal to moving its definition to a
different header, no? I'm not sure I see why one depends on the other.
Re: [PATCH v2 1/9] drm/panthor: Add 64-bit and poll register accessors
Posted by Karunika Choo 8 months, 1 week ago
On 10/04/2025 14:28, Boris Brezillon wrote:
> On Wed, 9 Apr 2025 14:00:54 +0100
> Karunika Choo <karunika.choo@arm.com> wrote:
> 
>> On 21/03/2025 07:48, Boris Brezillon wrote:
>>> On Thu, 20 Mar 2025 11:17:33 +0000
>>> Karunika Choo <karunika.choo@arm.com> wrote:
>>>  
>>>> This patch adds 64-bit register accessors to simplify register access in
>>>> Panthor. It also adds 32-bit and 64-bit variants for read_poll_timeout.
>>>>
>>>> Signed-off-by: Karunika Choo <karunika.choo@arm.com>
>>>> ---
>>>>  drivers/gpu/drm/panthor/panthor_regs.h | 55 ++++++++++++++++++++++++++
>>>>  1 file changed, 55 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/panthor/panthor_regs.h  
>> b/drivers/gpu/drm/panthor/panthor_regs.h
>>>> index 42dc3fedb0d4..7ec4a1d04e20 100644
>>>> --- a/drivers/gpu/drm/panthor/panthor_regs.h
>>>> +++ b/drivers/gpu/drm/panthor/panthor_regs.h
>>>> @@ -238,4 +238,59 @@
>>>>  #define gpu_read(dev, reg) \
>>>>  	readl((dev)->iomem + (reg))
>>>>
>>>> +#define gpu_read_relaxed(dev, reg) readl_relaxed((dev)->iomem + (reg))
>>>> +
>>>> +#define gpu_write64(dev, reg, data)                            \
>>>> +	do {                                                   \
>>>> +		u64 __val = (u64)(data);                       \
>>>> +		gpu_write(dev, reg, lower_32_bits(__val));     \
>>>> +		gpu_write(dev, reg + 4, upper_32_bits(__val)); \
>>>> +	} while (0)  
>>>
>>> We're not doing funky name concatenation in these macros, so I'd rather
>>> have them defined as static inline funcs in panthor_device.h. We
>>> probably want to move the gpu_read/write definitions there as well if
>>> we do that.  
>>
>> I see where you're coming from, and it makes sense. I was thinking it
>> might be better to keep it in panthor_regs.h since that's the file we
>> include when accessing GPU registers.
> 
> Well, yes, but also gpu_write/read() take a panthor_device, which is
> defined in panthor_device.h. I guess we can keep those in
> panthor_regs.h and include panthor_device.h from panthor_regs.h if
> there's no circular inclusion. I'm fine either way.
> 
>> That said, we could certainly
>> convert them to static inline functions if you prefer.
> 
> Yeah, I'd prefer that.
> 

Apologies for the back-and-forth. You’re absolutely right—it’s a good
point that struct panthor_device is defined in panthor_device.h. I
have moved these functions there as static inline functions, in a
separate patch outside this series.

Link: https://lore.kernel.org/lkml/20250410163546.919749-1-karunika.choo@arm.com/

Kind regards,
Karunika Choo

>>
>>>> +
>>>> +#define gpu_read64(dev, reg) \
>>>> +	(gpu_read(dev, reg) | ((u64)gpu_read(dev, reg + 4) << 32))
>>>> +
>>>> +#define gpu_read64_relaxed(dev, reg)  \
>>>> +	(gpu_read_relaxed(dev, reg) | \
>>>> +	 ((u64)gpu_read_relaxed(dev, reg + 4) << 32))
>>>> +
>>>> +#define gpu_read64_sync(dev, reg)                     \
>>>> +	({                                            \
>>>> +		u32 lo, hi1, hi2;                     \
>>>> +		do {                                  \
>>>> +			hi1 = gpu_read(dev, reg + 4); \
>>>> +			lo = gpu_read(dev, reg);      \
>>>> +			hi2 = gpu_read(dev, reg + 4); \
>>>> +		} while (hi1 != hi2);                 \
>>>> +		lo | ((u64)hi2 << 32);                \
>>>> +	})  
>>>
>>> I would name that one gpu_read64_counter and make it a static inline
>>> function. Note that we already have panthor_gpu_read_64bit_counter()
>>> which does the same thing, so maybe move it there and rename it along
>>> the way.  
>>
>> Happy to rename this to gpu_read64_counter in v3, if you're okay with
>> us keeping the macros/functions in this file.
> 
> Renaming the function is orthogonal to moving its definition to a
> different header, no? I'm not sure I see why one depends on the other.