[v3] tegra-video: add CSI support for Tegra20 and Tegra30

[PATCH v3 15/22] staging: media: tegra-video: tegra20: simplify format align calculations

Posted by Svyatoslav Ryhel 4 months, 2 weeks ago

Simplify format align calculations by slightly modifying supported formats
structure. Adjusted U and V offset calculations for planar formats since
YUV420P bits per pixel is 12 (1 full plane for Y + 2 * 1/4 planes for U
and V) so stride is width * 3/2, but offset must be calculated with plain
width since each plain has stride width * 1. This aligns with downstream
behavior which uses same approach for offset calculations.

Signed-off-by: Svyatoslav Ryhel <clamor95@gmail.com>
---
 drivers/staging/media/tegra-video/tegra20.c | 58 +++++++++------------
 drivers/staging/media/tegra-video/vi.h      |  3 +-
 2 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/drivers/staging/media/tegra-video/tegra20.c b/drivers/staging/media/tegra-video/tegra20.c
index 7c3ff843235d..b7a39723dfc2 100644
--- a/drivers/staging/media/tegra-video/tegra20.c
+++ b/drivers/staging/media/tegra-video/tegra20.c
@@ -280,20 +280,8 @@ static void tegra20_fmt_align(struct v4l2_pix_format *pix, unsigned int bpp)
 	pix->width  = clamp(pix->width,  TEGRA20_MIN_WIDTH,  TEGRA20_MAX_WIDTH);
 	pix->height = clamp(pix->height, TEGRA20_MIN_HEIGHT, TEGRA20_MAX_HEIGHT);
 
-	switch (pix->pixelformat) {
-	case V4L2_PIX_FMT_UYVY:
-	case V4L2_PIX_FMT_VYUY:
-	case V4L2_PIX_FMT_YUYV:
-	case V4L2_PIX_FMT_YVYU:
-		pix->bytesperline = roundup(pix->width, 2) * 2;
-		pix->sizeimage = roundup(pix->width, 2) * 2 * pix->height;
-		break;
-	case V4L2_PIX_FMT_YUV420:
-	case V4L2_PIX_FMT_YVU420:
-		pix->bytesperline = roundup(pix->width, 8);
-		pix->sizeimage = roundup(pix->width, 8) * pix->height * 3 / 2;
-		break;
-	}
+	pix->bytesperline = DIV_ROUND_UP(pix->width * bpp, 8);
+	pix->sizeimage = pix->bytesperline * pix->height;
 }
 
 /*
@@ -305,6 +293,7 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
 {
 	unsigned int stride = chan->format.bytesperline;
 	unsigned int height = chan->format.height;
+	unsigned int width = chan->format.width;
 
 	chan->start_offset = 0;
 
@@ -321,8 +310,8 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
 
 	case V4L2_PIX_FMT_YUV420:
 	case V4L2_PIX_FMT_YVU420:
-		chan->addr_offset_u = stride * height;
-		chan->addr_offset_v = chan->addr_offset_u + stride * height / 4;
+		chan->addr_offset_u = width * height;
+		chan->addr_offset_v = chan->addr_offset_u + width * height / 4;
 
 		/* For YVU420, we swap the locations of the U and V planes. */
 		if (chan->format.pixelformat == V4L2_PIX_FMT_YVU420)
@@ -332,14 +321,14 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
 		chan->start_offset_v = chan->addr_offset_v;
 
 		if (chan->vflip) {
-			chan->start_offset   += stride * (height - 1);
-			chan->start_offset_u += (stride / 2) * ((height / 2) - 1);
-			chan->start_offset_v += (stride / 2) * ((height / 2) - 1);
+			chan->start_offset   += width * (height - 1);
+			chan->start_offset_u += (width / 2) * ((height / 2) - 1);
+			chan->start_offset_v += (width / 2) * ((height / 2) - 1);
 		}
 		if (chan->hflip) {
-			chan->start_offset   += stride - 1;
-			chan->start_offset_u += (stride / 2) - 1;
-			chan->start_offset_v += (stride / 2) - 1;
+			chan->start_offset   += width - 1;
+			chan->start_offset_u += (width / 2) - 1;
+			chan->start_offset_v += (width / 2) - 1;
 		}
 		break;
 	}
@@ -576,20 +565,23 @@ static const struct tegra_vi_ops tegra20_vi_ops = {
 	.vi_stop_streaming = tegra20_vi_stop_streaming,
 };
 
-#define TEGRA20_VIDEO_FMT(MBUS_CODE, BPP, FOURCC)	\
-{							\
-	.code    = MEDIA_BUS_FMT_##MBUS_CODE,		\
-	.bpp     = BPP,					\
-	.fourcc  = V4L2_PIX_FMT_##FOURCC,		\
+#define TEGRA20_VIDEO_FMT(DATA_TYPE, BIT_WIDTH, MBUS_CODE, BPP, FOURCC)	\
+{									\
+	.img_dt		= TEGRA_IMAGE_DT_##DATA_TYPE,			\
+	.bit_width	= BIT_WIDTH,					\
+	.code		= MEDIA_BUS_FMT_##MBUS_CODE,			\
+	.bpp		= BPP,						\
+	.fourcc		= V4L2_PIX_FMT_##FOURCC,			\
 }
 
 static const struct tegra_video_format tegra20_video_formats[] = {
-	TEGRA20_VIDEO_FMT(UYVY8_2X8, 2, UYVY),
-	TEGRA20_VIDEO_FMT(VYUY8_2X8, 2, VYUY),
-	TEGRA20_VIDEO_FMT(YUYV8_2X8, 2, YUYV),
-	TEGRA20_VIDEO_FMT(YVYU8_2X8, 2, YVYU),
-	TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YUV420),
-	TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YVU420),
+	/* YUV422 */
+	TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 16, UYVY),
+	TEGRA20_VIDEO_FMT(YUV422_8, 16, VYUY8_2X8, 16, VYUY),
+	TEGRA20_VIDEO_FMT(YUV422_8, 16, YUYV8_2X8, 16, YUYV),
+	TEGRA20_VIDEO_FMT(YUV422_8, 16, YVYU8_2X8, 16, YVYU),
+	TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, YUV420),
+	TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, YVU420),
 };
 
 const struct tegra_vi_soc tegra20_vi_soc = {
diff --git a/drivers/staging/media/tegra-video/vi.h b/drivers/staging/media/tegra-video/vi.h
index bfadde8858d4..5cbc0606ed6c 100644
--- a/drivers/staging/media/tegra-video/vi.h
+++ b/drivers/staging/media/tegra-video/vi.h
@@ -281,7 +281,8 @@ enum tegra_image_dt {
  * @img_dt: MIPI CSI-2 data type (for CSI-2 only)
  * @bit_width: format width in bits per component (for CSI/Tegra210 only)
  * @code: media bus format code
- * @bpp: bytes per pixel (when stored in memory)
+ * @bpp: bytes per pixel (when stored in memory) for Tegra210,
+ *	 bits per pixel for Tegra20/Tegra30
  * @img_fmt: image format (for CSI/Tegra210 only)
  * @fourcc: V4L2 pixel format FCC identifier
  */
-- 
2.48.1

Re: [PATCH v3 15/22] staging: media: tegra-video: tegra20: simplify format align calculations

Posted by Mikko Perttunen 4 months, 1 week ago

On Friday, September 26, 2025 12:16 AM Svyatoslav Ryhel wrote:
> Simplify format align calculations by slightly modifying supported formats
> structure. Adjusted U and V offset calculations for planar formats since
> YUV420P bits per pixel is 12 (1 full plane for Y + 2 * 1/4 planes for U
> and V) so stride is width * 3/2, but offset must be calculated with plain
> width since each plain has stride width * 1. This aligns with downstream

plane

> behavior which uses same approach for offset calculations.
> 
> Signed-off-by: Svyatoslav Ryhel <clamor95@gmail.com>
> ---
>  drivers/staging/media/tegra-video/tegra20.c | 58 +++++++++------------
>  drivers/staging/media/tegra-video/vi.h      |  3 +-
>  2 files changed, 27 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/staging/media/tegra-video/tegra20.c b/drivers/staging/media/tegra-video/tegra20.c
> index 7c3ff843235d..b7a39723dfc2 100644
> --- a/drivers/staging/media/tegra-video/tegra20.c
> +++ b/drivers/staging/media/tegra-video/tegra20.c
> @@ -280,20 +280,8 @@ static void tegra20_fmt_align(struct v4l2_pix_format *pix, unsigned int bpp)
>  	pix->width  = clamp(pix->width,  TEGRA20_MIN_WIDTH,  TEGRA20_MAX_WIDTH);
>  	pix->height = clamp(pix->height, TEGRA20_MIN_HEIGHT, TEGRA20_MAX_HEIGHT);
>  
> -	switch (pix->pixelformat) {
> -	case V4L2_PIX_FMT_UYVY:
> -	case V4L2_PIX_FMT_VYUY:
> -	case V4L2_PIX_FMT_YUYV:
> -	case V4L2_PIX_FMT_YVYU:
> -		pix->bytesperline = roundup(pix->width, 2) * 2;
> -		pix->sizeimage = roundup(pix->width, 2) * 2 * pix->height;
> -		break;
> -	case V4L2_PIX_FMT_YUV420:
> -	case V4L2_PIX_FMT_YVU420:
> -		pix->bytesperline = roundup(pix->width, 8);
> -		pix->sizeimage = roundup(pix->width, 8) * pix->height * 3 / 2;
> -		break;
> -	}
> +	pix->bytesperline = DIV_ROUND_UP(pix->width * bpp, 8);
> +	pix->sizeimage = pix->bytesperline * pix->height;
>  }
>  
>  /*
> @@ -305,6 +293,7 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
>  {
>  	unsigned int stride = chan->format.bytesperline;
>  	unsigned int height = chan->format.height;
> +	unsigned int width = chan->format.width;
>  
>  	chan->start_offset = 0;
>  
> @@ -321,8 +310,8 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
>  
>  	case V4L2_PIX_FMT_YUV420:
>  	case V4L2_PIX_FMT_YVU420:
> -		chan->addr_offset_u = stride * height;
> -		chan->addr_offset_v = chan->addr_offset_u + stride * height / 4;
> +		chan->addr_offset_u = width * height;
> +		chan->addr_offset_v = chan->addr_offset_u + width * height / 4;
>  
>  		/* For YVU420, we swap the locations of the U and V planes. */
>  		if (chan->format.pixelformat == V4L2_PIX_FMT_YVU420)
> @@ -332,14 +321,14 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
>  		chan->start_offset_v = chan->addr_offset_v;
>  
>  		if (chan->vflip) {
> -			chan->start_offset   += stride * (height - 1);
> -			chan->start_offset_u += (stride / 2) * ((height / 2) - 1);
> -			chan->start_offset_v += (stride / 2) * ((height / 2) - 1);
> +			chan->start_offset   += width * (height - 1);
> +			chan->start_offset_u += (width / 2) * ((height / 2) - 1);
> +			chan->start_offset_v += (width / 2) * ((height / 2) - 1);
>  		}
>  		if (chan->hflip) {
> -			chan->start_offset   += stride - 1;
> -			chan->start_offset_u += (stride / 2) - 1;
> -			chan->start_offset_v += (stride / 2) - 1;
> +			chan->start_offset   += width - 1;
> +			chan->start_offset_u += (width / 2) - 1;
> +			chan->start_offset_v += (width / 2) - 1;
>  		}
>  		break;
>  	}
> @@ -576,20 +565,23 @@ static const struct tegra_vi_ops tegra20_vi_ops = {
>  	.vi_stop_streaming = tegra20_vi_stop_streaming,
>  };
>  
> -#define TEGRA20_VIDEO_FMT(MBUS_CODE, BPP, FOURCC)	\
> -{							\
> -	.code    = MEDIA_BUS_FMT_##MBUS_CODE,		\
> -	.bpp     = BPP,					\
> -	.fourcc  = V4L2_PIX_FMT_##FOURCC,		\
> +#define TEGRA20_VIDEO_FMT(DATA_TYPE, BIT_WIDTH, MBUS_CODE, BPP, FOURCC)	\
> +{									\
> +	.img_dt		= TEGRA_IMAGE_DT_##DATA_TYPE,			\
> +	.bit_width	= BIT_WIDTH,					\
> +	.code		= MEDIA_BUS_FMT_##MBUS_CODE,			\
> +	.bpp		= BPP,						\
> +	.fourcc		= V4L2_PIX_FMT_##FOURCC,			\
>  }
>  
>  static const struct tegra_video_format tegra20_video_formats[] = {
> -	TEGRA20_VIDEO_FMT(UYVY8_2X8, 2, UYVY),
> -	TEGRA20_VIDEO_FMT(VYUY8_2X8, 2, VYUY),
> -	TEGRA20_VIDEO_FMT(YUYV8_2X8, 2, YUYV),
> -	TEGRA20_VIDEO_FMT(YVYU8_2X8, 2, YVYU),
> -	TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YUV420),
> -	TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YVU420),
> +	/* YUV422 */
> +	TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 16, UYVY),
> +	TEGRA20_VIDEO_FMT(YUV422_8, 16, VYUY8_2X8, 16, VYUY),
> +	TEGRA20_VIDEO_FMT(YUV422_8, 16, YUYV8_2X8, 16, YUYV),
> +	TEGRA20_VIDEO_FMT(YUV422_8, 16, YVYU8_2X8, 16, YVYU),
> +	TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, YUV420),
> +	TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, YVU420),
>  };

Looking at the code, BPP seems to only be used for the line stride (i.e. bytes per line) calculation. I think we should just make it 8 for the planar formats (possibly with an explaining comment). With the current code, we end up with 'bytesperline' variables in places not being the actual bytes per line, which is confusing.

Actually, we can then just make the 'bpp' field be bytes per pixel as it was before to avoid the discrepancy with Tegra210.

>  
>  const struct tegra_vi_soc tegra20_vi_soc = {
> diff --git a/drivers/staging/media/tegra-video/vi.h b/drivers/staging/media/tegra-video/vi.h
> index bfadde8858d4..5cbc0606ed6c 100644
> --- a/drivers/staging/media/tegra-video/vi.h
> +++ b/drivers/staging/media/tegra-video/vi.h
> @@ -281,7 +281,8 @@ enum tegra_image_dt {
>   * @img_dt: MIPI CSI-2 data type (for CSI-2 only)
>   * @bit_width: format width in bits per component (for CSI/Tegra210 only)
>   * @code: media bus format code
> - * @bpp: bytes per pixel (when stored in memory)
> + * @bpp: bytes per pixel (when stored in memory) for Tegra210,
> + *	 bits per pixel for Tegra20/Tegra30
>   * @img_fmt: image format (for CSI/Tegra210 only)
>   * @fourcc: V4L2 pixel format FCC identifier
>   */
>

Re: [PATCH v3 15/22] staging: media: tegra-video: tegra20: simplify format align calculations

Posted by Svyatoslav Ryhel 4 months, 1 week ago

ср, 1 жовт. 2025 р. о 07:38 Mikko Perttunen <mperttunen@nvidia.com> пише:
>
> On Friday, September 26, 2025 12:16 AM Svyatoslav Ryhel wrote:
> > Simplify format align calculations by slightly modifying supported formats
> > structure. Adjusted U and V offset calculations for planar formats since
> > YUV420P bits per pixel is 12 (1 full plane for Y + 2 * 1/4 planes for U
> > and V) so stride is width * 3/2, but offset must be calculated with plain
> > width since each plain has stride width * 1. This aligns with downstream
>
> plane
>
> > behavior which uses same approach for offset calculations.
> >
> > Signed-off-by: Svyatoslav Ryhel <clamor95@gmail.com>
> > ---
> >  drivers/staging/media/tegra-video/tegra20.c | 58 +++++++++------------
> >  drivers/staging/media/tegra-video/vi.h      |  3 +-
> >  2 files changed, 27 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/staging/media/tegra-video/tegra20.c b/drivers/staging/media/tegra-video/tegra20.c
> > index 7c3ff843235d..b7a39723dfc2 100644
> > --- a/drivers/staging/media/tegra-video/tegra20.c
> > +++ b/drivers/staging/media/tegra-video/tegra20.c
> > @@ -280,20 +280,8 @@ static void tegra20_fmt_align(struct v4l2_pix_format *pix, unsigned int bpp)
> >       pix->width  = clamp(pix->width,  TEGRA20_MIN_WIDTH,  TEGRA20_MAX_WIDTH);
> >       pix->height = clamp(pix->height, TEGRA20_MIN_HEIGHT, TEGRA20_MAX_HEIGHT);
> >
> > -     switch (pix->pixelformat) {
> > -     case V4L2_PIX_FMT_UYVY:
> > -     case V4L2_PIX_FMT_VYUY:
> > -     case V4L2_PIX_FMT_YUYV:
> > -     case V4L2_PIX_FMT_YVYU:
> > -             pix->bytesperline = roundup(pix->width, 2) * 2;
> > -             pix->sizeimage = roundup(pix->width, 2) * 2 * pix->height;
> > -             break;
> > -     case V4L2_PIX_FMT_YUV420:
> > -     case V4L2_PIX_FMT_YVU420:
> > -             pix->bytesperline = roundup(pix->width, 8);
> > -             pix->sizeimage = roundup(pix->width, 8) * pix->height * 3 / 2;
> > -             break;
> > -     }
> > +     pix->bytesperline = DIV_ROUND_UP(pix->width * bpp, 8);
> > +     pix->sizeimage = pix->bytesperline * pix->height;
> >  }
> >
> >  /*
> > @@ -305,6 +293,7 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
> >  {
> >       unsigned int stride = chan->format.bytesperline;
> >       unsigned int height = chan->format.height;
> > +     unsigned int width = chan->format.width;
> >
> >       chan->start_offset = 0;
> >
> > @@ -321,8 +310,8 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
> >
> >       case V4L2_PIX_FMT_YUV420:
> >       case V4L2_PIX_FMT_YVU420:
> > -             chan->addr_offset_u = stride * height;
> > -             chan->addr_offset_v = chan->addr_offset_u + stride * height / 4;
> > +             chan->addr_offset_u = width * height;
> > +             chan->addr_offset_v = chan->addr_offset_u + width * height / 4;
> >
> >               /* For YVU420, we swap the locations of the U and V planes. */
> >               if (chan->format.pixelformat == V4L2_PIX_FMT_YVU420)
> > @@ -332,14 +321,14 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
> >               chan->start_offset_v = chan->addr_offset_v;
> >
> >               if (chan->vflip) {
> > -                     chan->start_offset   += stride * (height - 1);
> > -                     chan->start_offset_u += (stride / 2) * ((height / 2) - 1);
> > -                     chan->start_offset_v += (stride / 2) * ((height / 2) - 1);
> > +                     chan->start_offset   += width * (height - 1);
> > +                     chan->start_offset_u += (width / 2) * ((height / 2) - 1);
> > +                     chan->start_offset_v += (width / 2) * ((height / 2) - 1);
> >               }
> >               if (chan->hflip) {
> > -                     chan->start_offset   += stride - 1;
> > -                     chan->start_offset_u += (stride / 2) - 1;
> > -                     chan->start_offset_v += (stride / 2) - 1;
> > +                     chan->start_offset   += width - 1;
> > +                     chan->start_offset_u += (width / 2) - 1;
> > +                     chan->start_offset_v += (width / 2) - 1;
> >               }
> >               break;
> >       }
> > @@ -576,20 +565,23 @@ static const struct tegra_vi_ops tegra20_vi_ops = {
> >       .vi_stop_streaming = tegra20_vi_stop_streaming,
> >  };
> >
> > -#define TEGRA20_VIDEO_FMT(MBUS_CODE, BPP, FOURCC)    \
> > -{                                                    \
> > -     .code    = MEDIA_BUS_FMT_##MBUS_CODE,           \
> > -     .bpp     = BPP,                                 \
> > -     .fourcc  = V4L2_PIX_FMT_##FOURCC,               \
> > +#define TEGRA20_VIDEO_FMT(DATA_TYPE, BIT_WIDTH, MBUS_CODE, BPP, FOURCC)      \
> > +{                                                                    \
> > +     .img_dt         = TEGRA_IMAGE_DT_##DATA_TYPE,                   \
> > +     .bit_width      = BIT_WIDTH,                                    \
> > +     .code           = MEDIA_BUS_FMT_##MBUS_CODE,                    \
> > +     .bpp            = BPP,                                          \
> > +     .fourcc         = V4L2_PIX_FMT_##FOURCC,                        \
> >  }
> >
> >  static const struct tegra_video_format tegra20_video_formats[] = {
> > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 2, UYVY),
> > -     TEGRA20_VIDEO_FMT(VYUY8_2X8, 2, VYUY),
> > -     TEGRA20_VIDEO_FMT(YUYV8_2X8, 2, YUYV),
> > -     TEGRA20_VIDEO_FMT(YVYU8_2X8, 2, YVYU),
> > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YUV420),
> > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YVU420),
> > +     /* YUV422 */
> > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 16, UYVY),
> > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, VYUY8_2X8, 16, VYUY),
> > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, YUYV8_2X8, 16, YUYV),
> > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, YVYU8_2X8, 16, YVYU),
> > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, YUV420),
> > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, YVU420),
> >  };
>
> Looking at the code, BPP seems to only be used for the line stride (i.e. bytes per line) calculation. I think we should just make it 8 for the planar formats (possibly with an explaining comment). With the current code, we end up with 'bytesperline' variables in places not being the actual bytes per line, which is confusing.
>
> Actually, we can then just make the 'bpp' field be bytes per pixel as it was before to avoid the discrepancy with Tegra210.
>

No, this code is actually cleaner and in sync with what downstream
does, Tegra210 bytes per pixel is confusing since it totally neglects
formats with fractional bytes per pixel, it is impossible to set there
3/2 for example, which is used by YUV420.

According to downstream code bytes_per_line =
soc_mbus_bytes_per_line..., downstream directly name is bytes_per_line
and soc_mbus_bytes_per_line returns width * 3 / 2 which is correct
calculation (12 bits). Meanwhile for planar formats Tegra has 3
different buffers so with offset calculation plain width must be used
(which matches downstream).

> >
> >  const struct tegra_vi_soc tegra20_vi_soc = {
> > diff --git a/drivers/staging/media/tegra-video/vi.h b/drivers/staging/media/tegra-video/vi.h
> > index bfadde8858d4..5cbc0606ed6c 100644
> > --- a/drivers/staging/media/tegra-video/vi.h
> > +++ b/drivers/staging/media/tegra-video/vi.h
> > @@ -281,7 +281,8 @@ enum tegra_image_dt {
> >   * @img_dt: MIPI CSI-2 data type (for CSI-2 only)
> >   * @bit_width: format width in bits per component (for CSI/Tegra210 only)
> >   * @code: media bus format code
> > - * @bpp: bytes per pixel (when stored in memory)
> > + * @bpp: bytes per pixel (when stored in memory) for Tegra210,
> > + *    bits per pixel for Tegra20/Tegra30
> >   * @img_fmt: image format (for CSI/Tegra210 only)
> >   * @fourcc: V4L2 pixel format FCC identifier
> >   */
> >

Re: [PATCH v3 15/22] staging: media: tegra-video: tegra20: simplify format align calculations

Posted by Svyatoslav Ryhel 4 months, 1 week ago

ср, 1 жовт. 2025 р. о 08:07 Svyatoslav Ryhel <clamor95@gmail.com> пише:
>
> ср, 1 жовт. 2025 р. о 07:38 Mikko Perttunen <mperttunen@nvidia.com> пише:
> >
> > On Friday, September 26, 2025 12:16 AM Svyatoslav Ryhel wrote:
> > > Simplify format align calculations by slightly modifying supported formats
> > > structure. Adjusted U and V offset calculations for planar formats since
> > > YUV420P bits per pixel is 12 (1 full plane for Y + 2 * 1/4 planes for U
> > > and V) so stride is width * 3/2, but offset must be calculated with plain
> > > width since each plain has stride width * 1. This aligns with downstream
> >
> > plane
> >
> > > behavior which uses same approach for offset calculations.
> > >
> > > Signed-off-by: Svyatoslav Ryhel <clamor95@gmail.com>
> > > ---
> > >  drivers/staging/media/tegra-video/tegra20.c | 58 +++++++++------------
> > >  drivers/staging/media/tegra-video/vi.h      |  3 +-
> > >  2 files changed, 27 insertions(+), 34 deletions(-)
> > >
> > > diff --git a/drivers/staging/media/tegra-video/tegra20.c b/drivers/staging/media/tegra-video/tegra20.c
> > > index 7c3ff843235d..b7a39723dfc2 100644
> > > --- a/drivers/staging/media/tegra-video/tegra20.c
> > > +++ b/drivers/staging/media/tegra-video/tegra20.c
> > > @@ -280,20 +280,8 @@ static void tegra20_fmt_align(struct v4l2_pix_format *pix, unsigned int bpp)
> > >       pix->width  = clamp(pix->width,  TEGRA20_MIN_WIDTH,  TEGRA20_MAX_WIDTH);
> > >       pix->height = clamp(pix->height, TEGRA20_MIN_HEIGHT, TEGRA20_MAX_HEIGHT);
> > >
> > > -     switch (pix->pixelformat) {
> > > -     case V4L2_PIX_FMT_UYVY:
> > > -     case V4L2_PIX_FMT_VYUY:
> > > -     case V4L2_PIX_FMT_YUYV:
> > > -     case V4L2_PIX_FMT_YVYU:
> > > -             pix->bytesperline = roundup(pix->width, 2) * 2;
> > > -             pix->sizeimage = roundup(pix->width, 2) * 2 * pix->height;
> > > -             break;
> > > -     case V4L2_PIX_FMT_YUV420:
> > > -     case V4L2_PIX_FMT_YVU420:
> > > -             pix->bytesperline = roundup(pix->width, 8);
> > > -             pix->sizeimage = roundup(pix->width, 8) * pix->height * 3 / 2;
> > > -             break;
> > > -     }
> > > +     pix->bytesperline = DIV_ROUND_UP(pix->width * bpp, 8);
> > > +     pix->sizeimage = pix->bytesperline * pix->height;
> > >  }
> > >
> > >  /*
> > > @@ -305,6 +293,7 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
> > >  {
> > >       unsigned int stride = chan->format.bytesperline;
> > >       unsigned int height = chan->format.height;
> > > +     unsigned int width = chan->format.width;
> > >
> > >       chan->start_offset = 0;
> > >
> > > @@ -321,8 +310,8 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
> > >
> > >       case V4L2_PIX_FMT_YUV420:
> > >       case V4L2_PIX_FMT_YVU420:
> > > -             chan->addr_offset_u = stride * height;
> > > -             chan->addr_offset_v = chan->addr_offset_u + stride * height / 4;
> > > +             chan->addr_offset_u = width * height;
> > > +             chan->addr_offset_v = chan->addr_offset_u + width * height / 4;
> > >
> > >               /* For YVU420, we swap the locations of the U and V planes. */
> > >               if (chan->format.pixelformat == V4L2_PIX_FMT_YVU420)
> > > @@ -332,14 +321,14 @@ static void tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
> > >               chan->start_offset_v = chan->addr_offset_v;
> > >
> > >               if (chan->vflip) {
> > > -                     chan->start_offset   += stride * (height - 1);
> > > -                     chan->start_offset_u += (stride / 2) * ((height / 2) - 1);
> > > -                     chan->start_offset_v += (stride / 2) * ((height / 2) - 1);
> > > +                     chan->start_offset   += width * (height - 1);
> > > +                     chan->start_offset_u += (width / 2) * ((height / 2) - 1);
> > > +                     chan->start_offset_v += (width / 2) * ((height / 2) - 1);
> > >               }
> > >               if (chan->hflip) {
> > > -                     chan->start_offset   += stride - 1;
> > > -                     chan->start_offset_u += (stride / 2) - 1;
> > > -                     chan->start_offset_v += (stride / 2) - 1;
> > > +                     chan->start_offset   += width - 1;
> > > +                     chan->start_offset_u += (width / 2) - 1;
> > > +                     chan->start_offset_v += (width / 2) - 1;
> > >               }
> > >               break;
> > >       }
> > > @@ -576,20 +565,23 @@ static const struct tegra_vi_ops tegra20_vi_ops = {
> > >       .vi_stop_streaming = tegra20_vi_stop_streaming,
> > >  };
> > >
> > > -#define TEGRA20_VIDEO_FMT(MBUS_CODE, BPP, FOURCC)    \
> > > -{                                                    \
> > > -     .code    = MEDIA_BUS_FMT_##MBUS_CODE,           \
> > > -     .bpp     = BPP,                                 \
> > > -     .fourcc  = V4L2_PIX_FMT_##FOURCC,               \
> > > +#define TEGRA20_VIDEO_FMT(DATA_TYPE, BIT_WIDTH, MBUS_CODE, BPP, FOURCC)      \
> > > +{                                                                    \
> > > +     .img_dt         = TEGRA_IMAGE_DT_##DATA_TYPE,                   \
> > > +     .bit_width      = BIT_WIDTH,                                    \
> > > +     .code           = MEDIA_BUS_FMT_##MBUS_CODE,                    \
> > > +     .bpp            = BPP,                                          \
> > > +     .fourcc         = V4L2_PIX_FMT_##FOURCC,                        \
> > >  }
> > >
> > >  static const struct tegra_video_format tegra20_video_formats[] = {
> > > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 2, UYVY),
> > > -     TEGRA20_VIDEO_FMT(VYUY8_2X8, 2, VYUY),
> > > -     TEGRA20_VIDEO_FMT(YUYV8_2X8, 2, YUYV),
> > > -     TEGRA20_VIDEO_FMT(YVYU8_2X8, 2, YVYU),
> > > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YUV420),
> > > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YVU420),
> > > +     /* YUV422 */
> > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 16, UYVY),
> > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, VYUY8_2X8, 16, VYUY),
> > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, YUYV8_2X8, 16, YUYV),
> > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, YVYU8_2X8, 16, YVYU),
> > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, YUV420),
> > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, YVU420),
> > >  };
> >
> > Looking at the code, BPP seems to only be used for the line stride (i.e. bytes per line) calculation. I think we should just make it 8 for the planar formats (possibly with an explaining comment). With the current code, we end up with 'bytesperline' variables in places not being the actual bytes per line, which is confusing.
> >
> > Actually, we can then just make the 'bpp' field be bytes per pixel as it was before to avoid the discrepancy with Tegra210.
> >
>
> No, this code is actually cleaner and in sync with what downstream
> does, Tegra210 bytes per pixel is confusing since it totally neglects
> formats with fractional bytes per pixel, it is impossible to set there
> 3/2 for example, which is used by YUV420.
>
> According to downstream code bytes_per_line =
> soc_mbus_bytes_per_line..., downstream directly name is bytes_per_line
> and soc_mbus_bytes_per_line returns width * 3 / 2 which is correct
> calculation (12 bits). Meanwhile for planar formats Tegra has 3
> different buffers so with offset calculation plain width must be used
> (which matches downstream).
>

If you mean use of BPP by VI, I can propose removing bytesperline and
sizeimage configuration from VI entirely and leave this to per-SoC
fmt_align function which does this already anyway and guards every
time those values are referred. This way there will be no instances
where "places not being the actual bytes per line" comes true.

> > >
> > >  const struct tegra_vi_soc tegra20_vi_soc = {
> > > diff --git a/drivers/staging/media/tegra-video/vi.h b/drivers/staging/media/tegra-video/vi.h
> > > index bfadde8858d4..5cbc0606ed6c 100644
> > > --- a/drivers/staging/media/tegra-video/vi.h
> > > +++ b/drivers/staging/media/tegra-video/vi.h
> > > @@ -281,7 +281,8 @@ enum tegra_image_dt {
> > >   * @img_dt: MIPI CSI-2 data type (for CSI-2 only)
> > >   * @bit_width: format width in bits per component (for CSI/Tegra210 only)
> > >   * @code: media bus format code
> > > - * @bpp: bytes per pixel (when stored in memory)
> > > + * @bpp: bytes per pixel (when stored in memory) for Tegra210,
> > > + *    bits per pixel for Tegra20/Tegra30
> > >   * @img_fmt: image format (for CSI/Tegra210 only)
> > >   * @fourcc: V4L2 pixel format FCC identifier
> > >   */
> > >