[RESEND] media: mediatek: vcodec: Add to support VP9 inner racing mode

Mingjia Zhang posted 1 patch 2 years, 2 months ago
There is a newer version of this series
.../vcodec/vdec/vdec_vp9_req_lat_if.c         | 64 ++++++++++++-------
1 file changed, 40 insertions(+), 24 deletions(-)
[RESEND] media: mediatek: vcodec: Add to support VP9 inner racing mode
Posted by Mingjia Zhang 2 years, 2 months ago
In order to reduce decoder latency, enable VP9 inner racing mode.
Send lat trans buffer information to core when trigger lat to work,
need not to wait until lat decode done.

Signed-off-by: mingjia zhang <mingjia.zhang@mediatek.com>
---
CTS/GTS test pass
---
 .../vcodec/vdec/vdec_vp9_req_lat_if.c         | 64 ++++++++++++-------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
index fba06f321baa..35462d45fbf4 100644
--- a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
+++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
@@ -436,6 +436,7 @@ struct vdec_vp9_slice_ref {
  * @frame_ctx:		4 frame context according to VP9 Spec
  * @frame_ctx_helper:	4 frame context according to newest kernel spec
  * @dirty:		state of each frame context
+ * @local_vsi:		local instance vsi information
  * @init_vsi:		vsi used for initialized VP9 instance
  * @vsi:		vsi used for decoding/flush ...
  * @core_vsi:		vsi used for Core stage
@@ -482,6 +483,8 @@ struct vdec_vp9_slice_instance {
 	struct v4l2_vp9_frame_context frame_ctx_helper;
 	unsigned char dirty[4];
 
+	struct vdec_vp9_slice_vsi local_vsi;
+
 	/* MicroP vsi */
 	union {
 		struct vdec_vp9_slice_init_vsi *init_vsi;
@@ -1617,16 +1620,10 @@ static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance
 }
 
 static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance,
-				     struct vdec_lat_buf *lat_buf,
-				     struct vdec_vp9_slice_pfc *pfc)
+				     struct vdec_vp9_slice_vsi *vsi)
 {
-	struct vdec_vp9_slice_vsi *vsi;
-
-	vsi = &pfc->vsi;
-	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
-
 	mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n",
-			 pfc->seq, vsi->state.crc[0],
+			 (instance->seq - 1), vsi->state.crc[0],
 			 (unsigned long)vsi->trans.dma_addr,
 			 (unsigned long)vsi->trans.dma_addr_end);
 
@@ -2091,6 +2088,13 @@ static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
 		return ret;
 	}
 
+	if (IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability)) {
+		vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
+		memcpy(&instance->local_vsi, vsi, sizeof(*vsi));
+		vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
+		vsi = &instance->local_vsi;
+	}
+
 	if (instance->irq) {
 		ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_RECEIVED,
 						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
@@ -2103,22 +2107,25 @@ static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
 	}
 
 	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
-	ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
+	ret = vdec_vp9_slice_update_lat(instance, vsi);
 
-	/* LAT trans full, no more UBE or decode timeout */
-	if (ret) {
-		mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret);
-		return ret;
-	}
+	if (!IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability))
+		/* LAT trans full, no more UBE or decode timeout */
+		if (ret) {
+			mtk_vcodec_err(instance, "frame[%d] decode error: %d\n",
+				       ret, (instance->seq - 1));
+			return ret;
+		}
 
-	mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n",
-			 (unsigned long)pfc->vsi.trans.dma_addr,
-			 (unsigned long)pfc->vsi.trans.dma_addr_end);
 
-	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
-				       vsi->trans.dma_addr_end +
-				       ctx->msg_queue.wdma_addr.dma_addr);
-	vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
+	vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
+	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end);
+	if (!IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability))
+		vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
+
+	mtk_vcodec_debug(instance, "lat trans end addr(0x%lx), ube start addr(0x%lx)\n",
+			 (unsigned long)vsi->trans.dma_addr_end,
+			 (unsigned long)ctx->msg_queue.wdma_addr.dma_addr);
 
 	return 0;
 }
@@ -2194,10 +2201,14 @@ static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
 		goto err;
 	}
 
-	pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
 	mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n",
 			 (unsigned long)pfc->vsi.trans.dma_addr_end);
-	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
+
+	if (IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability))
+		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr);
+	else
+		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
+
 	ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
 
 	return 0;
@@ -2205,7 +2216,12 @@ static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
 err:
 	if (ctx && pfc) {
 		/* always update read pointer */
-		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
+		if (IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability))
+			vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
+						       pfc->vsi.trans.dma_addr);
+		else
+			vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
+						       pfc->vsi.trans.dma_addr_end);
 
 		if (fb)
 			ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
-- 
2.25.1
Re: [RESEND] media: mediatek: vcodec: Add to support VP9 inner racing mode
Posted by AngeloGioacchino Del Regno 2 years, 2 months ago
Il 15/07/22 08:49, Mingjia Zhang ha scritto:
> In order to reduce decoder latency, enable VP9 inner racing mode.
> Send lat trans buffer information to core when trigger lat to work,
> need not to wait until lat decode done.
> 
> Signed-off-by: mingjia zhang <mingjia.zhang@mediatek.com>
> ---
> CTS/GTS test pass

CTS/GTS passing is a good indication but, please, test with GStreamer (and
show the output, as well!).

Thanks,
Angelo

> ---
>   .../vcodec/vdec/vdec_vp9_req_lat_if.c         | 64 ++++++++++++-------
>   1 file changed, 40 insertions(+), 24 deletions(-)
>
Re: [RESEND] media: mediatek: vcodec: Add to support VP9 inner racing mode
Posted by mingjia.zhang@mediatek.com 2 years, 2 months ago
Hi Angelo,

Thanks for your reply and useful comments.

In addition to running cts/gts test, I ran the fluster test with
GStreamer locally. The test result is "Ran 240/303 tests successfully"


Thanks,
mingjia


On Fri, 2022-07-15 at 10:34 +0200, AngeloGioacchino Del Regno wrote:
> Il 15/07/22 08:49, Mingjia Zhang ha scritto:
> > In order to reduce decoder latency, enable VP9 inner racing mode.
> > Send lat trans buffer information to core when trigger lat to work,
> > need not to wait until lat decode done.
> > 
> > Signed-off-by: mingjia zhang <mingjia.zhang@mediatek.com>
> > ---
> > CTS/GTS test pass
> 
> CTS/GTS passing is a good indication but, please, test with GStreamer
> (and
> show the output, as well!).
> 
> Thanks,
> Angelo
> 
> > ---
> >   .../vcodec/vdec/vdec_vp9_req_lat_if.c         | 64 ++++++++++++
> > -------
> >   1 file changed, 40 insertions(+), 24 deletions(-)
> >
Re: [RESEND] media: mediatek: vcodec: Add to support VP9 inner racing mode
Posted by AngeloGioacchino Del Regno 2 years, 2 months ago
Il 22/07/22 08:43, mingjia.zhang@mediatek.com ha scritto:
> Hi Angelo,
> 
> Thanks for your reply and useful comments.
> 
> In addition to running cts/gts test, I ran the fluster test with
> GStreamer locally. The test result is "Ran 240/303 tests successfully"
> 
> 

Just an advice: if you want to speed up the reviews and merging these
commits, always post the full output of Fluster/GST testing when sending
them.

This will make it easier for the media maintainers to review your code :-)

Anyway, thanks for the Fluster testing!

Cheers,
Angelo

> Thanks,
> mingjia
> 
> 
> On Fri, 2022-07-15 at 10:34 +0200, AngeloGioacchino Del Regno wrote:
>> Il 15/07/22 08:49, Mingjia Zhang ha scritto:
>>> In order to reduce decoder latency, enable VP9 inner racing mode.
>>> Send lat trans buffer information to core when trigger lat to work,
>>> need not to wait until lat decode done.
>>>
>>> Signed-off-by: mingjia zhang <mingjia.zhang@mediatek.com>
>>> ---
>>> CTS/GTS test pass
>>
>> CTS/GTS passing is a good indication but, please, test with GStreamer
>> (and
>> show the output, as well!).
>>
>> Thanks,
>> Angelo
>>
>>> ---
>>>    .../vcodec/vdec/vdec_vp9_req_lat_if.c         | 64 ++++++++++++
>>> -------
>>>    1 file changed, 40 insertions(+), 24 deletions(-)
>>>
>