[PATCH v5] media: amphion: Support dmabuf and v4l2 buffer without binding

Ming Qian posted 1 patch 1 year ago
There is a newer version of this series
drivers/media/platform/amphion/vdec.c     | 235 +++++++++++++++++-----
drivers/media/platform/amphion/vpu.h      |   7 +-
drivers/media/platform/amphion/vpu_dbg.c  |  15 +-
drivers/media/platform/amphion/vpu_v4l2.c |  11 +
4 files changed, 220 insertions(+), 48 deletions(-)
[PATCH v5] media: amphion: Support dmabuf and v4l2 buffer without binding
Posted by Ming Qian 1 year ago
When using VB2_DMABUF, the relationship between dma-buf and v4l2 buffer
may not one-to-one, a single dma-buf may be queued via different
v4l2 buffers, and different dma-bufs may be queued via the same
v4l2 buffer, so it's not appropriate to use the v4l2 buffer index
as the frame store id.

We can generate a frame store id according to the dma address.
Then for a given dma-buf, the id is fixed.

Driver now manages the frame store and vb2-buffer states independently.

When a dmabuf is queued via another v4l2 buffer before the buffer is
released by firmware, need to pend it until firmware release it.

Signed-off-by: Ming Qian <ming.qian@oss.nxp.com>
---
v5
- Avoid dynamic size calculation in memory allocation, use kmalloc_array
  instead
v4
- remove unnecessary 'out of memory' message
v3
-- fix a typo in NULL pointer check
v2
-- fix an uninitialized issue reported by media-ci

 drivers/media/platform/amphion/vdec.c     | 235 +++++++++++++++++-----
 drivers/media/platform/amphion/vpu.h      |   7 +-
 drivers/media/platform/amphion/vpu_dbg.c  |  15 +-
 drivers/media/platform/amphion/vpu_v4l2.c |  11 +
 4 files changed, 220 insertions(+), 48 deletions(-)

diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c
index b3bc9eb16d6a..f4979d537b97 100644
--- a/drivers/media/platform/amphion/vdec.c
+++ b/drivers/media/platform/amphion/vdec.c
@@ -26,6 +26,7 @@
 #include "vpu_cmds.h"
 #include "vpu_rpc.h"
 
+#define VDEC_SLOT_CNT_DFT		32
 #define VDEC_MIN_BUFFER_CAP		8
 #define VDEC_MIN_BUFFER_OUT		8
 
@@ -41,6 +42,14 @@ struct vdec_fs_info {
 	u32 tag;
 };
 
+struct vdec_frame_store_t {
+	struct vpu_vb2_buffer *curr;
+	struct vpu_vb2_buffer *pend;
+	dma_addr_t addr;
+	unsigned int state;
+	u32 tag;
+};
+
 struct vdec_t {
 	u32 seq_hdr_found;
 	struct vpu_buffer udata;
@@ -48,7 +57,8 @@ struct vdec_t {
 	struct vpu_dec_codec_info codec_info;
 	enum vpu_codec_state state;
 
-	struct vpu_vb2_buffer *slots[VB2_MAX_FRAME];
+	struct vdec_frame_store_t *slots;
+	u32 slot_count;
 	u32 req_frame_count;
 	struct vdec_fs_info mbi;
 	struct vdec_fs_info dcp;
@@ -289,6 +299,63 @@ static int vdec_ctrl_init(struct vpu_inst *inst)
 	return 0;
 }
 
+static void vdec_attach_frame_store(struct vpu_inst *inst, struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
+	struct vdec_t *vdec = inst->priv;
+	struct vdec_frame_store_t *new_slots = NULL;
+	dma_addr_t addr;
+	int i;
+
+	addr = vpu_get_vb_phy_addr(vb, 0);
+	for (i = 0; i < vdec->slot_count; i++) {
+		if (addr == vdec->slots[i].addr) {
+			if (vdec->slots[i].curr && vdec->slots[i].curr != vpu_buf) {
+				vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED);
+				vdec->slots[i].pend = vpu_buf;
+			} else {
+				vpu_set_buffer_state(vbuf, vdec->slots[i].state);
+			}
+			vpu_buf->fs_id = i;
+			return;
+		}
+	}
+
+	for (i = 0; i < vdec->slot_count; i++) {
+		if (!vdec->slots[i].addr) {
+			vdec->slots[i].addr = addr;
+			vpu_buf->fs_id = i;
+			return;
+		}
+	}
+
+	new_slots = krealloc_array(vdec->slots, vdec->slot_count * 2,
+				   sizeof(*vdec->slots),
+				   GFP_KERNEL | __GFP_ZERO);
+	if (!new_slots) {
+		vpu_set_buffer_state(vbuf, VPU_BUF_STATE_ERROR);
+		return;
+	}
+
+	vdec->slots = new_slots;
+	vdec->slot_count *= 2;
+
+	vdec->slots[i].addr = addr;
+	vpu_buf->fs_id = i;
+}
+
+static void vdec_reset_frame_store(struct vpu_inst *inst)
+{
+	struct vdec_t *vdec = inst->priv;
+
+	if (!vdec->slots || !vdec->slot_count)
+		return;
+
+	vpu_trace(inst->dev, "inst[%d] reset slots\n", inst->id);
+	memset(vdec->slots, 0, sizeof(*vdec->slots) * vdec->slot_count);
+}
+
 static void vdec_handle_resolution_change(struct vpu_inst *inst)
 {
 	struct vdec_t *vdec = inst->priv;
@@ -750,11 +817,11 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg)
 	struct vb2_v4l2_buffer *src_buf;
 	int ret = 0;
 
-	if (!info || info->id >= ARRAY_SIZE(vdec->slots))
+	if (!info || info->id >= vdec->slot_count)
 		return -EINVAL;
 
 	vpu_inst_lock(inst);
-	vpu_buf = vdec->slots[info->id];
+	vpu_buf = vdec->slots[info->id].curr;
 	if (!vpu_buf) {
 		dev_err(inst->dev, "[%d] decoded invalid frame[%d]\n", inst->id, info->id);
 		ret = -EINVAL;
@@ -775,11 +842,13 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg)
 	if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED)
 		dev_info(inst->dev, "[%d] buf[%d] has been decoded\n", inst->id, info->id);
 	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_DECODED);
+	vdec->slots[info->id].state = VPU_BUF_STATE_DECODED;
 	vdec->decoded_frame_count++;
 	if (vdec->params.display_delay_enable) {
 		struct vpu_format *cur_fmt;
 
 		cur_fmt = vpu_get_format(inst, inst->cap_format.type);
+		vdec->slots[info->id].state = VPU_BUF_STATE_READY;
 		vpu_set_buffer_state(vbuf, VPU_BUF_STATE_READY);
 		for (int i = 0; i < vbuf->vb2_buf.num_planes; i++)
 			vb2_set_plane_payload(&vbuf->vb2_buf,
@@ -802,11 +871,11 @@ static struct vpu_vb2_buffer *vdec_find_buffer(struct vpu_inst *inst, u32 luma)
 	struct vdec_t *vdec = inst->priv;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) {
-		if (!vdec->slots[i])
+	for (i = 0; i < vdec->slot_count; i++) {
+		if (!vdec->slots[i].curr)
 			continue;
-		if (luma == vdec->slots[i]->luma)
-			return vdec->slots[i];
+		if (luma == vdec->slots[i].addr)
+			return vdec->slots[i].curr;
 	}
 
 	return NULL;
@@ -840,11 +909,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame)
 
 	cur_fmt = vpu_get_format(inst, inst->cap_format.type);
 	vbuf = &vpu_buf->m2m_buf.vb;
-	if (vbuf->vb2_buf.index != frame->id)
-		dev_err(inst->dev, "[%d] buffer id(%d, %d) dismatch\n",
-			inst->id, vbuf->vb2_buf.index, frame->id);
+	if (vpu_buf->fs_id != frame->id)
+		dev_err(inst->dev, "[%d] buffer id(%d(%d), %d) dismatch\n",
+			inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index, frame->id);
 
-	if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_READY && vdec->params.display_delay_enable)
+	if (vdec->params.display_delay_enable)
 		return;
 
 	if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_DECODED)
@@ -857,10 +926,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame)
 	vbuf->sequence = vdec->sequence;
 	dev_dbg(inst->dev, "[%d][OUTPUT TS]%32lld\n", inst->id, vbuf->vb2_buf.timestamp);
 
-	v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE);
 	vpu_inst_lock(inst);
+	vdec->slots[vpu_buf->fs_id].state = VPU_BUF_STATE_READY;
 	vdec->display_frame_count++;
 	vpu_inst_unlock(inst);
+	v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE);
 	dev_dbg(inst->dev, "[%d] decoded : %d, display : %d, sequence : %d\n",
 		inst->id, vdec->decoded_frame_count, vdec->display_frame_count, vdec->sequence);
 }
@@ -1103,18 +1173,30 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb
 	if (!vbuf)
 		return -EINVAL;
 
-	if (vdec->slots[vbuf->vb2_buf.index]) {
-		dev_err(inst->dev, "[%d] repeat alloc fs %d\n",
-			inst->id, vbuf->vb2_buf.index);
+	vpu_buf = to_vpu_vb2_buffer(vbuf);
+	if (vpu_buf->fs_id < 0 || vpu_buf->fs_id >= vdec->slot_count) {
+		dev_err(inst->dev, "invalid fs %d for v4l2 buffer %d\n",
+			vpu_buf->fs_id, vbuf->vb2_buf.index);
 		return -EINVAL;
 	}
 
+	if (vdec->slots[vpu_buf->fs_id].curr) {
+		if (vdec->slots[vpu_buf->fs_id].curr != vpu_buf) {
+			vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED);
+			vdec->slots[vpu_buf->fs_id].pend = vpu_buf;
+		} else {
+			vpu_set_buffer_state(vbuf, vdec->slots[vpu_buf->fs_id].state);
+		}
+		dev_err(inst->dev, "[%d] repeat alloc fs %d (v4l2 index %d)\n",
+			inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index);
+		return -EAGAIN;
+	}
+
 	dev_dbg(inst->dev, "[%d] state = %s, alloc fs %d, tag = 0x%x\n",
 		inst->id, vpu_codec_state_name(inst->state), vbuf->vb2_buf.index, vdec->seq_tag);
-	vpu_buf = to_vpu_vb2_buffer(vbuf);
 
 	memset(&info, 0, sizeof(info));
-	info.id = vbuf->vb2_buf.index;
+	info.id = vpu_buf->fs_id;
 	info.type = MEM_RES_FRAME;
 	info.tag = vdec->seq_tag;
 	info.luma_addr = vpu_get_vb_phy_addr(&vbuf->vb2_buf, 0);
@@ -1129,12 +1211,13 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb
 	if (ret)
 		return ret;
 
-	vpu_buf->tag = info.tag;
 	vpu_buf->luma = info.luma_addr;
 	vpu_buf->chroma_u = info.chroma_addr;
 	vpu_buf->chroma_v = 0;
 	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_INUSE);
-	vdec->slots[info.id] = vpu_buf;
+	vdec->slots[info.id].tag = info.tag;
+	vdec->slots[info.id].curr = vpu_buf;
+	vdec->slots[info.id].state = VPU_BUF_STATE_INUSE;
 	vdec->req_frame_count--;
 
 	return 0;
@@ -1195,25 +1278,47 @@ static void vdec_recycle_buffer(struct vpu_inst *inst, struct vb2_v4l2_buffer *v
 	v4l2_m2m_buf_queue(inst->fh.m2m_ctx, vbuf);
 }
 
-static void vdec_clear_slots(struct vpu_inst *inst)
+static void vdec_release_curr_frame_store(struct vpu_inst *inst, u32 id)
 {
 	struct vdec_t *vdec = inst->priv;
 	struct vpu_vb2_buffer *vpu_buf;
 	struct vb2_v4l2_buffer *vbuf;
+
+	if (id >= vdec->slot_count)
+		return;
+	if (!vdec->slots[id].curr)
+		return;
+
+	vpu_buf = vdec->slots[id].curr;
+	vbuf = &vpu_buf->m2m_buf.vb;
+
+	vdec_response_fs_release(inst, id, vdec->slots[id].tag);
+	if (vpu_buf->fs_id == id) {
+		if (vpu_buf->state != VPU_BUF_STATE_READY)
+			vdec_recycle_buffer(inst, vbuf);
+		vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE);
+	}
+
+	vdec->slots[id].curr = NULL;
+	vdec->slots[id].state = VPU_BUF_STATE_IDLE;
+
+	if (vdec->slots[id].pend) {
+		vpu_set_buffer_state(&vdec->slots[id].pend->m2m_buf.vb, VPU_BUF_STATE_IDLE);
+		vdec->slots[id].pend = NULL;
+	}
+}
+
+static void vdec_clear_slots(struct vpu_inst *inst)
+{
+	struct vdec_t *vdec = inst->priv;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) {
-		if (!vdec->slots[i])
+	for (i = 0; i < vdec->slot_count; i++) {
+		if (!vdec->slots[i].curr)
 			continue;
 
-		vpu_buf = vdec->slots[i];
-		vbuf = &vpu_buf->m2m_buf.vb;
-
 		vpu_trace(inst->dev, "clear slot %d\n", i);
-		vdec_response_fs_release(inst, i, vpu_buf->tag);
-		vdec_recycle_buffer(inst, vbuf);
-		vdec->slots[i]->state = VPU_BUF_STATE_IDLE;
-		vdec->slots[i] = NULL;
+		vdec_release_curr_frame_store(inst, i);
 	}
 }
 
@@ -1354,39 +1459,29 @@ static void vdec_event_req_fs(struct vpu_inst *inst, struct vpu_fs_info *fs)
 static void vdec_evnet_rel_fs(struct vpu_inst *inst, struct vpu_fs_info *fs)
 {
 	struct vdec_t *vdec = inst->priv;
-	struct vpu_vb2_buffer *vpu_buf;
-	struct vb2_v4l2_buffer *vbuf;
 
-	if (!fs || fs->id >= ARRAY_SIZE(vdec->slots))
+	if (!fs || fs->id >= vdec->slot_count)
 		return;
 	if (fs->type != MEM_RES_FRAME)
 		return;
 
-	if (fs->id >= vpu_get_num_buffers(inst, inst->cap_format.type)) {
+	if (fs->id >= vdec->slot_count) {
 		dev_err(inst->dev, "[%d] invalid fs(%d) to release\n", inst->id, fs->id);
 		return;
 	}
 
 	vpu_inst_lock(inst);
-	vpu_buf = vdec->slots[fs->id];
-	vdec->slots[fs->id] = NULL;
-
-	if (!vpu_buf) {
+	if (!vdec->slots[fs->id].curr) {
 		dev_dbg(inst->dev, "[%d] fs[%d] has bee released\n", inst->id, fs->id);
 		goto exit;
 	}
 
-	vbuf = &vpu_buf->m2m_buf.vb;
-	if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) {
+	if (vdec->slots[fs->id].state == VPU_BUF_STATE_DECODED) {
 		dev_dbg(inst->dev, "[%d] frame skip\n", inst->id);
 		vdec->sequence++;
 	}
 
-	vdec_response_fs_release(inst, fs->id, vpu_buf->tag);
-	if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_READY)
-		vdec_recycle_buffer(inst, vbuf);
-
-	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE);
+	vdec_release_curr_frame_store(inst, fs->id);
 	vpu_process_capture_buffer(inst);
 
 exit:
@@ -1582,6 +1677,11 @@ static void vdec_cleanup(struct vpu_inst *inst)
 		return;
 
 	vdec = inst->priv;
+	if (vdec) {
+		kfree(vdec->slots);
+		vdec->slots = NULL;
+		vdec->slot_count = 0;
+	}
 	vfree(vdec);
 	inst->priv = NULL;
 	vfree(inst);
@@ -1713,11 +1813,43 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type)
 	return 0;
 }
 
-static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i)
+static int vdec_get_slot_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i)
 {
 	struct vdec_t *vdec = inst->priv;
+	struct vpu_vb2_buffer *vpu_buf;
 	int num = -1;
 
+	vpu_inst_lock(inst);
+	if (i >= vdec->slot_count || !vdec->slots[i].addr)
+		goto exit;
+
+	vpu_buf = vdec->slots[i].curr;
+
+	num = scnprintf(str, size, "slot[%2d] :", i);
+	if (vpu_buf) {
+		num += scnprintf(str + num, size - num, " %2d",
+				 vpu_buf->m2m_buf.vb.vb2_buf.index);
+		num += scnprintf(str + num, size - num, "; state = %d", vdec->slots[i].state);
+	} else {
+		num += scnprintf(str + num, size - num, " -1");
+	}
+
+	if (vdec->slots[i].pend)
+		num += scnprintf(str + num, size - num, "; %d",
+				 vdec->slots[i].pend->m2m_buf.vb.vb2_buf.index);
+
+	num += scnprintf(str + num, size - num, "\n");
+exit:
+	vpu_inst_unlock(inst);
+
+	return num;
+}
+
+static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i)
+{
+	struct vdec_t *vdec = inst->priv;
+	int num;
+
 	switch (i) {
 	case 0:
 		num = scnprintf(str, size,
@@ -1771,6 +1903,7 @@ static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i
 				vdec->codec_info.vui_present);
 		break;
 	default:
+		num = vdec_get_slot_debug_info(inst, str, size, i - 10);
 		break;
 	}
 
@@ -1794,6 +1927,8 @@ static struct vpu_inst_ops vdec_inst_ops = {
 	.get_debug_info = vdec_get_debug_info,
 	.wait_prepare = vpu_inst_unlock,
 	.wait_finish = vpu_inst_lock,
+	.attach_frame_store = vdec_attach_frame_store,
+	.reset_frame_store = vdec_reset_frame_store,
 };
 
 static void vdec_init(struct file *file)
@@ -1834,6 +1969,16 @@ static int vdec_open(struct file *file)
 		return -ENOMEM;
 	}
 
+	vdec->slots = kmalloc_array(VDEC_SLOT_CNT_DFT,
+				    sizeof(*vdec->slots),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!vdec->slots) {
+		vfree(vdec);
+		vfree(inst);
+		return -ENOMEM;
+	}
+	vdec->slot_count = VDEC_SLOT_CNT_DFT;
+
 	inst->ops = &vdec_inst_ops;
 	inst->formats = vdec_formats;
 	inst->type = VPU_CORE_TYPE_DEC;
diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h
index 978971712742..d8100da160d1 100644
--- a/drivers/media/platform/amphion/vpu.h
+++ b/drivers/media/platform/amphion/vpu.h
@@ -223,6 +223,8 @@ struct vpu_inst_ops {
 	int (*get_debug_info)(struct vpu_inst *inst, char *str, u32 size, u32 i);
 	void (*wait_prepare)(struct vpu_inst *inst);
 	void (*wait_finish)(struct vpu_inst *inst);
+	void (*attach_frame_store)(struct vpu_inst *inst, struct vb2_buffer *vb);
+	void (*reset_frame_store)(struct vpu_inst *inst);
 };
 
 struct vpu_inst {
@@ -297,7 +299,8 @@ enum {
 	VPU_BUF_STATE_DECODED,
 	VPU_BUF_STATE_READY,
 	VPU_BUF_STATE_SKIP,
-	VPU_BUF_STATE_ERROR
+	VPU_BUF_STATE_ERROR,
+	VPU_BUF_STATE_CHANGED
 };
 
 struct vpu_vb2_buffer {
@@ -306,8 +309,8 @@ struct vpu_vb2_buffer {
 	dma_addr_t chroma_u;
 	dma_addr_t chroma_v;
 	unsigned int state;
-	u32 tag;
 	u32 average_qp;
+	s32 fs_id;
 };
 
 void vpu_writel(struct vpu_dev *vpu, u32 reg, u32 val);
diff --git a/drivers/media/platform/amphion/vpu_dbg.c b/drivers/media/platform/amphion/vpu_dbg.c
index 940e5bda5fa3..497ae4e8a229 100644
--- a/drivers/media/platform/amphion/vpu_dbg.c
+++ b/drivers/media/platform/amphion/vpu_dbg.c
@@ -48,6 +48,7 @@ static char *vpu_stat_name[] = {
 	[VPU_BUF_STATE_READY] = "ready",
 	[VPU_BUF_STATE_SKIP] = "skip",
 	[VPU_BUF_STATE_ERROR] = "error",
+	[VPU_BUF_STATE_CHANGED] = "changed",
 };
 
 static inline const char *to_vpu_stat_name(int state)
@@ -164,6 +165,7 @@ static int vpu_dbg_instance(struct seq_file *s, void *data)
 	for (i = 0; i < vb2_get_num_buffers(vq); i++) {
 		struct vb2_buffer *vb;
 		struct vb2_v4l2_buffer *vbuf;
+		struct vpu_vb2_buffer *vpu_buf;
 
 		vb = vb2_get_buffer(vq, i);
 		if (!vb)
@@ -173,13 +175,24 @@ static int vpu_dbg_instance(struct seq_file *s, void *data)
 			continue;
 
 		vbuf = to_vb2_v4l2_buffer(vb);
+		vpu_buf = to_vpu_vb2_buffer(vbuf);
 
 		num = scnprintf(str, sizeof(str),
-				"capture[%2d] state = %10s, %8s\n",
+				"capture[%2d] state = %10s, %8s",
 				i, vb2_stat_name[vb->state],
 				to_vpu_stat_name(vpu_get_buffer_state(vbuf)));
 		if (seq_write(s, str, num))
 			return 0;
+
+		if (vpu_buf->fs_id >= 0) {
+			num = scnprintf(str, sizeof(str), "; fs %d", vpu_buf->fs_id);
+			if (seq_write(s, str, num))
+				return 0;
+		}
+
+		num = scnprintf(str, sizeof(str), "\n");
+		if (seq_write(s, str, num))
+			return 0;
 	}
 
 	num = scnprintf(str, sizeof(str), "sequence = %d\n", inst->sequence);
diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c
index 50aeb69d2c66..37ef706c29dd 100644
--- a/drivers/media/platform/amphion/vpu_v4l2.c
+++ b/drivers/media/platform/amphion/vpu_v4l2.c
@@ -500,14 +500,25 @@ static int vpu_vb2_queue_setup(struct vb2_queue *vq,
 		call_void_vop(inst, release);
 	}
 
+	if (V4L2_TYPE_IS_CAPTURE(vq->type))
+		call_void_vop(inst, reset_frame_store);
+
 	return 0;
 }
 
 static int vpu_vb2_buf_init(struct vb2_buffer *vb)
 {
 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
+	struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
 
+	vpu_buf->fs_id = -1;
 	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE);
+
+	if (!inst->ops->attach_frame_store || V4L2_TYPE_IS_OUTPUT(vb->type))
+		return 0;
+
+	call_void_vop(inst, attach_frame_store, vb);
 	return 0;
 }
 
-- 
2.43.0-rc1
Re: [PATCH v5] media: amphion: Support dmabuf and v4l2 buffer without binding
Posted by Nicolas Dufresne 9 months, 2 weeks ago
Hi,

Le lundi 20 janvier 2025 à 17:21 +0900, Ming Qian a écrit :
> When using VB2_DMABUF, the relationship between dma-buf and v4l2 buffer
> may not one-to-one, a single dma-buf may be queued via different
> v4l2 buffers, and different dma-bufs may be queued via the same
> v4l2 buffer, so it's not appropriate to use the v4l2 buffer index
> as the frame store id.
> 
> We can generate a frame store id according to the dma address.
> Then for a given dma-buf, the id is fixed.
> 
> Driver now manages the frame store and vb2-buffer states independently.
> 
> When a dmabuf is queued via another v4l2 buffer before the buffer is
> released by firmware, need to pend it until firmware release it.
> 
> Signed-off-by: Ming Qian <ming.qian@oss.nxp.com>

I believe this patch is ready, but unfortunately it no longer applies.
Could you rebase against media-committers/next and resubmit ? Consider
including a base commit in the future, sometimes that allows avoiding
aa rebase.

Nicolas

> ---
> v5
> - Avoid dynamic size calculation in memory allocation, use kmalloc_array
>   instead
> v4
> - remove unnecessary 'out of memory' message
> v3
> -- fix a typo in NULL pointer check
> v2
> -- fix an uninitialized issue reported by media-ci
> 
>  drivers/media/platform/amphion/vdec.c     | 235 +++++++++++++++++-----
>  drivers/media/platform/amphion/vpu.h      |   7 +-
>  drivers/media/platform/amphion/vpu_dbg.c  |  15 +-
>  drivers/media/platform/amphion/vpu_v4l2.c |  11 +
>  4 files changed, 220 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c
> index b3bc9eb16d6a..f4979d537b97 100644
> --- a/drivers/media/platform/amphion/vdec.c
> +++ b/drivers/media/platform/amphion/vdec.c
> @@ -26,6 +26,7 @@
>  #include "vpu_cmds.h"
>  #include "vpu_rpc.h"
>  
> +#define VDEC_SLOT_CNT_DFT		32
>  #define VDEC_MIN_BUFFER_CAP		8
>  #define VDEC_MIN_BUFFER_OUT		8
>  
> @@ -41,6 +42,14 @@ struct vdec_fs_info {
>  	u32 tag;
>  };
>  
> +struct vdec_frame_store_t {
> +	struct vpu_vb2_buffer *curr;
> +	struct vpu_vb2_buffer *pend;
> +	dma_addr_t addr;
> +	unsigned int state;
> +	u32 tag;
> +};
> +
>  struct vdec_t {
>  	u32 seq_hdr_found;
>  	struct vpu_buffer udata;
> @@ -48,7 +57,8 @@ struct vdec_t {
>  	struct vpu_dec_codec_info codec_info;
>  	enum vpu_codec_state state;
>  
> -	struct vpu_vb2_buffer *slots[VB2_MAX_FRAME];
> +	struct vdec_frame_store_t *slots;
> +	u32 slot_count;
>  	u32 req_frame_count;
>  	struct vdec_fs_info mbi;
>  	struct vdec_fs_info dcp;
> @@ -289,6 +299,63 @@ static int vdec_ctrl_init(struct vpu_inst *inst)
>  	return 0;
>  }
>  
> +static void vdec_attach_frame_store(struct vpu_inst *inst, struct vb2_buffer *vb)
> +{
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
> +	struct vdec_t *vdec = inst->priv;
> +	struct vdec_frame_store_t *new_slots = NULL;
> +	dma_addr_t addr;
> +	int i;
> +
> +	addr = vpu_get_vb_phy_addr(vb, 0);
> +	for (i = 0; i < vdec->slot_count; i++) {
> +		if (addr == vdec->slots[i].addr) {
> +			if (vdec->slots[i].curr && vdec->slots[i].curr != vpu_buf) {
> +				vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED);
> +				vdec->slots[i].pend = vpu_buf;
> +			} else {
> +				vpu_set_buffer_state(vbuf, vdec->slots[i].state);
> +			}
> +			vpu_buf->fs_id = i;
> +			return;
> +		}
> +	}
> +
> +	for (i = 0; i < vdec->slot_count; i++) {
> +		if (!vdec->slots[i].addr) {
> +			vdec->slots[i].addr = addr;
> +			vpu_buf->fs_id = i;
> +			return;
> +		}
> +	}
> +
> +	new_slots = krealloc_array(vdec->slots, vdec->slot_count * 2,
> +				   sizeof(*vdec->slots),
> +				   GFP_KERNEL | __GFP_ZERO);
> +	if (!new_slots) {
> +		vpu_set_buffer_state(vbuf, VPU_BUF_STATE_ERROR);
> +		return;
> +	}
> +
> +	vdec->slots = new_slots;
> +	vdec->slot_count *= 2;
> +
> +	vdec->slots[i].addr = addr;
> +	vpu_buf->fs_id = i;
> +}
> +
> +static void vdec_reset_frame_store(struct vpu_inst *inst)
> +{
> +	struct vdec_t *vdec = inst->priv;
> +
> +	if (!vdec->slots || !vdec->slot_count)
> +		return;
> +
> +	vpu_trace(inst->dev, "inst[%d] reset slots\n", inst->id);
> +	memset(vdec->slots, 0, sizeof(*vdec->slots) * vdec->slot_count);
> +}
> +
>  static void vdec_handle_resolution_change(struct vpu_inst *inst)
>  {
>  	struct vdec_t *vdec = inst->priv;
> @@ -750,11 +817,11 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg)
>  	struct vb2_v4l2_buffer *src_buf;
>  	int ret = 0;
>  
> -	if (!info || info->id >= ARRAY_SIZE(vdec->slots))
> +	if (!info || info->id >= vdec->slot_count)
>  		return -EINVAL;
>  
>  	vpu_inst_lock(inst);
> -	vpu_buf = vdec->slots[info->id];
> +	vpu_buf = vdec->slots[info->id].curr;
>  	if (!vpu_buf) {
>  		dev_err(inst->dev, "[%d] decoded invalid frame[%d]\n", inst->id, info->id);
>  		ret = -EINVAL;
> @@ -775,11 +842,13 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg)
>  	if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED)
>  		dev_info(inst->dev, "[%d] buf[%d] has been decoded\n", inst->id, info->id);
>  	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_DECODED);
> +	vdec->slots[info->id].state = VPU_BUF_STATE_DECODED;
>  	vdec->decoded_frame_count++;
>  	if (vdec->params.display_delay_enable) {
>  		struct vpu_format *cur_fmt;
>  
>  		cur_fmt = vpu_get_format(inst, inst->cap_format.type);
> +		vdec->slots[info->id].state = VPU_BUF_STATE_READY;
>  		vpu_set_buffer_state(vbuf, VPU_BUF_STATE_READY);
>  		for (int i = 0; i < vbuf->vb2_buf.num_planes; i++)
>  			vb2_set_plane_payload(&vbuf->vb2_buf,
> @@ -802,11 +871,11 @@ static struct vpu_vb2_buffer *vdec_find_buffer(struct vpu_inst *inst, u32 luma)
>  	struct vdec_t *vdec = inst->priv;
>  	int i;
>  
> -	for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) {
> -		if (!vdec->slots[i])
> +	for (i = 0; i < vdec->slot_count; i++) {
> +		if (!vdec->slots[i].curr)
>  			continue;
> -		if (luma == vdec->slots[i]->luma)
> -			return vdec->slots[i];
> +		if (luma == vdec->slots[i].addr)
> +			return vdec->slots[i].curr;
>  	}
>  
>  	return NULL;
> @@ -840,11 +909,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame)
>  
>  	cur_fmt = vpu_get_format(inst, inst->cap_format.type);
>  	vbuf = &vpu_buf->m2m_buf.vb;
> -	if (vbuf->vb2_buf.index != frame->id)
> -		dev_err(inst->dev, "[%d] buffer id(%d, %d) dismatch\n",
> -			inst->id, vbuf->vb2_buf.index, frame->id);
> +	if (vpu_buf->fs_id != frame->id)
> +		dev_err(inst->dev, "[%d] buffer id(%d(%d), %d) dismatch\n",
> +			inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index, frame->id);
>  
> -	if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_READY && vdec->params.display_delay_enable)
> +	if (vdec->params.display_delay_enable)
>  		return;
>  
>  	if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_DECODED)
> @@ -857,10 +926,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame)
>  	vbuf->sequence = vdec->sequence;
>  	dev_dbg(inst->dev, "[%d][OUTPUT TS]%32lld\n", inst->id, vbuf->vb2_buf.timestamp);
>  
> -	v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE);
>  	vpu_inst_lock(inst);
> +	vdec->slots[vpu_buf->fs_id].state = VPU_BUF_STATE_READY;
>  	vdec->display_frame_count++;
>  	vpu_inst_unlock(inst);
> +	v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE);
>  	dev_dbg(inst->dev, "[%d] decoded : %d, display : %d, sequence : %d\n",
>  		inst->id, vdec->decoded_frame_count, vdec->display_frame_count, vdec->sequence);
>  }
> @@ -1103,18 +1173,30 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb
>  	if (!vbuf)
>  		return -EINVAL;
>  
> -	if (vdec->slots[vbuf->vb2_buf.index]) {
> -		dev_err(inst->dev, "[%d] repeat alloc fs %d\n",
> -			inst->id, vbuf->vb2_buf.index);
> +	vpu_buf = to_vpu_vb2_buffer(vbuf);
> +	if (vpu_buf->fs_id < 0 || vpu_buf->fs_id >= vdec->slot_count) {
> +		dev_err(inst->dev, "invalid fs %d for v4l2 buffer %d\n",
> +			vpu_buf->fs_id, vbuf->vb2_buf.index);
>  		return -EINVAL;
>  	}
>  
> +	if (vdec->slots[vpu_buf->fs_id].curr) {
> +		if (vdec->slots[vpu_buf->fs_id].curr != vpu_buf) {
> +			vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED);
> +			vdec->slots[vpu_buf->fs_id].pend = vpu_buf;
> +		} else {
> +			vpu_set_buffer_state(vbuf, vdec->slots[vpu_buf->fs_id].state);
> +		}
> +		dev_err(inst->dev, "[%d] repeat alloc fs %d (v4l2 index %d)\n",
> +			inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index);
> +		return -EAGAIN;
> +	}
> +
>  	dev_dbg(inst->dev, "[%d] state = %s, alloc fs %d, tag = 0x%x\n",
>  		inst->id, vpu_codec_state_name(inst->state), vbuf->vb2_buf.index, vdec->seq_tag);
> -	vpu_buf = to_vpu_vb2_buffer(vbuf);
>  
>  	memset(&info, 0, sizeof(info));
> -	info.id = vbuf->vb2_buf.index;
> +	info.id = vpu_buf->fs_id;
>  	info.type = MEM_RES_FRAME;
>  	info.tag = vdec->seq_tag;
>  	info.luma_addr = vpu_get_vb_phy_addr(&vbuf->vb2_buf, 0);
> @@ -1129,12 +1211,13 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb
>  	if (ret)
>  		return ret;
>  
> -	vpu_buf->tag = info.tag;
>  	vpu_buf->luma = info.luma_addr;
>  	vpu_buf->chroma_u = info.chroma_addr;
>  	vpu_buf->chroma_v = 0;
>  	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_INUSE);
> -	vdec->slots[info.id] = vpu_buf;
> +	vdec->slots[info.id].tag = info.tag;
> +	vdec->slots[info.id].curr = vpu_buf;
> +	vdec->slots[info.id].state = VPU_BUF_STATE_INUSE;
>  	vdec->req_frame_count--;
>  
>  	return 0;
> @@ -1195,25 +1278,47 @@ static void vdec_recycle_buffer(struct vpu_inst *inst, struct vb2_v4l2_buffer *v
>  	v4l2_m2m_buf_queue(inst->fh.m2m_ctx, vbuf);
>  }
>  
> -static void vdec_clear_slots(struct vpu_inst *inst)
> +static void vdec_release_curr_frame_store(struct vpu_inst *inst, u32 id)
>  {
>  	struct vdec_t *vdec = inst->priv;
>  	struct vpu_vb2_buffer *vpu_buf;
>  	struct vb2_v4l2_buffer *vbuf;
> +
> +	if (id >= vdec->slot_count)
> +		return;
> +	if (!vdec->slots[id].curr)
> +		return;
> +
> +	vpu_buf = vdec->slots[id].curr;
> +	vbuf = &vpu_buf->m2m_buf.vb;
> +
> +	vdec_response_fs_release(inst, id, vdec->slots[id].tag);
> +	if (vpu_buf->fs_id == id) {
> +		if (vpu_buf->state != VPU_BUF_STATE_READY)
> +			vdec_recycle_buffer(inst, vbuf);
> +		vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE);
> +	}
> +
> +	vdec->slots[id].curr = NULL;
> +	vdec->slots[id].state = VPU_BUF_STATE_IDLE;
> +
> +	if (vdec->slots[id].pend) {
> +		vpu_set_buffer_state(&vdec->slots[id].pend->m2m_buf.vb, VPU_BUF_STATE_IDLE);
> +		vdec->slots[id].pend = NULL;
> +	}
> +}
> +
> +static void vdec_clear_slots(struct vpu_inst *inst)
> +{
> +	struct vdec_t *vdec = inst->priv;
>  	int i;
>  
> -	for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) {
> -		if (!vdec->slots[i])
> +	for (i = 0; i < vdec->slot_count; i++) {
> +		if (!vdec->slots[i].curr)
>  			continue;
>  
> -		vpu_buf = vdec->slots[i];
> -		vbuf = &vpu_buf->m2m_buf.vb;
> -
>  		vpu_trace(inst->dev, "clear slot %d\n", i);
> -		vdec_response_fs_release(inst, i, vpu_buf->tag);
> -		vdec_recycle_buffer(inst, vbuf);
> -		vdec->slots[i]->state = VPU_BUF_STATE_IDLE;
> -		vdec->slots[i] = NULL;
> +		vdec_release_curr_frame_store(inst, i);
>  	}
>  }
>  
> @@ -1354,39 +1459,29 @@ static void vdec_event_req_fs(struct vpu_inst *inst, struct vpu_fs_info *fs)
>  static void vdec_evnet_rel_fs(struct vpu_inst *inst, struct vpu_fs_info *fs)
>  {
>  	struct vdec_t *vdec = inst->priv;
> -	struct vpu_vb2_buffer *vpu_buf;
> -	struct vb2_v4l2_buffer *vbuf;
>  
> -	if (!fs || fs->id >= ARRAY_SIZE(vdec->slots))
> +	if (!fs || fs->id >= vdec->slot_count)
>  		return;
>  	if (fs->type != MEM_RES_FRAME)
>  		return;
>  
> -	if (fs->id >= vpu_get_num_buffers(inst, inst->cap_format.type)) {
> +	if (fs->id >= vdec->slot_count) {
>  		dev_err(inst->dev, "[%d] invalid fs(%d) to release\n", inst->id, fs->id);
>  		return;
>  	}
>  
>  	vpu_inst_lock(inst);
> -	vpu_buf = vdec->slots[fs->id];
> -	vdec->slots[fs->id] = NULL;
> -
> -	if (!vpu_buf) {
> +	if (!vdec->slots[fs->id].curr) {
>  		dev_dbg(inst->dev, "[%d] fs[%d] has bee released\n", inst->id, fs->id);
>  		goto exit;
>  	}
>  
> -	vbuf = &vpu_buf->m2m_buf.vb;
> -	if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) {
> +	if (vdec->slots[fs->id].state == VPU_BUF_STATE_DECODED) {
>  		dev_dbg(inst->dev, "[%d] frame skip\n", inst->id);
>  		vdec->sequence++;
>  	}
>  
> -	vdec_response_fs_release(inst, fs->id, vpu_buf->tag);
> -	if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_READY)
> -		vdec_recycle_buffer(inst, vbuf);
> -
> -	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE);
> +	vdec_release_curr_frame_store(inst, fs->id);
>  	vpu_process_capture_buffer(inst);
>  
>  exit:
> @@ -1582,6 +1677,11 @@ static void vdec_cleanup(struct vpu_inst *inst)
>  		return;
>  
>  	vdec = inst->priv;
> +	if (vdec) {
> +		kfree(vdec->slots);
> +		vdec->slots = NULL;
> +		vdec->slot_count = 0;
> +	}
>  	vfree(vdec);
>  	inst->priv = NULL;
>  	vfree(inst);
> @@ -1713,11 +1813,43 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type)
>  	return 0;
>  }
>  
> -static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i)
> +static int vdec_get_slot_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i)
>  {
>  	struct vdec_t *vdec = inst->priv;
> +	struct vpu_vb2_buffer *vpu_buf;
>  	int num = -1;
>  
> +	vpu_inst_lock(inst);
> +	if (i >= vdec->slot_count || !vdec->slots[i].addr)
> +		goto exit;
> +
> +	vpu_buf = vdec->slots[i].curr;
> +
> +	num = scnprintf(str, size, "slot[%2d] :", i);
> +	if (vpu_buf) {
> +		num += scnprintf(str + num, size - num, " %2d",
> +				 vpu_buf->m2m_buf.vb.vb2_buf.index);
> +		num += scnprintf(str + num, size - num, "; state = %d", vdec->slots[i].state);
> +	} else {
> +		num += scnprintf(str + num, size - num, " -1");
> +	}
> +
> +	if (vdec->slots[i].pend)
> +		num += scnprintf(str + num, size - num, "; %d",
> +				 vdec->slots[i].pend->m2m_buf.vb.vb2_buf.index);
> +
> +	num += scnprintf(str + num, size - num, "\n");
> +exit:
> +	vpu_inst_unlock(inst);
> +
> +	return num;
> +}
> +
> +static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i)
> +{
> +	struct vdec_t *vdec = inst->priv;
> +	int num;
> +
>  	switch (i) {
>  	case 0:
>  		num = scnprintf(str, size,
> @@ -1771,6 +1903,7 @@ static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i
>  				vdec->codec_info.vui_present);
>  		break;
>  	default:
> +		num = vdec_get_slot_debug_info(inst, str, size, i - 10);
>  		break;
>  	}
>  
> @@ -1794,6 +1927,8 @@ static struct vpu_inst_ops vdec_inst_ops = {
>  	.get_debug_info = vdec_get_debug_info,
>  	.wait_prepare = vpu_inst_unlock,
>  	.wait_finish = vpu_inst_lock,
> +	.attach_frame_store = vdec_attach_frame_store,
> +	.reset_frame_store = vdec_reset_frame_store,
>  };
>  
>  static void vdec_init(struct file *file)
> @@ -1834,6 +1969,16 @@ static int vdec_open(struct file *file)
>  		return -ENOMEM;
>  	}
>  
> +	vdec->slots = kmalloc_array(VDEC_SLOT_CNT_DFT,
> +				    sizeof(*vdec->slots),
> +				    GFP_KERNEL | __GFP_ZERO);
> +	if (!vdec->slots) {
> +		vfree(vdec);
> +		vfree(inst);
> +		return -ENOMEM;
> +	}
> +	vdec->slot_count = VDEC_SLOT_CNT_DFT;
> +
>  	inst->ops = &vdec_inst_ops;
>  	inst->formats = vdec_formats;
>  	inst->type = VPU_CORE_TYPE_DEC;
> diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h
> index 978971712742..d8100da160d1 100644
> --- a/drivers/media/platform/amphion/vpu.h
> +++ b/drivers/media/platform/amphion/vpu.h
> @@ -223,6 +223,8 @@ struct vpu_inst_ops {
>  	int (*get_debug_info)(struct vpu_inst *inst, char *str, u32 size, u32 i);
>  	void (*wait_prepare)(struct vpu_inst *inst);
>  	void (*wait_finish)(struct vpu_inst *inst);
> +	void (*attach_frame_store)(struct vpu_inst *inst, struct vb2_buffer *vb);
> +	void (*reset_frame_store)(struct vpu_inst *inst);
>  };
>  
>  struct vpu_inst {
> @@ -297,7 +299,8 @@ enum {
>  	VPU_BUF_STATE_DECODED,
>  	VPU_BUF_STATE_READY,
>  	VPU_BUF_STATE_SKIP,
> -	VPU_BUF_STATE_ERROR
> +	VPU_BUF_STATE_ERROR,
> +	VPU_BUF_STATE_CHANGED
>  };
>  
>  struct vpu_vb2_buffer {
> @@ -306,8 +309,8 @@ struct vpu_vb2_buffer {
>  	dma_addr_t chroma_u;
>  	dma_addr_t chroma_v;
>  	unsigned int state;
> -	u32 tag;
>  	u32 average_qp;
> +	s32 fs_id;
>  };
>  
>  void vpu_writel(struct vpu_dev *vpu, u32 reg, u32 val);
> diff --git a/drivers/media/platform/amphion/vpu_dbg.c b/drivers/media/platform/amphion/vpu_dbg.c
> index 940e5bda5fa3..497ae4e8a229 100644
> --- a/drivers/media/platform/amphion/vpu_dbg.c
> +++ b/drivers/media/platform/amphion/vpu_dbg.c
> @@ -48,6 +48,7 @@ static char *vpu_stat_name[] = {
>  	[VPU_BUF_STATE_READY] = "ready",
>  	[VPU_BUF_STATE_SKIP] = "skip",
>  	[VPU_BUF_STATE_ERROR] = "error",
> +	[VPU_BUF_STATE_CHANGED] = "changed",
>  };
>  
>  static inline const char *to_vpu_stat_name(int state)
> @@ -164,6 +165,7 @@ static int vpu_dbg_instance(struct seq_file *s, void *data)
>  	for (i = 0; i < vb2_get_num_buffers(vq); i++) {
>  		struct vb2_buffer *vb;
>  		struct vb2_v4l2_buffer *vbuf;
> +		struct vpu_vb2_buffer *vpu_buf;
>  
>  		vb = vb2_get_buffer(vq, i);
>  		if (!vb)
> @@ -173,13 +175,24 @@ static int vpu_dbg_instance(struct seq_file *s, void *data)
>  			continue;
>  
>  		vbuf = to_vb2_v4l2_buffer(vb);
> +		vpu_buf = to_vpu_vb2_buffer(vbuf);
>  
>  		num = scnprintf(str, sizeof(str),
> -				"capture[%2d] state = %10s, %8s\n",
> +				"capture[%2d] state = %10s, %8s",
>  				i, vb2_stat_name[vb->state],
>  				to_vpu_stat_name(vpu_get_buffer_state(vbuf)));
>  		if (seq_write(s, str, num))
>  			return 0;
> +
> +		if (vpu_buf->fs_id >= 0) {
> +			num = scnprintf(str, sizeof(str), "; fs %d", vpu_buf->fs_id);
> +			if (seq_write(s, str, num))
> +				return 0;
> +		}
> +
> +		num = scnprintf(str, sizeof(str), "\n");
> +		if (seq_write(s, str, num))
> +			return 0;
>  	}
>  
>  	num = scnprintf(str, sizeof(str), "sequence = %d\n", inst->sequence);
> diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c
> index 50aeb69d2c66..37ef706c29dd 100644
> --- a/drivers/media/platform/amphion/vpu_v4l2.c
> +++ b/drivers/media/platform/amphion/vpu_v4l2.c
> @@ -500,14 +500,25 @@ static int vpu_vb2_queue_setup(struct vb2_queue *vq,
>  		call_void_vop(inst, release);
>  	}
>  
> +	if (V4L2_TYPE_IS_CAPTURE(vq->type))
> +		call_void_vop(inst, reset_frame_store);
> +
>  	return 0;
>  }
>  
>  static int vpu_vb2_buf_init(struct vb2_buffer *vb)
>  {
>  	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
> +	struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
>  
> +	vpu_buf->fs_id = -1;
>  	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE);
> +
> +	if (!inst->ops->attach_frame_store || V4L2_TYPE_IS_OUTPUT(vb->type))
> +		return 0;
> +
> +	call_void_vop(inst, attach_frame_store, vb);
>  	return 0;
>  }
>  
Re: [PATCH v5] media: amphion: Support dmabuf and v4l2 buffer without binding
Posted by Nicolas Dufresne 9 months, 2 weeks ago
Le lundi 20 janvier 2025 à 17:21 +0900, Ming Qian a écrit :
> When using VB2_DMABUF, the relationship between dma-buf and v4l2 buffer
> may not one-to-one, a single dma-buf may be queued via different
> v4l2 buffers, and different dma-bufs may be queued via the same
> v4l2 buffer, so it's not appropriate to use the v4l2 buffer index
> as the frame store id.
> 
> We can generate a frame store id according to the dma address.
> Then for a given dma-buf, the id is fixed.
> 
> Driver now manages the frame store and vb2-buffer states independently.
> 
> When a dmabuf is queued via another v4l2 buffer before the buffer is
> released by firmware, need to pend it until firmware release it.
> 
> Signed-off-by: Ming Qian <ming.qian@oss.nxp.com>
> ---
> v5
> - Avoid dynamic size calculation in memory allocation, use kmalloc_array
>   instead
> v4
> - remove unnecessary 'out of memory' message
> v3
> -- fix a typo in NULL pointer check
> v2
> -- fix an uninitialized issue reported by media-ci
> 
>  drivers/media/platform/amphion/vdec.c     | 235 +++++++++++++++++-----
>  drivers/media/platform/amphion/vpu.h      |   7 +-
>  drivers/media/platform/amphion/vpu_dbg.c  |  15 +-
>  drivers/media/platform/amphion/vpu_v4l2.c |  11 +
>  4 files changed, 220 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c
> index b3bc9eb16d6a..f4979d537b97 100644
> --- a/drivers/media/platform/amphion/vdec.c
> +++ b/drivers/media/platform/amphion/vdec.c
> @@ -26,6 +26,7 @@
>  #include "vpu_cmds.h"
>  #include "vpu_rpc.h"
>  
> +#define VDEC_SLOT_CNT_DFT		32
>  #define VDEC_MIN_BUFFER_CAP		8
>  #define VDEC_MIN_BUFFER_OUT		8
>  
> @@ -41,6 +42,14 @@ struct vdec_fs_info {
>  	u32 tag;
>  };
>  
> +struct vdec_frame_store_t {
> +	struct vpu_vb2_buffer *curr;
> +	struct vpu_vb2_buffer *pend;
> +	dma_addr_t addr;
> +	unsigned int state;
> +	u32 tag;
> +};
> +
>  struct vdec_t {
>  	u32 seq_hdr_found;
>  	struct vpu_buffer udata;
> @@ -48,7 +57,8 @@ struct vdec_t {
>  	struct vpu_dec_codec_info codec_info;
>  	enum vpu_codec_state state;
>  
> -	struct vpu_vb2_buffer *slots[VB2_MAX_FRAME];
> +	struct vdec_frame_store_t *slots;
> +	u32 slot_count;
>  	u32 req_frame_count;
>  	struct vdec_fs_info mbi;
>  	struct vdec_fs_info dcp;
> @@ -289,6 +299,63 @@ static int vdec_ctrl_init(struct vpu_inst *inst)
>  	return 0;
>  }
>  
> +static void vdec_attach_frame_store(struct vpu_inst *inst, struct vb2_buffer *vb)
> +{
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
> +	struct vdec_t *vdec = inst->priv;
> +	struct vdec_frame_store_t *new_slots = NULL;
> +	dma_addr_t addr;
> +	int i;
> +
> +	addr = vpu_get_vb_phy_addr(vb, 0);
> +	for (i = 0; i < vdec->slot_count; i++) {
> +		if (addr == vdec->slots[i].addr) {
> +			if (vdec->slots[i].curr && vdec->slots[i].curr != vpu_buf) {
> +				vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED);
> +				vdec->slots[i].pend = vpu_buf;
> +			} else {
> +				vpu_set_buffer_state(vbuf, vdec->slots[i].state);
> +			}
> +			vpu_buf->fs_id = i;
> +			return;
> +		}
> +	}
> +
> +	for (i = 0; i < vdec->slot_count; i++) {
> +		if (!vdec->slots[i].addr) {
> +			vdec->slots[i].addr = addr;
> +			vpu_buf->fs_id = i;
> +			return;
> +		}
> +	}
> +
> +	new_slots = krealloc_array(vdec->slots, vdec->slot_count * 2,
> +				   sizeof(*vdec->slots),
> +				   GFP_KERNEL | __GFP_ZERO);
> +	if (!new_slots) {
> +		vpu_set_buffer_state(vbuf, VPU_BUF_STATE_ERROR);
> +		return;
> +	}
> +
> +	vdec->slots = new_slots;
> +	vdec->slot_count *= 2;
> +
> +	vdec->slots[i].addr = addr;
> +	vpu_buf->fs_id = i;
> +}
> +
> +static void vdec_reset_frame_store(struct vpu_inst *inst)
> +{
> +	struct vdec_t *vdec = inst->priv;
> +
> +	if (!vdec->slots || !vdec->slot_count)
> +		return;
> +
> +	vpu_trace(inst->dev, "inst[%d] reset slots\n", inst->id);
> +	memset(vdec->slots, 0, sizeof(*vdec->slots) * vdec->slot_count);
> +}
> +
>  static void vdec_handle_resolution_change(struct vpu_inst *inst)
>  {
>  	struct vdec_t *vdec = inst->priv;
> @@ -750,11 +817,11 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg)
>  	struct vb2_v4l2_buffer *src_buf;
>  	int ret = 0;
>  
> -	if (!info || info->id >= ARRAY_SIZE(vdec->slots))
> +	if (!info || info->id >= vdec->slot_count)
>  		return -EINVAL;
>  
>  	vpu_inst_lock(inst);
> -	vpu_buf = vdec->slots[info->id];
> +	vpu_buf = vdec->slots[info->id].curr;
>  	if (!vpu_buf) {
>  		dev_err(inst->dev, "[%d] decoded invalid frame[%d]\n", inst->id, info->id);
>  		ret = -EINVAL;
> @@ -775,11 +842,13 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg)
>  	if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED)
>  		dev_info(inst->dev, "[%d] buf[%d] has been decoded\n", inst->id, info->id);
>  	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_DECODED);
> +	vdec->slots[info->id].state = VPU_BUF_STATE_DECODED;
>  	vdec->decoded_frame_count++;
>  	if (vdec->params.display_delay_enable) {
>  		struct vpu_format *cur_fmt;
>  
>  		cur_fmt = vpu_get_format(inst, inst->cap_format.type);
> +		vdec->slots[info->id].state = VPU_BUF_STATE_READY;
>  		vpu_set_buffer_state(vbuf, VPU_BUF_STATE_READY);
>  		for (int i = 0; i < vbuf->vb2_buf.num_planes; i++)
>  			vb2_set_plane_payload(&vbuf->vb2_buf,
> @@ -802,11 +871,11 @@ static struct vpu_vb2_buffer *vdec_find_buffer(struct vpu_inst *inst, u32 luma)
>  	struct vdec_t *vdec = inst->priv;
>  	int i;
>  
> -	for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) {
> -		if (!vdec->slots[i])
> +	for (i = 0; i < vdec->slot_count; i++) {
> +		if (!vdec->slots[i].curr)
>  			continue;
> -		if (luma == vdec->slots[i]->luma)
> -			return vdec->slots[i];
> +		if (luma == vdec->slots[i].addr)
> +			return vdec->slots[i].curr;
>  	}
>  
>  	return NULL;
> @@ -840,11 +909,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame)
>  
>  	cur_fmt = vpu_get_format(inst, inst->cap_format.type);
>  	vbuf = &vpu_buf->m2m_buf.vb;
> -	if (vbuf->vb2_buf.index != frame->id)
> -		dev_err(inst->dev, "[%d] buffer id(%d, %d) dismatch\n",
> -			inst->id, vbuf->vb2_buf.index, frame->id);
> +	if (vpu_buf->fs_id != frame->id)
> +		dev_err(inst->dev, "[%d] buffer id(%d(%d), %d) dismatch\n",
> +			inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index, frame->id);

Was not newly introduced, so I'll take anyway, but can you send a patch
later to fix dismatch -> mismatch.

>  
> -	if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_READY && vdec->params.display_delay_enable)
> +	if (vdec->params.display_delay_enable)
>  		return;
>  
>  	if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_DECODED)
> @@ -857,10 +926,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame)
>  	vbuf->sequence = vdec->sequence;
>  	dev_dbg(inst->dev, "[%d][OUTPUT TS]%32lld\n", inst->id, vbuf->vb2_buf.timestamp);
>  
> -	v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE);
>  	vpu_inst_lock(inst);
> +	vdec->slots[vpu_buf->fs_id].state = VPU_BUF_STATE_READY;
>  	vdec->display_frame_count++;
>  	vpu_inst_unlock(inst);
> +	v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE);
>  	dev_dbg(inst->dev, "[%d] decoded : %d, display : %d, sequence : %d\n",
>  		inst->id, vdec->decoded_frame_count, vdec->display_frame_count, vdec->sequence);
>  }
> @@ -1103,18 +1173,30 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb
>  	if (!vbuf)
>  		return -EINVAL;
>  
> -	if (vdec->slots[vbuf->vb2_buf.index]) {
> -		dev_err(inst->dev, "[%d] repeat alloc fs %d\n",
> -			inst->id, vbuf->vb2_buf.index);
> +	vpu_buf = to_vpu_vb2_buffer(vbuf);
> +	if (vpu_buf->fs_id < 0 || vpu_buf->fs_id >= vdec->slot_count) {
> +		dev_err(inst->dev, "invalid fs %d for v4l2 buffer %d\n",
> +			vpu_buf->fs_id, vbuf->vb2_buf.index);
>  		return -EINVAL;
>  	}
>  
> +	if (vdec->slots[vpu_buf->fs_id].curr) {
> +		if (vdec->slots[vpu_buf->fs_id].curr != vpu_buf) {
> +			vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED);
> +			vdec->slots[vpu_buf->fs_id].pend = vpu_buf;
> +		} else {
> +			vpu_set_buffer_state(vbuf, vdec->slots[vpu_buf->fs_id].state);
> +		}
> +		dev_err(inst->dev, "[%d] repeat alloc fs %d (v4l2 index %d)\n",
> +			inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index);
> +		return -EAGAIN;
> +	}
> +
>  	dev_dbg(inst->dev, "[%d] state = %s, alloc fs %d, tag = 0x%x\n",
>  		inst->id, vpu_codec_state_name(inst->state), vbuf->vb2_buf.index, vdec->seq_tag);
> -	vpu_buf = to_vpu_vb2_buffer(vbuf);
>  
>  	memset(&info, 0, sizeof(info));
> -	info.id = vbuf->vb2_buf.index;
> +	info.id = vpu_buf->fs_id;
>  	info.type = MEM_RES_FRAME;
>  	info.tag = vdec->seq_tag;
>  	info.luma_addr = vpu_get_vb_phy_addr(&vbuf->vb2_buf, 0);
> @@ -1129,12 +1211,13 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb
>  	if (ret)
>  		return ret;
>  
> -	vpu_buf->tag = info.tag;
>  	vpu_buf->luma = info.luma_addr;
>  	vpu_buf->chroma_u = info.chroma_addr;
>  	vpu_buf->chroma_v = 0;
>  	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_INUSE);
> -	vdec->slots[info.id] = vpu_buf;
> +	vdec->slots[info.id].tag = info.tag;
> +	vdec->slots[info.id].curr = vpu_buf;
> +	vdec->slots[info.id].state = VPU_BUF_STATE_INUSE;
>  	vdec->req_frame_count--;
>  
>  	return 0;
> @@ -1195,25 +1278,47 @@ static void vdec_recycle_buffer(struct vpu_inst *inst, struct vb2_v4l2_buffer *v
>  	v4l2_m2m_buf_queue(inst->fh.m2m_ctx, vbuf);
>  }
>  
> -static void vdec_clear_slots(struct vpu_inst *inst)
> +static void vdec_release_curr_frame_store(struct vpu_inst *inst, u32 id)
>  {
>  	struct vdec_t *vdec = inst->priv;
>  	struct vpu_vb2_buffer *vpu_buf;
>  	struct vb2_v4l2_buffer *vbuf;
> +
> +	if (id >= vdec->slot_count)
> +		return;
> +	if (!vdec->slots[id].curr)
> +		return;
> +
> +	vpu_buf = vdec->slots[id].curr;
> +	vbuf = &vpu_buf->m2m_buf.vb;
> +
> +	vdec_response_fs_release(inst, id, vdec->slots[id].tag);
> +	if (vpu_buf->fs_id == id) {
> +		if (vpu_buf->state != VPU_BUF_STATE_READY)
> +			vdec_recycle_buffer(inst, vbuf);
> +		vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE);
> +	}
> +
> +	vdec->slots[id].curr = NULL;
> +	vdec->slots[id].state = VPU_BUF_STATE_IDLE;
> +
> +	if (vdec->slots[id].pend) {
> +		vpu_set_buffer_state(&vdec->slots[id].pend->m2m_buf.vb, VPU_BUF_STATE_IDLE);
> +		vdec->slots[id].pend = NULL;
> +	}
> +}
> +
> +static void vdec_clear_slots(struct vpu_inst *inst)
> +{
> +	struct vdec_t *vdec = inst->priv;
>  	int i;
>  
> -	for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) {
> -		if (!vdec->slots[i])
> +	for (i = 0; i < vdec->slot_count; i++) {
> +		if (!vdec->slots[i].curr)
>  			continue;
>  
> -		vpu_buf = vdec->slots[i];
> -		vbuf = &vpu_buf->m2m_buf.vb;
> -
>  		vpu_trace(inst->dev, "clear slot %d\n", i);
> -		vdec_response_fs_release(inst, i, vpu_buf->tag);
> -		vdec_recycle_buffer(inst, vbuf);
> -		vdec->slots[i]->state = VPU_BUF_STATE_IDLE;
> -		vdec->slots[i] = NULL;
> +		vdec_release_curr_frame_store(inst, i);
>  	}
>  }
>  
> @@ -1354,39 +1459,29 @@ static void vdec_event_req_fs(struct vpu_inst *inst, struct vpu_fs_info *fs)
>  static void vdec_evnet_rel_fs(struct vpu_inst *inst, struct vpu_fs_info *fs)
>  {
>  	struct vdec_t *vdec = inst->priv;
> -	struct vpu_vb2_buffer *vpu_buf;
> -	struct vb2_v4l2_buffer *vbuf;
>  
> -	if (!fs || fs->id >= ARRAY_SIZE(vdec->slots))
> +	if (!fs || fs->id >= vdec->slot_count)
>  		return;
>  	if (fs->type != MEM_RES_FRAME)
>  		return;
>  
> -	if (fs->id >= vpu_get_num_buffers(inst, inst->cap_format.type)) {
> +	if (fs->id >= vdec->slot_count) {
>  		dev_err(inst->dev, "[%d] invalid fs(%d) to release\n", inst->id, fs->id);
>  		return;
>  	}
>  
>  	vpu_inst_lock(inst);
> -	vpu_buf = vdec->slots[fs->id];
> -	vdec->slots[fs->id] = NULL;
> -
> -	if (!vpu_buf) {
> +	if (!vdec->slots[fs->id].curr) {
>  		dev_dbg(inst->dev, "[%d] fs[%d] has bee released\n", inst->id, fs->id);
>  		goto exit;
>  	}
>  
> -	vbuf = &vpu_buf->m2m_buf.vb;
> -	if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) {
> +	if (vdec->slots[fs->id].state == VPU_BUF_STATE_DECODED) {
>  		dev_dbg(inst->dev, "[%d] frame skip\n", inst->id);
>  		vdec->sequence++;
>  	}
>  
> -	vdec_response_fs_release(inst, fs->id, vpu_buf->tag);
> -	if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_READY)
> -		vdec_recycle_buffer(inst, vbuf);
> -
> -	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE);
> +	vdec_release_curr_frame_store(inst, fs->id);
>  	vpu_process_capture_buffer(inst);
>  
>  exit:
> @@ -1582,6 +1677,11 @@ static void vdec_cleanup(struct vpu_inst *inst)
>  		return;
>  
>  	vdec = inst->priv;
> +	if (vdec) {
> +		kfree(vdec->slots);
> +		vdec->slots = NULL;
> +		vdec->slot_count = 0;
> +	}
>  	vfree(vdec);
>  	inst->priv = NULL;
>  	vfree(inst);
> @@ -1713,11 +1813,43 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type)
>  	return 0;
>  }
>  
> -static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i)
> +static int vdec_get_slot_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i)
>  {
>  	struct vdec_t *vdec = inst->priv;
> +	struct vpu_vb2_buffer *vpu_buf;
>  	int num = -1;
>  
> +	vpu_inst_lock(inst);
> +	if (i >= vdec->slot_count || !vdec->slots[i].addr)
> +		goto exit;
> +
> +	vpu_buf = vdec->slots[i].curr;
> +
> +	num = scnprintf(str, size, "slot[%2d] :", i);
> +	if (vpu_buf) {
> +		num += scnprintf(str + num, size - num, " %2d",
> +				 vpu_buf->m2m_buf.vb.vb2_buf.index);
> +		num += scnprintf(str + num, size - num, "; state = %d", vdec->slots[i].state);
> +	} else {
> +		num += scnprintf(str + num, size - num, " -1");
> +	}
> +
> +	if (vdec->slots[i].pend)
> +		num += scnprintf(str + num, size - num, "; %d",
> +				 vdec->slots[i].pend->m2m_buf.vb.vb2_buf.index);
> +
> +	num += scnprintf(str + num, size - num, "\n");
> +exit:
> +	vpu_inst_unlock(inst);
> +
> +	return num;
> +}
> +
> +static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i)
> +{
> +	struct vdec_t *vdec = inst->priv;
> +	int num;
> +
>  	switch (i) {
>  	case 0:
>  		num = scnprintf(str, size,
> @@ -1771,6 +1903,7 @@ static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i
>  				vdec->codec_info.vui_present);
>  		break;
>  	default:
> +		num = vdec_get_slot_debug_info(inst, str, size, i - 10);
>  		break;
>  	}
>  
> @@ -1794,6 +1927,8 @@ static struct vpu_inst_ops vdec_inst_ops = {
>  	.get_debug_info = vdec_get_debug_info,
>  	.wait_prepare = vpu_inst_unlock,
>  	.wait_finish = vpu_inst_lock,
> +	.attach_frame_store = vdec_attach_frame_store,
> +	.reset_frame_store = vdec_reset_frame_store,
>  };
>  
>  static void vdec_init(struct file *file)
> @@ -1834,6 +1969,16 @@ static int vdec_open(struct file *file)
>  		return -ENOMEM;
>  	}
>  
> +	vdec->slots = kmalloc_array(VDEC_SLOT_CNT_DFT,
> +				    sizeof(*vdec->slots),
> +				    GFP_KERNEL | __GFP_ZERO);
> +	if (!vdec->slots) {
> +		vfree(vdec);
> +		vfree(inst);
> +		return -ENOMEM;
> +	}
> +	vdec->slot_count = VDEC_SLOT_CNT_DFT;
> +
>  	inst->ops = &vdec_inst_ops;
>  	inst->formats = vdec_formats;
>  	inst->type = VPU_CORE_TYPE_DEC;
> diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h
> index 978971712742..d8100da160d1 100644
> --- a/drivers/media/platform/amphion/vpu.h
> +++ b/drivers/media/platform/amphion/vpu.h
> @@ -223,6 +223,8 @@ struct vpu_inst_ops {
>  	int (*get_debug_info)(struct vpu_inst *inst, char *str, u32 size, u32 i);
>  	void (*wait_prepare)(struct vpu_inst *inst);
>  	void (*wait_finish)(struct vpu_inst *inst);
> +	void (*attach_frame_store)(struct vpu_inst *inst, struct vb2_buffer *vb);
> +	void (*reset_frame_store)(struct vpu_inst *inst);
>  };
>  
>  struct vpu_inst {
> @@ -297,7 +299,8 @@ enum {
>  	VPU_BUF_STATE_DECODED,
>  	VPU_BUF_STATE_READY,
>  	VPU_BUF_STATE_SKIP,
> -	VPU_BUF_STATE_ERROR
> +	VPU_BUF_STATE_ERROR,
> +	VPU_BUF_STATE_CHANGED
>  };
>  
>  struct vpu_vb2_buffer {
> @@ -306,8 +309,8 @@ struct vpu_vb2_buffer {
>  	dma_addr_t chroma_u;
>  	dma_addr_t chroma_v;
>  	unsigned int state;
> -	u32 tag;
>  	u32 average_qp;
> +	s32 fs_id;
>  };
>  
>  void vpu_writel(struct vpu_dev *vpu, u32 reg, u32 val);
> diff --git a/drivers/media/platform/amphion/vpu_dbg.c b/drivers/media/platform/amphion/vpu_dbg.c
> index 940e5bda5fa3..497ae4e8a229 100644
> --- a/drivers/media/platform/amphion/vpu_dbg.c
> +++ b/drivers/media/platform/amphion/vpu_dbg.c
> @@ -48,6 +48,7 @@ static char *vpu_stat_name[] = {
>  	[VPU_BUF_STATE_READY] = "ready",
>  	[VPU_BUF_STATE_SKIP] = "skip",
>  	[VPU_BUF_STATE_ERROR] = "error",
> +	[VPU_BUF_STATE_CHANGED] = "changed",
>  };
>  
>  static inline const char *to_vpu_stat_name(int state)
> @@ -164,6 +165,7 @@ static int vpu_dbg_instance(struct seq_file *s, void *data)
>  	for (i = 0; i < vb2_get_num_buffers(vq); i++) {
>  		struct vb2_buffer *vb;
>  		struct vb2_v4l2_buffer *vbuf;
> +		struct vpu_vb2_buffer *vpu_buf;
>  
>  		vb = vb2_get_buffer(vq, i);
>  		if (!vb)
> @@ -173,13 +175,24 @@ static int vpu_dbg_instance(struct seq_file *s, void *data)
>  			continue;
>  
>  		vbuf = to_vb2_v4l2_buffer(vb);
> +		vpu_buf = to_vpu_vb2_buffer(vbuf);
>  
>  		num = scnprintf(str, sizeof(str),
> -				"capture[%2d] state = %10s, %8s\n",
> +				"capture[%2d] state = %10s, %8s",
>  				i, vb2_stat_name[vb->state],
>  				to_vpu_stat_name(vpu_get_buffer_state(vbuf)));
>  		if (seq_write(s, str, num))
>  			return 0;
> +
> +		if (vpu_buf->fs_id >= 0) {
> +			num = scnprintf(str, sizeof(str), "; fs %d", vpu_buf->fs_id);
> +			if (seq_write(s, str, num))
> +				return 0;
> +		}
> +
> +		num = scnprintf(str, sizeof(str), "\n");
> +		if (seq_write(s, str, num))
> +			return 0;
>  	}
>  
>  	num = scnprintf(str, sizeof(str), "sequence = %d\n", inst->sequence);
> diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c
> index 50aeb69d2c66..37ef706c29dd 100644
> --- a/drivers/media/platform/amphion/vpu_v4l2.c
> +++ b/drivers/media/platform/amphion/vpu_v4l2.c
> @@ -500,14 +500,25 @@ static int vpu_vb2_queue_setup(struct vb2_queue *vq,
>  		call_void_vop(inst, release);
>  	}
>  
> +	if (V4L2_TYPE_IS_CAPTURE(vq->type))
> +		call_void_vop(inst, reset_frame_store);
> +
>  	return 0;
>  }
>  
>  static int vpu_vb2_buf_init(struct vb2_buffer *vb)
>  {
>  	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
> +	struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
>  
> +	vpu_buf->fs_id = -1;
>  	vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE);
> +
> +	if (!inst->ops->attach_frame_store || V4L2_TYPE_IS_OUTPUT(vb->type))
> +		return 0;
> +
> +	call_void_vop(inst, attach_frame_store, vb);
>  	return 0;
>  }
>  

Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>