The existing way for decoding frames was to wait until each frame was
decoded after feeding a bitstream. As a result, performance was low
and Wave5 could not achieve max pixel processing rate.
Update driver to use an asynchronous approach for decoding and feeding a
bitstream in order to achieve full capabilities of the device.
Signed-off-by: Jackson.lee <jackson.lee@chipsnmedia.com>
Signed-off-by: Nas Chung <nas.chung@chipsnmedia.com>
---
.../platform/chips-media/wave5/wave5-helper.c | 6 +
.../chips-media/wave5/wave5-vpu-dec.c | 380 +++++++++++-------
.../platform/chips-media/wave5/wave5-vpu.c | 5 +-
.../platform/chips-media/wave5/wave5-vpuapi.c | 1 +
.../platform/chips-media/wave5/wave5-vpuapi.h | 6 +
5 files changed, 251 insertions(+), 147 deletions(-)
diff --git a/drivers/media/platform/chips-media/wave5/wave5-helper.c b/drivers/media/platform/chips-media/wave5/wave5-helper.c
index 2c9d8cbca6e4..2412de290eca 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-helper.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-helper.c
@@ -62,6 +62,12 @@ int wave5_vpu_release_device(struct file *filp,
struct vpu_instance *inst = wave5_to_vpu_inst(filp->private_data);
int ret = 0;
+ if (inst->run_thread) {
+ kthread_stop(inst->run_thread);
+ up(&inst->run_sem);
+ inst->run_thread = NULL;
+ }
+
v4l2_m2m_ctx_release(inst->v4l2_fh.m2m_ctx);
if (inst->state != VPU_INST_STATE_NONE) {
u32 fail_res;
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
index d3ff420c52ce..f0db531247ac 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
@@ -6,6 +6,9 @@
*/
#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/atomic.h>
#include "wave5-helper.h"
#define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder"
@@ -101,6 +104,24 @@ static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = {
}
};
+static int run_thread(void *data)
+{
+ struct vpu_instance *inst = (struct vpu_instance *)data;
+ struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
+
+ while (!kthread_should_stop()) {
+ if (down_interruptible(&inst->run_sem))
+ continue;
+
+ if (kthread_should_stop())
+ break;
+
+ v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
+ }
+
+ return 0;
+}
+
/*
* Make sure that the state switch is allowed and add logging for debugging
* purposes
@@ -230,7 +251,6 @@ static int start_decode(struct vpu_instance *inst, u32 *fail_res)
switch_state(inst, VPU_INST_STATE_STOP);
dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__);
- v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
}
return ret;
@@ -347,7 +367,6 @@ static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
struct vb2_v4l2_buffer *dec_buf = NULL;
struct vb2_v4l2_buffer *disp_buf = NULL;
struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
- struct queue_status_info q_status;
dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__);
@@ -360,11 +379,22 @@ static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr,
&dec_info.wr_ptr);
- wave5_handle_src_buffer(inst, dec_info.rd_ptr);
dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__,
dec_info.index_frame_decoded, dec_info.index_frame_display);
+ if (inst->std == W_AVC_DEC &&
+ dec_info.index_frame_decoded == DECODED_IDX_FLAG_SKIP &&
+ dec_info.index_frame_display == DISPLAY_IDX_FLAG_NO_FB) {
+ struct vb2_v4l2_buffer *src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
+
+ if (src_buf)
+ v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
+ return;
+ }
+
+ wave5_handle_src_buffer(inst, dec_info.rd_ptr);
+
if (!vb2_is_streaming(dst_vq)) {
dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__);
v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
@@ -441,20 +471,6 @@ static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
}
spin_unlock_irqrestore(&inst->state_spinlock, flags);
}
-
- /*
- * During a resolution change and while draining, the firmware may flush
- * the reorder queue regardless of having a matching decoding operation
- * pending. Only terminate the job if there are no more IRQ coming.
- */
- wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
- if (q_status.report_queue_count == 0 &&
- (q_status.instance_queue_count == 0 || dec_info.sequence_changed)) {
- dev_dbg(inst->dev->dev, "%s: finishing job.\n", __func__);
- pm_runtime_mark_last_busy(inst->dev->dev);
- pm_runtime_put_autosuspend(inst->dev->dev);
- v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
- }
}
static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap)
@@ -465,6 +481,142 @@ static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capab
return 0;
}
+static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
+ struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
+{
+ size_t size;
+ size_t offset = wr_ptr - ring_buffer->daddr;
+ int ret;
+
+ if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
+ size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
+ ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
+ if (ret < 0)
+ return ret;
+
+ ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
+ buffer_size - size);
+ if (ret < 0)
+ return ret;
+ } else {
+ ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
+ buffer_size);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int fill_ringbuffer(struct vpu_instance *inst)
+{
+ struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
+ struct vpu_src_buffer *vpu_buf;
+ int ret = 0;
+
+ if (m2m_ctx->last_src_buf) {
+ struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
+
+ if (vpu_buf->consumed) {
+ dev_dbg(inst->dev->dev, "last src buffer already written\n");
+ return 0;
+ }
+ }
+
+ list_for_each_entry(vpu_buf, &inst->avail_src_bufs, list) {
+ struct vb2_v4l2_buffer *vbuf = &vpu_buf->v4l2_m2m_buf.vb;
+ struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
+ size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
+ void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
+ dma_addr_t rd_ptr = 0;
+ dma_addr_t wr_ptr = 0;
+ size_t remain_size = 0;
+
+ if (vpu_buf->consumed) {
+ dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
+ vbuf->vb2_buf.index);
+ continue;
+ }
+
+ if (!src_buf) {
+ dev_dbg(inst->dev->dev,
+ "%s: Acquiring kernel pointer to src buf (%u), fail\n",
+ __func__, vbuf->vb2_buf.index);
+ break;
+ }
+
+ ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
+ if (ret) {
+ /* Unable to acquire the mutex */
+ dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
+ ret);
+ return ret;
+ }
+
+ dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
+
+ if (remain_size < src_size) {
+ dev_dbg(inst->dev->dev,
+ "%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
+ __func__, remain_size, src_size, vbuf->vb2_buf.index);
+ break;
+ }
+
+ ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
+ if (ret) {
+ dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
+ vbuf->vb2_buf.index, ret);
+ return ret;
+ }
+
+ ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
+ if (ret) {
+ dev_dbg(inst->dev->dev,
+ "update_bitstream_buffer fail: %d for src buf (%u)\n",
+ ret, vbuf->vb2_buf.index);
+ break;
+ }
+
+ vpu_buf->consumed = true;
+
+ /* Don't write buffers passed the last one while draining. */
+ if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
+ dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
+ break;
+ }
+
+ inst->queuing_num++;
+ list_del_init(&vpu_buf->list);
+ break;
+ }
+
+ return ret;
+}
+
+static void wave5_vpu_dec_feed_remaining(struct vpu_instance *inst)
+{
+ int ret = 0;
+ struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
+ u32 fail_res = 0;
+
+ mutex_lock(&inst->feed_lock);
+ ret = fill_ringbuffer(inst);
+ mutex_unlock(&inst->feed_lock);
+ if (ret) {
+ dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
+ return;
+ }
+
+ ret = start_decode(inst, &fail_res);
+ if (ret) {
+ dev_err(inst->dev->dev,
+ "Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
+ m2m_ctx, ret, fail_res);
+ }
+
+ v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
+}
+
static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize)
{
const struct vpu_format *vpu_fmt;
@@ -794,11 +946,21 @@ static int wave5_vpu_dec_stop(struct vpu_instance *inst)
}
if (inst->state != VPU_INST_STATE_NONE) {
+ struct vb2_v4l2_buffer *vbuf;
+ struct vpu_src_buffer *vpu_buf;
+
/*
* Temporarily release the state_spinlock so that subsequent
* calls do not block on a mutex while inside this spinlock.
*/
spin_unlock_irqrestore(&inst->state_spinlock, flags);
+ vbuf = v4l2_m2m_last_src_buf(m2m_ctx);
+ if (vbuf) {
+ vpu_buf = wave5_to_vpu_src_buf(vbuf);
+ if (!vpu_buf->consumed)
+ wave5_vpu_dec_feed_remaining(inst);
+ }
+
ret = wave5_vpu_dec_set_eos_on_firmware(inst);
if (ret)
return ret;
@@ -1116,115 +1278,6 @@ static int wave5_prepare_fb(struct vpu_instance *inst)
return 0;
}
-static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
- struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
-{
- size_t size;
- size_t offset = wr_ptr - ring_buffer->daddr;
- int ret;
-
- if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
- size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
- ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
- if (ret < 0)
- return ret;
-
- ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
- buffer_size - size);
- if (ret < 0)
- return ret;
- } else {
- ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
- buffer_size);
- if (ret < 0)
- return ret;
- }
-
- return 0;
-}
-
-static int fill_ringbuffer(struct vpu_instance *inst)
-{
- struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
- struct v4l2_m2m_buffer *buf, *n;
- int ret;
-
- if (m2m_ctx->last_src_buf) {
- struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
-
- if (vpu_buf->consumed) {
- dev_dbg(inst->dev->dev, "last src buffer already written\n");
- return 0;
- }
- }
-
- v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
- struct vb2_v4l2_buffer *vbuf = &buf->vb;
- struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
- struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
- size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
- void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
- dma_addr_t rd_ptr = 0;
- dma_addr_t wr_ptr = 0;
- size_t remain_size = 0;
-
- if (vpu_buf->consumed) {
- dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
- vbuf->vb2_buf.index);
- continue;
- }
-
- if (!src_buf) {
- dev_dbg(inst->dev->dev,
- "%s: Acquiring kernel pointer to src buf (%u), fail\n",
- __func__, vbuf->vb2_buf.index);
- break;
- }
-
- ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
- if (ret) {
- /* Unable to acquire the mutex */
- dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
- ret);
- return ret;
- }
-
- dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
-
- if (remain_size < src_size) {
- dev_dbg(inst->dev->dev,
- "%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
- __func__, remain_size, src_size, vbuf->vb2_buf.index);
- break;
- }
-
- ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
- if (ret) {
- dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
- vbuf->vb2_buf.index, ret);
- return ret;
- }
-
- ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
- if (ret) {
- dev_dbg(inst->dev->dev,
- "update_bitstream_buffer fail: %d for src buf (%u)\n",
- ret, vbuf->vb2_buf.index);
- break;
- }
-
- vpu_buf->consumed = true;
-
- /* Don't write buffers passed the last one while draining. */
- if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
- dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
- break;
- }
- }
-
- return 0;
-}
-
static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
{
struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
@@ -1236,6 +1289,11 @@ static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
vbuf->sequence = inst->queued_src_buf_num++;
v4l2_m2m_buf_queue(m2m_ctx, vbuf);
+
+ INIT_LIST_HEAD(&vpu_buf->list);
+ mutex_lock(&inst->feed_lock);
+ list_add_tail(&vpu_buf->list, &inst->avail_src_bufs);
+ mutex_unlock(&inst->feed_lock);
}
static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
@@ -1287,10 +1345,13 @@ static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb)
__func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0),
vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2));
- if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
+ if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
+ if (inst->empty_queue)
+ inst->empty_queue = false;
wave5_vpu_dec_buf_queue_src(vb);
- else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
+ } else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
wave5_vpu_dec_buf_queue_dst(vb);
+ }
}
static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst)
@@ -1369,6 +1430,13 @@ static int streamoff_output(struct vb2_queue *q)
struct vb2_v4l2_buffer *buf;
int ret;
dma_addr_t new_rd_ptr;
+ struct vpu_src_buffer *vpu_buf, *tmp;
+
+ inst->retry = false;
+ inst->queuing_num = 0;
+
+ list_for_each_entry_safe(vpu_buf, tmp, &inst->avail_src_bufs, list)
+ list_del_init(&vpu_buf->list);
while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) {
dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n",
@@ -1445,6 +1513,7 @@ static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
pm_runtime_resume_and_get(inst->dev->dev);
+ inst->empty_queue = false;
while (check_cmd) {
struct queue_status_info q_status;
@@ -1452,10 +1521,8 @@ static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
- if (q_status.report_queue_count == 0)
- break;
-
- if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
+ if ((inst->state == VPU_INST_STATE_STOP || q_status.instance_queue_count == 0) &&
+ q_status.report_queue_count == 0)
break;
if (wave5_vpu_dec_get_output_info(inst, &dec_output_info))
@@ -1548,13 +1615,24 @@ static void wave5_vpu_dec_device_run(void *priv)
struct queue_status_info q_status;
u32 fail_res = 0;
int ret = 0;
+ unsigned long flags;
dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__);
pm_runtime_resume_and_get(inst->dev->dev);
- ret = fill_ringbuffer(inst);
- if (ret) {
- dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
- goto finish_job_and_return;
+ if (!inst->retry) {
+ mutex_lock(&inst->feed_lock);
+ ret = fill_ringbuffer(inst);
+ mutex_unlock(&inst->feed_lock);
+ if (ret < 0) {
+ dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
+ goto finish_job_and_return;
+ } else if (!inst->eos &&
+ inst->queuing_num == 0 &&
+ inst->state == VPU_INST_STATE_PIC_RUN) {
+ dev_dbg(inst->dev->dev, "%s: no bitstream for feeding, so skip ", __func__);
+ inst->empty_queue = true;
+ goto finish_job_and_return;
+ }
}
switch (inst->state) {
@@ -1590,7 +1668,9 @@ static void wave5_vpu_dec_device_run(void *priv)
* we had a chance to switch, which leads to an invalid state
* change.
*/
+ spin_lock_irqsave(&inst->state_spinlock, flags);
switch_state(inst, VPU_INST_STATE_PIC_RUN);
+ spin_unlock_irqrestore(&inst->state_spinlock, flags);
/*
* During DRC, the picture decoding remains pending, so just leave the job
@@ -1605,12 +1685,14 @@ static void wave5_vpu_dec_device_run(void *priv)
ret = wave5_prepare_fb(inst);
if (ret) {
dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret);
+ spin_lock_irqsave(&inst->state_spinlock, flags);
switch_state(inst, VPU_INST_STATE_STOP);
+ spin_unlock_irqrestore(&inst->state_spinlock, flags);
break;
}
if (q_status.instance_queue_count) {
- dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
+ v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
return;
}
@@ -1621,11 +1703,17 @@ static void wave5_vpu_dec_device_run(void *priv)
dev_err(inst->dev->dev,
"Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
m2m_ctx, ret, fail_res);
- break;
+ goto finish_job_and_return;
}
/* Return so that we leave this job active */
- dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
- return;
+ if (fail_res == WAVE5_SYSERR_QUEUEING_FAIL) {
+ inst->retry = true;
+ } else {
+ inst->retry = false;
+ if (!inst->eos)
+ inst->queuing_num--;
+ }
+ break;
default:
WARN(1, "Execution of a job in state %s illegal.\n", state_to_str(inst->state));
break;
@@ -1633,9 +1721,7 @@ static void wave5_vpu_dec_device_run(void *priv)
finish_job_and_return:
dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__);
- pm_runtime_mark_last_busy(inst->dev->dev);
- pm_runtime_put_autosuspend(inst->dev->dev);
- v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
+ up(&inst->run_sem);
}
static void wave5_vpu_dec_job_abort(void *priv)
@@ -1686,7 +1772,8 @@ static int wave5_vpu_dec_job_ready(void *priv)
"No capture buffer ready to decode!\n");
break;
} else if (!wave5_is_draining_or_eos(inst) &&
- !v4l2_m2m_num_src_bufs_ready(m2m_ctx)) {
+ (!v4l2_m2m_num_src_bufs_ready(m2m_ctx) ||
+ inst->empty_queue)) {
dev_dbg(inst->dev->dev,
"No bitstream data to decode!\n");
break;
@@ -1726,6 +1813,8 @@ static int wave5_vpu_open_dec(struct file *filp)
inst->ops = &wave5_vpu_dec_inst_ops;
spin_lock_init(&inst->state_spinlock);
+ mutex_init(&inst->feed_lock);
+ INIT_LIST_HEAD(&inst->avail_src_bufs);
inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
if (!inst->codec_info)
@@ -1797,6 +1886,9 @@ static int wave5_vpu_open_dec(struct file *filp)
if (inst->dev->product_code != WAVE515_CODE)
wave5_vdi_allocate_sram(inst->dev);
+ sema_init(&inst->run_sem, 1);
+ inst->run_thread = kthread_run(run_thread, inst, "run thread");
+
ret = mutex_lock_interruptible(&dev->dev_lock);
if (ret)
goto cleanup_inst;
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu.c b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
index 63a607d10433..a9bd96cbf9ac 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpu.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
@@ -51,7 +51,7 @@ static void wave5_vpu_handle_irq(void *dev_id)
u32 seq_done;
u32 cmd_done;
u32 irq_reason;
- struct vpu_instance *inst;
+ struct vpu_instance *inst, *tmp;
struct vpu_device *dev = dev_id;
irq_reason = wave5_vdi_read_register(dev, W5_VPU_VINT_REASON);
@@ -60,8 +60,7 @@ static void wave5_vpu_handle_irq(void *dev_id)
wave5_vdi_write_register(dev, W5_VPU_VINT_REASON_CLR, irq_reason);
wave5_vdi_write_register(dev, W5_VPU_VINT_CLEAR, 0x1);
- list_for_each_entry(inst, &dev->instances, list) {
-
+ list_for_each_entry_safe(inst, tmp, &dev->instances, list) {
if (irq_reason & BIT(INT_WAVE5_INIT_SEQ) ||
irq_reason & BIT(INT_WAVE5_ENC_SET_PARAM)) {
if (dev->product_code == WAVE515_CODE &&
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.c b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.c
index e16b990041c2..a5b1966530c0 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.c
@@ -247,6 +247,7 @@ int wave5_vpu_dec_close(struct vpu_instance *inst, u32 *fail_res)
unlock_and_return:
mutex_unlock(&vpu_dev->hw_lock);
+ mutex_destroy(&inst->feed_lock);
pm_runtime_put_sync(inst->dev->dev);
return ret;
}
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
index 45615c15beca..7cdb5b5fe3e4 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
@@ -812,11 +812,17 @@ struct vpu_instance {
bool cbcr_interleave;
bool nv21;
bool eos;
+ bool retry;
+ bool empty_queue;
+ int queuing_num;
+ struct mutex feed_lock; /* lock for feeding bitstream buffers */
struct vpu_buf bitstream_vbuf;
dma_addr_t last_rd_ptr;
size_t remaining_consumed_bytes;
bool needs_reallocation;
+ struct semaphore run_sem;
+ struct task_struct *run_thread;
unsigned int min_src_buf_count;
unsigned int rot_angle;
unsigned int mirror_direction;
--
2.43.0
Hey Jackson,
I did a bit of testing and I ran into a couple of different issues when
decoding with more than 1 thread, could you have a look into these? I
know that this is broken before the patch as well, but I feel like we
should first get decoding with more than 1 thread correct before we make
decoding asynchronous, because that won't make debugging any easier.
Have a look here: https://paste.debian.net/1337231/
Regards,
Sebastian
On 12.11.2024 11:15, Jackson.lee wrote:
>The existing way for decoding frames was to wait until each frame was
>decoded after feeding a bitstream. As a result, performance was low
>and Wave5 could not achieve max pixel processing rate.
>
>Update driver to use an asynchronous approach for decoding and feeding a
>bitstream in order to achieve full capabilities of the device.
>
>Signed-off-by: Jackson.lee <jackson.lee@chipsnmedia.com>
>Signed-off-by: Nas Chung <nas.chung@chipsnmedia.com>
>---
> .../platform/chips-media/wave5/wave5-helper.c | 6 +
> .../chips-media/wave5/wave5-vpu-dec.c | 380 +++++++++++-------
> .../platform/chips-media/wave5/wave5-vpu.c | 5 +-
> .../platform/chips-media/wave5/wave5-vpuapi.c | 1 +
> .../platform/chips-media/wave5/wave5-vpuapi.h | 6 +
> 5 files changed, 251 insertions(+), 147 deletions(-)
>
>diff --git a/drivers/media/platform/chips-media/wave5/wave5-helper.c b/drivers/media/platform/chips-media/wave5/wave5-helper.c
>index 2c9d8cbca6e4..2412de290eca 100644
>--- a/drivers/media/platform/chips-media/wave5/wave5-helper.c
>+++ b/drivers/media/platform/chips-media/wave5/wave5-helper.c
>@@ -62,6 +62,12 @@ int wave5_vpu_release_device(struct file *filp,
> struct vpu_instance *inst = wave5_to_vpu_inst(filp->private_data);
> int ret = 0;
>
>+ if (inst->run_thread) {
>+ kthread_stop(inst->run_thread);
>+ up(&inst->run_sem);
>+ inst->run_thread = NULL;
>+ }
>+
> v4l2_m2m_ctx_release(inst->v4l2_fh.m2m_ctx);
> if (inst->state != VPU_INST_STATE_NONE) {
> u32 fail_res;
>diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
>index d3ff420c52ce..f0db531247ac 100644
>--- a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
>+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
>@@ -6,6 +6,9 @@
> */
>
> #include <linux/pm_runtime.h>
>+#include <linux/delay.h>
>+#include <linux/timer.h>
>+#include <linux/atomic.h>
> #include "wave5-helper.h"
>
> #define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder"
>@@ -101,6 +104,24 @@ static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = {
> }
> };
>
>+static int run_thread(void *data)
>+{
>+ struct vpu_instance *inst = (struct vpu_instance *)data;
>+ struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
>+
>+ while (!kthread_should_stop()) {
>+ if (down_interruptible(&inst->run_sem))
>+ continue;
>+
>+ if (kthread_should_stop())
>+ break;
>+
>+ v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
>+ }
>+
>+ return 0;
>+}
>+
> /*
> * Make sure that the state switch is allowed and add logging for debugging
> * purposes
>@@ -230,7 +251,6 @@ static int start_decode(struct vpu_instance *inst, u32 *fail_res)
> switch_state(inst, VPU_INST_STATE_STOP);
>
> dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__);
>- v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
> }
>
> return ret;
>@@ -347,7 +367,6 @@ static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
> struct vb2_v4l2_buffer *dec_buf = NULL;
> struct vb2_v4l2_buffer *disp_buf = NULL;
> struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
>- struct queue_status_info q_status;
>
> dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__);
>
>@@ -360,11 +379,22 @@ static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
>
> dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr,
> &dec_info.wr_ptr);
>- wave5_handle_src_buffer(inst, dec_info.rd_ptr);
>
> dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__,
> dec_info.index_frame_decoded, dec_info.index_frame_display);
>
>+ if (inst->std == W_AVC_DEC &&
>+ dec_info.index_frame_decoded == DECODED_IDX_FLAG_SKIP &&
>+ dec_info.index_frame_display == DISPLAY_IDX_FLAG_NO_FB) {
>+ struct vb2_v4l2_buffer *src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
>+
>+ if (src_buf)
>+ v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
>+ return;
>+ }
>+
>+ wave5_handle_src_buffer(inst, dec_info.rd_ptr);
>+
> if (!vb2_is_streaming(dst_vq)) {
> dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__);
> v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
>@@ -441,20 +471,6 @@ static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
> }
> spin_unlock_irqrestore(&inst->state_spinlock, flags);
> }
>-
>- /*
>- * During a resolution change and while draining, the firmware may flush
>- * the reorder queue regardless of having a matching decoding operation
>- * pending. Only terminate the job if there are no more IRQ coming.
>- */
>- wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
>- if (q_status.report_queue_count == 0 &&
>- (q_status.instance_queue_count == 0 || dec_info.sequence_changed)) {
>- dev_dbg(inst->dev->dev, "%s: finishing job.\n", __func__);
>- pm_runtime_mark_last_busy(inst->dev->dev);
>- pm_runtime_put_autosuspend(inst->dev->dev);
>- v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
>- }
> }
>
> static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap)
>@@ -465,6 +481,142 @@ static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capab
> return 0;
> }
>
>+static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
>+ struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
>+{
>+ size_t size;
>+ size_t offset = wr_ptr - ring_buffer->daddr;
>+ int ret;
>+
>+ if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
>+ size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
>+ ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
>+ if (ret < 0)
>+ return ret;
>+
>+ ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
>+ buffer_size - size);
>+ if (ret < 0)
>+ return ret;
>+ } else {
>+ ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
>+ buffer_size);
>+ if (ret < 0)
>+ return ret;
>+ }
>+
>+ return 0;
>+}
>+
>+static int fill_ringbuffer(struct vpu_instance *inst)
>+{
>+ struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
>+ struct vpu_src_buffer *vpu_buf;
>+ int ret = 0;
>+
>+ if (m2m_ctx->last_src_buf) {
>+ struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
>+
>+ if (vpu_buf->consumed) {
>+ dev_dbg(inst->dev->dev, "last src buffer already written\n");
>+ return 0;
>+ }
>+ }
>+
>+ list_for_each_entry(vpu_buf, &inst->avail_src_bufs, list) {
>+ struct vb2_v4l2_buffer *vbuf = &vpu_buf->v4l2_m2m_buf.vb;
>+ struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
>+ size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
>+ void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
>+ dma_addr_t rd_ptr = 0;
>+ dma_addr_t wr_ptr = 0;
>+ size_t remain_size = 0;
>+
>+ if (vpu_buf->consumed) {
>+ dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
>+ vbuf->vb2_buf.index);
>+ continue;
>+ }
>+
>+ if (!src_buf) {
>+ dev_dbg(inst->dev->dev,
>+ "%s: Acquiring kernel pointer to src buf (%u), fail\n",
>+ __func__, vbuf->vb2_buf.index);
>+ break;
>+ }
>+
>+ ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
>+ if (ret) {
>+ /* Unable to acquire the mutex */
>+ dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
>+ ret);
>+ return ret;
>+ }
>+
>+ dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
>+
>+ if (remain_size < src_size) {
>+ dev_dbg(inst->dev->dev,
>+ "%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
>+ __func__, remain_size, src_size, vbuf->vb2_buf.index);
>+ break;
>+ }
>+
>+ ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
>+ if (ret) {
>+ dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
>+ vbuf->vb2_buf.index, ret);
>+ return ret;
>+ }
>+
>+ ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
>+ if (ret) {
>+ dev_dbg(inst->dev->dev,
>+ "update_bitstream_buffer fail: %d for src buf (%u)\n",
>+ ret, vbuf->vb2_buf.index);
>+ break;
>+ }
>+
>+ vpu_buf->consumed = true;
>+
>+ /* Don't write buffers passed the last one while draining. */
>+ if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
>+ dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
>+ break;
>+ }
>+
>+ inst->queuing_num++;
>+ list_del_init(&vpu_buf->list);
>+ break;
>+ }
>+
>+ return ret;
>+}
>+
>+static void wave5_vpu_dec_feed_remaining(struct vpu_instance *inst)
>+{
>+ int ret = 0;
>+ struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
>+ u32 fail_res = 0;
>+
>+ mutex_lock(&inst->feed_lock);
>+ ret = fill_ringbuffer(inst);
>+ mutex_unlock(&inst->feed_lock);
>+ if (ret) {
>+ dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
>+ return;
>+ }
>+
>+ ret = start_decode(inst, &fail_res);
>+ if (ret) {
>+ dev_err(inst->dev->dev,
>+ "Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
>+ m2m_ctx, ret, fail_res);
>+ }
>+
>+ v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
>+}
>+
> static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize)
> {
> const struct vpu_format *vpu_fmt;
>@@ -794,11 +946,21 @@ static int wave5_vpu_dec_stop(struct vpu_instance *inst)
> }
>
> if (inst->state != VPU_INST_STATE_NONE) {
>+ struct vb2_v4l2_buffer *vbuf;
>+ struct vpu_src_buffer *vpu_buf;
>+
> /*
> * Temporarily release the state_spinlock so that subsequent
> * calls do not block on a mutex while inside this spinlock.
> */
> spin_unlock_irqrestore(&inst->state_spinlock, flags);
>+ vbuf = v4l2_m2m_last_src_buf(m2m_ctx);
>+ if (vbuf) {
>+ vpu_buf = wave5_to_vpu_src_buf(vbuf);
>+ if (!vpu_buf->consumed)
>+ wave5_vpu_dec_feed_remaining(inst);
>+ }
>+
> ret = wave5_vpu_dec_set_eos_on_firmware(inst);
> if (ret)
> return ret;
>@@ -1116,115 +1278,6 @@ static int wave5_prepare_fb(struct vpu_instance *inst)
> return 0;
> }
>
>-static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
>- struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
>-{
>- size_t size;
>- size_t offset = wr_ptr - ring_buffer->daddr;
>- int ret;
>-
>- if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
>- size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
>- ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
>- if (ret < 0)
>- return ret;
>-
>- ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
>- buffer_size - size);
>- if (ret < 0)
>- return ret;
>- } else {
>- ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
>- buffer_size);
>- if (ret < 0)
>- return ret;
>- }
>-
>- return 0;
>-}
>-
>-static int fill_ringbuffer(struct vpu_instance *inst)
>-{
>- struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
>- struct v4l2_m2m_buffer *buf, *n;
>- int ret;
>-
>- if (m2m_ctx->last_src_buf) {
>- struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
>-
>- if (vpu_buf->consumed) {
>- dev_dbg(inst->dev->dev, "last src buffer already written\n");
>- return 0;
>- }
>- }
>-
>- v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
>- struct vb2_v4l2_buffer *vbuf = &buf->vb;
>- struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
>- struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
>- size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
>- void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
>- dma_addr_t rd_ptr = 0;
>- dma_addr_t wr_ptr = 0;
>- size_t remain_size = 0;
>-
>- if (vpu_buf->consumed) {
>- dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
>- vbuf->vb2_buf.index);
>- continue;
>- }
>-
>- if (!src_buf) {
>- dev_dbg(inst->dev->dev,
>- "%s: Acquiring kernel pointer to src buf (%u), fail\n",
>- __func__, vbuf->vb2_buf.index);
>- break;
>- }
>-
>- ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
>- if (ret) {
>- /* Unable to acquire the mutex */
>- dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
>- ret);
>- return ret;
>- }
>-
>- dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
>-
>- if (remain_size < src_size) {
>- dev_dbg(inst->dev->dev,
>- "%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
>- __func__, remain_size, src_size, vbuf->vb2_buf.index);
>- break;
>- }
>-
>- ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
>- if (ret) {
>- dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
>- vbuf->vb2_buf.index, ret);
>- return ret;
>- }
>-
>- ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
>- if (ret) {
>- dev_dbg(inst->dev->dev,
>- "update_bitstream_buffer fail: %d for src buf (%u)\n",
>- ret, vbuf->vb2_buf.index);
>- break;
>- }
>-
>- vpu_buf->consumed = true;
>-
>- /* Don't write buffers passed the last one while draining. */
>- if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
>- dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
>- break;
>- }
>- }
>-
>- return 0;
>-}
>-
> static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
> {
> struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
>@@ -1236,6 +1289,11 @@ static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
> vbuf->sequence = inst->queued_src_buf_num++;
>
> v4l2_m2m_buf_queue(m2m_ctx, vbuf);
>+
>+ INIT_LIST_HEAD(&vpu_buf->list);
>+ mutex_lock(&inst->feed_lock);
>+ list_add_tail(&vpu_buf->list, &inst->avail_src_bufs);
>+ mutex_unlock(&inst->feed_lock);
> }
>
> static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
>@@ -1287,10 +1345,13 @@ static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb)
> __func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0),
> vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2));
>
>- if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
>+ if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
>+ if (inst->empty_queue)
>+ inst->empty_queue = false;
> wave5_vpu_dec_buf_queue_src(vb);
>- else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
>+ } else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
> wave5_vpu_dec_buf_queue_dst(vb);
>+ }
> }
>
> static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst)
>@@ -1369,6 +1430,13 @@ static int streamoff_output(struct vb2_queue *q)
> struct vb2_v4l2_buffer *buf;
> int ret;
> dma_addr_t new_rd_ptr;
>+ struct vpu_src_buffer *vpu_buf, *tmp;
>+
>+ inst->retry = false;
>+ inst->queuing_num = 0;
>+
>+ list_for_each_entry_safe(vpu_buf, tmp, &inst->avail_src_bufs, list)
>+ list_del_init(&vpu_buf->list);
>
> while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) {
> dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n",
>@@ -1445,6 +1513,7 @@ static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
>
> dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
> pm_runtime_resume_and_get(inst->dev->dev);
>+ inst->empty_queue = false;
>
> while (check_cmd) {
> struct queue_status_info q_status;
>@@ -1452,10 +1521,8 @@ static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
>
> wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
>
>- if (q_status.report_queue_count == 0)
>- break;
>-
>- if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
>+ if ((inst->state == VPU_INST_STATE_STOP || q_status.instance_queue_count == 0) &&
>+ q_status.report_queue_count == 0)
> break;
>
> if (wave5_vpu_dec_get_output_info(inst, &dec_output_info))
>@@ -1548,13 +1615,24 @@ static void wave5_vpu_dec_device_run(void *priv)
> struct queue_status_info q_status;
> u32 fail_res = 0;
> int ret = 0;
>+ unsigned long flags;
>
> dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__);
> pm_runtime_resume_and_get(inst->dev->dev);
>- ret = fill_ringbuffer(inst);
>- if (ret) {
>- dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
>- goto finish_job_and_return;
>+ if (!inst->retry) {
>+ mutex_lock(&inst->feed_lock);
>+ ret = fill_ringbuffer(inst);
>+ mutex_unlock(&inst->feed_lock);
>+ if (ret < 0) {
>+ dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
>+ goto finish_job_and_return;
>+ } else if (!inst->eos &&
>+ inst->queuing_num == 0 &&
>+ inst->state == VPU_INST_STATE_PIC_RUN) {
>+ dev_dbg(inst->dev->dev, "%s: no bitstream for feeding, so skip ", __func__);
>+ inst->empty_queue = true;
>+ goto finish_job_and_return;
>+ }
> }
>
> switch (inst->state) {
>@@ -1590,7 +1668,9 @@ static void wave5_vpu_dec_device_run(void *priv)
> * we had a chance to switch, which leads to an invalid state
> * change.
> */
>+ spin_lock_irqsave(&inst->state_spinlock, flags);
> switch_state(inst, VPU_INST_STATE_PIC_RUN);
>+ spin_unlock_irqrestore(&inst->state_spinlock, flags);
>
> /*
> * During DRC, the picture decoding remains pending, so just leave the job
>@@ -1605,12 +1685,14 @@ static void wave5_vpu_dec_device_run(void *priv)
> ret = wave5_prepare_fb(inst);
> if (ret) {
> dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret);
>+ spin_lock_irqsave(&inst->state_spinlock, flags);
> switch_state(inst, VPU_INST_STATE_STOP);
>+ spin_unlock_irqrestore(&inst->state_spinlock, flags);
> break;
> }
>
> if (q_status.instance_queue_count) {
>- dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
>+ v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
> return;
> }
>
>@@ -1621,11 +1703,17 @@ static void wave5_vpu_dec_device_run(void *priv)
> dev_err(inst->dev->dev,
> "Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
> m2m_ctx, ret, fail_res);
>- break;
>+ goto finish_job_and_return;
> }
> /* Return so that we leave this job active */
>- dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
>- return;
>+ if (fail_res == WAVE5_SYSERR_QUEUEING_FAIL) {
>+ inst->retry = true;
>+ } else {
>+ inst->retry = false;
>+ if (!inst->eos)
>+ inst->queuing_num--;
>+ }
>+ break;
> default:
> WARN(1, "Execution of a job in state %s illegal.\n", state_to_str(inst->state));
> break;
>@@ -1633,9 +1721,7 @@ static void wave5_vpu_dec_device_run(void *priv)
>
> finish_job_and_return:
> dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__);
>- pm_runtime_mark_last_busy(inst->dev->dev);
>- pm_runtime_put_autosuspend(inst->dev->dev);
>- v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
>+ up(&inst->run_sem);
> }
>
> static void wave5_vpu_dec_job_abort(void *priv)
>@@ -1686,7 +1772,8 @@ static int wave5_vpu_dec_job_ready(void *priv)
> "No capture buffer ready to decode!\n");
> break;
> } else if (!wave5_is_draining_or_eos(inst) &&
>- !v4l2_m2m_num_src_bufs_ready(m2m_ctx)) {
>+ (!v4l2_m2m_num_src_bufs_ready(m2m_ctx) ||
>+ inst->empty_queue)) {
> dev_dbg(inst->dev->dev,
> "No bitstream data to decode!\n");
> break;
>@@ -1726,6 +1813,8 @@ static int wave5_vpu_open_dec(struct file *filp)
> inst->ops = &wave5_vpu_dec_inst_ops;
>
> spin_lock_init(&inst->state_spinlock);
>+ mutex_init(&inst->feed_lock);
>+ INIT_LIST_HEAD(&inst->avail_src_bufs);
>
> inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
> if (!inst->codec_info)
>@@ -1797,6 +1886,9 @@ static int wave5_vpu_open_dec(struct file *filp)
> if (inst->dev->product_code != WAVE515_CODE)
> wave5_vdi_allocate_sram(inst->dev);
>
>+ sema_init(&inst->run_sem, 1);
>+ inst->run_thread = kthread_run(run_thread, inst, "run thread");
>+
> ret = mutex_lock_interruptible(&dev->dev_lock);
> if (ret)
> goto cleanup_inst;
>diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu.c b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
>index 63a607d10433..a9bd96cbf9ac 100644
>--- a/drivers/media/platform/chips-media/wave5/wave5-vpu.c
>+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
>@@ -51,7 +51,7 @@ static void wave5_vpu_handle_irq(void *dev_id)
> u32 seq_done;
> u32 cmd_done;
> u32 irq_reason;
>- struct vpu_instance *inst;
>+ struct vpu_instance *inst, *tmp;
> struct vpu_device *dev = dev_id;
>
> irq_reason = wave5_vdi_read_register(dev, W5_VPU_VINT_REASON);
>@@ -60,8 +60,7 @@ static void wave5_vpu_handle_irq(void *dev_id)
> wave5_vdi_write_register(dev, W5_VPU_VINT_REASON_CLR, irq_reason);
> wave5_vdi_write_register(dev, W5_VPU_VINT_CLEAR, 0x1);
>
>- list_for_each_entry(inst, &dev->instances, list) {
>-
>+ list_for_each_entry_safe(inst, tmp, &dev->instances, list) {
> if (irq_reason & BIT(INT_WAVE5_INIT_SEQ) ||
> irq_reason & BIT(INT_WAVE5_ENC_SET_PARAM)) {
> if (dev->product_code == WAVE515_CODE &&
>diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.c b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.c
>index e16b990041c2..a5b1966530c0 100644
>--- a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.c
>+++ b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.c
>@@ -247,6 +247,7 @@ int wave5_vpu_dec_close(struct vpu_instance *inst, u32 *fail_res)
>
> unlock_and_return:
> mutex_unlock(&vpu_dev->hw_lock);
>+ mutex_destroy(&inst->feed_lock);
> pm_runtime_put_sync(inst->dev->dev);
> return ret;
> }
>diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
>index 45615c15beca..7cdb5b5fe3e4 100644
>--- a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
>+++ b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
>@@ -812,11 +812,17 @@ struct vpu_instance {
> bool cbcr_interleave;
> bool nv21;
> bool eos;
>+ bool retry;
>+ bool empty_queue;
>+ int queuing_num;
>+ struct mutex feed_lock; /* lock for feeding bitstream buffers */
> struct vpu_buf bitstream_vbuf;
> dma_addr_t last_rd_ptr;
> size_t remaining_consumed_bytes;
> bool needs_reallocation;
>
>+ struct semaphore run_sem;
>+ struct task_struct *run_thread;
> unsigned int min_src_buf_count;
> unsigned int rot_angle;
> unsigned int mirror_direction;
>--
>2.43.0
>
>
Sebastian Fricke
Consultant Software Engineer
Collabora Ltd
Platinum Building, St John's Innovation Park, Cambridge CB4 0DS, UK
Registered in England & Wales no 5513718.
© 2016 - 2026 Red Hat, Inc.