[RFC PATCH 7/7] media: amphion: Add V4L2 memory tracking support

ming.qian@oss.nxp.com posted 7 patches 1 day, 5 hours ago
[RFC PATCH 7/7] media: amphion: Add V4L2 memory tracking support
Posted by ming.qian@oss.nxp.com 1 day, 5 hours ago
From: Ming Qian <ming.qian@oss.nxp.com>

Integrate V4L2 memtrack framework to track DMA buffer allocations in the
Amphion VPU driver. Memory usage is organized hierarchically (device ->
instance -> queue) and exposed via V4L2_CID_MEMORY_USAGE control and
debugfs.

Tracked buffers include firmware boot region, RPC, stream ring buffers,
and codec-specific frame buffers (MBI, DCP, enc/ref frames).

Signed-off-by: Ming Qian <ming.qian@oss.nxp.com>
---
 drivers/media/platform/amphion/Kconfig    |  1 +
 drivers/media/platform/amphion/vdec.c     |  9 ++++++
 drivers/media/platform/amphion/venc.c     |  9 ++++++
 drivers/media/platform/amphion/vpu.h      |  7 +++++
 drivers/media/platform/amphion/vpu_core.c |  6 ++++
 drivers/media/platform/amphion/vpu_dbg.c  |  5 ++++
 drivers/media/platform/amphion/vpu_drv.c  |  2 ++
 drivers/media/platform/amphion/vpu_v4l2.c | 35 ++++++++++++++++++++++-
 8 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/amphion/Kconfig b/drivers/media/platform/amphion/Kconfig
index 4a363e07ccc9..2835c25415c5 100644
--- a/drivers/media/platform/amphion/Kconfig
+++ b/drivers/media/platform/amphion/Kconfig
@@ -12,6 +12,7 @@ config VIDEO_AMPHION_VPU
 	select V4L2_MEM2MEM_DEV
 	select VIDEOBUF2_DMA_CONTIG
 	select VIDEOBUF2_VMALLOC
+	select V4L2_MEMTRACK
 	help
 	  Amphion VPU Codec IP contains two parts: Windsor and Malone.
 	  Windsor is encoder that supports H.264, and Malone is decoder
diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c
index a9f0521f2e1a..2b863fadb67f 100644
--- a/drivers/media/platform/amphion/vdec.c
+++ b/drivers/media/platform/amphion/vdec.c
@@ -279,6 +279,9 @@ static int vdec_ctrl_init(struct vpu_inst *inst)
 	if (ctrl)
 		ctrl->flags |= V4L2_CTRL_FLAG_VOLATILE;
 
+	v4l2_ctrl_new_std(&inst->ctrl_handler, NULL,
+			  V4L2_CID_MEMORY_USAGE, 0, S64_MAX, 1, 0);
+
 	if (inst->ctrl_handler.error) {
 		ret = inst->ctrl_handler.error;
 		v4l2_ctrl_handler_free(&inst->ctrl_handler);
@@ -1069,6 +1072,8 @@ static int vdec_alloc_fs_buffer(struct vpu_inst *inst, struct vdec_fs_info *fs)
 
 	vpu_free_dma(buffer);
 	buffer->length = fs->size;
+	buffer->memtrack = inst->memtrack;
+	buffer->label = fs->type == MEM_RES_MBI ? "mbi" : "dcp";
 	return vpu_alloc_dma(inst->core, buffer);
 }
 
@@ -1683,6 +1688,8 @@ static int vdec_start(struct vpu_inst *inst)
 	vpu_trace(inst->dev, "[%d]\n", inst->id);
 	if (!vdec->udata.virt) {
 		vdec->udata.length = 0x1000;
+		vdec->udata.memtrack = inst->memtrack;
+		vdec->udata.label = "udata";
 		ret = vpu_alloc_dma(inst->core, &vdec->udata);
 		if (ret) {
 			dev_err(inst->dev, "[%d] alloc udata fail\n", inst->id);
@@ -1694,6 +1701,8 @@ static int vdec_start(struct vpu_inst *inst)
 		stream_buffer_size = vpu_iface_get_stream_buffer_size(inst->core);
 		if (stream_buffer_size > 0) {
 			inst->stream_buffer.length = stream_buffer_size;
+			inst->stream_buffer.memtrack = inst->memtrack;
+			inst->stream_buffer.label = "bitstream-ring-buffer";
 			ret = vpu_alloc_dma(inst->core, &inst->stream_buffer);
 			if (ret) {
 				dev_err(inst->dev, "[%d] alloc stream buffer fail\n", inst->id);
diff --git a/drivers/media/platform/amphion/venc.c b/drivers/media/platform/amphion/venc.c
index 0b3d58b9f2f7..193ee488eba4 100644
--- a/drivers/media/platform/amphion/venc.c
+++ b/drivers/media/platform/amphion/venc.c
@@ -678,6 +678,9 @@ static int venc_ctrl_init(struct vpu_inst *inst)
 	v4l2_ctrl_new_std(&inst->ctrl_handler, NULL,
 			  V4L2_CID_MPEG_VIDEO_AVERAGE_QP, 0, 51, 1, 0);
 
+	v4l2_ctrl_new_std(&inst->ctrl_handler, NULL,
+			  V4L2_CID_MEMORY_USAGE, 0, S64_MAX, 1, 0);
+
 	if (inst->ctrl_handler.error) {
 		ret = inst->ctrl_handler.error;
 		v4l2_ctrl_handler_free(&inst->ctrl_handler);
@@ -929,6 +932,8 @@ static int venc_start_session(struct vpu_inst *inst, u32 type)
 	stream_buffer_size = vpu_iface_get_stream_buffer_size(inst->core);
 	if (stream_buffer_size > 0) {
 		inst->stream_buffer.length = max_t(u32, stream_buffer_size, venc->cpb_size * 3);
+		inst->stream_buffer.memtrack = inst->memtrack;
+		inst->stream_buffer.label = "bitstream-ring-buffer";
 		ret = vpu_alloc_dma(inst->core, &inst->stream_buffer);
 		if (ret)
 			goto error;
@@ -1027,6 +1032,8 @@ static void venc_request_mem_resource(struct vpu_inst *inst,
 
 	for (i = 0; i < enc_frame_num; i++) {
 		venc->enc[i].length = enc_frame_size;
+		venc->enc[i].memtrack = inst->memtrack;
+		venc->enc[i].label = "enc-frame";
 		ret = vpu_alloc_dma(inst->core, &venc->enc[i]);
 		if (ret) {
 			venc_cleanup_mem_resource(inst);
@@ -1035,6 +1042,8 @@ static void venc_request_mem_resource(struct vpu_inst *inst,
 	}
 	for (i = 0; i < ref_frame_num; i++) {
 		venc->ref[i].length = ref_frame_size;
+		venc->ref[i].memtrack = inst->memtrack;
+		venc->ref[i].label = "ref-frame";
 		ret = vpu_alloc_dma(inst->core, &venc->ref[i]);
 		if (ret) {
 			venc_cleanup_mem_resource(inst);
diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h
index bfd171a3ded4..08913cc54cb1 100644
--- a/drivers/media/platform/amphion/vpu.h
+++ b/drivers/media/platform/amphion/vpu.h
@@ -9,6 +9,7 @@
 #include <media/v4l2-device.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-mem2mem.h>
+#include <media/v4l2-memtrack.h>
 #include <linux/mailbox_client.h>
 #include <linux/mailbox_controller.h>
 #include <linux/kfifo.h>
@@ -17,6 +18,7 @@
 #define VPU_TIMEOUT		msecs_to_jiffies(1000)
 #define VPU_INST_NULL_ID	(-1L)
 #define VPU_MSG_BUFFER_SIZE	(8192)
+#define VPU_NOTIFY_DELAY_MS	(200)
 
 enum imx_plat_type {
 	IMX8QXP = 0,
@@ -47,6 +49,8 @@ struct vpu_buffer {
 	u32 length;
 	u32 bytesused;
 	struct device *dev;
+	struct v4l2_memtrack_node *memtrack;
+	const char *label;
 };
 
 struct vpu_func {
@@ -81,6 +85,7 @@ struct vpu_dev {
 	atomic_t ref_dec;
 
 	struct dentry *debugfs;
+	struct v4l2_memtrack_node *memtrack;
 };
 
 struct vpu_format {
@@ -279,6 +284,8 @@ struct vpu_inst {
 	pid_t tgid;
 	struct dentry *debugfs;
 
+	struct v4l2_memtrack_node *memtrack;
+
 	void *priv;
 };
 
diff --git a/drivers/media/platform/amphion/vpu_core.c b/drivers/media/platform/amphion/vpu_core.c
index 85cc4a14f8ed..16c2efc86feb 100644
--- a/drivers/media/platform/amphion/vpu_core.c
+++ b/drivers/media/platform/amphion/vpu_core.c
@@ -150,6 +150,8 @@ static int __vpu_alloc_dma(struct device *dev, struct vpu_buffer *buf)
 	if (!buf->virt)
 		return -ENOMEM;
 
+	if (buf->memtrack)
+		v4l2_memtrack_add(buf->memtrack, buf->length, buf->label);
 	buf->dev = dev;
 
 	return 0;
@@ -160,6 +162,8 @@ void vpu_free_dma(struct vpu_buffer *buf)
 	if (!buf->virt || !buf->dev)
 		return;
 
+	if (buf->memtrack)
+		v4l2_memtrack_sub(buf->memtrack, buf->length, buf->label);
 	dma_free_coherent(buf->dev, buf->length, buf->virt, buf->phys);
 	buf->virt = NULL;
 	buf->phys = 0;
@@ -550,6 +554,7 @@ static int vpu_core_parse_dt(struct vpu_core *core, struct device_node *np)
 
 	core->fw.phys = res.start;
 	core->fw.length = resource_size(&res);
+	v4l2_memtrack_add(core->vpu->memtrack, core->fw.length, "fw");
 
 	ret = of_reserved_mem_region_to_resource(np, 1, &res);
 	if (ret) {
@@ -559,6 +564,7 @@ static int vpu_core_parse_dt(struct vpu_core *core, struct device_node *np)
 
 	core->rpc.phys = res.start;
 	core->rpc.length = resource_size(&res);
+	v4l2_memtrack_add(core->vpu->memtrack, core->rpc.length, "rpc");
 
 	if (core->rpc.length < core->res->rpc_size + core->res->fwlog_size) {
 		dev_err(core->dev, "the rpc-region <%pad, 0x%x> is not enough\n",
diff --git a/drivers/media/platform/amphion/vpu_dbg.c b/drivers/media/platform/amphion/vpu_dbg.c
index 497ae4e8a229..a82e21cc8a67 100644
--- a/drivers/media/platform/amphion/vpu_dbg.c
+++ b/drivers/media/platform/amphion/vpu_dbg.c
@@ -212,6 +212,11 @@ static int vpu_dbg_instance(struct seq_file *s, void *data)
 	if (seq_write(s, str, num))
 		return 0;
 
+	num = scnprintf(str, sizeof(str), "memory usage = %ld\n",
+			v4l2_memtrack_read(inst->memtrack));
+	if (seq_write(s, str, num))
+		return 0;
+
 	num = scnprintf(str, sizeof(str), "flow :\n");
 	if (seq_write(s, str, num))
 		return 0;
diff --git a/drivers/media/platform/amphion/vpu_drv.c b/drivers/media/platform/amphion/vpu_drv.c
index 2cca61f41bea..73e01c55da82 100644
--- a/drivers/media/platform/amphion/vpu_drv.c
+++ b/drivers/media/platform/amphion/vpu_drv.c
@@ -138,6 +138,7 @@ static int vpu_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_vpu_media;
 	vpu->debugfs = debugfs_create_dir("amphion_vpu", NULL);
+	vpu->memtrack = v4l2_memtrack_create_root("amphion-vpu");
 
 	of_platform_populate(dev->of_node, NULL, NULL, dev);
 
@@ -162,6 +163,7 @@ static void vpu_remove(struct platform_device *pdev)
 	struct vpu_dev *vpu = platform_get_drvdata(pdev);
 	struct device *dev = &pdev->dev;
 
+	v4l2_memtrack_destroy_node(vpu->memtrack);
 	debugfs_remove_recursive(vpu->debugfs);
 	vpu->debugfs = NULL;
 
diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c
index 7cccc994fc50..431f5f64e683 100644
--- a/drivers/media/platform/amphion/vpu_v4l2.c
+++ b/drivers/media/platform/amphion/vpu_v4l2.c
@@ -651,6 +651,14 @@ static const struct vb2_ops vpu_vb2_ops = {
 	.buf_queue          = vpu_vb2_buf_queue,
 };
 
+static void vpu_memtrack_ctrl_notify(struct v4l2_memtrack_node *node, size_t total, void *priv)
+{
+	struct v4l2_ctrl *ctrl = priv;
+
+	if (ctrl)
+		v4l2_ctrl_s_ctrl_int64(ctrl, total);
+}
+
 static int vpu_m2m_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
 {
 	struct vpu_inst *inst = priv;
@@ -668,9 +676,13 @@ static int vpu_m2m_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_q
 	src_vq->buf_struct_size = sizeof(struct vpu_vb2_buffer);
 	src_vq->dev = inst->vpu->dev;
 	src_vq->lock = &inst->lock;
+	if (inst->memtrack)
+		src_vq->memtrack = v4l2_memtrack_create_node(inst->memtrack, "output");
 	ret = vb2_queue_init(src_vq);
-	if (ret)
+	if (ret) {
+		v4l2_memtrack_destroy_node(src_vq->memtrack);
 		return ret;
+	}
 
 	dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
 	inst->cap_format.type = dst_vq->type;
@@ -684,8 +696,12 @@ static int vpu_m2m_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_q
 	dst_vq->buf_struct_size = sizeof(struct vpu_vb2_buffer);
 	dst_vq->dev = inst->vpu->dev;
 	dst_vq->lock = &inst->lock;
+	if (inst->memtrack)
+		dst_vq->memtrack = v4l2_memtrack_create_node(inst->memtrack, "capture");
 	ret = vb2_queue_init(dst_vq);
 	if (ret) {
+		v4l2_memtrack_destroy_node(src_vq->memtrack);
+		v4l2_memtrack_destroy_node(dst_vq->memtrack);
 		vb2_queue_release(src_vq);
 		return ret;
 	}
@@ -706,6 +722,12 @@ static int vpu_v4l2_release(struct vpu_inst *inst)
 	vpu_release_core(inst->core);
 	put_device(inst->dev);
 
+	if (inst->memtrack) {
+		v4l2_memtrack_unregister_notify(inst->memtrack);
+		v4l2_memtrack_destroy_node(inst->memtrack);
+		inst->memtrack = NULL;
+	}
+
 	v4l2_ctrl_handler_free(&inst->ctrl_handler);
 	mutex_destroy(&inst->lock);
 
@@ -745,6 +767,8 @@ int vpu_v4l2_open(struct file *file, struct vpu_inst *inst)
 	inst->min_buffer_out = 2;
 	v4l2_fh_init(&inst->fh, func->vfd);
 	v4l2_fh_add(&inst->fh, file);
+	if (vpu->memtrack)
+		inst->memtrack = v4l2_memtrack_create_node(vpu->memtrack, "instance");
 
 	ret = call_vop(inst, ctrl_init);
 	if (ret)
@@ -757,6 +781,14 @@ int vpu_v4l2_open(struct file *file, struct vpu_inst *inst)
 		goto error;
 	}
 
+	if (inst->memtrack) {
+		v4l2_memtrack_set_notify_delay(inst->memtrack, VPU_NOTIFY_DELAY_MS);
+		v4l2_memtrack_register_notify(inst->memtrack,
+					      vpu_memtrack_ctrl_notify,
+					      v4l2_ctrl_find(&inst->ctrl_handler,
+							     V4L2_CID_MEMORY_USAGE));
+	}
+
 	inst->fh.ctrl_handler = &inst->ctrl_handler;
 	inst->state = VPU_CODEC_STATE_DEINIT;
 	inst->workqueue = alloc_ordered_workqueue("vpu_inst", WQ_MEM_RECLAIM);
@@ -775,6 +807,7 @@ int vpu_v4l2_open(struct file *file, struct vpu_inst *inst)
 
 	return 0;
 error:
+	v4l2_memtrack_destroy_node(inst->memtrack);
 	v4l2_fh_del(&inst->fh, file);
 	v4l2_fh_exit(&inst->fh);
 	vpu_inst_put(inst);
-- 
2.53.0