include/trace/events/dma.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-)
The dma_map_sg tracepoint can trigger a perf buffer overflow when
tracing large scatter-gather lists. With devices like virtio-gpu
creating large DRM buffers, nents can exceed 1000 entries, resulting
in:
phys_addrs: 1000 * 8 bytes = 8,000 bytes
dma_addrs: 1000 * 8 bytes = 8,000 bytes
lengths: 1000 * 4 bytes = 4,000 bytes
Total: ~20,000 bytes
This exceeds PERF_MAX_TRACE_SIZE (8192 bytes), causing:
WARNING: CPU: 0 PID: 5497 at kernel/trace/trace_event_perf.c:405
perf buffer not large enough, wanted 24620, have 8192
Cap all three dynamic arrays at 128 entries using min() in the array
size calculation. This ensures arrays are only as large as needed
(up to the cap), avoiding unnecessary memory allocation for small
operations while preventing overflow for large ones.
The tracepoint now records the full nents/ents counts and a truncated
flag so users can see when data has been capped.
Changes in v2:
- Use min(nents, DMA_TRACE_MAX_ENTRIES) for dynamic array sizing
instead of fixed DMA_TRACE_MAX_ENTRIES allocation (feedback from
Steven Rostedt)
- This allocates only what's needed up to the cap, avoiding waste
for small operations
Reported-by: syzbot+28cea38c382fd15e751a@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=28cea38c382fd15e751a
Tested-by: syzbot+28cea38c382fd15e751a@syzkaller.appspotmail.com
Signed-off-by: Deepanshu Kartikey <Kartikey406@gmail.com>
---
include/trace/events/dma.h | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index b3fef140ae15..33e99e792f1a 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -275,6 +275,8 @@ TRACE_EVENT(dma_free_sgt,
sizeof(u64), sizeof(u64)))
);
+#define DMA_TRACE_MAX_ENTRIES 128
+
TRACE_EVENT(dma_map_sg,
TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
int ents, enum dma_data_direction dir, unsigned long attrs),
@@ -282,9 +284,12 @@ TRACE_EVENT(dma_map_sg,
TP_STRUCT__entry(
__string(device, dev_name(dev))
- __dynamic_array(u64, phys_addrs, nents)
- __dynamic_array(u64, dma_addrs, ents)
- __dynamic_array(unsigned int, lengths, ents)
+ __field(int, full_nents)
+ __field(int, full_ents)
+ __field(bool, truncated)
+ __dynamic_array(u64, phys_addrs, min(nents, DMA_TRACE_MAX_ENTRIES))
+ __dynamic_array(u64, dma_addrs, min(ents, DMA_TRACE_MAX_ENTRIES))
+ __dynamic_array(unsigned int, lengths, min(ents, DMA_TRACE_MAX_ENTRIES))
__field(enum dma_data_direction, dir)
__field(unsigned long, attrs)
),
@@ -292,11 +297,16 @@ TRACE_EVENT(dma_map_sg,
TP_fast_assign(
struct scatterlist *sg;
int i;
+ int traced_nents = min_t(int, nents, DMA_TRACE_MAX_ENTRIES);
+ int traced_ents = min_t(int, ents, DMA_TRACE_MAX_ENTRIES);
__assign_str(device);
- for_each_sg(sgl, sg, nents, i)
+ __entry->full_nents = nents;
+ __entry->full_ents = ents;
+ __entry->truncated = (nents > DMA_TRACE_MAX_ENTRIES) || (ents > DMA_TRACE_MAX_ENTRIES);
+ for_each_sg(sgl, sg, traced_nents, i)
((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg);
- for_each_sg(sgl, sg, ents, i) {
+ for_each_sg(sgl, sg, traced_ents, i) {
((u64 *)__get_dynamic_array(dma_addrs))[i] =
sg_dma_address(sg);
((unsigned int *)__get_dynamic_array(lengths))[i] =
@@ -306,9 +316,12 @@ TRACE_EVENT(dma_map_sg,
__entry->attrs = attrs;
),
- TP_printk("%s dir=%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s",
+ TP_printk("%s dir=%s nents=%d/%d ents=%d/%d%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s",
__get_str(device),
decode_dma_data_direction(__entry->dir),
+ min_t(int, __entry->full_nents, DMA_TRACE_MAX_ENTRIES), __entry->full_nents,
+ min_t(int, __entry->full_ents, DMA_TRACE_MAX_ENTRIES), __entry->full_ents,
+ __entry->truncated ? " [TRUNCATED]" : "",
__print_array(__get_dynamic_array(dma_addrs),
__get_dynamic_array_len(dma_addrs) /
sizeof(u64), sizeof(u64)),
--
2.43.0
On 30.01.2026 16:52, Deepanshu Kartikey wrote:
> The dma_map_sg tracepoint can trigger a perf buffer overflow when
> tracing large scatter-gather lists. With devices like virtio-gpu
> creating large DRM buffers, nents can exceed 1000 entries, resulting
> in:
>
> phys_addrs: 1000 * 8 bytes = 8,000 bytes
> dma_addrs: 1000 * 8 bytes = 8,000 bytes
> lengths: 1000 * 4 bytes = 4,000 bytes
> Total: ~20,000 bytes
>
> This exceeds PERF_MAX_TRACE_SIZE (8192 bytes), causing:
>
> WARNING: CPU: 0 PID: 5497 at kernel/trace/trace_event_perf.c:405
> perf buffer not large enough, wanted 24620, have 8192
>
> Cap all three dynamic arrays at 128 entries using min() in the array
> size calculation. This ensures arrays are only as large as needed
> (up to the cap), avoiding unnecessary memory allocation for small
> operations while preventing overflow for large ones.
>
> The tracepoint now records the full nents/ents counts and a truncated
> flag so users can see when data has been capped.
>
> Changes in v2:
> - Use min(nents, DMA_TRACE_MAX_ENTRIES) for dynamic array sizing
> instead of fixed DMA_TRACE_MAX_ENTRIES allocation (feedback from
> Steven Rostedt)
> - This allocates only what's needed up to the cap, avoiding waste
> for small operations
>
> Reported-by: syzbot+28cea38c382fd15e751a@syzkaller.appspotmail.com
> Closes: https://protect2.fireeye.com/v1/url?k=6e1b074b-0f901278-6e1a8c04-000babff9bb7-c332a06151070595&q=1&e=71ff52e1-1daa-4a0f-81b1-6593694574b3&u=https%3A%2F%2Fsyzkaller.appspot.com%2Fbug%3Fextid%3D28cea38c382fd15e751a
> Tested-by: syzbot+28cea38c382fd15e751a@syzkaller.appspotmail.com
> Signed-off-by: Deepanshu Kartikey <Kartikey406@gmail.com>
Applied to dma-mapping-fixes, thanks!
> ---
> include/trace/events/dma.h | 25 +++++++++++++++++++------
> 1 file changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
> index b3fef140ae15..33e99e792f1a 100644
> --- a/include/trace/events/dma.h
> +++ b/include/trace/events/dma.h
> @@ -275,6 +275,8 @@ TRACE_EVENT(dma_free_sgt,
> sizeof(u64), sizeof(u64)))
> );
>
> +#define DMA_TRACE_MAX_ENTRIES 128
> +
> TRACE_EVENT(dma_map_sg,
> TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
> int ents, enum dma_data_direction dir, unsigned long attrs),
> @@ -282,9 +284,12 @@ TRACE_EVENT(dma_map_sg,
>
> TP_STRUCT__entry(
> __string(device, dev_name(dev))
> - __dynamic_array(u64, phys_addrs, nents)
> - __dynamic_array(u64, dma_addrs, ents)
> - __dynamic_array(unsigned int, lengths, ents)
> + __field(int, full_nents)
> + __field(int, full_ents)
> + __field(bool, truncated)
> + __dynamic_array(u64, phys_addrs, min(nents, DMA_TRACE_MAX_ENTRIES))
> + __dynamic_array(u64, dma_addrs, min(ents, DMA_TRACE_MAX_ENTRIES))
> + __dynamic_array(unsigned int, lengths, min(ents, DMA_TRACE_MAX_ENTRIES))
> __field(enum dma_data_direction, dir)
> __field(unsigned long, attrs)
> ),
> @@ -292,11 +297,16 @@ TRACE_EVENT(dma_map_sg,
> TP_fast_assign(
> struct scatterlist *sg;
> int i;
> + int traced_nents = min_t(int, nents, DMA_TRACE_MAX_ENTRIES);
> + int traced_ents = min_t(int, ents, DMA_TRACE_MAX_ENTRIES);
>
> __assign_str(device);
> - for_each_sg(sgl, sg, nents, i)
> + __entry->full_nents = nents;
> + __entry->full_ents = ents;
> + __entry->truncated = (nents > DMA_TRACE_MAX_ENTRIES) || (ents > DMA_TRACE_MAX_ENTRIES);
> + for_each_sg(sgl, sg, traced_nents, i)
> ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg);
> - for_each_sg(sgl, sg, ents, i) {
> + for_each_sg(sgl, sg, traced_ents, i) {
> ((u64 *)__get_dynamic_array(dma_addrs))[i] =
> sg_dma_address(sg);
> ((unsigned int *)__get_dynamic_array(lengths))[i] =
> @@ -306,9 +316,12 @@ TRACE_EVENT(dma_map_sg,
> __entry->attrs = attrs;
> ),
>
> - TP_printk("%s dir=%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s",
> + TP_printk("%s dir=%s nents=%d/%d ents=%d/%d%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s",
> __get_str(device),
> decode_dma_data_direction(__entry->dir),
> + min_t(int, __entry->full_nents, DMA_TRACE_MAX_ENTRIES), __entry->full_nents,
> + min_t(int, __entry->full_ents, DMA_TRACE_MAX_ENTRIES), __entry->full_ents,
> + __entry->truncated ? " [TRUNCATED]" : "",
> __print_array(__get_dynamic_array(dma_addrs),
> __get_dynamic_array_len(dma_addrs) /
> sizeof(u64), sizeof(u64)),
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On 1/30/26 10:52, Deepanshu Kartikey wrote:
> The dma_map_sg tracepoint can trigger a perf buffer overflow when
> tracing large scatter-gather lists. With devices like virtio-gpu
> creating large DRM buffers, nents can exceed 1000 entries, resulting
> in:
>
> phys_addrs: 1000 * 8 bytes = 8,000 bytes
> dma_addrs: 1000 * 8 bytes = 8,000 bytes
> lengths: 1000 * 4 bytes = 4,000 bytes
> Total: ~20,000 bytes
>
> This exceeds PERF_MAX_TRACE_SIZE (8192 bytes), causing:
>
> WARNING: CPU: 0 PID: 5497 at kernel/trace/trace_event_perf.c:405
> perf buffer not large enough, wanted 24620, have 8192
>
> Cap all three dynamic arrays at 128 entries using min() in the array
> size calculation. This ensures arrays are only as large as needed
> (up to the cap), avoiding unnecessary memory allocation for small
> operations while preventing overflow for large ones.
>
> The tracepoint now records the full nents/ents counts and a truncated
> flag so users can see when data has been capped.
>
> Changes in v2:
> - Use min(nents, DMA_TRACE_MAX_ENTRIES) for dynamic array sizing
> instead of fixed DMA_TRACE_MAX_ENTRIES allocation (feedback from
> Steven Rostedt)
> - This allocates only what's needed up to the cap, avoiding waste
> for small operations
>
> Reported-by: syzbot+28cea38c382fd15e751a@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=28cea38c382fd15e751a
> Tested-by: syzbot+28cea38c382fd15e751a@syzkaller.appspotmail.com
> Signed-off-by: Deepanshu Kartikey <Kartikey406@gmail.com>
> ---
> include/trace/events/dma.h | 25 +++++++++++++++++++------
> 1 file changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
> index b3fef140ae15..33e99e792f1a 100644
> --- a/include/trace/events/dma.h
> +++ b/include/trace/events/dma.h
> @@ -275,6 +275,8 @@ TRACE_EVENT(dma_free_sgt,
> sizeof(u64), sizeof(u64)))
> );
>
> +#define DMA_TRACE_MAX_ENTRIES 128
> +
> TRACE_EVENT(dma_map_sg,
> TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents,
> int ents, enum dma_data_direction dir, unsigned long attrs),
> @@ -282,9 +284,12 @@ TRACE_EVENT(dma_map_sg,
>
> TP_STRUCT__entry(
> __string(device, dev_name(dev))
> - __dynamic_array(u64, phys_addrs, nents)
> - __dynamic_array(u64, dma_addrs, ents)
> - __dynamic_array(unsigned int, lengths, ents)
> + __field(int, full_nents)
> + __field(int, full_ents)
> + __field(bool, truncated)
> + __dynamic_array(u64, phys_addrs, min(nents, DMA_TRACE_MAX_ENTRIES))
> + __dynamic_array(u64, dma_addrs, min(ents, DMA_TRACE_MAX_ENTRIES))
> + __dynamic_array(unsigned int, lengths, min(ents, DMA_TRACE_MAX_ENTRIES))
> __field(enum dma_data_direction, dir)
> __field(unsigned long, attrs)
> ),
> @@ -292,11 +297,16 @@ TRACE_EVENT(dma_map_sg,
> TP_fast_assign(
> struct scatterlist *sg;
> int i;
> + int traced_nents = min_t(int, nents, DMA_TRACE_MAX_ENTRIES);
> + int traced_ents = min_t(int, ents, DMA_TRACE_MAX_ENTRIES);
>
> __assign_str(device);
> - for_each_sg(sgl, sg, nents, i)
> + __entry->full_nents = nents;
> + __entry->full_ents = ents;
> + __entry->truncated = (nents > DMA_TRACE_MAX_ENTRIES) || (ents > DMA_TRACE_MAX_ENTRIES);
> + for_each_sg(sgl, sg, traced_nents, i)
> ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg);
> - for_each_sg(sgl, sg, ents, i) {
> + for_each_sg(sgl, sg, traced_ents, i) {
> ((u64 *)__get_dynamic_array(dma_addrs))[i] =
> sg_dma_address(sg);
> ((unsigned int *)__get_dynamic_array(lengths))[i] =
> @@ -306,9 +316,12 @@ TRACE_EVENT(dma_map_sg,
> __entry->attrs = attrs;
> ),
>
> - TP_printk("%s dir=%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s",
> + TP_printk("%s dir=%s nents=%d/%d ents=%d/%d%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s",
> __get_str(device),
> decode_dma_data_direction(__entry->dir),
> + min_t(int, __entry->full_nents, DMA_TRACE_MAX_ENTRIES), __entry->full_nents,
> + min_t(int, __entry->full_ents, DMA_TRACE_MAX_ENTRIES), __entry->full_ents,
> + __entry->truncated ? " [TRUNCATED]" : "",
> __print_array(__get_dynamic_array(dma_addrs),
> __get_dynamic_array_len(dma_addrs) /
> sizeof(u64), sizeof(u64)),
Reviwed-by: Sean Anderson <sean.anderson@linux.dev>
Although it's a bit unusual that there's no limit on dynamic arrays like there is for %*ph.
On Fri, 30 Jan 2026 11:27:47 -0500 Sean Anderson <sean.anderson@linux.dev> wrote: > Reviwed-by: Sean Anderson <sean.anderson@linux.dev> > > Although it's a bit unusual that there's no limit on dynamic arrays like there is for %*ph. Dynamic arrays are done in fast paths. I rather not add an extra branch to test cases where its most likely not going to exceed the limit. -- Steve
© 2016 - 2026 Red Hat, Inc.