Allow reading the trace file for trace remotes. This performs a
non-consuming read of the trace buffer.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
index abf83ff75df5..c011df20df95 100644
--- a/kernel/trace/trace_remote.c
+++ b/kernel/trace/trace_remote.c
@@ -18,14 +18,25 @@
#define TRACEFS_MODE_WRITE 0640
#define TRACEFS_MODE_READ 0440
+enum tri_type {
+ TRI_CONSUMING,
+ TRI_NONCONSUMING,
+};
+
struct trace_remote_iterator {
struct trace_remote *remote;
struct trace_seq seq;
struct delayed_work poll_work;
unsigned long lost_events;
u64 ts;
+ union {
+ struct ring_buffer_iter **rb_iters;
+ struct ring_buffer_iter *rb_iter;
+ };
int cpu;
int evt_cpu;
+ loff_t pos;
+ enum tri_type type;
};
struct trace_remote {
@@ -36,6 +47,8 @@ struct trace_remote {
unsigned long trace_buffer_size;
struct ring_buffer_remote rb_remote;
struct mutex lock;
+ struct rw_semaphore reader_lock;
+ struct rw_semaphore *pcpu_reader_locks;
unsigned int nr_readers;
unsigned int poll_ms;
bool tracing_on;
@@ -225,6 +238,20 @@ static int trace_remote_get(struct trace_remote *remote, int cpu)
if (ret)
return ret;
+ if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) {
+ int lock_cpu;
+
+ remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, sizeof(*remote->pcpu_reader_locks),
+ GFP_KERNEL);
+ if (!remote->pcpu_reader_locks) {
+ trace_remote_try_unload(remote);
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(lock_cpu)
+ init_rwsem(&remote->pcpu_reader_locks[lock_cpu]);
+ }
+
remote->nr_readers++;
return 0;
@@ -239,6 +266,9 @@ static void trace_remote_put(struct trace_remote *remote)
if (remote->nr_readers)
return;
+ kfree(remote->pcpu_reader_locks);
+ remote->pcpu_reader_locks = NULL;
+
trace_remote_try_unload(remote);
}
@@ -253,6 +283,48 @@ static void __poll_remote(struct work_struct *work)
msecs_to_jiffies(iter->remote->poll_ms));
}
+static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
+{
+ bool once = false;
+
+ if (cpu != RING_BUFFER_ALL_CPUS) {
+ iter->rb_iter = ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL);
+
+ return iter->rb_iter ? 0 : -ENOMEM;
+ }
+
+ iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), GFP_KERNEL);
+ if (!iter->rb_iters)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ iter->rb_iters[cpu] = ring_buffer_read_start(iter->remote->trace_buffer, cpu,
+ GFP_KERNEL);
+ if (iter->rb_iters[cpu])
+ once = true;
+ }
+
+ return once ? 0 : -ENOMEM;
+}
+
+static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
+{
+ if (!iter->rb_iter)
+ return;
+
+ if (cpu != RING_BUFFER_ALL_CPUS) {
+ ring_buffer_read_finish(iter->rb_iter);
+ return;
+ }
+
+ for_each_possible_cpu(cpu) {
+ if (iter->rb_iters[cpu])
+ ring_buffer_read_finish(iter->rb_iters[cpu]);
+ }
+
+ kfree(iter->rb_iters);
+}
+
static struct trace_remote_iterator
*trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type)
{
@@ -261,6 +333,8 @@ static struct trace_remote_iterator
lockdep_assert_held(&remote->lock);
+ if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote))
+ return NULL;
ret = trace_remote_get(remote, cpu);
if (ret)
@@ -275,9 +349,21 @@ static struct trace_remote_iterator
if (iter) {
iter->remote = remote;
iter->cpu = cpu;
+ iter->type = type;
trace_seq_init(&iter->seq);
- INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
- schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
+
+ switch (type) {
+ case TRI_CONSUMING:
+ INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
+ schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
+ break;
+ case TRI_NONCONSUMING:
+ ret = __alloc_ring_buffer_iter(iter, cpu);
+ break;
+ }
+
+ if (ret)
+ goto err;
return iter;
}
@@ -301,10 +387,100 @@ static void trace_remote_iter_free(struct trace_remote_iterator *iter)
lockdep_assert_held(&remote->lock);
+ switch (iter->type) {
+ case TRI_CONSUMING:
+ cancel_delayed_work_sync(&iter->poll_work);
+ break;
+ case TRI_NONCONSUMING:
+ __free_ring_buffer_iter(iter, iter->cpu);
+ break;
+ }
+
kfree(iter);
trace_remote_put(remote);
}
+static void trace_remote_iter_read_start(struct trace_remote_iterator *iter)
+{
+ struct trace_remote *remote = iter->remote;
+ int cpu = iter->cpu;
+
+ /* Acquire global reader lock */
+ if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
+ down_write(&remote->reader_lock);
+ else
+ down_read(&remote->reader_lock);
+
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ return;
+
+ /*
+ * No need for the remote lock here, iter holds a reference on
+ * remote->nr_readers
+ */
+
+ /* Get the per-CPU one */
+ if (WARN_ON_ONCE(!remote->pcpu_reader_locks))
+ return;
+
+ if (iter->type == TRI_CONSUMING)
+ down_write(&remote->pcpu_reader_locks[cpu]);
+ else
+ down_read(&remote->pcpu_reader_locks[cpu]);
+}
+
+static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter)
+{
+ struct trace_remote *remote = iter->remote;
+ int cpu = iter->cpu;
+
+ /* Release per-CPU reader lock */
+ if (cpu != RING_BUFFER_ALL_CPUS) {
+ /*
+ * No need for the remote lock here, iter holds a reference on
+ * remote->nr_readers
+ */
+ if (iter->type == TRI_CONSUMING)
+ up_write(&remote->pcpu_reader_locks[cpu]);
+ else
+ up_read(&remote->pcpu_reader_locks[cpu]);
+ }
+
+ /* Release global reader lock */
+ if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
+ up_write(&remote->reader_lock);
+ else
+ up_read(&remote->reader_lock);
+}
+
+static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator *iter, int cpu)
+{
+ return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : iter->rb_iters[cpu];
+}
+
+static struct ring_buffer_event *
+__peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events)
+{
+ struct ring_buffer_event *rb_evt;
+ struct ring_buffer_iter *rb_iter;
+
+ switch (iter->type) {
+ case TRI_CONSUMING:
+ return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, lost_events);
+ case TRI_NONCONSUMING:
+ rb_iter = __get_rb_iter(iter, cpu);
+ rb_evt = ring_buffer_iter_peek(rb_iter, ts);
+ if (!rb_evt)
+ return NULL;
+
+ *lost_events = ring_buffer_iter_dropped(rb_iter);
+
+ return rb_evt;
+ }
+
+ return NULL;
+}
+
static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
{
struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
@@ -314,7 +490,7 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
if (ring_buffer_empty_cpu(trace_buffer, cpu))
return false;
- if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, &iter->lost_events))
+ if (!__peek_event(iter, cpu, &iter->ts, &iter->lost_events))
return false;
iter->evt_cpu = cpu;
@@ -329,7 +505,7 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
if (ring_buffer_empty_cpu(trace_buffer, cpu))
continue;
- if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events))
+ if (!__peek_event(iter, cpu, &ts, &lost_events))
continue;
if (ts >= iter->ts)
@@ -343,7 +519,21 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
return iter->ts != U64_MAX;
}
-static int trace_remote_iter_print(struct trace_remote_iterator *iter)
+static void trace_remote_iter_move(struct trace_remote_iterator *iter)
+{
+ struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
+
+ switch (iter->type) {
+ case TRI_CONSUMING:
+ ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
+ break;
+ case TRI_NONCONSUMING:
+ ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu));
+ break;
+ }
+}
+
+static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
{
unsigned long usecs_rem;
u64 ts = iter->ts;
@@ -371,7 +561,11 @@ static int trace_pipe_open(struct inode *inode, struct file *filp)
cpu = (long)inode->i_cdev - 1;
guard(mutex)(&remote->lock);
- iter = trace_remote_iter(remote, cpu);
+
+ iter = trace_remote_iter(remote, cpu, TRI_CONSUMING);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
filp->private_data = iter;
return IS_ERR(iter) ? PTR_ERR(iter) : 0;
@@ -406,7 +600,8 @@ static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt,
if (ret < 0)
return ret;
- while (trace_remote_iter_next(iter)) {
+ trace_remote_iter_read_start(iter);
+
while (trace_remote_iter_read_event(iter)) {
int prev_len = iter->seq.seq.len;
@@ -415,9 +610,11 @@ static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt,
break;
}
- ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
+ trace_remote_iter_move(iter);
}
+ trace_remote_iter_read_finished(iter);
+
goto copy_to_user;
}
@@ -427,6 +624,119 @@ static const struct file_operations trace_pipe_fops = {
.release = trace_pipe_release,
};
+static void *trace_seq_start(struct seq_file *m, loff_t *pos)
+{
+ struct trace_remote_iterator *iter = m->private;
+ loff_t i = *pos;
+
+ if (!iter)
+ return NULL;
+
+ if (iter->pos <= *pos) {
+ do {
+ if (!trace_remote_iter_read_event(iter))
+ return NULL;
+
+ trace_remote_iter_move(iter);
+ iter->pos++;
+ } while (i--);
+ }
+
+ return iter;
+}
+
+static void *trace_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct trace_remote_iterator *iter = m->private;
+
+ ++*pos;
+
+ if (!iter || !trace_remote_iter_read_event(iter))
+ return NULL;
+
+ trace_remote_iter_move(iter);
+ iter->pos++;
+
+ return iter;
+}
+
+static int trace_seq_show(struct seq_file *m, void *v)
+{
+ struct trace_remote_iterator *iter = v;
+
+ trace_seq_init(&iter->seq);
+
+ if (trace_remote_iter_print_event(iter)) {
+ seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id);
+ return 0;
+ }
+
+ return trace_print_seq(m, &iter->seq);
+}
+
+static void trace_seq_stop(struct seq_file *s, void *v) { }
+
+static const struct seq_operations trace_seq_ops = {
+ .start = trace_seq_start,
+ .next = trace_seq_next,
+ .show = trace_seq_show,
+ .stop = trace_seq_stop,
+};
+
+static int trace_open(struct inode *inode, struct file *filp)
+{
+ struct trace_remote *remote = inode->i_private;
+ struct trace_remote_iterator *iter = NULL;
+ int cpu = RING_BUFFER_ALL_CPUS;
+ int ret;
+
+ if (!(filp->f_mode & FMODE_READ))
+ return 0;
+
+ if (inode->i_cdev)
+ cpu = (long)inode->i_cdev - 1;
+
+ guard(mutex)(&remote->lock);
+
+ iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
+ ret = seq_open(filp, &trace_seq_ops);
+ if (ret) {
+ trace_remote_iter_free(iter);
+ return ret;
+ }
+
+ if (iter)
+ trace_remote_iter_read_start(iter);
+
+ ((struct seq_file *)filp->private_data)->private = (void *)iter;
+
+ return 0;
+}
+
+static int trace_release(struct inode *inode, struct file *filp)
+{
+ struct trace_remote_iterator *iter;
+
+ if (!(filp->f_mode & FMODE_READ))
+ return 0;
+
+ iter = ((struct seq_file *)filp->private_data)->private;
+ seq_release(inode, filp);
+
+ if (!iter)
+ return 0;
+
+ guard(mutex)(&iter->remote->lock);
+
+ trace_remote_iter_read_finished(iter);
+ trace_remote_iter_free(iter);
+
+ return 0;
+}
+
static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
{
struct inode *inode = file_inode(filp);
@@ -444,7 +754,11 @@ static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cn
}
static const struct file_operations trace_fops = {
+ .open = trace_open,
.write = trace_write,
+ .read = seq_read,
+ .read_iter = seq_read_iter,
+ .release = trace_release,
};
static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
@@ -533,6 +847,7 @@ int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs,
remote->trace_buffer_size = 7 << 10;
remote->poll_ms = 100;
mutex_init(&remote->lock);
+ init_rwsem(&remote->reader_lock);
if (trace_remote_init_tracefs(name, remote)) {
kfree(remote);
--
2.51.0.618.g983fd99d29-goog