With a file, to write data an offset needs to be known. Typically data
follows the event attributes in a file. However, if processing a pipe
the number of event attributes may not be known. It is convenient in
that case to write the attributes after the data. Expand
perf_session__do_write_header to allow this when the data offset and
size are known.
This approach may be useful for more than just taking a pipe file to
write into a data file, `perf inject --itrace` will reserve and
additional 8kb for attributes, which would be unnecessary if the
attributes were written after the data.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/util/header.c | 106 +++++++++++++++++++++++++--------------
1 file changed, 67 insertions(+), 39 deletions(-)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 65c9086610cb..4eb39463067e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3676,32 +3676,50 @@ int perf_header__write_pipe(int fd)
static int perf_session__do_write_header(struct perf_session *session,
struct evlist *evlist,
int fd, bool at_exit,
- struct feat_copier *fc)
+ struct feat_copier *fc,
+ bool write_attrs_after_data)
{
struct perf_file_header f_header;
- struct perf_file_attr f_attr;
struct perf_header *header = &session->header;
struct evsel *evsel;
struct feat_fd ff = {
.fd = fd,
};
- u64 attr_offset;
+ u64 attr_offset = sizeof(f_header), attr_size = 0;
int err;
- lseek(fd, sizeof(f_header), SEEK_SET);
+ if (write_attrs_after_data && at_exit) {
+ /*
+ * Write features at the end of the file first so that
+ * attributes may come after them.
+ */
+ if (!header->data_offset && header->data_size) {
+ pr_err("File contains data but offset unknown\n");
+ err = -1;
+ goto err_out;
+ }
+ header->feat_offset = header->data_offset + header->data_size;
+ err = perf_header__adds_write(header, evlist, fd, fc);
+ if (err < 0)
+ goto err_out;
+ attr_offset = lseek(fd, 0, SEEK_CUR);
+ } else {
+ lseek(fd, attr_offset, SEEK_SET);
+ }
evlist__for_each_entry(session->evlist, evsel) {
- evsel->id_offset = lseek(fd, 0, SEEK_CUR);
- err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
- if (err < 0) {
- pr_debug("failed to write perf header\n");
- free(ff.buf);
- return err;
+ evsel->id_offset = attr_offset;
+ /* Avoid writing at the end of the file until the session is exiting. */
+ if (!write_attrs_after_data || at_exit) {
+ err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
+ if (err < 0) {
+ pr_debug("failed to write perf header\n");
+ goto err_out;
+ }
}
+ attr_offset += evsel->core.ids * sizeof(u64);
}
- attr_offset = lseek(ff.fd, 0, SEEK_CUR);
-
evlist__for_each_entry(evlist, evsel) {
if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
/*
@@ -3711,40 +3729,46 @@ static int perf_session__do_write_header(struct perf_session *session,
*/
evsel->core.attr.size = sizeof(evsel->core.attr);
}
- f_attr = (struct perf_file_attr){
- .attr = evsel->core.attr,
- .ids = {
- .offset = evsel->id_offset,
- .size = evsel->core.ids * sizeof(u64),
+ /* Avoid writing at the end of the file until the session is exiting. */
+ if (!write_attrs_after_data || at_exit) {
+ struct perf_file_attr f_attr = {
+ .attr = evsel->core.attr,
+ .ids = {
+ .offset = evsel->id_offset,
+ .size = evsel->core.ids * sizeof(u64),
+ }
+ };
+ err = do_write(&ff, &f_attr, sizeof(f_attr));
+ if (err < 0) {
+ pr_debug("failed to write perf header attribute\n");
+ goto err_out;
}
- };
- err = do_write(&ff, &f_attr, sizeof(f_attr));
- if (err < 0) {
- pr_debug("failed to write perf header attribute\n");
- free(ff.buf);
- return err;
}
+ attr_size += sizeof(struct perf_file_attr);
}
- if (!header->data_offset)
- header->data_offset = lseek(fd, 0, SEEK_CUR);
+ if (!header->data_offset) {
+ if (write_attrs_after_data)
+ header->data_offset = sizeof(f_header);
+ else
+ header->data_offset = attr_offset + attr_size;
+ }
header->feat_offset = header->data_offset + header->data_size;
- if (at_exit) {
+ if (!write_attrs_after_data && at_exit) {
+ /* Write features now feat_offset is known. */
err = perf_header__adds_write(header, evlist, fd, fc);
- if (err < 0) {
- free(ff.buf);
- return err;
- }
+ if (err < 0)
+ goto err_out;
}
f_header = (struct perf_file_header){
.magic = PERF_MAGIC,
.size = sizeof(f_header),
- .attr_size = sizeof(f_attr),
+ .attr_size = sizeof(struct perf_file_attr),
.attrs = {
.offset = attr_offset,
- .size = evlist->core.nr_entries * sizeof(f_attr),
+ .size = attr_size,
},
.data = {
.offset = header->data_offset,
@@ -3757,21 +3781,24 @@ static int perf_session__do_write_header(struct perf_session *session,
lseek(fd, 0, SEEK_SET);
err = do_write(&ff, &f_header, sizeof(f_header));
- free(ff.buf);
if (err < 0) {
pr_debug("failed to write perf header\n");
- return err;
+ goto err_out;
+ } else {
+ lseek(fd, 0, SEEK_END);
+ err = 0;
}
- lseek(fd, header->data_offset + header->data_size, SEEK_SET);
-
- return 0;
+err_out:
+ free(ff.buf);
+ return err;
}
int perf_session__write_header(struct perf_session *session,
struct evlist *evlist,
int fd, bool at_exit)
{
- return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
+ return perf_session__do_write_header(session, evlist, fd, at_exit, /*fc=*/NULL,
+ /*write_attrs_after_data=*/false);
}
size_t perf_session__data_offset(const struct evlist *evlist)
@@ -3793,7 +3820,8 @@ int perf_session__inject_header(struct perf_session *session,
int fd,
struct feat_copier *fc)
{
- return perf_session__do_write_header(session, evlist, fd, true, fc);
+ return perf_session__do_write_header(session, evlist, fd, true, fc,
+ /*write_attrs_after_data=*/false);
}
static int perf_header__getbuffer64(struct perf_header *header,
--
2.46.0.295.g3b9ea8a38a-goog
On Thu, Aug 29, 2024 at 08:01:51AM -0700, Ian Rogers wrote:
> With a file, to write data an offset needs to be known. Typically data
> follows the event attributes in a file. However, if processing a pipe
> the number of event attributes may not be known. It is convenient in
> that case to write the attributes after the data. Expand
> perf_session__do_write_header to allow this when the data offset and
> size are known.
>
> This approach may be useful for more than just taking a pipe file to
> write into a data file, `perf inject --itrace` will reserve and
> additional 8kb for attributes, which would be unnecessary if the
> attributes were written after the data.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/util/header.c | 106 +++++++++++++++++++++++++--------------
> 1 file changed, 67 insertions(+), 39 deletions(-)
>
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 65c9086610cb..4eb39463067e 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -3676,32 +3676,50 @@ int perf_header__write_pipe(int fd)
> static int perf_session__do_write_header(struct perf_session *session,
> struct evlist *evlist,
> int fd, bool at_exit,
> - struct feat_copier *fc)
> + struct feat_copier *fc,
> + bool write_attrs_after_data)
> {
> struct perf_file_header f_header;
> - struct perf_file_attr f_attr;
> struct perf_header *header = &session->header;
> struct evsel *evsel;
> struct feat_fd ff = {
> .fd = fd,
> };
> - u64 attr_offset;
> + u64 attr_offset = sizeof(f_header), attr_size = 0;
> int err;
>
> - lseek(fd, sizeof(f_header), SEEK_SET);
> + if (write_attrs_after_data && at_exit) {
> + /*
> + * Write features at the end of the file first so that
> + * attributes may come after them.
> + */
> + if (!header->data_offset && header->data_size) {
> + pr_err("File contains data but offset unknown\n");
> + err = -1;
> + goto err_out;
> + }
> + header->feat_offset = header->data_offset + header->data_size;
> + err = perf_header__adds_write(header, evlist, fd, fc);
> + if (err < 0)
> + goto err_out;
> + attr_offset = lseek(fd, 0, SEEK_CUR);
> + } else {
> + lseek(fd, attr_offset, SEEK_SET);
> + }
>
> evlist__for_each_entry(session->evlist, evsel) {
> - evsel->id_offset = lseek(fd, 0, SEEK_CUR);
> - err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> - if (err < 0) {
> - pr_debug("failed to write perf header\n");
> - free(ff.buf);
> - return err;
> + evsel->id_offset = attr_offset;
> + /* Avoid writing at the end of the file until the session is exiting. */
> + if (!write_attrs_after_data || at_exit) {
> + err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> + if (err < 0) {
> + pr_debug("failed to write perf header\n");
> + goto err_out;
> + }
> }
> + attr_offset += evsel->core.ids * sizeof(u64);
So in the past we were using lseek(fd, 0, SEEK_CUR) to set the
evsel->id_offset, now you're assuming that do_write will honour the size
parameter, i.e. write evsel->core.ids * sizeof(u64), but:
/* Return: 0 if succeeded, -ERR if failed. */
int do_write(struct feat_fd *ff, const void *buf, size_t size)
{
if (!ff->buf)
return __do_write_fd(ff, buf, size);
return __do_write_buf(ff, buf, size);
}
And then:
static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size)
{
ssize_t ret = writen(ff->fd, buf, size);
if (ret != (ssize_t)size)
return ret < 0 ? (int)ret : -1;
return 0;
}
I see that writen calls ion that even has a BUG_ON() if it doesn't write
exactly the requested size bytes, I got distracted with __do_write_fd
extra check that ret != size returning ret if not negative...
I.e. your code _should_ be equivalent due to the check in ion(), and
taking that as an assumption you reduce the number of lseek syscalls,
which is a good thing...
I was just trying to see that the !write_attrs_after_data case was
_exactly_ the same as before, which it doesn't look like it is :-\
- Arnaldo
> }
>
> - attr_offset = lseek(ff.fd, 0, SEEK_CUR);
> -
> evlist__for_each_entry(evlist, evsel) {
> if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
> /*
> @@ -3711,40 +3729,46 @@ static int perf_session__do_write_header(struct perf_session *session,
> */
> evsel->core.attr.size = sizeof(evsel->core.attr);
> }
> - f_attr = (struct perf_file_attr){
> - .attr = evsel->core.attr,
> - .ids = {
> - .offset = evsel->id_offset,
> - .size = evsel->core.ids * sizeof(u64),
> + /* Avoid writing at the end of the file until the session is exiting. */
> + if (!write_attrs_after_data || at_exit) {
> + struct perf_file_attr f_attr = {
> + .attr = evsel->core.attr,
> + .ids = {
> + .offset = evsel->id_offset,
> + .size = evsel->core.ids * sizeof(u64),
> + }
> + };
> + err = do_write(&ff, &f_attr, sizeof(f_attr));
> + if (err < 0) {
> + pr_debug("failed to write perf header attribute\n");
> + goto err_out;
> }
> - };
> - err = do_write(&ff, &f_attr, sizeof(f_attr));
> - if (err < 0) {
> - pr_debug("failed to write perf header attribute\n");
> - free(ff.buf);
> - return err;
> }
> + attr_size += sizeof(struct perf_file_attr);
> }
>
> - if (!header->data_offset)
> - header->data_offset = lseek(fd, 0, SEEK_CUR);
> + if (!header->data_offset) {
> + if (write_attrs_after_data)
> + header->data_offset = sizeof(f_header);
> + else
> + header->data_offset = attr_offset + attr_size;
> + }
> header->feat_offset = header->data_offset + header->data_size;
>
> - if (at_exit) {
> + if (!write_attrs_after_data && at_exit) {
> + /* Write features now feat_offset is known. */
> err = perf_header__adds_write(header, evlist, fd, fc);
> - if (err < 0) {
> - free(ff.buf);
> - return err;
> - }
> + if (err < 0)
> + goto err_out;
> }
>
> f_header = (struct perf_file_header){
> .magic = PERF_MAGIC,
> .size = sizeof(f_header),
> - .attr_size = sizeof(f_attr),
> + .attr_size = sizeof(struct perf_file_attr),
> .attrs = {
> .offset = attr_offset,
> - .size = evlist->core.nr_entries * sizeof(f_attr),
> + .size = attr_size,
> },
> .data = {
> .offset = header->data_offset,
> @@ -3757,21 +3781,24 @@ static int perf_session__do_write_header(struct perf_session *session,
>
> lseek(fd, 0, SEEK_SET);
> err = do_write(&ff, &f_header, sizeof(f_header));
> - free(ff.buf);
> if (err < 0) {
> pr_debug("failed to write perf header\n");
> - return err;
> + goto err_out;
> + } else {
> + lseek(fd, 0, SEEK_END);
> + err = 0;
> }
> - lseek(fd, header->data_offset + header->data_size, SEEK_SET);
> -
> - return 0;
> +err_out:
> + free(ff.buf);
> + return err;
> }
>
> int perf_session__write_header(struct perf_session *session,
> struct evlist *evlist,
> int fd, bool at_exit)
> {
> - return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
> + return perf_session__do_write_header(session, evlist, fd, at_exit, /*fc=*/NULL,
> + /*write_attrs_after_data=*/false);
> }
>
> size_t perf_session__data_offset(const struct evlist *evlist)
> @@ -3793,7 +3820,8 @@ int perf_session__inject_header(struct perf_session *session,
> int fd,
> struct feat_copier *fc)
> {
> - return perf_session__do_write_header(session, evlist, fd, true, fc);
> + return perf_session__do_write_header(session, evlist, fd, true, fc,
> + /*write_attrs_after_data=*/false);
> }
>
> static int perf_header__getbuffer64(struct perf_header *header,
> --
> 2.46.0.295.g3b9ea8a38a-goog
On Thu, Aug 29, 2024 at 12:31 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> On Thu, Aug 29, 2024 at 08:01:51AM -0700, Ian Rogers wrote:
> > With a file, to write data an offset needs to be known. Typically data
> > follows the event attributes in a file. However, if processing a pipe
> > the number of event attributes may not be known. It is convenient in
> > that case to write the attributes after the data. Expand
> > perf_session__do_write_header to allow this when the data offset and
> > size are known.
> >
> > This approach may be useful for more than just taking a pipe file to
> > write into a data file, `perf inject --itrace` will reserve and
> > additional 8kb for attributes, which would be unnecessary if the
> > attributes were written after the data.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > tools/perf/util/header.c | 106 +++++++++++++++++++++++++--------------
> > 1 file changed, 67 insertions(+), 39 deletions(-)
> >
> > diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> > index 65c9086610cb..4eb39463067e 100644
> > --- a/tools/perf/util/header.c
> > +++ b/tools/perf/util/header.c
> > @@ -3676,32 +3676,50 @@ int perf_header__write_pipe(int fd)
> > static int perf_session__do_write_header(struct perf_session *session,
> > struct evlist *evlist,
> > int fd, bool at_exit,
> > - struct feat_copier *fc)
> > + struct feat_copier *fc,
> > + bool write_attrs_after_data)
> > {
> > struct perf_file_header f_header;
> > - struct perf_file_attr f_attr;
> > struct perf_header *header = &session->header;
> > struct evsel *evsel;
> > struct feat_fd ff = {
> > .fd = fd,
> > };
> > - u64 attr_offset;
> > + u64 attr_offset = sizeof(f_header), attr_size = 0;
> > int err;
> >
> > - lseek(fd, sizeof(f_header), SEEK_SET);
> > + if (write_attrs_after_data && at_exit) {
> > + /*
> > + * Write features at the end of the file first so that
> > + * attributes may come after them.
> > + */
> > + if (!header->data_offset && header->data_size) {
> > + pr_err("File contains data but offset unknown\n");
> > + err = -1;
> > + goto err_out;
> > + }
> > + header->feat_offset = header->data_offset + header->data_size;
> > + err = perf_header__adds_write(header, evlist, fd, fc);
> > + if (err < 0)
> > + goto err_out;
> > + attr_offset = lseek(fd, 0, SEEK_CUR);
> > + } else {
> > + lseek(fd, attr_offset, SEEK_SET);
> > + }
> >
> > evlist__for_each_entry(session->evlist, evsel) {
> > - evsel->id_offset = lseek(fd, 0, SEEK_CUR);
> > - err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > - if (err < 0) {
> > - pr_debug("failed to write perf header\n");
> > - free(ff.buf);
> > - return err;
> > + evsel->id_offset = attr_offset;
> > + /* Avoid writing at the end of the file until the session is exiting. */
> > + if (!write_attrs_after_data || at_exit) {
> > + err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > + if (err < 0) {
> > + pr_debug("failed to write perf header\n");
> > + goto err_out;
> > + }
> > }
> > + attr_offset += evsel->core.ids * sizeof(u64);
>
> So in the past we were using lseek(fd, 0, SEEK_CUR) to set the
> evsel->id_offset, now you're assuming that do_write will honour the size
> parameter, i.e. write evsel->core.ids * sizeof(u64), but:
>
> /* Return: 0 if succeeded, -ERR if failed. */
> int do_write(struct feat_fd *ff, const void *buf, size_t size)
> {
> if (!ff->buf)
> return __do_write_fd(ff, buf, size);
> return __do_write_buf(ff, buf, size);
> }
>
> And then:
>
> static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size)
> {
> ssize_t ret = writen(ff->fd, buf, size);
>
> if (ret != (ssize_t)size)
> return ret < 0 ? (int)ret : -1;
> return 0;
> }
>
> I see that writen calls ion that even has a BUG_ON() if it doesn't write
> exactly the requested size bytes, I got distracted with __do_write_fd
> extra check that ret != size returning ret if not negative...
>
> I.e. your code _should_ be equivalent due to the check in ion(), and
> taking that as an assumption you reduce the number of lseek syscalls,
> which is a good thing...
>
> I was just trying to see that the !write_attrs_after_data case was
> _exactly_ the same as before, which it doesn't look like it is :-\
I'm not seeing the difference. Before:
```
lseek(fd, sizeof(f_header), SEEK_SET);
evlist__for_each_entry(session->evlist, evsel) {
evsel->id_offset = lseek(fd, 0, SEEK_CUR); // Initially at sizeof(f_header)
...
err = do_write(&ff, evsel->core.id, evsel->core.ids *
sizeof(u64)); // offset has advanced by "evsel->core.ids *
sizeof(u64)" bytes
...
}
```
After:
```
u64 attr_offset = sizeof(f_header)
...
else {
lseek(fd, attr_offset, SEEK_SET);
}
evlist__for_each_entry(session->evlist, evsel) {
evsel->id_offset = attr_offset;
...
err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
...
attr_offset += evsel->core.ids * sizeof(u64);
}
```
The reason for using math rather than lseek (apart from reducing
syscalls) is that if we're writing the header at the beginning of
generating a file (usually done more to compute the
header->data_offset), we can't use lseek if writing at the end as we
don't know where the end of the data will be yet and don't write out
the bytes.
Thanks,
Ian
> - Arnaldo
>
> > }
> >
> > - attr_offset = lseek(ff.fd, 0, SEEK_CUR);
> > -
> > evlist__for_each_entry(evlist, evsel) {
> > if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
> > /*
> > @@ -3711,40 +3729,46 @@ static int perf_session__do_write_header(struct perf_session *session,
> > */
> > evsel->core.attr.size = sizeof(evsel->core.attr);
> > }
> > - f_attr = (struct perf_file_attr){
> > - .attr = evsel->core.attr,
> > - .ids = {
> > - .offset = evsel->id_offset,
> > - .size = evsel->core.ids * sizeof(u64),
> > + /* Avoid writing at the end of the file until the session is exiting. */
> > + if (!write_attrs_after_data || at_exit) {
> > + struct perf_file_attr f_attr = {
> > + .attr = evsel->core.attr,
> > + .ids = {
> > + .offset = evsel->id_offset,
> > + .size = evsel->core.ids * sizeof(u64),
> > + }
> > + };
> > + err = do_write(&ff, &f_attr, sizeof(f_attr));
> > + if (err < 0) {
> > + pr_debug("failed to write perf header attribute\n");
> > + goto err_out;
> > }
> > - };
> > - err = do_write(&ff, &f_attr, sizeof(f_attr));
> > - if (err < 0) {
> > - pr_debug("failed to write perf header attribute\n");
> > - free(ff.buf);
> > - return err;
> > }
> > + attr_size += sizeof(struct perf_file_attr);
> > }
> >
> > - if (!header->data_offset)
> > - header->data_offset = lseek(fd, 0, SEEK_CUR);
> > + if (!header->data_offset) {
> > + if (write_attrs_after_data)
> > + header->data_offset = sizeof(f_header);
> > + else
> > + header->data_offset = attr_offset + attr_size;
> > + }
> > header->feat_offset = header->data_offset + header->data_size;
> >
> > - if (at_exit) {
> > + if (!write_attrs_after_data && at_exit) {
> > + /* Write features now feat_offset is known. */
> > err = perf_header__adds_write(header, evlist, fd, fc);
> > - if (err < 0) {
> > - free(ff.buf);
> > - return err;
> > - }
> > + if (err < 0)
> > + goto err_out;
> > }
> >
> > f_header = (struct perf_file_header){
> > .magic = PERF_MAGIC,
> > .size = sizeof(f_header),
> > - .attr_size = sizeof(f_attr),
> > + .attr_size = sizeof(struct perf_file_attr),
> > .attrs = {
> > .offset = attr_offset,
> > - .size = evlist->core.nr_entries * sizeof(f_attr),
> > + .size = attr_size,
> > },
> > .data = {
> > .offset = header->data_offset,
> > @@ -3757,21 +3781,24 @@ static int perf_session__do_write_header(struct perf_session *session,
> >
> > lseek(fd, 0, SEEK_SET);
> > err = do_write(&ff, &f_header, sizeof(f_header));
> > - free(ff.buf);
> > if (err < 0) {
> > pr_debug("failed to write perf header\n");
> > - return err;
> > + goto err_out;
> > + } else {
> > + lseek(fd, 0, SEEK_END);
> > + err = 0;
> > }
> > - lseek(fd, header->data_offset + header->data_size, SEEK_SET);
> > -
> > - return 0;
> > +err_out:
> > + free(ff.buf);
> > + return err;
> > }
> >
> > int perf_session__write_header(struct perf_session *session,
> > struct evlist *evlist,
> > int fd, bool at_exit)
> > {
> > - return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
> > + return perf_session__do_write_header(session, evlist, fd, at_exit, /*fc=*/NULL,
> > + /*write_attrs_after_data=*/false);
> > }
> >
> > size_t perf_session__data_offset(const struct evlist *evlist)
> > @@ -3793,7 +3820,8 @@ int perf_session__inject_header(struct perf_session *session,
> > int fd,
> > struct feat_copier *fc)
> > {
> > - return perf_session__do_write_header(session, evlist, fd, true, fc);
> > + return perf_session__do_write_header(session, evlist, fd, true, fc,
> > + /*write_attrs_after_data=*/false);
> > }
> >
> > static int perf_header__getbuffer64(struct perf_header *header,
> > --
> > 2.46.0.295.g3b9ea8a38a-goog
On Thu, Aug 29, 2024 at 01:12:32PM -0700, Ian Rogers wrote:
> On Thu, Aug 29, 2024 at 12:31 PM Arnaldo Carvalho de Melo
> <acme@kernel.org> wrote:
> >
> > On Thu, Aug 29, 2024 at 08:01:51AM -0700, Ian Rogers wrote:
> > > With a file, to write data an offset needs to be known. Typically data
> > > follows the event attributes in a file. However, if processing a pipe
> > > the number of event attributes may not be known. It is convenient in
> > > that case to write the attributes after the data. Expand
> > > perf_session__do_write_header to allow this when the data offset and
> > > size are known.
> > >
> > > This approach may be useful for more than just taking a pipe file to
> > > write into a data file, `perf inject --itrace` will reserve and
> > > additional 8kb for attributes, which would be unnecessary if the
> > > attributes were written after the data.
> > >
> > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > ---
> > > tools/perf/util/header.c | 106 +++++++++++++++++++++++++--------------
> > > 1 file changed, 67 insertions(+), 39 deletions(-)
> > >
> > > diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> > > index 65c9086610cb..4eb39463067e 100644
> > > --- a/tools/perf/util/header.c
> > > +++ b/tools/perf/util/header.c
> > > @@ -3676,32 +3676,50 @@ int perf_header__write_pipe(int fd)
> > > static int perf_session__do_write_header(struct perf_session *session,
> > > struct evlist *evlist,
> > > int fd, bool at_exit,
> > > - struct feat_copier *fc)
> > > + struct feat_copier *fc,
> > > + bool write_attrs_after_data)
> > > {
> > > struct perf_file_header f_header;
> > > - struct perf_file_attr f_attr;
> > > struct perf_header *header = &session->header;
> > > struct evsel *evsel;
> > > struct feat_fd ff = {
> > > .fd = fd,
> > > };
> > > - u64 attr_offset;
> > > + u64 attr_offset = sizeof(f_header), attr_size = 0;
> > > int err;
> > >
> > > - lseek(fd, sizeof(f_header), SEEK_SET);
> > > + if (write_attrs_after_data && at_exit) {
> > > + /*
> > > + * Write features at the end of the file first so that
> > > + * attributes may come after them.
> > > + */
> > > + if (!header->data_offset && header->data_size) {
> > > + pr_err("File contains data but offset unknown\n");
> > > + err = -1;
> > > + goto err_out;
> > > + }
> > > + header->feat_offset = header->data_offset + header->data_size;
> > > + err = perf_header__adds_write(header, evlist, fd, fc);
> > > + if (err < 0)
> > > + goto err_out;
> > > + attr_offset = lseek(fd, 0, SEEK_CUR);
> > > + } else {
> > > + lseek(fd, attr_offset, SEEK_SET);
> > > + }
> > >
> > > evlist__for_each_entry(session->evlist, evsel) {
> > > - evsel->id_offset = lseek(fd, 0, SEEK_CUR);
> > > - err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > > - if (err < 0) {
> > > - pr_debug("failed to write perf header\n");
> > > - free(ff.buf);
> > > - return err;
> > > + evsel->id_offset = attr_offset;
> > > + /* Avoid writing at the end of the file until the session is exiting. */
> > > + if (!write_attrs_after_data || at_exit) {
> > > + err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > > + if (err < 0) {
> > > + pr_debug("failed to write perf header\n");
> > > + goto err_out;
> > > + }
> > > }
> > > + attr_offset += evsel->core.ids * sizeof(u64);
> >
> > So in the past we were using lseek(fd, 0, SEEK_CUR) to set the
> > evsel->id_offset, now you're assuming that do_write will honour the size
> > parameter, i.e. write evsel->core.ids * sizeof(u64), but:
> >
> > /* Return: 0 if succeeded, -ERR if failed. */
> > int do_write(struct feat_fd *ff, const void *buf, size_t size)
> > {
> > if (!ff->buf)
> > return __do_write_fd(ff, buf, size);
> > return __do_write_buf(ff, buf, size);
> > }
> >
> > And then:
> >
> > static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size)
> > {
> > ssize_t ret = writen(ff->fd, buf, size);
> >
> > if (ret != (ssize_t)size)
> > return ret < 0 ? (int)ret : -1;
> > return 0;
> > }
> >
> > I see that writen calls ion that even has a BUG_ON() if it doesn't write
> > exactly the requested size bytes, I got distracted with __do_write_fd
> > extra check that ret != size returning ret if not negative...
> >
> > I.e. your code _should_ be equivalent due to the check in ion(), and
> > taking that as an assumption you reduce the number of lseek syscalls,
> > which is a good thing...
> >
> > I was just trying to see that the !write_attrs_after_data case was
> > _exactly_ the same as before, which it doesn't look like it is :-\
>
> I'm not seeing the difference. Before:
You noticed the difference: before we used lseek to get the current
offset to use, afterwards we moved to doing plain math.
So I had to check if we could assume that, and with the current code
structure, yes, we can assume that, so seems safe, but it is different
and if the assumption somehow breaks, as the code in __do_write_fd()
guard against (unneeded at the moment as ion has even a BUG_ON for that
not to happen), then the offset will not be where the data is.
Using lseek() is more costly (syscalls) but it is the ultimate answer to
get where in the file the current offset is.
So that is the difference I noticed.
Doing multiple things in the same patch causes these reviewing delays,
doubts, its something we discussed multiple times in the past, and that
continue to cause these discussions.
- Arnaldo
> ```
> lseek(fd, sizeof(f_header), SEEK_SET);
> evlist__for_each_entry(session->evlist, evsel) {
> evsel->id_offset = lseek(fd, 0, SEEK_CUR); // Initially at sizeof(f_header)
> ...
> err = do_write(&ff, evsel->core.id, evsel->core.ids *
> sizeof(u64)); // offset has advanced by "evsel->core.ids *
> sizeof(u64)" bytes
> ...
> }
> ```
> After:
> ```
> u64 attr_offset = sizeof(f_header)
> ...
> else {
> lseek(fd, attr_offset, SEEK_SET);
> }
>
> evlist__for_each_entry(session->evlist, evsel) {
> evsel->id_offset = attr_offset;
> ...
> err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> ...
> attr_offset += evsel->core.ids * sizeof(u64);
> }
> ```
>
> The reason for using math rather than lseek (apart from reducing
> syscalls) is that if we're writing the header at the beginning of
> generating a file (usually done more to compute the
> header->data_offset), we can't use lseek if writing at the end as we
> don't know where the end of the data will be yet and don't write out
> the bytes.
>
> Thanks,
> Ian
>
> > - Arnaldo
> >
> > > }
> > >
> > > - attr_offset = lseek(ff.fd, 0, SEEK_CUR);
> > > -
> > > evlist__for_each_entry(evlist, evsel) {
> > > if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
> > > /*
> > > @@ -3711,40 +3729,46 @@ static int perf_session__do_write_header(struct perf_session *session,
> > > */
> > > evsel->core.attr.size = sizeof(evsel->core.attr);
> > > }
> > > - f_attr = (struct perf_file_attr){
> > > - .attr = evsel->core.attr,
> > > - .ids = {
> > > - .offset = evsel->id_offset,
> > > - .size = evsel->core.ids * sizeof(u64),
> > > + /* Avoid writing at the end of the file until the session is exiting. */
> > > + if (!write_attrs_after_data || at_exit) {
> > > + struct perf_file_attr f_attr = {
> > > + .attr = evsel->core.attr,
> > > + .ids = {
> > > + .offset = evsel->id_offset,
> > > + .size = evsel->core.ids * sizeof(u64),
> > > + }
> > > + };
> > > + err = do_write(&ff, &f_attr, sizeof(f_attr));
> > > + if (err < 0) {
> > > + pr_debug("failed to write perf header attribute\n");
> > > + goto err_out;
> > > }
> > > - };
> > > - err = do_write(&ff, &f_attr, sizeof(f_attr));
> > > - if (err < 0) {
> > > - pr_debug("failed to write perf header attribute\n");
> > > - free(ff.buf);
> > > - return err;
> > > }
> > > + attr_size += sizeof(struct perf_file_attr);
> > > }
> > >
> > > - if (!header->data_offset)
> > > - header->data_offset = lseek(fd, 0, SEEK_CUR);
> > > + if (!header->data_offset) {
> > > + if (write_attrs_after_data)
> > > + header->data_offset = sizeof(f_header);
> > > + else
> > > + header->data_offset = attr_offset + attr_size;
> > > + }
> > > header->feat_offset = header->data_offset + header->data_size;
> > >
> > > - if (at_exit) {
> > > + if (!write_attrs_after_data && at_exit) {
> > > + /* Write features now feat_offset is known. */
> > > err = perf_header__adds_write(header, evlist, fd, fc);
> > > - if (err < 0) {
> > > - free(ff.buf);
> > > - return err;
> > > - }
> > > + if (err < 0)
> > > + goto err_out;
> > > }
> > >
> > > f_header = (struct perf_file_header){
> > > .magic = PERF_MAGIC,
> > > .size = sizeof(f_header),
> > > - .attr_size = sizeof(f_attr),
> > > + .attr_size = sizeof(struct perf_file_attr),
> > > .attrs = {
> > > .offset = attr_offset,
> > > - .size = evlist->core.nr_entries * sizeof(f_attr),
> > > + .size = attr_size,
> > > },
> > > .data = {
> > > .offset = header->data_offset,
> > > @@ -3757,21 +3781,24 @@ static int perf_session__do_write_header(struct perf_session *session,
> > >
> > > lseek(fd, 0, SEEK_SET);
> > > err = do_write(&ff, &f_header, sizeof(f_header));
> > > - free(ff.buf);
> > > if (err < 0) {
> > > pr_debug("failed to write perf header\n");
> > > - return err;
> > > + goto err_out;
> > > + } else {
> > > + lseek(fd, 0, SEEK_END);
> > > + err = 0;
> > > }
> > > - lseek(fd, header->data_offset + header->data_size, SEEK_SET);
> > > -
> > > - return 0;
> > > +err_out:
> > > + free(ff.buf);
> > > + return err;
> > > }
> > >
> > > int perf_session__write_header(struct perf_session *session,
> > > struct evlist *evlist,
> > > int fd, bool at_exit)
> > > {
> > > - return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
> > > + return perf_session__do_write_header(session, evlist, fd, at_exit, /*fc=*/NULL,
> > > + /*write_attrs_after_data=*/false);
> > > }
> > >
> > > size_t perf_session__data_offset(const struct evlist *evlist)
> > > @@ -3793,7 +3820,8 @@ int perf_session__inject_header(struct perf_session *session,
> > > int fd,
> > > struct feat_copier *fc)
> > > {
> > > - return perf_session__do_write_header(session, evlist, fd, true, fc);
> > > + return perf_session__do_write_header(session, evlist, fd, true, fc,
> > > + /*write_attrs_after_data=*/false);
> > > }
> > >
> > > static int perf_header__getbuffer64(struct perf_header *header,
> > > --
> > > 2.46.0.295.g3b9ea8a38a-goog
On Thu, Aug 29, 2024 at 1:58 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> On Thu, Aug 29, 2024 at 01:12:32PM -0700, Ian Rogers wrote:
> > On Thu, Aug 29, 2024 at 12:31 PM Arnaldo Carvalho de Melo
> > <acme@kernel.org> wrote:
> > >
> > > On Thu, Aug 29, 2024 at 08:01:51AM -0700, Ian Rogers wrote:
> > > > With a file, to write data an offset needs to be known. Typically data
> > > > follows the event attributes in a file. However, if processing a pipe
> > > > the number of event attributes may not be known. It is convenient in
> > > > that case to write the attributes after the data. Expand
> > > > perf_session__do_write_header to allow this when the data offset and
> > > > size are known.
> > > >
> > > > This approach may be useful for more than just taking a pipe file to
> > > > write into a data file, `perf inject --itrace` will reserve and
> > > > additional 8kb for attributes, which would be unnecessary if the
> > > > attributes were written after the data.
> > > >
> > > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > > ---
> > > > tools/perf/util/header.c | 106 +++++++++++++++++++++++++--------------
> > > > 1 file changed, 67 insertions(+), 39 deletions(-)
> > > >
> > > > diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> > > > index 65c9086610cb..4eb39463067e 100644
> > > > --- a/tools/perf/util/header.c
> > > > +++ b/tools/perf/util/header.c
> > > > @@ -3676,32 +3676,50 @@ int perf_header__write_pipe(int fd)
> > > > static int perf_session__do_write_header(struct perf_session *session,
> > > > struct evlist *evlist,
> > > > int fd, bool at_exit,
> > > > - struct feat_copier *fc)
> > > > + struct feat_copier *fc,
> > > > + bool write_attrs_after_data)
> > > > {
> > > > struct perf_file_header f_header;
> > > > - struct perf_file_attr f_attr;
> > > > struct perf_header *header = &session->header;
> > > > struct evsel *evsel;
> > > > struct feat_fd ff = {
> > > > .fd = fd,
> > > > };
> > > > - u64 attr_offset;
> > > > + u64 attr_offset = sizeof(f_header), attr_size = 0;
> > > > int err;
> > > >
> > > > - lseek(fd, sizeof(f_header), SEEK_SET);
> > > > + if (write_attrs_after_data && at_exit) {
> > > > + /*
> > > > + * Write features at the end of the file first so that
> > > > + * attributes may come after them.
> > > > + */
> > > > + if (!header->data_offset && header->data_size) {
> > > > + pr_err("File contains data but offset unknown\n");
> > > > + err = -1;
> > > > + goto err_out;
> > > > + }
> > > > + header->feat_offset = header->data_offset + header->data_size;
> > > > + err = perf_header__adds_write(header, evlist, fd, fc);
> > > > + if (err < 0)
> > > > + goto err_out;
> > > > + attr_offset = lseek(fd, 0, SEEK_CUR);
> > > > + } else {
> > > > + lseek(fd, attr_offset, SEEK_SET);
> > > > + }
> > > >
> > > > evlist__for_each_entry(session->evlist, evsel) {
> > > > - evsel->id_offset = lseek(fd, 0, SEEK_CUR);
> > > > - err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > > > - if (err < 0) {
> > > > - pr_debug("failed to write perf header\n");
> > > > - free(ff.buf);
> > > > - return err;
> > > > + evsel->id_offset = attr_offset;
> > > > + /* Avoid writing at the end of the file until the session is exiting. */
> > > > + if (!write_attrs_after_data || at_exit) {
> > > > + err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > > > + if (err < 0) {
> > > > + pr_debug("failed to write perf header\n");
> > > > + goto err_out;
> > > > + }
> > > > }
> > > > + attr_offset += evsel->core.ids * sizeof(u64);
> > >
> > > So in the past we were using lseek(fd, 0, SEEK_CUR) to set the
> > > evsel->id_offset, now you're assuming that do_write will honour the size
> > > parameter, i.e. write evsel->core.ids * sizeof(u64), but:
> > >
> > > /* Return: 0 if succeeded, -ERR if failed. */
> > > int do_write(struct feat_fd *ff, const void *buf, size_t size)
> > > {
> > > if (!ff->buf)
> > > return __do_write_fd(ff, buf, size);
> > > return __do_write_buf(ff, buf, size);
> > > }
> > >
> > > And then:
> > >
> > > static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size)
> > > {
> > > ssize_t ret = writen(ff->fd, buf, size);
> > >
> > > if (ret != (ssize_t)size)
> > > return ret < 0 ? (int)ret : -1;
> > > return 0;
> > > }
> > >
> > > I see that writen calls ion that even has a BUG_ON() if it doesn't write
> > > exactly the requested size bytes, I got distracted with __do_write_fd
> > > extra check that ret != size returning ret if not negative...
> > >
> > > I.e. your code _should_ be equivalent due to the check in ion(), and
> > > taking that as an assumption you reduce the number of lseek syscalls,
> > > which is a good thing...
> > >
> > > I was just trying to see that the !write_attrs_after_data case was
> > > _exactly_ the same as before, which it doesn't look like it is :-\
> >
> > I'm not seeing the difference. Before:
>
> You noticed the difference: before we used lseek to get the current
> offset to use, afterwards we moved to doing plain math.
>
> So I had to check if we could assume that, and with the current code
> structure, yes, we can assume that, so seems safe, but it is different
> and if the assumption somehow breaks, as the code in __do_write_fd()
> guard against (unneeded at the moment as ion has even a BUG_ON for that
> not to happen), then the offset will not be where the data is.
>
> Using lseek() is more costly (syscalls) but it is the ultimate answer to
> get where in the file the current offset is.
>
> So that is the difference I noticed.
>
> Doing multiple things in the same patch causes these reviewing delays,
> doubts, its something we discussed multiple times in the past, and that
> continue to cause these discussions.
Right, but it is something of an unfortunate coincidence of how the
code is structured. The fact that writing the header updates
data_offset which is a thing that other things depend upon while
depending on its value itself, etc. - ie the function does more than
just a write, it also sometimes computes the layout, has inbuilt
assumptions on the values lseek will return, and so on. To get to this
final structure took a fair few iterations and I've separated this
change out from the bulk in the next change to keep the patch size
down. I could have done a patch switching from lseeks to math, then a
patch to add write_attrs_after_data. It probably would have yielded
about 4 lines of shared code, more lines that would have been deleted,
while creating quite a bit of work for me. Ideally when these
functions were created there would have been far more liberal use of
things like immutability, so side-effects are minimized. Yes I could
refactor everything, but time..
Thanks,
Ian
> - Arnaldo
>
> > ```
> > lseek(fd, sizeof(f_header), SEEK_SET);
> > evlist__for_each_entry(session->evlist, evsel) {
> > evsel->id_offset = lseek(fd, 0, SEEK_CUR); // Initially at sizeof(f_header)
> > ...
> > err = do_write(&ff, evsel->core.id, evsel->core.ids *
> > sizeof(u64)); // offset has advanced by "evsel->core.ids *
> > sizeof(u64)" bytes
> > ...
> > }
> > ```
> > After:
> > ```
> > u64 attr_offset = sizeof(f_header)
> > ...
> > else {
> > lseek(fd, attr_offset, SEEK_SET);
> > }
> >
> > evlist__for_each_entry(session->evlist, evsel) {
> > evsel->id_offset = attr_offset;
> > ...
> > err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > ...
> > attr_offset += evsel->core.ids * sizeof(u64);
> > }
> > ```
> >
> > The reason for using math rather than lseek (apart from reducing
> > syscalls) is that if we're writing the header at the beginning of
> > generating a file (usually done more to compute the
> > header->data_offset), we can't use lseek if writing at the end as we
> > don't know where the end of the data will be yet and don't write out
> > the bytes.
> >
> > Thanks,
> > Ian
> >
> > > - Arnaldo
> > >
> > > > }
> > > >
> > > > - attr_offset = lseek(ff.fd, 0, SEEK_CUR);
> > > > -
> > > > evlist__for_each_entry(evlist, evsel) {
> > > > if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
> > > > /*
> > > > @@ -3711,40 +3729,46 @@ static int perf_session__do_write_header(struct perf_session *session,
> > > > */
> > > > evsel->core.attr.size = sizeof(evsel->core.attr);
> > > > }
> > > > - f_attr = (struct perf_file_attr){
> > > > - .attr = evsel->core.attr,
> > > > - .ids = {
> > > > - .offset = evsel->id_offset,
> > > > - .size = evsel->core.ids * sizeof(u64),
> > > > + /* Avoid writing at the end of the file until the session is exiting. */
> > > > + if (!write_attrs_after_data || at_exit) {
> > > > + struct perf_file_attr f_attr = {
> > > > + .attr = evsel->core.attr,
> > > > + .ids = {
> > > > + .offset = evsel->id_offset,
> > > > + .size = evsel->core.ids * sizeof(u64),
> > > > + }
> > > > + };
> > > > + err = do_write(&ff, &f_attr, sizeof(f_attr));
> > > > + if (err < 0) {
> > > > + pr_debug("failed to write perf header attribute\n");
> > > > + goto err_out;
> > > > }
> > > > - };
> > > > - err = do_write(&ff, &f_attr, sizeof(f_attr));
> > > > - if (err < 0) {
> > > > - pr_debug("failed to write perf header attribute\n");
> > > > - free(ff.buf);
> > > > - return err;
> > > > }
> > > > + attr_size += sizeof(struct perf_file_attr);
> > > > }
> > > >
> > > > - if (!header->data_offset)
> > > > - header->data_offset = lseek(fd, 0, SEEK_CUR);
> > > > + if (!header->data_offset) {
> > > > + if (write_attrs_after_data)
> > > > + header->data_offset = sizeof(f_header);
> > > > + else
> > > > + header->data_offset = attr_offset + attr_size;
> > > > + }
> > > > header->feat_offset = header->data_offset + header->data_size;
> > > >
> > > > - if (at_exit) {
> > > > + if (!write_attrs_after_data && at_exit) {
> > > > + /* Write features now feat_offset is known. */
> > > > err = perf_header__adds_write(header, evlist, fd, fc);
> > > > - if (err < 0) {
> > > > - free(ff.buf);
> > > > - return err;
> > > > - }
> > > > + if (err < 0)
> > > > + goto err_out;
> > > > }
> > > >
> > > > f_header = (struct perf_file_header){
> > > > .magic = PERF_MAGIC,
> > > > .size = sizeof(f_header),
> > > > - .attr_size = sizeof(f_attr),
> > > > + .attr_size = sizeof(struct perf_file_attr),
> > > > .attrs = {
> > > > .offset = attr_offset,
> > > > - .size = evlist->core.nr_entries * sizeof(f_attr),
> > > > + .size = attr_size,
> > > > },
> > > > .data = {
> > > > .offset = header->data_offset,
> > > > @@ -3757,21 +3781,24 @@ static int perf_session__do_write_header(struct perf_session *session,
> > > >
> > > > lseek(fd, 0, SEEK_SET);
> > > > err = do_write(&ff, &f_header, sizeof(f_header));
> > > > - free(ff.buf);
> > > > if (err < 0) {
> > > > pr_debug("failed to write perf header\n");
> > > > - return err;
> > > > + goto err_out;
> > > > + } else {
> > > > + lseek(fd, 0, SEEK_END);
> > > > + err = 0;
> > > > }
> > > > - lseek(fd, header->data_offset + header->data_size, SEEK_SET);
> > > > -
> > > > - return 0;
> > > > +err_out:
> > > > + free(ff.buf);
> > > > + return err;
> > > > }
> > > >
> > > > int perf_session__write_header(struct perf_session *session,
> > > > struct evlist *evlist,
> > > > int fd, bool at_exit)
> > > > {
> > > > - return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
> > > > + return perf_session__do_write_header(session, evlist, fd, at_exit, /*fc=*/NULL,
> > > > + /*write_attrs_after_data=*/false);
> > > > }
> > > >
> > > > size_t perf_session__data_offset(const struct evlist *evlist)
> > > > @@ -3793,7 +3820,8 @@ int perf_session__inject_header(struct perf_session *session,
> > > > int fd,
> > > > struct feat_copier *fc)
> > > > {
> > > > - return perf_session__do_write_header(session, evlist, fd, true, fc);
> > > > + return perf_session__do_write_header(session, evlist, fd, true, fc,
> > > > + /*write_attrs_after_data=*/false);
> > > > }
> > > >
> > > > static int perf_header__getbuffer64(struct perf_header *header,
> > > > --
> > > > 2.46.0.295.g3b9ea8a38a-goog
On Thu, Aug 29, 2024 at 02:42:38PM -0700, Ian Rogers wrote: > On Thu, Aug 29, 2024 at 1:58 PM Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > > On Thu, Aug 29, 2024 at 01:12:32PM -0700, Ian Rogers wrote: > > > On Thu, Aug 29, 2024 at 12:31 PM Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > > > > On Thu, Aug 29, 2024 at 08:01:51AM -0700, Ian Rogers wrote: > > > I'm not seeing the difference. Before: > > You noticed the difference: before we used lseek to get the current > > offset to use, afterwards we moved to doing plain math. > > So I had to check if we could assume that, and with the current code > > structure, yes, we can assume that, so seems safe, but it is different > > and if the assumption somehow breaks, as the code in __do_write_fd() > > guard against (unneeded at the moment as ion has even a BUG_ON for that > > not to happen), then the offset will not be where the data is. > > Using lseek() is more costly (syscalls) but it is the ultimate answer to > > get where in the file the current offset is. > > So that is the difference I noticed. > > Doing multiple things in the same patch causes these reviewing delays, > > doubts, its something we discussed multiple times in the past, and that > > continue to cause these discussions. > Right, but it is something of an unfortunate coincidence of how the > code is structured. The fact that writing the header updates > data_offset which is a thing that other things depend upon while > depending on its value itself, etc. - ie the function does more than > just a write, it also sometimes computes the layout, has inbuilt > assumptions on the values lseek will return, and so on. To get to this I share your frustrations, code gets complex over time, that is why I at least try to ask these questions, encourage more granular patches, that do just one thing, etc, to avoid having this conversation again years from now, when some other person tries to understand the codebase do bisects, refactor it, etc, just like you're doing now. > final structure took a fair few iterations and I've separated this > change out from the bulk in the next change to keep the patch size > down. I could have done a patch switching from lseeks to math, then a > patch to add write_attrs_after_data. > It probably would have yielded about 4 lines of shared code, more > lines that would have been deleted, while creating quite a bit of work > for me. > Ideally when these functions were created there would have been far > more liberal use of things like immutability, so side-effects are > minimized. Yes I could refactor everything, but time.. As I said, I think your patch is safe as-is, its just that it took more time than needed for reviewing, i.e. it will cost more one side or the other, and as I have to review everything I merge, doing it on my side slows things down the overall process of collecting patches from lots of people. So far I collected 234 patches for v6.12, and there are way more to process, like Howard's perf trace BTF work, that I merged partially, but where I'll have to do work on splitting patches as agreed with him in another thread, etc. - Arnaldo
On Thu, Aug 29, 2024 at 02:42:38PM -0700, Ian Rogers wrote:
> On Thu, Aug 29, 2024 at 1:58 PM Arnaldo Carvalho de Melo
> <acme@kernel.org> wrote:
> >
> > On Thu, Aug 29, 2024 at 01:12:32PM -0700, Ian Rogers wrote:
> > > On Thu, Aug 29, 2024 at 12:31 PM Arnaldo Carvalho de Melo
> > > <acme@kernel.org> wrote:
> > > >
> > > > On Thu, Aug 29, 2024 at 08:01:51AM -0700, Ian Rogers wrote:
> > > > > With a file, to write data an offset needs to be known. Typically data
> > > > > follows the event attributes in a file. However, if processing a pipe
> > > > > the number of event attributes may not be known. It is convenient in
> > > > > that case to write the attributes after the data. Expand
> > > > > perf_session__do_write_header to allow this when the data offset and
> > > > > size are known.
> > > > >
> > > > > This approach may be useful for more than just taking a pipe file to
> > > > > write into a data file, `perf inject --itrace` will reserve and
> > > > > additional 8kb for attributes, which would be unnecessary if the
> > > > > attributes were written after the data.
> > > > >
> > > > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > > > ---
> > > > > tools/perf/util/header.c | 106 +++++++++++++++++++++++++--------------
> > > > > 1 file changed, 67 insertions(+), 39 deletions(-)
> > > > >
> > > > > diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> > > > > index 65c9086610cb..4eb39463067e 100644
> > > > > --- a/tools/perf/util/header.c
> > > > > +++ b/tools/perf/util/header.c
> > > > > @@ -3676,32 +3676,50 @@ int perf_header__write_pipe(int fd)
> > > > > static int perf_session__do_write_header(struct perf_session *session,
> > > > > struct evlist *evlist,
> > > > > int fd, bool at_exit,
> > > > > - struct feat_copier *fc)
> > > > > + struct feat_copier *fc,
> > > > > + bool write_attrs_after_data)
> > > > > {
> > > > > struct perf_file_header f_header;
> > > > > - struct perf_file_attr f_attr;
> > > > > struct perf_header *header = &session->header;
> > > > > struct evsel *evsel;
> > > > > struct feat_fd ff = {
> > > > > .fd = fd,
> > > > > };
> > > > > - u64 attr_offset;
> > > > > + u64 attr_offset = sizeof(f_header), attr_size = 0;
> > > > > int err;
> > > > >
> > > > > - lseek(fd, sizeof(f_header), SEEK_SET);
> > > > > + if (write_attrs_after_data && at_exit) {
> > > > > + /*
> > > > > + * Write features at the end of the file first so that
> > > > > + * attributes may come after them.
> > > > > + */
> > > > > + if (!header->data_offset && header->data_size) {
> > > > > + pr_err("File contains data but offset unknown\n");
> > > > > + err = -1;
> > > > > + goto err_out;
> > > > > + }
> > > > > + header->feat_offset = header->data_offset + header->data_size;
> > > > > + err = perf_header__adds_write(header, evlist, fd, fc);
> > > > > + if (err < 0)
> > > > > + goto err_out;
> > > > > + attr_offset = lseek(fd, 0, SEEK_CUR);
> > > > > + } else {
> > > > > + lseek(fd, attr_offset, SEEK_SET);
> > > > > + }
> > > > >
> > > > > evlist__for_each_entry(session->evlist, evsel) {
> > > > > - evsel->id_offset = lseek(fd, 0, SEEK_CUR);
> > > > > - err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > > > > - if (err < 0) {
> > > > > - pr_debug("failed to write perf header\n");
> > > > > - free(ff.buf);
> > > > > - return err;
> > > > > + evsel->id_offset = attr_offset;
> > > > > + /* Avoid writing at the end of the file until the session is exiting. */
> > > > > + if (!write_attrs_after_data || at_exit) {
> > > > > + err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > > > > + if (err < 0) {
> > > > > + pr_debug("failed to write perf header\n");
> > > > > + goto err_out;
> > > > > + }
> > > > > }
> > > > > + attr_offset += evsel->core.ids * sizeof(u64);
> > > >
> > > > So in the past we were using lseek(fd, 0, SEEK_CUR) to set the
> > > > evsel->id_offset, now you're assuming that do_write will honour the size
> > > > parameter, i.e. write evsel->core.ids * sizeof(u64), but:
> > > >
> > > > /* Return: 0 if succeeded, -ERR if failed. */
> > > > int do_write(struct feat_fd *ff, const void *buf, size_t size)
> > > > {
> > > > if (!ff->buf)
> > > > return __do_write_fd(ff, buf, size);
> > > > return __do_write_buf(ff, buf, size);
> > > > }
> > > >
> > > > And then:
> > > >
> > > > static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size)
> > > > {
> > > > ssize_t ret = writen(ff->fd, buf, size);
> > > >
> > > > if (ret != (ssize_t)size)
> > > > return ret < 0 ? (int)ret : -1;
> > > > return 0;
> > > > }
> > > >
> > > > I see that writen calls ion that even has a BUG_ON() if it doesn't write
> > > > exactly the requested size bytes, I got distracted with __do_write_fd
> > > > extra check that ret != size returning ret if not negative...
> > > >
> > > > I.e. your code _should_ be equivalent due to the check in ion(), and
> > > > taking that as an assumption you reduce the number of lseek syscalls,
> > > > which is a good thing...
> > > >
> > > > I was just trying to see that the !write_attrs_after_data case was
> > > > _exactly_ the same as before, which it doesn't look like it is :-\
> > >
> > > I'm not seeing the difference. Before:
> >
> > You noticed the difference: before we used lseek to get the current
> > offset to use, afterwards we moved to doing plain math.
> >
> > So I had to check if we could assume that, and with the current code
> > structure, yes, we can assume that, so seems safe, but it is different
> > and if the assumption somehow breaks, as the code in __do_write_fd()
> > guard against (unneeded at the moment as ion has even a BUG_ON for that
> > not to happen), then the offset will not be where the data is.
> >
> > Using lseek() is more costly (syscalls) but it is the ultimate answer to
> > get where in the file the current offset is.
> >
> > So that is the difference I noticed.
> >
> > Doing multiple things in the same patch causes these reviewing delays,
> > doubts, its something we discussed multiple times in the past, and that
> > continue to cause these discussions.
>
> Right, but it is something of an unfortunate coincidence of how the
> code is structured. The fact that writing the header updates
> data_offset which is a thing that other things depend upon while
> depending on its value itself, etc. - ie the function does more than
> just a write, it also sometimes computes the layout, has inbuilt
> assumptions on the values lseek will return, and so on. To get to this
> final structure took a fair few iterations and I've separated this
> change out from the bulk in the next change to keep the patch size
> down. I could have done a patch switching from lseeks to math, then a
> patch to add write_attrs_after_data. It probably would have yielded
> about 4 lines of shared code, more lines that would have been deleted,
> while creating quite a bit of work for me. Ideally when these
> functions were created there would have been far more liberal use of
> things like immutability, so side-effects are minimized. Yes I could
> refactor everything, but time..
Maybe I'm too naive but can we skip header updates on pipe data? I'm
curious if this makes sense..
Thanks,
Namhyung
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index a7c859db2e15..b36f84f29295 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2341,6 +2341,9 @@ int cmd_inject(int argc, const char **argv)
if (ret)
goto out_delete;
+ if (data.is_pipe)
+ inject.is_pipe = true;
+
if (!data.is_pipe && inject.output.is_pipe) {
ret = perf_header__write_pipe(perf_data__fd(&inject.output));
if (ret < 0) {
On Thu, Aug 29, 2024 at 9:39 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Thu, Aug 29, 2024 at 02:42:38PM -0700, Ian Rogers wrote:
> > On Thu, Aug 29, 2024 at 1:58 PM Arnaldo Carvalho de Melo
> > <acme@kernel.org> wrote:
> > >
> > > On Thu, Aug 29, 2024 at 01:12:32PM -0700, Ian Rogers wrote:
> > > > On Thu, Aug 29, 2024 at 12:31 PM Arnaldo Carvalho de Melo
> > > > <acme@kernel.org> wrote:
> > > > >
> > > > > On Thu, Aug 29, 2024 at 08:01:51AM -0700, Ian Rogers wrote:
> > > > > > With a file, to write data an offset needs to be known. Typically data
> > > > > > follows the event attributes in a file. However, if processing a pipe
> > > > > > the number of event attributes may not be known. It is convenient in
> > > > > > that case to write the attributes after the data. Expand
> > > > > > perf_session__do_write_header to allow this when the data offset and
> > > > > > size are known.
> > > > > >
> > > > > > This approach may be useful for more than just taking a pipe file to
> > > > > > write into a data file, `perf inject --itrace` will reserve and
> > > > > > additional 8kb for attributes, which would be unnecessary if the
> > > > > > attributes were written after the data.
> > > > > >
> > > > > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > > > > ---
> > > > > > tools/perf/util/header.c | 106 +++++++++++++++++++++++++--------------
> > > > > > 1 file changed, 67 insertions(+), 39 deletions(-)
> > > > > >
> > > > > > diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> > > > > > index 65c9086610cb..4eb39463067e 100644
> > > > > > --- a/tools/perf/util/header.c
> > > > > > +++ b/tools/perf/util/header.c
> > > > > > @@ -3676,32 +3676,50 @@ int perf_header__write_pipe(int fd)
> > > > > > static int perf_session__do_write_header(struct perf_session *session,
> > > > > > struct evlist *evlist,
> > > > > > int fd, bool at_exit,
> > > > > > - struct feat_copier *fc)
> > > > > > + struct feat_copier *fc,
> > > > > > + bool write_attrs_after_data)
> > > > > > {
> > > > > > struct perf_file_header f_header;
> > > > > > - struct perf_file_attr f_attr;
> > > > > > struct perf_header *header = &session->header;
> > > > > > struct evsel *evsel;
> > > > > > struct feat_fd ff = {
> > > > > > .fd = fd,
> > > > > > };
> > > > > > - u64 attr_offset;
> > > > > > + u64 attr_offset = sizeof(f_header), attr_size = 0;
> > > > > > int err;
> > > > > >
> > > > > > - lseek(fd, sizeof(f_header), SEEK_SET);
> > > > > > + if (write_attrs_after_data && at_exit) {
> > > > > > + /*
> > > > > > + * Write features at the end of the file first so that
> > > > > > + * attributes may come after them.
> > > > > > + */
> > > > > > + if (!header->data_offset && header->data_size) {
> > > > > > + pr_err("File contains data but offset unknown\n");
> > > > > > + err = -1;
> > > > > > + goto err_out;
> > > > > > + }
> > > > > > + header->feat_offset = header->data_offset + header->data_size;
> > > > > > + err = perf_header__adds_write(header, evlist, fd, fc);
> > > > > > + if (err < 0)
> > > > > > + goto err_out;
> > > > > > + attr_offset = lseek(fd, 0, SEEK_CUR);
> > > > > > + } else {
> > > > > > + lseek(fd, attr_offset, SEEK_SET);
> > > > > > + }
> > > > > >
> > > > > > evlist__for_each_entry(session->evlist, evsel) {
> > > > > > - evsel->id_offset = lseek(fd, 0, SEEK_CUR);
> > > > > > - err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > > > > > - if (err < 0) {
> > > > > > - pr_debug("failed to write perf header\n");
> > > > > > - free(ff.buf);
> > > > > > - return err;
> > > > > > + evsel->id_offset = attr_offset;
> > > > > > + /* Avoid writing at the end of the file until the session is exiting. */
> > > > > > + if (!write_attrs_after_data || at_exit) {
> > > > > > + err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
> > > > > > + if (err < 0) {
> > > > > > + pr_debug("failed to write perf header\n");
> > > > > > + goto err_out;
> > > > > > + }
> > > > > > }
> > > > > > + attr_offset += evsel->core.ids * sizeof(u64);
> > > > >
> > > > > So in the past we were using lseek(fd, 0, SEEK_CUR) to set the
> > > > > evsel->id_offset, now you're assuming that do_write will honour the size
> > > > > parameter, i.e. write evsel->core.ids * sizeof(u64), but:
> > > > >
> > > > > /* Return: 0 if succeeded, -ERR if failed. */
> > > > > int do_write(struct feat_fd *ff, const void *buf, size_t size)
> > > > > {
> > > > > if (!ff->buf)
> > > > > return __do_write_fd(ff, buf, size);
> > > > > return __do_write_buf(ff, buf, size);
> > > > > }
> > > > >
> > > > > And then:
> > > > >
> > > > > static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size)
> > > > > {
> > > > > ssize_t ret = writen(ff->fd, buf, size);
> > > > >
> > > > > if (ret != (ssize_t)size)
> > > > > return ret < 0 ? (int)ret : -1;
> > > > > return 0;
> > > > > }
> > > > >
> > > > > I see that writen calls ion that even has a BUG_ON() if it doesn't write
> > > > > exactly the requested size bytes, I got distracted with __do_write_fd
> > > > > extra check that ret != size returning ret if not negative...
> > > > >
> > > > > I.e. your code _should_ be equivalent due to the check in ion(), and
> > > > > taking that as an assumption you reduce the number of lseek syscalls,
> > > > > which is a good thing...
> > > > >
> > > > > I was just trying to see that the !write_attrs_after_data case was
> > > > > _exactly_ the same as before, which it doesn't look like it is :-\
> > > >
> > > > I'm not seeing the difference. Before:
> > >
> > > You noticed the difference: before we used lseek to get the current
> > > offset to use, afterwards we moved to doing plain math.
> > >
> > > So I had to check if we could assume that, and with the current code
> > > structure, yes, we can assume that, so seems safe, but it is different
> > > and if the assumption somehow breaks, as the code in __do_write_fd()
> > > guard against (unneeded at the moment as ion has even a BUG_ON for that
> > > not to happen), then the offset will not be where the data is.
> > >
> > > Using lseek() is more costly (syscalls) but it is the ultimate answer to
> > > get where in the file the current offset is.
> > >
> > > So that is the difference I noticed.
> > >
> > > Doing multiple things in the same patch causes these reviewing delays,
> > > doubts, its something we discussed multiple times in the past, and that
> > > continue to cause these discussions.
> >
> > Right, but it is something of an unfortunate coincidence of how the
> > code is structured. The fact that writing the header updates
> > data_offset which is a thing that other things depend upon while
> > depending on its value itself, etc. - ie the function does more than
> > just a write, it also sometimes computes the layout, has inbuilt
> > assumptions on the values lseek will return, and so on. To get to this
> > final structure took a fair few iterations and I've separated this
> > change out from the bulk in the next change to keep the patch size
> > down. I could have done a patch switching from lseeks to math, then a
> > patch to add write_attrs_after_data. It probably would have yielded
> > about 4 lines of shared code, more lines that would have been deleted,
> > while creating quite a bit of work for me. Ideally when these
> > functions were created there would have been far more liberal use of
> > things like immutability, so side-effects are minimized. Yes I could
> > refactor everything, but time..
>
> Maybe I'm too naive but can we skip header updates on pipe data? I'm
> curious if this makes sense..
>
> Thanks,
> Namhyung
>
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index a7c859db2e15..b36f84f29295 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2341,6 +2341,9 @@ int cmd_inject(int argc, const char **argv)
> if (ret)
> goto out_delete;
>
> + if (data.is_pipe)
> + inject.is_pipe = true;
> +
I'm not sure what you are saying. We can't know definitively if the
input is a pipe style file or pipe until the header is read, which is
part of session__new and something we pass whether we want to repipe
the header or not. So we've made a decision or not to repipe but
opening the header may change the decision that was already made. As
you say we can opportunistically just copy/repipe the header if we
know the input and output types match, but:
1) generating the header isn't that much work,
2) if the header needs to change for extra attributes, such as with
some of the auxiliary flags, then the repiped header was no good
anyway.
Trying to keep header repiping alive for inject, the only use, is
weird given all the gotchas. I think it is simpler to open, know what
we're dealing with, then generate the output header accordingly -
possibly synthesizing events for the attributes in the case of file to
pipe.
Thanks,
Ian
> if (!data.is_pipe && inject.output.is_pipe) {
> ret = perf_header__write_pipe(perf_data__fd(&inject.output));
> if (ret < 0) {
>
On Thu, Aug 29, 2024 at 10:03 PM Ian Rogers <irogers@google.com> wrote: > > On Thu, Aug 29, 2024 at 9:39 PM Namhyung Kim <namhyung@kernel.org> wrote: > > Maybe I'm too naive but can we skip header updates on pipe data? I'm > > curious if this makes sense.. > > > > Thanks, > > Namhyung > > > > > > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > > index a7c859db2e15..b36f84f29295 100644 > > --- a/tools/perf/builtin-inject.c > > +++ b/tools/perf/builtin-inject.c > > @@ -2341,6 +2341,9 @@ int cmd_inject(int argc, const char **argv) > > if (ret) > > goto out_delete; > > > > + if (data.is_pipe) > > + inject.is_pipe = true; > > + > > I'm not sure what you are saying. We can't know definitively if the > input is a pipe style file or pipe until the header is read, which is > part of session__new and something we pass whether we want to repipe > the header or not. So we've made a decision or not to repipe but > opening the header may change the decision that was already made. As > you say we can opportunistically just copy/repipe the header if we > know the input and output types match, but: > 1) generating the header isn't that much work, > 2) if the header needs to change for extra attributes, such as with > some of the auxiliary flags, then the repiped header was no good > anyway. > Trying to keep header repiping alive for inject, the only use, is > weird given all the gotchas. I think it is simpler to open, know what > we're dealing with, then generate the output header accordingly - > possibly synthesizing events for the attributes in the case of file to > pipe. I'm ok with removing repipe in session__new. What I want is not to overwrite the file header for a data file containing pipe header. In your example, 'perf record -o- > a.data' should have the pipe header in a.data. Then b.data from perf inject should have the pipe header as well, right? Then we don't need to worry about the rewrite IIUC. Thanks, Namhyung
© 2016 - 2026 Red Hat, Inc.