[PATCH] perf jevents: Sort list of input files

Bernhard M. Wiedemann posted 1 patch 2 years, 10 months ago
There is a newer version of this series
tools/perf/pmu-events/jevents.py | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
[PATCH] perf jevents: Sort list of input files
Posted by Bernhard M. Wiedemann 2 years, 10 months ago
Without this, pmu-events.c would be generated with variations in ordering
depending on non-deterministic filesystem readdir order.

I tested that pmu-events.c still has the same number of lines
and that perf list output works.

This patch was done while working on reproducible builds for openSUSE,
but also solves issues in Debian [1] and other distributions.

[1] https://tests.reproducible-builds.org/debian/rb-pkg/unstable/i386/linux.html

Signed-off-by: Bernhard M. Wiedemann <bwiedemann@suse.de>
CC: Ian Rogers <irogers@google.com>
---
 tools/perf/pmu-events/jevents.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index 2bcd07ce609f..f06e1abac7c7 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -381,10 +381,13 @@ def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
 
   return events
 
+def sorted_scandir(path: str) -> list[os.DirEntry]:
+  return sorted(os.scandir(path), key=lambda e: e.name)
+
 def preprocess_arch_std_files(archpath: str) -> None:
   """Read in all architecture standard events."""
   global _arch_std_events
-  for item in os.scandir(archpath):
+  for item in sorted_scandir(archpath):
     if item.is_file() and item.name.endswith('.json'):
       for event in read_json_events(item.path, topic=''):
         if event.name:
@@ -497,7 +500,7 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
 def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
   """Process a JSON file during the main walk."""
   def is_leaf_dir(path: str) -> bool:
-    for item in os.scandir(path):
+    for item in sorted_scandir(path):
       if item.is_dir():
         return False
     return True
@@ -889,7 +892,7 @@ def main() -> None:
   def ftw(path: str, parents: Sequence[str],
           action: Callable[[Sequence[str], os.DirEntry], None]) -> None:
     """Replicate the directory/file walking behavior of C's file tree walk."""
-    for item in os.scandir(path):
+    for item in sorted_scandir(path):
       if _args.model != 'all' and item.is_dir():
         # Check if the model matches one in _args.model.
         if len(parents) == _args.model.split(',')[0].count('/'):
@@ -930,7 +933,7 @@ struct compact_pmu_event {
 
 """)
   archs = []
-  for item in os.scandir(_args.starting_dir):
+  for item in sorted_scandir(_args.starting_dir):
     if not item.is_dir():
       continue
     if item.name == _args.arch or _args.arch == 'all' or item.name == 'test':
-- 
2.35.3
Re: [PATCH] perf jevents: Sort list of input files
Posted by Ian Rogers 2 years, 10 months ago
On Mon, Mar 20, 2023 at 1:19 PM Bernhard M. Wiedemann
<bwiedemann@suse.de> wrote:
>
> Without this, pmu-events.c would be generated with variations in ordering
> depending on non-deterministic filesystem readdir order.
>
> I tested that pmu-events.c still has the same number of lines
> and that perf list output works.
>
> This patch was done while working on reproducible builds for openSUSE,
> but also solves issues in Debian [1] and other distributions.
>
> [1] https://tests.reproducible-builds.org/debian/rb-pkg/unstable/i386/linux.html
>
> Signed-off-by: Bernhard M. Wiedemann <bwiedemann@suse.de>
> CC: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/pmu-events/jevents.py | 11 +++++++----
>  1 file changed, 7 insertions(+), 4 deletions(-)

Thanks Bernhard,

I think this may already be addressed by sorting prior to output:
https://lore.kernel.org/r/20220812230949.683239-5-irogers@google.com

Could you confirm?

Thanks,
Ian

>
> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> index 2bcd07ce609f..f06e1abac7c7 100755
> --- a/tools/perf/pmu-events/jevents.py
> +++ b/tools/perf/pmu-events/jevents.py
> @@ -381,10 +381,13 @@ def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
>
>    return events
>
> +def sorted_scandir(path: str) -> list[os.DirEntry]:
> +  return sorted(os.scandir(path), key=lambda e: e.name)
> +
>  def preprocess_arch_std_files(archpath: str) -> None:
>    """Read in all architecture standard events."""
>    global _arch_std_events
> -  for item in os.scandir(archpath):
> +  for item in sorted_scandir(archpath):
>      if item.is_file() and item.name.endswith('.json'):
>        for event in read_json_events(item.path, topic=''):
>          if event.name:
> @@ -497,7 +500,7 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
>  def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
>    """Process a JSON file during the main walk."""
>    def is_leaf_dir(path: str) -> bool:
> -    for item in os.scandir(path):
> +    for item in sorted_scandir(path):
>        if item.is_dir():
>          return False
>      return True
> @@ -889,7 +892,7 @@ def main() -> None:
>    def ftw(path: str, parents: Sequence[str],
>            action: Callable[[Sequence[str], os.DirEntry], None]) -> None:
>      """Replicate the directory/file walking behavior of C's file tree walk."""
> -    for item in os.scandir(path):
> +    for item in sorted_scandir(path):
>        if _args.model != 'all' and item.is_dir():
>          # Check if the model matches one in _args.model.
>          if len(parents) == _args.model.split(',')[0].count('/'):
> @@ -930,7 +933,7 @@ struct compact_pmu_event {
>
>  """)
>    archs = []
> -  for item in os.scandir(_args.starting_dir):
> +  for item in sorted_scandir(_args.starting_dir):
>      if not item.is_dir():
>        continue
>      if item.name == _args.arch or _args.arch == 'all' or item.name == 'test':
> --
> 2.35.3
>
Re: [PATCH] perf jevents: Sort list of input files
Posted by Bernhard M. Wiedemann 2 years, 10 months ago

On 20/03/2023 21.48, Ian Rogers wrote:
> I think this may already be addressed by sorting prior to output:
> https://lore.kernel.org/r/20220812230949.683239-5-irogers@google.com
> 
> Could you confirm?

Hi Ian,

I was testing on 6.2.6 which includes that patch and it was still affected.
The trouble with sorting at the end is, that there can be influences of 
ordering in earlier processing steps, that don't get ironed out by the 
sort later.

Some more experimenting showed that only the ftw scandir needed sorting, 
which allows to further simplify the patch to

      """Replicate the directory/file walking behavior of C's ...
-    for item in os.scandir(path):
+    for item in sorted(os.scandir(path), key=lambda e: e.name):
        action(parents, item)


Without the patch, a random diff in pmu-events.c starts with
-static const struct compact_pmu_event pme_amdzen2[] = {
+static const struct compact_pmu_event pme_silvermont[] = {


While I'm testing on scratch ext4 filesystems where dirindex causes 
randomness, you could also use the disorderfs FUSE-filesystem with its 
shuffle mode to give you random order.

Ciao
Bernhard M.
Re: [PATCH] perf jevents: Sort list of input files
Posted by Arnaldo Carvalho de Melo 2 years, 10 months ago
Em Mon, Mar 20, 2023 at 10:30:00PM +0100, Bernhard M. Wiedemann escreveu:
> 
> 
> On 20/03/2023 21.48, Ian Rogers wrote:
> > I think this may already be addressed by sorting prior to output:
> > https://lore.kernel.org/r/20220812230949.683239-5-irogers@google.com
> > 
> > Could you confirm?
> 
> Hi Ian,
> 
> I was testing on 6.2.6 which includes that patch and it was still affected.
> The trouble with sorting at the end is, that there can be influences of
> ordering in earlier processing steps, that don't get ironed out by the sort
> later.
> 
> Some more experimenting showed that only the ftw scandir needed sorting,
> which allows to further simplify the patch to
> 
>      """Replicate the directory/file walking behavior of C's ...
> -    for item in os.scandir(path):
> +    for item in sorted(os.scandir(path), key=lambda e: e.name):
>        action(parents, item)
> 
> 
> Without the patch, a random diff in pmu-events.c starts with
> -static const struct compact_pmu_event pme_amdzen2[] = {
> +static const struct compact_pmu_event pme_silvermont[] = {
> 
> 
> While I'm testing on scratch ext4 filesystems where dirindex causes
> randomness, you could also use the disorderfs FUSE-filesystem with its
> shuffle mode to give you random order.

So, Ian acked the current patch, but you found some further
simplification, can you please resubmit?

- Arnaldo
[PATCH] perf jevents: Sort list of input files
Posted by Bernhard M. Wiedemann 2 years, 10 months ago
Without this, pmu-events.c would be generated with variations in ordering
depending on non-deterministic filesystem readdir order.

I tested that pmu-events.c still has the same number of lines
and that perf list output works.

This patch was done while working on reproducible builds for openSUSE,
but also solves issues in Debian [1] and other distributions.

[1] https://tests.reproducible-builds.org/debian/rb-pkg/unstable/i386/linux.html

Signed-off-by: Bernhard M. Wiedemann <bwiedemann@suse.de>
CC: Ian Rogers <irogers@google.com>
---
 tools/perf/pmu-events/jevents.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index 2bcd07ce609f..736ee0a75cf8 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -889,7 +889,7 @@ def main() -> None:
   def ftw(path: str, parents: Sequence[str],
           action: Callable[[Sequence[str], os.DirEntry], None]) -> None:
     """Replicate the directory/file walking behavior of C's file tree walk."""
-    for item in os.scandir(path):
+    for item in sorted(os.scandir(path), key=lambda e: e.name):
       if _args.model != 'all' and item.is_dir():
         # Check if the model matches one in _args.model.
         if len(parents) == _args.model.split(',')[0].count('/'):
-- 
2.35.3
Re: [PATCH] perf jevents: Sort list of input files
Posted by Arnaldo Carvalho de Melo 2 years, 10 months ago
Em Tue, Mar 21, 2023 at 07:30:32AM +0100, Bernhard M. Wiedemann escreveu:
> Without this, pmu-events.c would be generated with variations in ordering
> depending on non-deterministic filesystem readdir order.
> 
> I tested that pmu-events.c still has the same number of lines
> and that perf list output works.
> 
> This patch was done while working on reproducible builds for openSUSE,
> but also solves issues in Debian [1] and other distributions.
> 
> [1] https://tests.reproducible-builds.org/debian/rb-pkg/unstable/i386/linux.html

Thanks, applied.

- Arnaldo

 
> Signed-off-by: Bernhard M. Wiedemann <bwiedemann@suse.de>
> CC: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/pmu-events/jevents.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> index 2bcd07ce609f..736ee0a75cf8 100755
> --- a/tools/perf/pmu-events/jevents.py
> +++ b/tools/perf/pmu-events/jevents.py
> @@ -889,7 +889,7 @@ def main() -> None:
>    def ftw(path: str, parents: Sequence[str],
>            action: Callable[[Sequence[str], os.DirEntry], None]) -> None:
>      """Replicate the directory/file walking behavior of C's file tree walk."""
> -    for item in os.scandir(path):
> +    for item in sorted(os.scandir(path), key=lambda e: e.name):
>        if _args.model != 'all' and item.is_dir():
>          # Check if the model matches one in _args.model.
>          if len(parents) == _args.model.split(',')[0].count('/'):
> -- 
> 2.35.3
> 

-- 

- Arnaldo
Re: [PATCH] perf jevents: Sort list of input files
Posted by Ian Rogers 2 years, 10 months ago
On Mon, Mar 20, 2023 at 2:30 PM Bernhard M. Wiedemann
<bwiedemann@suse.de> wrote:
>
> On 20/03/2023 21.48, Ian Rogers wrote:
> > I think this may already be addressed by sorting prior to output:
> > https://lore.kernel.org/r/20220812230949.683239-5-irogers@google.com
> >
> > Could you confirm?
>
> Hi Ian,
>
> I was testing on 6.2.6 which includes that patch and it was still affected.
> The trouble with sorting at the end is, that there can be influences of
> ordering in earlier processing steps, that don't get ironed out by the
> sort later.
>
> Some more experimenting showed that only the ftw scandir needed sorting,
> which allows to further simplify the patch to
>
>       """Replicate the directory/file walking behavior of C's ...
> -    for item in os.scandir(path):
> +    for item in sorted(os.scandir(path), key=lambda e: e.name):
>         action(parents, item)
>
>
> Without the patch, a random diff in pmu-events.c starts with
> -static const struct compact_pmu_event pme_amdzen2[] = {
> +static const struct compact_pmu_event pme_silvermont[] = {
>
>
> While I'm testing on scratch ext4 filesystems where dirindex causes
> randomness, you could also use the disorderfs FUSE-filesystem with its
> shuffle mode to give you random order.
>
> Ciao
> Bernhard M.

Thanks Bernhard!

Acked-by: Ian Rogers <irogers@google.com>

I wonder about determining the order from the mapfile.csv, but that's
another problem for another day.

Thanks,
Ian