[v1] Add support for Firefox's gecko profile format

[PATCH 4/9] scripts: python: Implement parsing of input data in convertPerfScriptProfile

Posted by Anup Sharma 2 years, 7 months ago

The lines variable is created by splitting the profile string into individual
lines. It allows for iterating over each line for processing.

The line is considered the start of a sample. It is matched against a regular
expression pattern to extract relevant information such as before_time_stamp,
time_stamp, threadNamePidAndTidMatch, threadName, pid, and tid.

The stack frames of the current sample are then parsed in a nested loop.
Each stackFrameLine is matched against a regular expression pattern to
extract rawFunc and mod information.

Also fixed few checkpatch warnings.

Signed-off-by: Anup Sharma <anupnewsmail@gmail.com>
---
 .../scripts/python/firefox-gecko-converter.py | 62 ++++++++++++++++++-
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/tools/perf/scripts/python/firefox-gecko-converter.py b/tools/perf/scripts/python/firefox-gecko-converter.py
index 0ff70c0349c8..e5bc7a11c3e6 100644
--- a/tools/perf/scripts/python/firefox-gecko-converter.py
+++ b/tools/perf/scripts/python/firefox-gecko-converter.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
 import re
 import sys
 import json
@@ -14,13 +15,13 @@ def isPerfScriptFormat(profile):
     firstLine = profile[:profile.index('\n')]
     return bool(re.match(r'^\S.*?\s+(?:\d+/)?\d+\s+(?:\d+\d+\s+)?[\d.]+:', firstLine))
 
-def convertPerfScriptProfile(profile): 
+def convertPerfScriptProfile(profile):
 
         def addSample(threadName, stackArray, time):
             nonlocal name
             if name != threadName:
                 name = threadName
-            # TODO: 
+            # TODO:
             # get_or_create_stack will create a new stack if it doesn't exist, or return the existing stack if it does.
             # get_or_create_frame will create a new frame if it doesn't exist, or return the existing frame if it does.
             stack = reduce(lambda prefix, stackFrame: get_or_create_stack(get_or_create_frame(stackFrame), prefix), stackArray, None)
@@ -54,3 +55,60 @@ def convertPerfScriptProfile(profile):
             thread = _createtread(threadName, pid, tid)
             threadMap[tid] = thread
         thread['addSample'](threadName, stack, time_stamp)
+
+    lines = profile.split('\n')
+
+    line_index = 0
+    startTime = 0
+    while line_index < len(lines):
+        line = lines[line_index]
+        line_index += 1
+    # perf script --header outputs header lines beginning with #
+        if line == '' or line.startswith('#'):
+            continue
+
+        sample_start_line = line
+
+        sample_start_match = re.match(r'^(.*)\s+([\d.]+):', sample_start_line)
+        if not sample_start_match:
+            print(f'Could not parse line as the start of a sample in the "perf script" profile format: "{sample_start_line}"')
+            continue
+
+        before_time_stamp = sample_start_match[1]
+        time_stamp = float(sample_start_match[2]) * 1000
+        threadNamePidAndTidMatch = re.match(r'^(.*)\s+(?:(\d+)\/)?(\d+)\b', before_time_stamp)
+
+        if not threadNamePidAndTidMatch:
+            print('Could not parse line as the start of a sample in the "perf script" profile format: "%s"' % sampleStartLine)
+            continue
+        threadName = threadNamePidAndTidMatch[1].strip()
+        pid = int(threadNamePidAndTidMatch[2] or 0)
+        tid = int(threadNamePidAndTidMatch[3] or 0)
+        if startTime == 0:
+            startTime = time_stamp
+        # Parse the stack frames of the current sample in a nested loop.
+        stack = []
+        while line_index < len(lines):
+            stackFrameLine = lines[line_index]
+            line_index += 1
+            if stackFrameLine.strip() == '':
+                # Sample ends.
+                break
+            stackFrameMatch = re.match(r'^\s*(\w+)\s*(.+) \(([^)]*)\)', stackFrameLine)
+            if stackFrameMatch:
+                rawFunc = stackFrameMatch[2]
+                mod = stackFrameMatch[3]
+                rawFunc = re.sub(r'\+0x[\da-f]+$', '', rawFunc)
+
+            if rawFunc.startswith('('):
+                continue # skip process names
+
+            if mod:
+                # If we have a module name, provide it.
+                # The code processing the profile will search for
+                # "functionName (in libraryName)" using a regexp,
+                # and automatically create the library information.
+                rawFunc += f' (in {mod})'
+
+            stack.append(rawFunc)
+
-- 
2.34.1

Re: [PATCH 4/9] scripts: python: Implement parsing of input data in convertPerfScriptProfile

Posted by Namhyung Kim 2 years, 7 months ago

Hi Anup,

On Wed, Jun 21, 2023 at 12:41 PM Anup Sharma <anupnewsmail@gmail.com> wrote:
>
> The lines variable is created by splitting the profile string into individual
> lines. It allows for iterating over each line for processing.
>
> The line is considered the start of a sample. It is matched against a regular
> expression pattern to extract relevant information such as before_time_stamp,
> time_stamp, threadNamePidAndTidMatch, threadName, pid, and tid.
>
> The stack frames of the current sample are then parsed in a nested loop.
> Each stackFrameLine is matched against a regular expression pattern to
> extract rawFunc and mod information.
>
> Also fixed few checkpatch warnings.
>
> Signed-off-by: Anup Sharma <anupnewsmail@gmail.com>
> ---
>  .../scripts/python/firefox-gecko-converter.py | 62 ++++++++++++++++++-
>  1 file changed, 60 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/scripts/python/firefox-gecko-converter.py b/tools/perf/scripts/python/firefox-gecko-converter.py
> index 0ff70c0349c8..e5bc7a11c3e6 100644
> --- a/tools/perf/scripts/python/firefox-gecko-converter.py
> +++ b/tools/perf/scripts/python/firefox-gecko-converter.py
> @@ -1,4 +1,5 @@
>  #!/usr/bin/env python3
> +# SPDX-License-Identifier: GPL-2.0

Please put this line in the first commit.

>  import re
>  import sys
>  import json
> @@ -14,13 +15,13 @@ def isPerfScriptFormat(profile):
>      firstLine = profile[:profile.index('\n')]
>      return bool(re.match(r'^\S.*?\s+(?:\d+/)?\d+\s+(?:\d+\d+\s+)?[\d.]+:', firstLine))
>
> -def convertPerfScriptProfile(profile):
> +def convertPerfScriptProfile(profile):

You'd better configure your editor to warn or even fix
the trailing whitespace automatically.

Thanks,
Namhyung


>
>          def addSample(threadName, stackArray, time):
>              nonlocal name
>              if name != threadName:
>                  name = threadName
> -            # TODO:
> +            # TODO:
>              # get_or_create_stack will create a new stack if it doesn't exist, or return the existing stack if it does.
>              # get_or_create_frame will create a new frame if it doesn't exist, or return the existing frame if it does.
>              stack = reduce(lambda prefix, stackFrame: get_or_create_stack(get_or_create_frame(stackFrame), prefix), stackArray, None)
> @@ -54,3 +55,60 @@ def convertPerfScriptProfile(profile):
>              thread = _createtread(threadName, pid, tid)
>              threadMap[tid] = thread
>          thread['addSample'](threadName, stack, time_stamp)
> +
> +    lines = profile.split('\n')
> +
> +    line_index = 0
> +    startTime = 0
> +    while line_index < len(lines):
> +        line = lines[line_index]
> +        line_index += 1
> +    # perf script --header outputs header lines beginning with #
> +        if line == '' or line.startswith('#'):
> +            continue
> +
> +        sample_start_line = line
> +
> +        sample_start_match = re.match(r'^(.*)\s+([\d.]+):', sample_start_line)
> +        if not sample_start_match:
> +            print(f'Could not parse line as the start of a sample in the "perf script" profile format: "{sample_start_line}"')
> +            continue
> +
> +        before_time_stamp = sample_start_match[1]
> +        time_stamp = float(sample_start_match[2]) * 1000
> +        threadNamePidAndTidMatch = re.match(r'^(.*)\s+(?:(\d+)\/)?(\d+)\b', before_time_stamp)
> +
> +        if not threadNamePidAndTidMatch:
> +            print('Could not parse line as the start of a sample in the "perf script" profile format: "%s"' % sampleStartLine)
> +            continue
> +        threadName = threadNamePidAndTidMatch[1].strip()
> +        pid = int(threadNamePidAndTidMatch[2] or 0)
> +        tid = int(threadNamePidAndTidMatch[3] or 0)
> +        if startTime == 0:
> +            startTime = time_stamp
> +        # Parse the stack frames of the current sample in a nested loop.
> +        stack = []
> +        while line_index < len(lines):
> +            stackFrameLine = lines[line_index]
> +            line_index += 1
> +            if stackFrameLine.strip() == '':
> +                # Sample ends.
> +                break
> +            stackFrameMatch = re.match(r'^\s*(\w+)\s*(.+) \(([^)]*)\)', stackFrameLine)
> +            if stackFrameMatch:
> +                rawFunc = stackFrameMatch[2]
> +                mod = stackFrameMatch[3]
> +                rawFunc = re.sub(r'\+0x[\da-f]+$', '', rawFunc)
> +
> +            if rawFunc.startswith('('):
> +                continue # skip process names
> +
> +            if mod:
> +                # If we have a module name, provide it.
> +                # The code processing the profile will search for
> +                # "functionName (in libraryName)" using a regexp,
> +                # and automatically create the library information.
> +                rawFunc += f' (in {mod})'
> +
> +            stack.append(rawFunc)
> +
> --
> 2.34.1
>

Re: [PATCH 4/9] scripts: python: Implement parsing of input data in convertPerfScriptProfile

Posted by Anup Sharma 2 years, 7 months ago

On Fri, Jun 23, 2023 at 05:03:12PM -0700, Namhyung Kim wrote:
> Hi Anup,
> 
> On Wed, Jun 21, 2023 at 12:41 PM Anup Sharma <anupnewsmail@gmail.com> wrote:
> >
> > The lines variable is created by splitting the profile string into individual
> > lines. It allows for iterating over each line for processing.
> >
> > The line is considered the start of a sample. It is matched against a regular
> > expression pattern to extract relevant information such as before_time_stamp,
> > time_stamp, threadNamePidAndTidMatch, threadName, pid, and tid.
> >
> > The stack frames of the current sample are then parsed in a nested loop.
> > Each stackFrameLine is matched against a regular expression pattern to
> > extract rawFunc and mod information.
> >
> > Also fixed few checkpatch warnings.
> >
> > Signed-off-by: Anup Sharma <anupnewsmail@gmail.com>
> > ---
> >  .../scripts/python/firefox-gecko-converter.py | 62 ++++++++++++++++++-
> >  1 file changed, 60 insertions(+), 2 deletions(-)
> >
> > diff --git a/tools/perf/scripts/python/firefox-gecko-converter.py b/tools/perf/scripts/python/firefox-gecko-converter.py
> > index 0ff70c0349c8..e5bc7a11c3e6 100644
> > --- a/tools/perf/scripts/python/firefox-gecko-converter.py
> > +++ b/tools/perf/scripts/python/firefox-gecko-converter.py
> > @@ -1,4 +1,5 @@
> >  #!/usr/bin/env python3
> > +# SPDX-License-Identifier: GPL-2.0
> 
> Please put this line in the first commit.

Sure, followed in latest version.

> >  import re
> >  import sys
> >  import json
> > @@ -14,13 +15,13 @@ def isPerfScriptFormat(profile):
> >      firstLine = profile[:profile.index('\n')]
> >      return bool(re.match(r'^\S.*?\s+(?:\d+/)?\d+\s+(?:\d+\d+\s+)?[\d.]+:', firstLine))
> >
> > -def convertPerfScriptProfile(profile):
> > +def convertPerfScriptProfile(profile):
> 
> You'd better configure your editor to warn or even fix
> the trailing whitespace automatically.

Thanks, I followed your advice and configured my nvim to handle trailing
whitespace automatically. It has significantly improved my workflow.
Here's the updated snippet I added to my vimrc file:

highlight ExtraWhitespace ctermbg=white guibg=white
match ExtraWhitespace /\s\+$/

> Thanks,
> Namhyung
> 
> 
> >
> >          def addSample(threadName, stackArray, time):
> >              nonlocal name
> >              if name != threadName:
> >                  name = threadName
> > -            # TODO:
> > +            # TODO:
> >              # get_or_create_stack will create a new stack if it doesn't exist, or return the existing stack if it does.
> >              # get_or_create_frame will create a new frame if it doesn't exist, or return the existing frame if it does.
> >              stack = reduce(lambda prefix, stackFrame: get_or_create_stack(get_or_create_frame(stackFrame), prefix), stackArray, None)
> > @@ -54,3 +55,60 @@ def convertPerfScriptProfile(profile):
> >              thread = _createtread(threadName, pid, tid)
> >              threadMap[tid] = thread
> >          thread['addSample'](threadName, stack, time_stamp)
> > +
> > +    lines = profile.split('\n')
> > +
> > +    line_index = 0
> > +    startTime = 0
> > +    while line_index < len(lines):
> > +        line = lines[line_index]
> > +        line_index += 1
> > +    # perf script --header outputs header lines beginning with #
> > +        if line == '' or line.startswith('#'):
> > +            continue
> > +
> > +        sample_start_line = line
> > +
> > +        sample_start_match = re.match(r'^(.*)\s+([\d.]+):', sample_start_line)
> > +        if not sample_start_match:
> > +            print(f'Could not parse line as the start of a sample in the "perf script" profile format: "{sample_start_line}"')
> > +            continue
> > +
> > +        before_time_stamp = sample_start_match[1]
> > +        time_stamp = float(sample_start_match[2]) * 1000
> > +        threadNamePidAndTidMatch = re.match(r'^(.*)\s+(?:(\d+)\/)?(\d+)\b', before_time_stamp)
> > +
> > +        if not threadNamePidAndTidMatch:
> > +            print('Could not parse line as the start of a sample in the "perf script" profile format: "%s"' % sampleStartLine)
> > +            continue
> > +        threadName = threadNamePidAndTidMatch[1].strip()
> > +        pid = int(threadNamePidAndTidMatch[2] or 0)
> > +        tid = int(threadNamePidAndTidMatch[3] or 0)
> > +        if startTime == 0:
> > +            startTime = time_stamp
> > +        # Parse the stack frames of the current sample in a nested loop.
> > +        stack = []
> > +        while line_index < len(lines):
> > +            stackFrameLine = lines[line_index]
> > +            line_index += 1
> > +            if stackFrameLine.strip() == '':
> > +                # Sample ends.
> > +                break
> > +            stackFrameMatch = re.match(r'^\s*(\w+)\s*(.+) \(([^)]*)\)', stackFrameLine)
> > +            if stackFrameMatch:
> > +                rawFunc = stackFrameMatch[2]
> > +                mod = stackFrameMatch[3]
> > +                rawFunc = re.sub(r'\+0x[\da-f]+$', '', rawFunc)
> > +
> > +            if rawFunc.startswith('('):
> > +                continue # skip process names
> > +
> > +            if mod:
> > +                # If we have a module name, provide it.
> > +                # The code processing the profile will search for
> > +                # "functionName (in libraryName)" using a regexp,
> > +                # and automatically create the library information.
> > +                rawFunc += f' (in {mod})'
> > +
> > +            stack.append(rawFunc)
> > +
> > --
> > 2.34.1
> >