[RFC PATCH v2 15/16] scripts/get_maintainer.py: implement basic git fallback support

Alex Bennée posted 16 patches 1 day, 19 hours ago
Maintainers: "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Thomas Huth <thuth@redhat.com>, John Snow <jsnow@redhat.com>, Cleber Rosa <crosa@redhat.com>
[RFC PATCH v2 15/16] scripts/get_maintainer.py: implement basic git fallback support
Posted by Alex Bennée 1 day, 19 hours ago
Implement the basic --git fallback support which also needs
--git-since and the various knobs to control the minimum and maximum
signatures to look for.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 scripts/get_maintainer.py | 125 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)

diff --git a/scripts/get_maintainer.py b/scripts/get_maintainer.py
index b41f5342876..de229a20bc2 100755
--- a/scripts/get_maintainer.py
+++ b/scripts/get_maintainer.py
@@ -11,12 +11,16 @@
 # SPDX-License-Identifier: GPL-2.0-or-later
 
 from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
+from dataclasses import dataclass
 from os import path
 from pathlib import Path
 from enum import StrEnum, auto
 from re import compile as re_compile
 from re import sub as re_sub
+from re import IGNORECASE
 from regex import compile as prec_compile
+from git import Repo
+from collections import Counter
 
 #
 # Subsystem MAINTAINER entries
@@ -230,6 +234,96 @@ def process_patch_file(patchfile):
 
     return (msg, file_list)
 
+#
+# Helpers for querying git
+#
+
+
+@dataclass
+class GitOptions:
+    repo: Repo
+    singers: bool
+    since: str
+    min_sig: int
+    max_maint: int
+    min_percent: int
+
+
+def rank_signers(git_opts, all_signers, total_commits):
+    """
+    Counts signer occurrences and returns a list of (Person, count, percent).
+    """
+    if total_commits == 0:
+        return []
+
+    # Count by email to handle duplicates/mailmap issues
+    counts = Counter(s.email for s in all_signers)
+
+    # Keep a map of email -> Person object for the most recent name used
+    email_to_person = {p.email: p for p in all_signers}
+
+    ranked_results = []
+
+    # Sort by count descending, then take the top N
+    for email, count in counts.most_common(git_opts.max_maint):
+        percent = min(100.0, (count / total_commits) * 100)
+        if percent >= git_opts.min_percent:
+            person = email_to_person[email]
+            ranked_results.append((person, count, percent))
+
+    return ranked_results
+
+
+# regex to extract name/email from *-by: tags
+sig_line_re = re_compile(r"^\s*[\w-]+-by:\s*(?P<person_info>.*)", IGNORECASE)
+
+
+def extract_signers(commit_message):
+    """
+    Return a list of Persons found in commit.
+    """
+    signers = []
+    for line in commit_message.splitlines():
+        match = sig_line_re.match(line)
+        if match:
+            try:
+                p = Person(match.group('person_info'))
+                signers.append(p)
+            except BadPerson:
+                continue
+    return signers
+
+
+def extract_from_git(git_opts, src_file):
+    """
+    Extract 'maintainers' from examining the git history of a file.
+    Return an array of Person/role tuples.
+    """
+    repo = git_opts.repo
+
+    # use the porcelain to fetch the log
+    hashes = repo.git.log('--follow', f"--since={git_opts.since}",
+                          "--format=%H", '--', src_file).splitlines()
+
+    if len(hashes) <= 0:
+        return []
+
+    commits = [repo.commit(h) for h in hashes]
+
+    all_signers = []
+
+    for c in commits:
+        all_signers.extend(extract_signers(f"{c.message}"))
+
+    ranked = rank_signers(git_opts, all_signers, len(commits))
+    results = []
+
+    for person, count, percent in ranked:
+        role = f"commit_signer: {count}/{len(commits)}={percent:.0f}%"
+        results.append((person, role))
+
+    return results
+
 #
 # Helper functions for dealing with the source path
 #
@@ -331,6 +425,22 @@ def main():
     parser.add_argument('--src', type=valid_src_root, default=src,
                         help=f'Root of QEMU source tree{" (default: " + src + ")" if src else ""}')
 
+    # Git Options
+    parser.add_argument('--git', action=BooleanOptionalAction,
+                        default=False,
+                        help="Include recent git *-by: signers (default: don't)")
+    parser.add_argument('--git-since', default="1-year-ago",
+                        help='git history to use when falling back (default: 1-year-ago)')
+    parser.add_argument('--git-fallback',
+                        action=BooleanOptionalAction, default=True,
+                        help='use git when no exact MAINTAINERS pattern (default: fallback)')
+    parser.add_argument('--git-min-signatures', default=1,
+                        help='number of signatures required (default: 1)')
+    parser.add_argument('--git-max-maintainers', default=5,
+                        help='maximum number of git derived maintainers to add (default: 5)')
+    parser.add_argument('--git-min-percent', default=5,
+                        help='minimum percentage of commits to tagged as a maintainer (default: 5)')
+
     args = parser.parse_args()
 
     try:
@@ -369,6 +479,21 @@ def main():
     for rm in maintained:
         print(str(rm))
 
+    # Git fallback
+    if args.git or (args.git_fallback and len(maintained) == 0):
+        repo = Repo(src)
+        git_opts = GitOptions(repo=repo, singers=args.git,
+                              since=args.git_since,
+                              min_sig=args.git_min_signatures,
+                              max_maint=args.git_max_maintainers,
+                              min_percent=args.git_min_percent)
+
+        for f in files:
+            gmaint = extract_from_git(git_opts, f)
+
+            for (person, role) in gmaint:
+                print(f"{person} ({role})")
+
 
 if __name__ == '__main__':
     main()
-- 
2.47.3