scripts/deploy | 8 + scripts/dockerfiles/importer-lore.docker | 5 + scripts/patchew-importer-lore | 244 ++++++++++++++++++ scripts/playbooks/deploy-importers-lore.yml | 38 +++ .../templates/importer-lore-config.j2 | 4 + 5 files changed, 299 insertions(+) create mode 100644 scripts/dockerfiles/importer-lore.docker create mode 100755 scripts/patchew-importer-lore create mode 100644 scripts/playbooks/deploy-importers-lore.yml create mode 100644 scripts/playbooks/templates/importer-lore-config.j2
This is a spiced-up version of Fam's code from
https://github.com/famz/patchew/commit/925998bf6.
The differences are:
- there is a new importer script and playbook, so we have two
- there is config file support so that the playbook can follow
the same model as the existing ones
- there is an age limit so that patches older than a few months
are not imported
- once it is up to date, the script only works on the most
recent repos and does not attempt to clone all of them
- all git invocations are done from Python instead of shell
The configuration file supports all command-line options,
while the playbook is a bit more limited.
Co-developed-by: Fam Zheng <fam.zheng@bytedance.com>
---
scripts/deploy | 8 +
scripts/dockerfiles/importer-lore.docker | 5 +
scripts/patchew-importer-lore | 244 ++++++++++++++++++
scripts/playbooks/deploy-importers-lore.yml | 38 +++
.../templates/importer-lore-config.j2 | 4 +
5 files changed, 299 insertions(+)
create mode 100644 scripts/dockerfiles/importer-lore.docker
create mode 100755 scripts/patchew-importer-lore
create mode 100644 scripts/playbooks/deploy-importers-lore.yml
create mode 100644 scripts/playbooks/templates/importer-lore-config.j2
diff --git a/scripts/deploy b/scripts/deploy
index e05984d..8bd23c4 100755
--- a/scripts/deploy
+++ b/scripts/deploy
@@ -21,6 +21,8 @@ def parse_args():
help="Database host address")
parser.add_argument("--tester", "-t", nargs="*", dest="testers",
help="Tester host address")
+ parser.add_argument("--public-inbox", "-p", nargs="?",
+ help="Importer host address")
parser.add_argument("--importer", "-i", nargs="?",
help="Importer host address")
parser.add_argument("--applier", "-a", nargs="?",
@@ -41,6 +43,9 @@ def generate_inventory_file(args):
[appliers]
%s
+[importers_lore]
+%s
+
[importers]
%s
@@ -49,6 +54,7 @@ def generate_inventory_file(args):
% (args.web_server or "",
args.db_server or "",
args.applier or "",
+ args.public_inbox or "",
args.importer or "",
"\n".join(args.testers or [])))
f.flush()
@@ -68,6 +74,8 @@ def main():
playbooks.append("deploy-testers.yml")
if args.applier:
playbooks.append("deploy-appliers.yml")
+ if args.public_inbox:
+ playbooks.append("deploy-importers-lore.yml")
if args.importer:
playbooks.append("deploy-importers.yml")
if not playbooks:
diff --git a/scripts/dockerfiles/importer-lore.docker b/scripts/dockerfiles/importer-lore.docker
new file mode 100644
index 0000000..1e7e14b
--- /dev/null
+++ b/scripts/dockerfiles/importer-lore.docker
@@ -0,0 +1,5 @@
+FROM fedora:latest
+RUN dnf install -y python findutils git wget
+ENV LC_ALL en_US.UTF-8
+COPY . /opt/patchew/
+CMD /opt/patchew/scripts/patchew-importer-lore -d /data/patchew -c /data/patchew/config
diff --git a/scripts/patchew-importer-lore b/scripts/patchew-importer-lore
new file mode 100755
index 0000000..e034998
--- /dev/null
+++ b/scripts/patchew-importer-lore
@@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+#
+# Copyright 2021-2022 Bytedance Inc.
+#
+# Authors:
+# Fam Zheng <fam.zheng@bytedance.com>
+#
+# This work is licensed under the MIT License. Please see the LICENSE file or
+# http://opensource.org/licenses/MIT.
+
+import os
+import sys
+import time
+import argparse
+import logging
+import tempfile
+import subprocess
+import dbm
+
+BASE_DIR = os.path.realpath(os.path.dirname(__file__) + "/..")
+PATCHEW_CLI = os.path.join(BASE_DIR, "patchew-cli")
+
+CONFIG_ITEMS = {
+ "data_dir": {
+ "short": "d",
+ "help": "directory to put data in",
+ "metavar": "PATH",
+ },
+ "patchew_server": {
+ "short": "S",
+ "help": "Patchew server to log into",
+ "metavar": "HOST",
+ },
+ "patchew_username": {
+ "short": "U",
+ "help": "Username for patchew server",
+ "metavar": "USER",
+ },
+ "patchew_password": {
+ "short": "P",
+ "help": "Password for patchew server",
+ "metavar": "PASSWORD",
+ },
+ "git_root": {
+ "short": "g",
+ "help": "Root of public-inbox repository",
+ "metavar": "URL",
+ },
+ "limit": {
+ "short": "l",
+ "default": "2.months.ago",
+ "help": "How old to import backlog (default 2 months)",
+ "metavar": "DATE",
+ },
+ "max": {
+ "short": "m",
+ "default": "4",
+ "help": "How many public-inbox repositories to import (default 4)",
+ "metavar": "N",
+ },
+ "batch": {
+ "short": "b",
+ "default": "500",
+ "help": "How many messages to import between git-pull",
+ "metavar": "N",
+ },
+}
+
+CONFIG = {}
+HIGHEST_REPO = 0
+
+
+def config_from_file(args):
+ global CONFIG
+ import configparser
+
+ parser = configparser.ConfigParser()
+ parser.read(args.config)
+
+ # default section applies to all git repos
+ CONFIG.update(parser["DEFAULT"])
+ if not args.git_root:
+ # no -g flag, there needs to be exactly one non-DEFAULT section
+ if len(parser.sections()) > 1:
+ raise Exception("please specify desired git root")
+ git_root = parser.sections()[0]
+ CONFIG["git_root"] = git_root
+ CONFIG.update(parser[git_root])
+ else:
+ # -g flag, use the named section or just the defaults
+ if args.git_root in parser.sections():
+ CONFIG.update(parser[args.git_root])
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--config", "-c", help="Path to config file", metavar="FILE")
+ for k, v in CONFIG_ITEMS.items():
+ long = "--" + k.replace("_", "-")
+ short = "-" + v["short"]
+ parser.add_argument(short, long, help=v["help"], metavar=v["metavar"])
+ if "default" in v:
+ CONFIG[k] = v["default"]
+
+ args = parser.parse_args()
+ if args.config:
+ config_from_file(args)
+ # Arguments override config file
+ for k in CONFIG_ITEMS.keys():
+ if getattr(args, k) is not None:
+ CONFIG[k] = getattr(args, k)
+
+
+def git_clone(src, dest):
+ logging.info("cloning " + src + " into " + os.path.join(os.getcwd(), dest))
+ subprocess.check_call(["git", "clone", src, dest])
+
+
+def git_pull(wd):
+ logging.info("updating " + os.path.join(os.getcwd(), wd))
+ subprocess.check_call(["git", "pull"], cwd=wd)
+
+
+def find_commits(git_root, first_repo, max_repos):
+ global HIGHEST_REPO
+ base = "public-inbox"
+ if not os.path.exists(base):
+ os.mkdir(base)
+ for i in range(first_repo, -1, -1):
+ if max_repos < 1:
+ break
+
+ i_str = str(i)
+ wd = os.path.join(base, i_str)
+ if not os.path.exists(wd):
+ try:
+ git_clone(git_root + i_str, wd)
+ except subprocess.CalledProcessError:
+ continue
+
+ HIGHEST_REPO = max(HIGHEST_REPO, i)
+ try:
+ git_pull(wd)
+ except subprocess.CalledProcessError:
+ break
+
+ max_repos -= 1
+ p = subprocess.Popen(
+ ["git", "log", "--oneline", "--since=" + CONFIG["limit"], "--format=%h"],
+ cwd=wd,
+ stdout=subprocess.PIPE,
+ encoding="utf-8",
+ )
+ for line in p.stdout:
+ yield (wd, line.strip())
+
+
+def show_commit(d, c):
+ return subprocess.check_output(["git", "show", "%s:m" % c], cwd=d)
+
+
+def import_public_inbox(git_root, max_imports, first_repo, max_repos):
+ if not git_root.endswith("/"):
+ git_root += "/"
+
+ db = dbm.open("patchew-importer-lore.db", "c")
+
+ for (d, commit) in find_commits(git_root, first_repo, max_repos):
+ if max_imports < 1:
+ break
+ if db.get(commit):
+ continue
+ max_imports -= 1
+ with tempfile.NamedTemporaryFile() as tf:
+ try:
+ tf.write(show_commit(d, commit))
+ tf.flush()
+ what = subprocess.check_output(
+ "git log -n 1 {commit} --oneline --format='%aD - %aN <%aE> - %s'".format(
+ commit=commit
+ ),
+ shell=True,
+ cwd=d,
+ encoding="utf-8",
+ )
+ logging.info("importing %s" % what)
+ cmd = [PATCHEW_CLI, "-s", CONFIG["patchew_server"], "import", tf.name]
+ subprocess.check_output(cmd, stderr=subprocess.PIPE)
+ db[commit] = "imported"
+ except Exception as e:
+ logging.error(
+ "failed to import commit %s in archive %s: %s" % (commit, d, e)
+ )
+ db[commit] = "failed"
+ else:
+ time.sleep(60)
+
+
+def main():
+ global CONFIG, HIGHEST_REPO
+
+ parse_args()
+ if not CONFIG["patchew_server"]:
+ logging.error(
+ "you need to specify a patchew server within the config file or with -S"
+ )
+ if not CONFIG["patchew_username"]:
+ logging.error(
+ "you need to specify a patchew username within the config file or with -U"
+ )
+ if not CONFIG["patchew_password"]:
+ logging.error(
+ "you need to specify a patchew username password the config file or with -P"
+ )
+
+ logging.basicConfig(level=logging.DEBUG)
+ if CONFIG["data_dir"]:
+ if not os.path.exists(CONFIG["data_dir"]):
+ os.mkdir(CONFIG["data_dir"])
+ os.chdir(CONFIG["data_dir"])
+ cmd = [
+ PATCHEW_CLI,
+ "-s",
+ CONFIG["patchew_server"],
+ "login",
+ CONFIG["patchew_username"],
+ CONFIG["patchew_password"],
+ ]
+ subprocess.check_call(cmd, stderr=subprocess.STDOUT)
+
+ # no need to be stingy, high repos are checked only once per run
+ first_repo = 40
+ max_repos = int(CONFIG["max"])
+ max_imports = int(CONFIG["batch"])
+ git_root = CONFIG["git_root"]
+ while True:
+ # restart and import the latest mails every once in a while to make
+ # sure new patches are imported timely, before the backlog
+ import_public_inbox(git_root, max_imports, first_repo, max_repos)
+ first_repo = HIGHEST_REPO + 1
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/scripts/playbooks/deploy-importers-lore.yml b/scripts/playbooks/deploy-importers-lore.yml
new file mode 100644
index 0000000..de0a0f2
--- /dev/null
+++ b/scripts/playbooks/deploy-importers-lore.yml
@@ -0,0 +1,38 @@
+- hosts: importers_lore
+ vars_prompt:
+ - name: instance_name
+ prompt: "The instance name"
+ default: patchew-importer-lore
+ private: no
+ - name: "patchew_server"
+ prompt: "The address of patchew server"
+ default: "https://patchew.org"
+ private: no
+ - name: "importer_user"
+ prompt: "Username for the importer to login to the server"
+ private: no
+ default: "importer"
+ - name: "importer_pass"
+ prompt: "Password for the importer to login to the server"
+ private: yes
+ - name: "git_repo_base"
+ prompt: "URL in which to find public-inbox git repositories"
+ default: "https://lore.kernel.org/lkml/"
+ private: no
+ vars:
+ base_dir: "/data/{{ instance_name }}"
+ src_dir: "{{ base_dir }}/src"
+ data_dir: "{{ base_dir }}/data"
+ config_file: "{{ data_dir }}/config"
+ tasks:
+ - name: Create data dir
+ file:
+ path: "{{ data_dir }}"
+ state: directory
+ - name: Create config
+ template:
+ src: "templates/importer-lore-config.j2"
+ dest: "{{ config_file }}"
+ - import_tasks: tasks/docker-deploy.yml
+ vars:
+ instance_role: importer-lore
diff --git a/scripts/playbooks/templates/importer-lore-config.j2 b/scripts/playbooks/templates/importer-lore-config.j2
new file mode 100644
index 0000000..e3a1437
--- /dev/null
+++ b/scripts/playbooks/templates/importer-lore-config.j2
@@ -0,0 +1,4 @@
+[{{ git_repo_base }}]
+patchew_server={{ patchew_server }}
+patchew_username={{ importer_user }}
+patchew_password={{ importer_pass }}
--
2.34.1
_______________________________________________
Patchew-devel mailing list
Patchew-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/patchew-devel
© 2016 - 2024 Red Hat, Inc.