[v2] scripts/ci: add gitlab-failure-analysis script

[PATCH v2] scripts/ci: add gitlab-failure-analysis script
Posted by Alex Bennée 2 weeks, 4 days ago
This is a script designed to collect data from multiple pipelines and
analyse the failure modes they have. By default it will probe the last
3 failed jobs on the staging branch. However this can all be
controlled by the CLI:

  ./scripts/ci/gitlab-failure-analysis --count 2 --branch=testing/next --id 39915562 --status=
  running pipeline 2028486060, total jobs 125, skipped 5, failed 0,  39742 tests, 0 failed tests
  success pipeline 2015018135, total jobs 125, skipped 5, failed 0,  49219 tests, 0 failed tests

You can also skip failing jobs and just dump the tests:

  ./scripts/ci/gitlab-failure-analysis --branch= --id 39915562 --status= --skip-jobs --pipeline 1946202491 1919542960
  failed pipeline 1946202491, total jobs 127, skipped 5, failed 26,  38742 tests, 278 skipped tests, 2 failed tests
    Failed test qemu.qemu:qtest+qtest-s390x / qtest-s390x/boot-serial-test, check-system-opensuse, 1 /s390x/boot-serial/s390-ccw-virtio - FATAL-ERROR: Failed to find expected string. Please check '/tmp/qtest-boot-serial-sW77EA3'
    Failed test qemu.qemu:qtest+qtest-aarch64 / qtest-aarch64/arm-cpu-features, check-system-opensuse, 1 /aarch64/arm/query-cpu-model-expansion - ERROR:../tests/qtest/arm-cpu-features.c:459:test_query_cpu_model_expansion: assertion failed (_error == "The CPU type 'host' requires KVM"): ("The CPU type 'host' requires hardware accelerator" == "The CPU type 'host' requires KVM")
  failed pipeline 1919542960, total jobs 127, skipped 5, failed 2,  48753 tests, 441 skipped tests, 1 failed tests
    Failed test qemu.qemu:unit / test-aio, msys2-64bit, 12 /aio/timer/schedule - ERROR:../tests/unit/test-aio.c:413:test_timer_schedule: assertion failed: (aio_poll(ctx, true))

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
v2
  - allow status selection, handle empty strings as None
  - allow individual pipeline selection
  - extract individual tests
  - allow skipping of jobs
---
 scripts/ci/gitlab-failure-analysis | 117 +++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100755 scripts/ci/gitlab-failure-analysis

diff --git a/scripts/ci/gitlab-failure-analysis b/scripts/ci/gitlab-failure-analysis
new file mode 100755
index 00000000000..906725be973
--- /dev/null
+++ b/scripts/ci/gitlab-failure-analysis
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+#
+# A script to analyse failures in the gitlab pipelines. It requires an
+# API key from gitlab with the following permissions:
+#  - api
+#  - read_repository
+#  - read_user
+#
+
+import argparse
+import gitlab
+import os
+
+#
+# Arguments
+#
+class NoneForEmptyStringAction(argparse.Action):
+    def __call__(self, parser, namespace, value, option_string=None):
+        if value == '':
+            setattr(namespace, self.dest, None)
+        else:
+            setattr(namespace, self.dest, value)
+
+
+parser = argparse.ArgumentParser(description="Analyse failed GitLab CI runs.")
+
+parser.add_argument("--gitlab",
+                    default="https://gitlab.com",
+                    help="GitLab instance URL (default: https://gitlab.com).")
+parser.add_argument("--id", default=11167699,
+                    type=int,
+                    help="GitLab project id (default: 11167699 for qemu-project/qemu)")
+parser.add_argument("--token",
+                    default=os.getenv("GITLAB_TOKEN"),
+                    help="Your personal access token with 'api' scope.")
+parser.add_argument("--branch",
+                    type=str,
+                    default="staging",
+                    action=NoneForEmptyStringAction,
+                    help="The name of the branch (default: 'staging')")
+parser.add_argument("--status",
+                    type=str,
+                    action=NoneForEmptyStringAction,
+                    default="failed",
+                    help="Filter by branch status (default: 'failed')")
+parser.add_argument("--count", type=int,
+                    default=3,
+                    help="The number of failed runs to fetch.")
+parser.add_argument("--skip-jobs",
+                    default=False,
+                    action='store_true',
+                    help="Skip dumping the job info")
+parser.add_argument("--pipeline", type=int,
+                    nargs="+",
+                    default=None,
+                    help="Explicit pipeline ID(s) to fetch.")
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+
+    gl = gitlab.Gitlab(url=args.gitlab, private_token=args.token)
+    project = gl.projects.get(args.id)
+
+
+    pipelines_to_process = []
+
+    # Use explicit pipeline IDs if provided, otherwise fetch a list
+    if args.pipeline:
+        args.count = len(args.pipeline)
+        for p_id in args.pipeline:
+            pipelines_to_process.append(project.pipelines.get(p_id))
+    else:
+        # Use an iterator to fetch the pipelines
+        pipe_iter = project.pipelines.list(iterator=True,
+                                           status=args.status,
+                                           ref=args.branch)
+        # Check each failed pipeline
+        pipelines_to_process = [next(pipe_iter) for _ in range(args.count)]
+
+    # Check each pipeline
+    for p in pipelines_to_process:
+
+        jobs = p.jobs.list(get_all=True)
+        failed_jobs = [j for j in jobs if j.status == "failed"]
+        skipped_jobs = [j for j in jobs if j.status == "skipped"]
+        manual_jobs = [j for j in jobs if j.status == "manual"]
+
+        trs = p.test_report_summary.get()
+        total = trs.total["count"]
+        skipped = trs.total["skipped"]
+        failed = trs.total["failed"]
+
+        print(f"{p.status} pipeline {p.id}, total jobs {len(jobs)}, "
+              f"skipped {len(skipped_jobs)}, "
+              f"failed {len(failed_jobs)}, ",
+              f"{total} tests, "
+              f"{skipped} skipped tests, "
+              f"{failed} failed tests")
+
+        if not args.skip_jobs:
+            for j in failed_jobs:
+                print(f"  Failed job {j.id}, {j.name}, {j.web_url}")
+
+        # It seems we can only extract failing tests from the full
+        # test report, maybe there is some way to filter it.
+
+        if failed > 0:
+            ftr = p.test_report.get()
+            failed_suites = [s for s in ftr.test_suites if
+                             s["failed_count"] > 0]
+            for fs in failed_suites:
+                name = fs["name"]
+                tests = fs["test_cases"]
+                failed_tests = [t for t in tests if t["status"] == 'failed']
+                for t in failed_tests:
+                    print(f"  Failed test {t["classname"]}, {name}, {t["name"]}")
-- 
2.47.3