[libvirt PATCH v3 5/6] ci: util: Add a registry checker for stale images

Erik Skultety posted 6 patches 4 years, 10 months ago
There is a newer version of this series
[libvirt PATCH v3 5/6] ci: util: Add a registry checker for stale images
Posted by Erik Skultety 4 years, 10 months ago
This function checks whether there are any stale Docker images in the
registry that can be purged. Since we're pulling available container
images from our GitLab registry with the 'list-images' action, it
could happen that we'd list old (already unsupported) images and make
them available for the user to consume and run a build in them.
Naturally, the build will most likely fail leaving the user confused.

Signed-off-by: Erik Skultety <eskultet@redhat.com>
---
 ci/helper  | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 ci/util.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/ci/helper b/ci/helper
index 31cf72fbdf..b5255db835 100755
--- a/ci/helper
+++ b/ci/helper
@@ -130,7 +130,7 @@ class Parser:
         refreshparser = subparsers.add_parser(
             "refresh",
             help="refresh data generated with lcitool",
-            parents=[lcitoolparser],
+            parents=[lcitoolparser, gitlabparser],
             formatter_class=argparse.ArgumentDefaultsHelpFormatter,
         )
         refreshparser.add_argument(
@@ -139,6 +139,13 @@ class Parser:
             default=False,
             help="refresh data silently"
         )
+        refreshparser.add_argument(
+            "--check-stale",
+            action="store",
+            choices=["yes", "no"],
+            default="yes",
+            help="check for existence of stale images on the GitLab instance"
+        )
         refreshparser.set_defaults(func=Application.action_refresh)
 
     def parse(self):
@@ -287,10 +294,46 @@ class Application:
             print("Available cross-compiler container images:\n")
             print(spacing + ("\n" + spacing).join(cross))
 
+    def check_stale_images(self):
+        if self.args.check_stale != "yes" or self.args.quiet:
+            return
+
+        namespace = self.args.namespace
+        gitlab_uri = self.args.gitlab_uri
+        registry_uri = util.get_registry_uri(namespace, gitlab_uri)
+        lcitool_hosts = self.lcitool_get_hosts()
+
+        stale_images = util.get_registry_stale_images(registry_uri,
+                                                      lcitool_hosts)
+        if stale_images:
+            spacing = "\n" + 4 * " "
+            stale_fmt = [f"{k} (ID: {v})" for k, v in stale_images.items()]
+            stale_details = spacing + spacing.join(stale_fmt)
+            stale_ids = ' '.join([str(id) for id in stale_images.values()])
+            registry_uri = util.get_registry_uri(namespace, gitlab_uri)
+
+            print(f"""
+The following images are stale and can be purged from the registry:
+{stale_details}
+
+You can remove the above images over the API with the following code snippet:
+
+    $ for image_id in {stale_ids}; do \\
+          curl --request DELETE --header "PRIVATE-TOKEN: <access_token>" \\
+          {registry_uri}/$image_id \\
+      done
+
+You can generate a personal access token here:
+    {gitlab_uri}/-/profile/personal_access_tokens""")
+
     def action_refresh(self):
+        # refresh Dockerfiles and vars files
         self.refresh_containers()
         self.refresh_cirrus()
 
+        # check for stale images
+        self.check_stale_images()
+
     def run(self):
         self.args.func(self)
 
diff --git a/ci/util.py b/ci/util.py
index f9f8320276..d69c246872 100644
--- a/ci/util.py
+++ b/ci/util.py
@@ -38,3 +38,55 @@ def get_registry_images(uri: str) -> List[Dict]:
 
     # read the HTTP response and load the JSON part of it
     return json.loads(r.read().decode())
+
+
+def get_image_distro(image_name: str) -> str:
+    """
+    Extract the name of the distro in the GitLab image registry name, e.g.
+        ci-debian-9-cross-mipsel --> debian-9
+
+    :param image_name: name of the GitLab registry image
+    :return: distro name as a string
+    """
+    name_prefix = "ci-"
+    name_suffix = "-cross-"
+
+    distro = image_name[len(name_prefix):]
+
+    index = distro.find(name_suffix)
+    if index > 0:
+        distro = distro[:index]
+
+    return distro
+
+
+def get_registry_stale_images(registry_uri: str,
+                              supported_distros: List[str]) -> Dict[str, int]:
+    """
+    Check the GitLab image registry for images that we no longer support and
+    which should be deleted.
+
+    :param uri: URI pointing to a GitLab instance's image registry
+    :param supported_distros: list of hosts supported by lcitool
+    :return: dictionary formatted as: {<gitlab_image_name>: <gitlab_image_id>}
+    """
+
+    images = get_registry_images(registry_uri)
+
+    # extract distro names from the list of registry images
+    registry_distros = [get_image_distro(i["name"]) for i in images]
+
+    # - compare the distros powering the images in GitLab registry with
+    #   the list of host available from lcitool
+    # - @unsupported is a set containing the distro names which we no longer
+    #   support; we need to map these back to registry image names
+    unsupported = set(registry_distros) - set(supported_distros)
+    if unsupported:
+        stale_images = {}
+        for distro in unsupported:
+            for img in images:
+                # gitlab images are named like "ci-<distro>-<cross_arch>?"
+                if distro in img["name"]:
+                    stale_images[img["name"]] = img["id"]
+
+    return stale_images
-- 
2.29.2

Re: [libvirt PATCH v3 5/6] ci: util: Add a registry checker for stale images
Posted by Andrea Bolognani 4 years, 10 months ago
On Thu, 2021-03-18 at 09:09 +0100, Erik Skultety wrote:
> +    def check_stale_images(self):
> +        if self.args.check_stale != "yes" or self.args.quiet:
> +            return

I would prefer it if this check were to be performed in the
action_refresh() method to decide whether or not check_stale_images()
should be called.

> +            print(f"""
> +The following images are stale and can be purged from the registry:
> +{stale_details}
> +
> +You can remove the above images over the API with the following code snippet:

"You can delete the images listed above using this shell snippet:"

> +    $ for image_id in {stale_ids}; do \\
> +          curl --request DELETE --header "PRIVATE-TOKEN: <access_token>" \\
> +          {registry_uri}/$image_id \\
> +      done

This will not result in a working shell snippet being displayed. What
you want is

  f"""
     $ for image_id in {stale_ids}; do
           curl --request DELETE --header "PRIVATE-TOKEN: <access_token>" \\
                {registry_uri}/$image_id;
       done
  """

> +
> +You can generate a personal access token here:
> +    {gitlab_uri}/-/profile/personal_access_tokens""")

Empty line before the URL for readability.

I still think that we should use textwrap.dedent(), as it makes the
script much more readable at the cost of a single additional
string.replace() call:

  if stale_images:
      spacing = "\n" + 4 * " "
      stale_fmt = [f"{k} (ID: {v})" for k, v in stale_images.items()]
      stale_details = spacing.join(stale_fmt)
      stale_ids = ' '.join([str(id) for id in stale_images.values()])
      registry_uri = util.get_registry_uri(namespace, gitlab_uri)

      msg = textwrap.dedent(f"""
          The following images are stale and can be purged from the registry:

              STALE_DETAILS

          You can delete the images listed above using this shell snippet:

              $ for image_id in {stale_ids}; do
                    curl --request DELETE --header "PRIVATE-TOKEN: <access_token>" \\
                         {registry_uri}/$image_id; \\
                done

          You can generate a personal access token here:

              {gitlab_uri}/-/profile/personal_access_tokens
      """)
      print(msg.replace("STALE_DETAILS", stale_details))

>      def action_refresh(self):
> +        # refresh Dockerfiles and vars files
>          self.refresh_containers()
>          self.refresh_cirrus()
>  
> +        # check for stale images
> +        self.check_stale_images()

The method names are really quite self-explanatory, so the comments
you're introducing don't add much IMHO... I'd say leave them out.

> +def get_registry_stale_images(registry_uri: str,
> +                              supported_distros: List[str]) -> Dict[str, int]:
> +    """
> +    Check the GitLab image registry for images that we no longer support and
> +    which should be deleted.
> +
> +    :param uri: URI pointing to a GitLab instance's image registry
> +    :param supported_distros: list of hosts supported by lcitool
> +    :return: dictionary formatted as: {<gitlab_image_name>: <gitlab_image_id>}
> +    """
> +
> +    images = get_registry_images(registry_uri)
> +
> +    # extract distro names from the list of registry images
> +    registry_distros = [get_image_distro(i["name"]) for i in images]
> +
> +    # - compare the distros powering the images in GitLab registry with
> +    #   the list of host available from lcitool
> +    # - @unsupported is a set containing the distro names which we no longer
> +    #   support; we need to map these back to registry image names
> +    unsupported = set(registry_distros) - set(supported_distros)
> +    if unsupported:
> +        stale_images = {}
> +        for distro in unsupported:
> +            for img in images:
> +                # gitlab images are named like "ci-<distro>-<cross_arch>?"
> +                if distro in img["name"]:
> +                    stale_images[img["name"]] = img["id"]
> +
> +    return stale_images

As far as I can tell, this can be achieved in a much more
straightforward way with

  def get_registry_stale_images(registry_uri, supported_distros):

      images = get_registry_images(registry_uri)
      stale_images = {}

      for img in images:
          if get_image_distro(img["name"]) not in supported_distros:
              stale_images[img["name"]] = img["id"]

      return stale_images

At least from a quick test, the results appear to be the same. Am I
missing something?

-- 
Andrea Bolognani / Red Hat / Virtualization