[PATCH v5 14/44] tests/functional: add a module for handling asset download & caching

Thomas Huth posted 44 patches 2 weeks, 5 days ago
[PATCH v5 14/44] tests/functional: add a module for handling asset download & caching
Posted by Thomas Huth 2 weeks, 5 days ago
From: Daniel P. Berrangé <berrange@redhat.com>

The 'Asset' class is a simple module that declares a downloadable
asset that can be cached locally. Downloads are stored in the user's
home dir at ~/.cache/qemu/download, using a sha256 sum of the URL.

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
[thuth: Drop sha1 support, use hash on file content for naming instead of URL,
        add the possibility to specify the cache dir via environment variable]
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
 tests/functional/qemu_test/__init__.py |  1 +
 tests/functional/qemu_test/asset.py    | 97 ++++++++++++++++++++++++++
 2 files changed, 98 insertions(+)
 create mode 100644 tests/functional/qemu_test/asset.py

diff --git a/tests/functional/qemu_test/__init__.py b/tests/functional/qemu_test/__init__.py
index 2f1e0bc70d..db05c8f412 100644
--- a/tests/functional/qemu_test/__init__.py
+++ b/tests/functional/qemu_test/__init__.py
@@ -6,6 +6,7 @@
 # later.  See the COPYING file in the top-level directory.
 
 
+from .asset import Asset
 from .config import BUILD_DIR
 from .cmd import has_cmd, has_cmds, run_cmd, is_readable_executable_file, \
     interrupt_interactive_console_until_pattern, wait_for_console_pattern, \
diff --git a/tests/functional/qemu_test/asset.py b/tests/functional/qemu_test/asset.py
new file mode 100644
index 0000000000..c0e675d847
--- /dev/null
+++ b/tests/functional/qemu_test/asset.py
@@ -0,0 +1,97 @@
+# Test utilities for fetching & caching assets
+#
+# Copyright 2024 Red Hat, Inc.
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import hashlib
+import logging
+import os
+import subprocess
+import urllib.request
+from pathlib import Path
+from shutil import copyfileobj
+
+
+# Instances of this class must be declared as class level variables
+# starting with a name "ASSET_". This enables the pre-caching logic
+# to easily find all referenced assets and download them prior to
+# execution of the tests.
+class Asset:
+
+    def __init__(self, url, hashsum):
+        self.url = url
+        self.hash = hashsum
+        cache_dir_env = os.getenv('QEMU_TEST_CACHE_DIR')
+        if cache_dir_env:
+            self.cache_dir = Path(cache_dir_env, "download")
+        else:
+            self.cache_dir = Path(Path("~").expanduser(),
+                                  ".cache", "qemu", "download")
+        self.cache_file = Path(self.cache_dir, hashsum)
+        self.log = logging.getLogger('qemu-test')
+
+    def __repr__(self):
+        return "Asset: url=%s hash=%s cache=%s" % (
+            self.url, self.hash, self.cache_file)
+
+    def _check(self, cache_file):
+        if self.hash is None:
+            return True
+        if len(self.hash) == 64:
+            sum_prog = 'sha256sum'
+        elif len(self.hash) == 128:
+            sum_prog = 'sha512sum'
+        else:
+            raise Exception("unknown hash type")
+
+        checksum = subprocess.check_output(
+            [sum_prog, str(cache_file)]).split()[0]
+        return self.hash == checksum.decode("utf-8")
+
+    def valid(self):
+        return self.cache_file.exists() and self._check(self.cache_file)
+
+    def fetch(self):
+        if not self.cache_dir.exists():
+            self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+        if self.valid():
+            self.log.debug("Using cached asset %s for %s",
+                           self.cache_file, self.url)
+            return str(self.cache_file)
+
+        self.log.info("Downloading %s to %s...", self.url, self.cache_file)
+        tmp_cache_file = self.cache_file.with_suffix(".download")
+
+        try:
+            resp = urllib.request.urlopen(self.url)
+        except Exception as e:
+            self.log.error("Unable to download %s: %s", self.url, e)
+            raise
+
+        try:
+            with tmp_cache_file.open("wb+") as dst:
+                copyfileobj(resp, dst)
+        except:
+            tmp_cache_file.unlink()
+            raise
+        try:
+            # Set these just for informational purposes
+            os.setxattr(str(tmp_cache_file), "user.qemu-asset-url",
+                        self.url.encode('utf8'))
+            os.setxattr(str(tmp_cache_file), "user.qemu-asset-hash",
+                        self.hash.encode('utf8'))
+        except Exception as e:
+            self.log.debug("Unable to set xattr on %s: %s", tmp_cache_file, e)
+            pass
+
+        if not self._check(tmp_cache_file):
+            tmp_cache_file.unlink()
+            raise Exception("Hash of %s does not match %s" %
+                            (self.url, self.hash))
+        tmp_cache_file.replace(self.cache_file)
+
+        self.log.info("Cached %s at %s" % (self.url, self.cache_file))
+        return str(self.cache_file)
-- 
2.46.0


Re: [PATCH v5 14/44] tests/functional: add a module for handling asset download & caching
Posted by Philippe Mathieu-Daudé 2 weeks, 2 days ago
On 30/8/24 15:38, Thomas Huth wrote:
> From: Daniel P. Berrangé <berrange@redhat.com>
> 
> The 'Asset' class is a simple module that declares a downloadable
> asset that can be cached locally. Downloads are stored in the user's
> home dir at ~/.cache/qemu/download, using a sha256 sum of the URL.
> 
> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> [thuth: Drop sha1 support, use hash on file content for naming instead of URL,
>          add the possibility to specify the cache dir via environment variable]
> Signed-off-by: Thomas Huth <thuth@redhat.com>
> ---
>   tests/functional/qemu_test/__init__.py |  1 +
>   tests/functional/qemu_test/asset.py    | 97 ++++++++++++++++++++++++++
>   2 files changed, 98 insertions(+)
>   create mode 100644 tests/functional/qemu_test/asset.py
> 
> diff --git a/tests/functional/qemu_test/__init__.py b/tests/functional/qemu_test/__init__.py
> index 2f1e0bc70d..db05c8f412 100644
> --- a/tests/functional/qemu_test/__init__.py
> +++ b/tests/functional/qemu_test/__init__.py
> @@ -6,6 +6,7 @@
>   # later.  See the COPYING file in the top-level directory.
>   
>   
> +from .asset import Asset
>   from .config import BUILD_DIR
>   from .cmd import has_cmd, has_cmds, run_cmd, is_readable_executable_file, \
>       interrupt_interactive_console_until_pattern, wait_for_console_pattern, \
> diff --git a/tests/functional/qemu_test/asset.py b/tests/functional/qemu_test/asset.py
> new file mode 100644
> index 0000000000..c0e675d847
> --- /dev/null
> +++ b/tests/functional/qemu_test/asset.py
> @@ -0,0 +1,97 @@
> +# Test utilities for fetching & caching assets
> +#
> +# Copyright 2024 Red Hat, Inc.
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or
> +# later.  See the COPYING file in the top-level directory.
> +
> +import hashlib
> +import logging
> +import os
> +import subprocess
> +import urllib.request
> +from pathlib import Path
> +from shutil import copyfileobj
> +
> +
> +# Instances of this class must be declared as class level variables
> +# starting with a name "ASSET_". This enables the pre-caching logic
> +# to easily find all referenced assets and download them prior to
> +# execution of the tests.
> +class Asset:
> +
> +    def __init__(self, url, hashsum):
> +        self.url = url
> +        self.hash = hashsum
> +        cache_dir_env = os.getenv('QEMU_TEST_CACHE_DIR')
> +        if cache_dir_env:
> +            self.cache_dir = Path(cache_dir_env, "download")
> +        else:
> +            self.cache_dir = Path(Path("~").expanduser(),
> +                                  ".cache", "qemu", "download")

Maybe we should use hashsum[:2] as intermediate bucket directory.
(Suggestion now to not have to move the files again later)

> +        self.cache_file = Path(self.cache_dir, hashsum)
> +        self.log = logging.getLogger('qemu-test')
> +
> +    def __repr__(self):
> +        return "Asset: url=%s hash=%s cache=%s" % (
> +            self.url, self.hash, self.cache_file)
> +
> +    def _check(self, cache_file):
> +        if self.hash is None:
> +            return True
> +        if len(self.hash) == 64:
> +            sum_prog = 'sha256sum'
> +        elif len(self.hash) == 128:
> +            sum_prog = 'sha512sum'
> +        else:
> +            raise Exception("unknown hash type")
> +
> +        checksum = subprocess.check_output(
> +            [sum_prog, str(cache_file)]).split()[0]
> +        return self.hash == checksum.decode("utf-8")
> +
> +    def valid(self):
> +        return self.cache_file.exists() and self._check(self.cache_file)
> +
> +    def fetch(self):
> +        if not self.cache_dir.exists():
> +            self.cache_dir.mkdir(parents=True, exist_ok=True)
> +
> +        if self.valid():
> +            self.log.debug("Using cached asset %s for %s",
> +                           self.cache_file, self.url)
> +            return str(self.cache_file)
> +
> +        self.log.info("Downloading %s to %s...", self.url, self.cache_file)
> +        tmp_cache_file = self.cache_file.with_suffix(".download")
> +
> +        try:
> +            resp = urllib.request.urlopen(self.url)
> +        except Exception as e:
> +            self.log.error("Unable to download %s: %s", self.url, e)
> +            raise
> +
> +        try:
> +            with tmp_cache_file.open("wb+") as dst:
> +                copyfileobj(resp, dst)
> +        except:
> +            tmp_cache_file.unlink()
> +            raise
> +        try:
> +            # Set these just for informational purposes
> +            os.setxattr(str(tmp_cache_file), "user.qemu-asset-url",
> +                        self.url.encode('utf8'))
> +            os.setxattr(str(tmp_cache_file), "user.qemu-asset-hash",
> +                        self.hash.encode('utf8'))
> +        except Exception as e:
> +            self.log.debug("Unable to set xattr on %s: %s", tmp_cache_file, e)
> +            pass
> +
> +        if not self._check(tmp_cache_file):
> +            tmp_cache_file.unlink()
> +            raise Exception("Hash of %s does not match %s" %
> +                            (self.url, self.hash))
> +        tmp_cache_file.replace(self.cache_file)
> +
> +        self.log.info("Cached %s at %s" % (self.url, self.cache_file))
> +        return str(self.cache_file)