[v2] crypto: run qcrypto_pbkdf2_count_iters in a new thread

[PATCH v2] crypto: run qcrypto_pbkdf2_count_iters in a new thread

Posted by Tiago Pasqualini 1 year, 5 months ago

CPU time accounting in the kernel has been demonstrated to have a
sawtooth pattern[1][2]. This can cause the getrusage system call to
not be as accurate as we are expecting, which can cause this calculation
to stall.

The kernel discussions shows that this inaccuracy happens when CPU time
gets big enough, so this patch changes qcrypto_pbkdf2_count_iters to run
in a fresh thread to avoid this inaccuracy. It also adds a sanity check
to fail the process if CPU time is not accounted.

[1] https://lore.kernel.org/lkml/159231011694.16989.16351419333851309713.tip-bot2@tip-bot2/
[2] https://lore.kernel.org/lkml/20221226031010.4079885-1-maxing.lan@bytedance.com/t/#m1c7f2fdc0ea742776a70fd1aa2a2e414c437f534

Resolves: #2398
Signed-off-by: Tiago Pasqualini <tiago.pasqualini@canonical.com>
---
 crypto/pbkdf.c | 53 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 7 deletions(-)

diff --git a/crypto/pbkdf.c b/crypto/pbkdf.c
index 8d198c152c..d1c06ef3ed 100644
--- a/crypto/pbkdf.c
+++ b/crypto/pbkdf.c
@@ -19,6 +19,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/thread.h"
 #include "qapi/error.h"
 #include "crypto/pbkdf.h"
 #ifndef _WIN32
@@ -85,12 +86,28 @@ static int qcrypto_pbkdf2_get_thread_cpu(unsigned long long *val_ms,
 #endif
 }
 
-uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
-                                    const uint8_t *key, size_t nkey,
-                                    const uint8_t *salt, size_t nsalt,
-                                    size_t nout,
-                                    Error **errp)
+typedef struct CountItersData {
+    QCryptoHashAlgorithm hash;
+    const uint8_t *key;
+    size_t nkey;
+    const uint8_t *salt;
+    size_t nsalt;
+    size_t nout;
+    uint64_t iterations;
+    Error **errp;
+} CountItersData;
+
+static void *threaded_qcrypto_pbkdf2_count_iters(void *data)
 {
+    CountItersData *iters_data = (CountItersData *) data;
+    QCryptoHashAlgorithm hash = iters_data->hash;
+    const uint8_t *key = iters_data->key;
+    size_t nkey = iters_data->nkey;
+    const uint8_t *salt = iters_data->salt;
+    size_t nsalt = iters_data->nsalt;
+    size_t nout = iters_data->nout;
+    Error **errp = iters_data->errp;
+
     uint64_t ret = -1;
     g_autofree uint8_t *out = g_new(uint8_t, nout);
     uint64_t iterations = (1 << 15);
@@ -114,7 +131,10 @@ uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
 
         delta_ms = end_ms - start_ms;
 
-        if (delta_ms > 500) {
+        if (delta_ms == 0) { /* sanity check */
+            error_setg(errp, "Unable to get accurate CPU usage");
+            goto cleanup;
+        } else if (delta_ms > 500) {
             break;
         } else if (delta_ms < 100) {
             iterations = iterations * 10;
@@ -129,5 +149,24 @@ uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
 
  cleanup:
     memset(out, 0, nout);
-    return ret;
+    iters_data->iterations = ret;
+    return NULL;
+}
+
+uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
+                                    const uint8_t *key, size_t nkey,
+                                    const uint8_t *salt, size_t nsalt,
+                                    size_t nout,
+                                    Error **errp)
+{
+    CountItersData data = {
+        hash, key, nkey, salt, nsalt, nout, 0, errp
+    };
+    QemuThread thread;
+
+    qemu_thread_create(&thread, "pbkdf2", threaded_qcrypto_pbkdf2_count_iters,
+                       &data, QEMU_THREAD_JOINABLE);
+    qemu_thread_join(&thread);
+
+    return data.iterations;
 }
-- 
2.43.0

Re: [PATCH v2] crypto: run qcrypto_pbkdf2_count_iters in a new thread

Posted by Thomas Huth 1 year, 2 months ago

On 05/09/2024 01.52, Tiago Pasqualini wrote:
> CPU time accounting in the kernel has been demonstrated to have a
> sawtooth pattern[1][2]. This can cause the getrusage system call to
> not be as accurate as we are expecting, which can cause this calculation
> to stall.
> 
> The kernel discussions shows that this inaccuracy happens when CPU time
> gets big enough, so this patch changes qcrypto_pbkdf2_count_iters to run
> in a fresh thread to avoid this inaccuracy. It also adds a sanity check
> to fail the process if CPU time is not accounted.
> 
> [1] https://lore.kernel.org/lkml/159231011694.16989.16351419333851309713.tip-bot2@tip-bot2/
> [2] https://lore.kernel.org/lkml/20221226031010.4079885-1-maxing.lan@bytedance.com/t/#m1c7f2fdc0ea742776a70fd1aa2a2e414c437f534
> 
> Resolves: #2398
> Signed-off-by: Tiago Pasqualini <tiago.pasqualini@canonical.com>
> ---
>   crypto/pbkdf.c | 53 +++++++++++++++++++++++++++++++++++++++++++-------
>   1 file changed, 46 insertions(+), 7 deletions(-)
> 
> diff --git a/crypto/pbkdf.c b/crypto/pbkdf.c
> index 8d198c152c..d1c06ef3ed 100644
> --- a/crypto/pbkdf.c
> +++ b/crypto/pbkdf.c
> @@ -19,6 +19,7 @@
>    */
>   
>   #include "qemu/osdep.h"
> +#include "qemu/thread.h"
>   #include "qapi/error.h"
>   #include "crypto/pbkdf.h"
>   #ifndef _WIN32
> @@ -85,12 +86,28 @@ static int qcrypto_pbkdf2_get_thread_cpu(unsigned long long *val_ms,
>   #endif
>   }
>   
> -uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
> -                                    const uint8_t *key, size_t nkey,
> -                                    const uint8_t *salt, size_t nsalt,
> -                                    size_t nout,
> -                                    Error **errp)
> +typedef struct CountItersData {
> +    QCryptoHashAlgorithm hash;
> +    const uint8_t *key;
> +    size_t nkey;
> +    const uint8_t *salt;
> +    size_t nsalt;
> +    size_t nout;
> +    uint64_t iterations;
> +    Error **errp;
> +} CountItersData;
> +
> +static void *threaded_qcrypto_pbkdf2_count_iters(void *data)
>   {
> +    CountItersData *iters_data = (CountItersData *) data;
> +    QCryptoHashAlgorithm hash = iters_data->hash;
> +    const uint8_t *key = iters_data->key;
> +    size_t nkey = iters_data->nkey;
> +    const uint8_t *salt = iters_data->salt;
> +    size_t nsalt = iters_data->nsalt;
> +    size_t nout = iters_data->nout;
> +    Error **errp = iters_data->errp;
> +
>       uint64_t ret = -1;
>       g_autofree uint8_t *out = g_new(uint8_t, nout);
>       uint64_t iterations = (1 << 15);
> @@ -114,7 +131,10 @@ uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
>   
>           delta_ms = end_ms - start_ms;
>   
> -        if (delta_ms > 500) {
> +        if (delta_ms == 0) { /* sanity check */
> +            error_setg(errp, "Unable to get accurate CPU usage");

  Hi!

While running "make check -j12 SPEED=slow" on a s390x host, I got:

   3/657 qemu:unit / test-crypto-block 
          ERROR            0.27s   killed by signal 6 SIGABRT
 >>> G_TEST_SLOW=1 
UBSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1 
MALLOC_PERTURB_=150 G_TEST_BUILDDIR=/home/thuth/s390x/qemu/tests/unit 
MESON_TEST_ITERATION=1 
MSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1 
ASAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1 
G_TEST_SRCDIR=/home/thuth/devel/qemu/tests/unit 
/home/thuth/s390x/qemu/tests/unit/test-crypto-block --tap -k
――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― 
✀ 
―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
stderr:
Unexpected error in threaded_qcrypto_pbkdf2_count_iters() at 
../../devel/qemu/crypto/pbkdf.c:135:
/home/thuth/s390x/qemu/tests/unit/test-crypto-block: Unable to get accurate 
CPU usage

(test program exited with status code -6)

TAP parsing error: Too few tests run (expected 27, got 1)
――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――

Could we handle this more gracefully, please? I don't think that this should 
fail the unit tests, should it?

  Thomas


> +            goto cleanup;
> +        } else if (delta_ms > 500) {
>               break;
>           } else if (delta_ms < 100) {
>               iterations = iterations * 10;
> @@ -129,5 +149,24 @@ uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
>   
>    cleanup:
>       memset(out, 0, nout);
> -    return ret;
> +    iters_data->iterations = ret;
> +    return NULL;
> +}
> +
> +uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
> +                                    const uint8_t *key, size_t nkey,
> +                                    const uint8_t *salt, size_t nsalt,
> +                                    size_t nout,
> +                                    Error **errp)
> +{
> +    CountItersData data = {
> +        hash, key, nkey, salt, nsalt, nout, 0, errp
> +    };
> +    QemuThread thread;
> +
> +    qemu_thread_create(&thread, "pbkdf2", threaded_qcrypto_pbkdf2_count_iters,
> +                       &data, QEMU_THREAD_JOINABLE);
> +    qemu_thread_join(&thread);
> +
> +    return data.iterations;
>   }

Re: [PATCH v2] crypto: run qcrypto_pbkdf2_count_iters in a new thread

Posted by Daniel P. Berrangé 1 year, 2 months ago

On Tue, Dec 10, 2024 at 06:29:49PM +0100, Thomas Huth wrote:
> On 05/09/2024 01.52, Tiago Pasqualini wrote:
> > CPU time accounting in the kernel has been demonstrated to have a
> > sawtooth pattern[1][2]. This can cause the getrusage system call to
> > not be as accurate as we are expecting, which can cause this calculation
> > to stall.
> > 
> > The kernel discussions shows that this inaccuracy happens when CPU time
> > gets big enough, so this patch changes qcrypto_pbkdf2_count_iters to run
> > in a fresh thread to avoid this inaccuracy. It also adds a sanity check
> > to fail the process if CPU time is not accounted.
> > 
> > [1] https://lore.kernel.org/lkml/159231011694.16989.16351419333851309713.tip-bot2@tip-bot2/
> > [2] https://lore.kernel.org/lkml/20221226031010.4079885-1-maxing.lan@bytedance.com/t/#m1c7f2fdc0ea742776a70fd1aa2a2e414c437f534
> > 
> > Resolves: #2398
> > Signed-off-by: Tiago Pasqualini <tiago.pasqualini@canonical.com>
> > ---
> >   crypto/pbkdf.c | 53 +++++++++++++++++++++++++++++++++++++++++++-------
> >   1 file changed, 46 insertions(+), 7 deletions(-)

> > +static void *threaded_qcrypto_pbkdf2_count_iters(void *data)
> >   {
> > +    CountItersData *iters_data = (CountItersData *) data;
> > +    QCryptoHashAlgorithm hash = iters_data->hash;
> > +    const uint8_t *key = iters_data->key;
> > +    size_t nkey = iters_data->nkey;
> > +    const uint8_t *salt = iters_data->salt;
> > +    size_t nsalt = iters_data->nsalt;
> > +    size_t nout = iters_data->nout;
> > +    Error **errp = iters_data->errp;
> > +
> >       uint64_t ret = -1;
> >       g_autofree uint8_t *out = g_new(uint8_t, nout);
> >       uint64_t iterations = (1 << 15);
> > @@ -114,7 +131,10 @@ uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
> >           delta_ms = end_ms - start_ms;
> > -        if (delta_ms > 500) {
> > +        if (delta_ms == 0) { /* sanity check */
> > +            error_setg(errp, "Unable to get accurate CPU usage");
> 
>  Hi!
> 
> While running "make check -j12 SPEED=slow" on a s390x host, I got:
> 
>   3/657 qemu:unit / test-crypto-block          ERROR            0.27s
> killed by signal 6 SIGABRT
> >>> G_TEST_SLOW=1 UBSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1
> MALLOC_PERTURB_=150 G_TEST_BUILDDIR=/home/thuth/s390x/qemu/tests/unit
> MESON_TEST_ITERATION=1 MSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1
> ASAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1
> G_TEST_SRCDIR=/home/thuth/devel/qemu/tests/unit
> /home/thuth/s390x/qemu/tests/unit/test-crypto-block --tap -k
> ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
> ✀ ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
> stderr:
> Unexpected error in threaded_qcrypto_pbkdf2_count_iters() at
> ../../devel/qemu/crypto/pbkdf.c:135:
> /home/thuth/s390x/qemu/tests/unit/test-crypto-block: Unable to get accurate
> CPU usage
> 
> (test program exited with status code -6)
> 
> TAP parsing error: Too few tests run (expected 27, got 1)
> ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
> 
> Could we handle this more gracefully, please? I don't think that this should
> fail the unit tests, should it?

This is a scenario that is not supposed to ever happen. It means
we've had two calls to getrusage which returns exactly the same
CPU time. This should be impossible, as we've definitely burnt
lots of CPU between these two calls. Is this reliably reproducable
only on s390x ?

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [PATCH v2] crypto: run qcrypto_pbkdf2_count_iters in a new thread

Posted by Daniel P. Berrangé 1 year, 5 months ago

On Wed, Sep 04, 2024 at 08:52:30PM -0300, Tiago Pasqualini wrote:
> CPU time accounting in the kernel has been demonstrated to have a
> sawtooth pattern[1][2]. This can cause the getrusage system call to
> not be as accurate as we are expecting, which can cause this calculation
> to stall.
> 
> The kernel discussions shows that this inaccuracy happens when CPU time
> gets big enough, so this patch changes qcrypto_pbkdf2_count_iters to run
> in a fresh thread to avoid this inaccuracy. It also adds a sanity check
> to fail the process if CPU time is not accounted.
> 
> [1] https://lore.kernel.org/lkml/159231011694.16989.16351419333851309713.tip-bot2@tip-bot2/
> [2] https://lore.kernel.org/lkml/20221226031010.4079885-1-maxing.lan@bytedance.com/t/#m1c7f2fdc0ea742776a70fd1aa2a2e414c437f534
> 
> Resolves: #2398
> Signed-off-by: Tiago Pasqualini <tiago.pasqualini@canonical.com>
> ---
>  crypto/pbkdf.c | 53 +++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 46 insertions(+), 7 deletions(-)

Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>

and queued.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|