[libvirt] [PATCH v5] qemu: Introduce state_lock_timeout to qemu.conf

Yi Wang posted 1 patch 5 years, 6 months ago
Test syntax-check passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/1536203380-39493-1-git-send-email-wang.yi59@zte.com.cn
There is a newer version of this series
src/qemu/libvirtd_qemu.aug         |  1 +
src/qemu/qemu.conf                 | 10 ++++++++++
src/qemu/qemu_conf.c               | 15 +++++++++++++++
src/qemu/qemu_conf.h               |  2 ++
src/qemu/qemu_domain.c             |  5 +----
src/qemu/test_libvirtd_qemu.aug.in |  1 +
6 files changed, 30 insertions(+), 4 deletions(-)
[libvirt] [PATCH v5] qemu: Introduce state_lock_timeout to qemu.conf
Posted by Yi Wang 5 years, 6 months ago
When doing some job holding state lock for a long time,
we may come across error:
"Timed out during operation: cannot acquire state change lock"
Well, sometimes it's not a problem and users want to continue
to wait, and this patch allow users decide how long time they
can wait the state lock.

Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
Reviewed-by: Xi Xu <xu.xi8@zte.com.cn>
---
changes in v5:
- adjust position of state lock in aug file
- fix state lock time got from conf file from milliseconds to
  seconds

changes in v4:
- fix syntax-check error

changes in v3:
- add user-friendly description and nb of state lock
- check validity of stateLockTimeout

changes in v2:
- change default value to 30 in qemu.conf
- set the default value in virQEMUDriverConfigNew()
---
 src/qemu/libvirtd_qemu.aug         |  1 +
 src/qemu/qemu.conf                 | 10 ++++++++++
 src/qemu/qemu_conf.c               | 15 +++++++++++++++
 src/qemu/qemu_conf.h               |  2 ++
 src/qemu/qemu_domain.c             |  5 +----
 src/qemu/test_libvirtd_qemu.aug.in |  1 +
 6 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug
index ddc4bbf..a5601e1 100644
--- a/src/qemu/libvirtd_qemu.aug
+++ b/src/qemu/libvirtd_qemu.aug
@@ -100,6 +100,7 @@ module Libvirtd_qemu =
                  | str_entry "lock_manager"
 
    let rpc_entry = int_entry "max_queued"
+                 | int_entry "state_lock_timeout"
                  | int_entry "keepalive_interval"
                  | int_entry "keepalive_count"
 
diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
index cd57b3c..8920a1a 100644
--- a/src/qemu/qemu.conf
+++ b/src/qemu/qemu.conf
@@ -667,6 +667,16 @@
 #
 #max_queued = 0
 
+
+# When two or more threads want to work with the same domain they use a
+# job lock to mutually exclude each other. However, waiting for the lock
+# is limited up to state_lock_timeout seconds.
+# NB, strong recommendation to set the timeout longer than 30 seconds.
+#
+# Default is 30
+#
+#state_lock_timeout = 60
+
 ###################################################################
 # Keepalive protocol:
 # This allows qemu driver to detect broken connections to remote
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index a4f545e..012f4d1 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -129,6 +129,9 @@ void qemuDomainCmdlineDefFree(qemuDomainCmdlineDefPtr def)
 #endif
 
 
+/* Give up waiting for mutex after 30 seconds */
+#define QEMU_JOB_WAIT_TIME (1000ull * 30)
+
 virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
 {
     virQEMUDriverConfigPtr cfg;
@@ -346,6 +349,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
     cfg->glusterDebugLevel = 4;
     cfg->stdioLogD = true;
 
+    cfg->stateLockTimeout = QEMU_JOB_WAIT_TIME;
+
     if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST)))
         goto error;
 
@@ -863,6 +868,10 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
     if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0)
         goto cleanup;
 
+    if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout) < 0)
+        goto cleanup;
+    cfg->stateLockTimeout *= 1000;
+
     if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0)
         goto cleanup;
 
@@ -1055,6 +1064,12 @@ virQEMUDriverConfigValidate(virQEMUDriverConfigPtr cfg)
         return -1;
     }
 
+    if (cfg->stateLockTimeout <= 0) {
+        virReportError(VIR_ERR_CONF_SYNTAX, "%s",
+                       _("state_lock_timeout should larger than zero"));
+        return -1;
+    }
+
     return 0;
 }
 
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index a8d84ef..97cf2e1 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -190,6 +190,8 @@ struct _virQEMUDriverConfig {
     int keepAliveInterval;
     unsigned int keepAliveCount;
 
+    int stateLockTimeout;
+
     int seccompSandbox;
 
     char *migrateHost;
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 886e3fb..5a2ca52 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -6652,9 +6652,6 @@ qemuDomainObjCanSetJob(qemuDomainObjPrivatePtr priv,
              priv->job.agentActive == QEMU_AGENT_JOB_NONE));
 }
 
-/* Give up waiting for mutex after 30 seconds */
-#define QEMU_JOB_WAIT_TIME (1000ull * 30)
-
 /**
  * qemuDomainObjBeginJobInternal:
  * @driver: qemu driver
@@ -6714,7 +6711,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
     }
 
     priv->jobs_queued++;
-    then = now + QEMU_JOB_WAIT_TIME;
+    then = now + cfg->stateLockTimeout;
 
  retry:
     if ((!async && job != QEMU_JOB_DESTROY) &&
diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
index f1e8806..8e072d0 100644
--- a/src/qemu/test_libvirtd_qemu.aug.in
+++ b/src/qemu/test_libvirtd_qemu.aug.in
@@ -82,6 +82,7 @@ module Test_libvirtd_qemu =
 { "relaxed_acs_check" = "1" }
 { "lock_manager" = "lockd" }
 { "max_queued" = "0" }
+{ "state_lock_timeout" = "60" }
 { "keepalive_interval" = "5" }
 { "keepalive_count" = "5" }
 { "seccomp_sandbox" = "1" }
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v5] qemu: Introduce state_lock_timeout to qemu.conf
Posted by John Ferlan 5 years, 6 months ago

On 09/05/2018 11:09 PM, Yi Wang wrote:
> When doing some job holding state lock for a long time,
> we may come across error:

blank line

> "Timed out during operation: cannot acquire state change lock"

blank line

> Well, sometimes it's not a problem and users want to continue
> to wait, and this patch allow users decide how long time they
> can wait the state lock.

As I noted in a previous review... With this patch it's not the
individual user or command that can decide how long to wait, rather it's
set by qemu.conf policy. The duration of the wait just becomes
customize-able. Not that it's any different the current implementation,
since the value is set in qemuDomainObjBeginJobInternal that means
there's no distinguishing between job, asyncjob, and agentjob. There's
also no attempt to allow changing the value via virt-admin.

A "strong recommendation" of longer than 30 seconds doesn't provide much
guidance. The setting of value generally depends on the configuration
and what causes an API to hold the lock for longer than 30 seconds.
That's probably limited to primarily virDomainListGetStats and/or
virConnectGetAllDomainStats. There's other factors in play including CPU
speed, CPU quota allowed, network throughput/latency, disk speed, etc.
Tough to create a value the works for every command/purpose.

Ironically (perhaps) allowing a value of 0 would essentially make it so
no one waited - such as how qemuDomainObjBeginJobNowait operates
essentially. OTOH, in order to have everything wait "as long as
possible" passing -1 would be a lot easier than providing 2147483647 (or
if properly typed to uint, then 4294967295U).

In the long run, regardless of what value is chosen it may never be
"long enough". Similarly how does one know what is causing the "wait". I
have doubts that "typically impatient" users would wait more than few
seconds before hitting ^c. Since there's no indication that something is
waiting or hung, how is one to truly understand the difference between
the two.

Still, I have a feeling something similar could be done for those
commands that are primarily QUERY jobs for which the consumer desires to
override the default of 30 seconds via a new option controlled by a
flag. Commands could add the --nowait flag just as easily as a --wait
NN. To me that would be far better improvement than a one time config
adjustment for everyone.

Not that the MODIFY jobs are any less important vis-a-vis waiting, but
those I would think would want a shorter wait time. How comfortable do
you feel when you go to modify something simple only to get no response.
Leaves one wondering, did my change take affect or did my change cause
some sort of problem?

Anyway, while not a full on objection, I figured I'd voice my preference
for a different solution and see where it takes things.

John


> 
> Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
> Reviewed-by: Xi Xu <xu.xi8@zte.com.cn>
> ---
> changes in v5:
> - adjust position of state lock in aug file
> - fix state lock time got from conf file from milliseconds to
>   seconds
> 
> changes in v4:
> - fix syntax-check error
> 
> changes in v3:
> - add user-friendly description and nb of state lock
> - check validity of stateLockTimeout
> 
> changes in v2:
> - change default value to 30 in qemu.conf
> - set the default value in virQEMUDriverConfigNew()
> ---
>  src/qemu/libvirtd_qemu.aug         |  1 +
>  src/qemu/qemu.conf                 | 10 ++++++++++
>  src/qemu/qemu_conf.c               | 15 +++++++++++++++
>  src/qemu/qemu_conf.h               |  2 ++
>  src/qemu/qemu_domain.c             |  5 +----
>  src/qemu/test_libvirtd_qemu.aug.in |  1 +
>  6 files changed, 30 insertions(+), 4 deletions(-)
> 
> diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug
> index ddc4bbf..a5601e1 100644
> --- a/src/qemu/libvirtd_qemu.aug
> +++ b/src/qemu/libvirtd_qemu.aug
> @@ -100,6 +100,7 @@ module Libvirtd_qemu =
>                   | str_entry "lock_manager"
>  
>     let rpc_entry = int_entry "max_queued"
> +                 | int_entry "state_lock_timeout"
>                   | int_entry "keepalive_interval"
>                   | int_entry "keepalive_count"
>  
> diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
> index cd57b3c..8920a1a 100644
> --- a/src/qemu/qemu.conf
> +++ b/src/qemu/qemu.conf
> @@ -667,6 +667,16 @@
>  #
>  #max_queued = 0
>  
> +
> +# When two or more threads want to work with the same domain they use a
> +# job lock to mutually exclude each other. However, waiting for the lock
> +# is limited up to state_lock_timeout seconds.
> +# NB, strong recommendation to set the timeout longer than 30 seconds.
> +#
> +# Default is 30
> +#
> +#state_lock_timeout = 60
> +
>  ###################################################################
>  # Keepalive protocol:
>  # This allows qemu driver to detect broken connections to remote
> diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
> index a4f545e..012f4d1 100644
> --- a/src/qemu/qemu_conf.c
> +++ b/src/qemu/qemu_conf.c
> @@ -129,6 +129,9 @@ void qemuDomainCmdlineDefFree(qemuDomainCmdlineDefPtr def)
>  #endif
>  
>  
> +/* Give up waiting for mutex after 30 seconds */
> +#define QEMU_JOB_WAIT_TIME (1000ull * 30)
> +
>  virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
>  {
>      virQEMUDriverConfigPtr cfg;
> @@ -346,6 +349,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
>      cfg->glusterDebugLevel = 4;
>      cfg->stdioLogD = true;
>  
> +    cfg->stateLockTimeout = QEMU_JOB_WAIT_TIME;
> +
>      if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST)))
>          goto error;
>  
> @@ -863,6 +868,10 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
>      if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0)
>          goto cleanup;
>  
> +    if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout) < 0)
> +        goto cleanup;
> +    cfg->stateLockTimeout *= 1000;
> +
>      if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0)
>          goto cleanup;
>  
> @@ -1055,6 +1064,12 @@ virQEMUDriverConfigValidate(virQEMUDriverConfigPtr cfg)
>          return -1;
>      }
>  
> +    if (cfg->stateLockTimeout <= 0) {

If this were an unsigned int, then the < 0 check wouldn't necessary.

So then the question becomes, should 0 (zero) be special cased as wait
forever.

> +        virReportError(VIR_ERR_CONF_SYNTAX, "%s",
> +                       _("state_lock_timeout should larger than zero"));

If 0 were left as invalid, then s/should/must be/

> +        return -1;
> +    }
> +
>      return 0;
>  }
>  
> diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
> index a8d84ef..97cf2e1 100644
> --- a/src/qemu/qemu_conf.h
> +++ b/src/qemu/qemu_conf.h
> @@ -190,6 +190,8 @@ struct _virQEMUDriverConfig {
>      int keepAliveInterval;
>      unsigned int keepAliveCount;
>  
> +    int stateLockTimeout;
> +
>      int seccompSandbox;
>  
>      char *migrateHost;
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 886e3fb..5a2ca52 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -6652,9 +6652,6 @@ qemuDomainObjCanSetJob(qemuDomainObjPrivatePtr priv,
>               priv->job.agentActive == QEMU_AGENT_JOB_NONE));
>  }
>  
> -/* Give up waiting for mutex after 30 seconds */
> -#define QEMU_JOB_WAIT_TIME (1000ull * 30)
> -
>  /**
>   * qemuDomainObjBeginJobInternal:
>   * @driver: qemu driver
> @@ -6714,7 +6711,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
>      }
>  
>      priv->jobs_queued++;
> -    then = now + QEMU_JOB_WAIT_TIME;
> +    then = now + cfg->stateLockTimeout;
>  
>   retry:
>      if ((!async && job != QEMU_JOB_DESTROY) &&
> diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
> index f1e8806..8e072d0 100644
> --- a/src/qemu/test_libvirtd_qemu.aug.in
> +++ b/src/qemu/test_libvirtd_qemu.aug.in
> @@ -82,6 +82,7 @@ module Test_libvirtd_qemu =
>  { "relaxed_acs_check" = "1" }
>  { "lock_manager" = "lockd" }
>  { "max_queued" = "0" }
> +{ "state_lock_timeout" = "60" }
>  { "keepalive_interval" = "5" }
>  { "keepalive_count" = "5" }
>  { "seccomp_sandbox" = "1" }
> 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v5] qemu: Introduce state_lock_timeout to qemu.conf
Posted by Michal Privoznik 5 years, 6 months ago
On 09/10/2018 10:22 PM, John Ferlan wrote:
> 
> 
> On 09/05/2018 11:09 PM, Yi Wang wrote:
>> When doing some job holding state lock for a long time,
>> we may come across error:
> 
> blank line
> 
>> "Timed out during operation: cannot acquire state change lock"
> 
> blank line
> 
>> Well, sometimes it's not a problem and users want to continue
>> to wait, and this patch allow users decide how long time they
>> can wait the state lock.
> 
> As I noted in a previous review... With this patch it's not the
> individual user or command that can decide how long to wait, rather it's
> set by qemu.conf policy. The duration of the wait just becomes
> customize-able. Not that it's any different the current implementation,
> since the value is set in qemuDomainObjBeginJobInternal that means
> there's no distinguishing between job, asyncjob, and agentjob. There's
> also no attempt to allow changing the value via virt-admin.

I'm not sure how we would implement per API timeout. We could perhaps do
per connection but that's also not desired.

And regarding "how does one know how long to wait" - well, they don't.
That's the thing with timeouts. It's not an argument against them
though. Even primitive functions (e.g. those from pthread) have
XXX_timed() variants.

> 
> A "strong recommendation" of longer than 30 seconds doesn't provide much
> guidance. The setting of value generally depends on the configuration
> and what causes an API to hold the lock for longer than 30 seconds.
> That's probably limited to primarily virDomainListGetStats and/or
> virConnectGetAllDomainStats. There's other factors in play including CPU
> speed, CPU quota allowed, network throughput/latency, disk speed, etc.
> Tough to create a value the works for every command/purpose.

Agreed, "strong recommendation" is use case dependant (although it's
perhaps what initiated this patch). If anything, we should discourage
people from touching this knob.

> 
> Ironically (perhaps) allowing a value of 0 would essentially make it so
> no one waited - such as how qemuDomainObjBeginJobNowait operates
> essentially. OTOH, in order to have everything wait "as long as
> possible" passing -1 would be a lot easier than providing 2147483647 (or
> if properly typed to uint, then 4294967295U).

Passing 0 is explicitly forbidden (although in a bit clumsy way - we
need to check the value right after it's parsed because multiplying it
by 1000 can lead to overflow and a negative value becomes positive or
vice versa).

> 
> In the long run, regardless of what value is chosen it may never be
> "long enough". Similarly how does one know what is causing the "wait". I
> have doubts that "typically impatient" users would wait more than few
> seconds before hitting ^c. Since there's no indication that something is
> waiting or hung, how is one to truly understand the difference between
> the two.
> 
> Still, I have a feeling something similar could be done for those
> commands that are primarily QUERY jobs for which the consumer desires to
> override the default of 30 seconds via a new option controlled by a
> flag. Commands could add the --nowait flag just as easily as a --wait
> NN. To me that would be far better improvement than a one time config
> adjustment for everyone.

Ughr. This is not the way to go IMO. Can you imagine how many APIs we
have and thus how many flags we would have to introduce?
If anything we can have async versions of *some* APIs. But that is long
way to go too.

> 
> Not that the MODIFY jobs are any less important vis-a-vis waiting, but
> those I would think would want a shorter wait time. How comfortable do
> you feel when you go to modify something simple only to get no response.
> Leaves one wondering, did my change take affect or did my change cause
> some sort of problem?
> 
> Anyway, while not a full on objection, I figured I'd voice my preference
> for a different solution and see where it takes things.

I think having this in qemu.conf makes sense. After all, we already have
max_queued, keepalive_* (which is also a timeout, sort of) and probably
some others.

> 
> John
> 
> 
>>
>> Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
>> Reviewed-by: Xi Xu <xu.xi8@zte.com.cn>
>> ---
>> changes in v5:
>> - adjust position of state lock in aug file
>> - fix state lock time got from conf file from milliseconds to
>>   seconds
>>
>> changes in v4:
>> - fix syntax-check error
>>
>> changes in v3:
>> - add user-friendly description and nb of state lock
>> - check validity of stateLockTimeout
>>
>> changes in v2:
>> - change default value to 30 in qemu.conf
>> - set the default value in virQEMUDriverConfigNew()
>> ---
>>  src/qemu/libvirtd_qemu.aug         |  1 +
>>  src/qemu/qemu.conf                 | 10 ++++++++++
>>  src/qemu/qemu_conf.c               | 15 +++++++++++++++
>>  src/qemu/qemu_conf.h               |  2 ++
>>  src/qemu/qemu_domain.c             |  5 +----
>>  src/qemu/test_libvirtd_qemu.aug.in |  1 +
>>  6 files changed, 30 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug
>> index ddc4bbf..a5601e1 100644
>> --- a/src/qemu/libvirtd_qemu.aug
>> +++ b/src/qemu/libvirtd_qemu.aug
>> @@ -100,6 +100,7 @@ module Libvirtd_qemu =
>>                   | str_entry "lock_manager"
>>  
>>     let rpc_entry = int_entry "max_queued"
>> +                 | int_entry "state_lock_timeout"
>>                   | int_entry "keepalive_interval"
>>                   | int_entry "keepalive_count"
>>  
>> diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
>> index cd57b3c..8920a1a 100644
>> --- a/src/qemu/qemu.conf
>> +++ b/src/qemu/qemu.conf
>> @@ -667,6 +667,16 @@
>>  #
>>  #max_queued = 0
>>  
>> +
>> +# When two or more threads want to work with the same domain they use a
>> +# job lock to mutually exclude each other. However, waiting for the lock
>> +# is limited up to state_lock_timeout seconds.
>> +# NB, strong recommendation to set the timeout longer than 30 seconds.

s/.*/It is strongly recommended to not touch this setting/

>> +#
>> +# Default is 30
>> +#
>> +#state_lock_timeout = 60
>> +
>>  ###################################################################
>>  # Keepalive protocol:
>>  # This allows qemu driver to detect broken connections to remote
>> diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
>> index a4f545e..012f4d1 100644
>> --- a/src/qemu/qemu_conf.c
>> +++ b/src/qemu/qemu_conf.c
>> @@ -129,6 +129,9 @@ void qemuDomainCmdlineDefFree(qemuDomainCmdlineDefPtr def)
>>  #endif
>>  
>>  
>> +/* Give up waiting for mutex after 30 seconds */
>> +#define QEMU_JOB_WAIT_TIME (1000ull * 30)
>> +
>>  virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
>>  {
>>      virQEMUDriverConfigPtr cfg;
>> @@ -346,6 +349,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
>>      cfg->glusterDebugLevel = 4;
>>      cfg->stdioLogD = true;
>>  
>> +    cfg->stateLockTimeout = QEMU_JOB_WAIT_TIME;
>> +
>>      if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST)))
>>          goto error;
>>  
>> @@ -863,6 +868,10 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
>>      if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0)
>>          goto cleanup;
>>  
>> +    if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout) < 0)
>> +        goto cleanup;
>> +    cfg->stateLockTimeout *= 1000;
>> +
>>      if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0)
>>          goto cleanup;
>>  
>> @@ -1055,6 +1064,12 @@ virQEMUDriverConfigValidate(virQEMUDriverConfigPtr cfg)
>>          return -1;
>>      }
>>  
>> +    if (cfg->stateLockTimeout <= 0) {
> 
> If this were an unsigned int, then the < 0 check wouldn't necessary.
> 
> So then the question becomes, should 0 (zero) be special cased as wait
> forever.

This check needs to be done before multiplication otherwise an overflow
might occur. Also, the multiplication can be done in BeginJobInternal so
that cfg->stateLockTimeout holds value entered by user.

> 
>> +        virReportError(VIR_ERR_CONF_SYNTAX, "%s",
>> +                       _("state_lock_timeout should larger than zero"));
> 
> If 0 were left as invalid, then s/should/must be/
> 
>> +        return -1;
>> +    }
>> +
>>      return 0;
>>  }
>>  

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt][PATCH v5] qemu: Introduce state_lock_timeout toqemu.conf
Posted by wang.yi59@zte.com.cn 5 years, 6 months ago
Thank you both for your patience and discussion, John and Michal.
I will send a new version to fix the issues referred in the discussion.

> On 09/10/2018 10:22 PM, John Ferlan wrote:
> >
> >
> > On 09/05/2018 11:09 PM, Yi Wang wrote:
> >> When doing some job holding state lock for a long time,
> >> we may come across error:
> >
> > blank line
> >
> >> "Timed out during operation: cannot acquire state change lock"
> >
> > blank line
> >
> >> Well, sometimes it's not a problem and users want to continue
> >> to wait, and this patch allow users decide how long time they
> >> can wait the state lock.
> >
> > As I noted in a previous review... With this patch it's not the
> > individual user or command that can decide how long to wait, rather it's
> > set by qemu.conf policy. The duration of the wait just becomes
> > customize-able. Not that it's any different the current implementation,
> > since the value is set in qemuDomainObjBeginJobInternal that means
> > there's no distinguishing between job, asyncjob, and agentjob. There's
> > also no attempt to allow changing the value via virt-admin.
>
> I'm not sure how we would implement per API timeout. We could perhaps do
> per connection but that's also not desired.
>
> And regarding "how does one know how long to wait" - well, they don't.
> That's the thing with timeouts. It's not an argument against them
> though. Even primitive functions (e.g. those from pthread) have
> XXX_timed() variants.
>
> >
> > A "strong recommendation" of longer than 30 seconds doesn't provide much
> > guidance. The setting of value generally depends on the configuration
> > and what causes an API to hold the lock for longer than 30 seconds.
> > That's probably limited to primarily virDomainListGetStats and/or
> > virConnectGetAllDomainStats. There's other factors in play including CPU
> > speed, CPU quota allowed, network throughput/latency, disk speed, etc.
> > Tough to create a value the works for every command/purpose.
>
> Agreed, "strong recommendation" is use case dependant (although it's
> perhaps what initiated this patch). If anything, we should discourage
> people from touching this knob.
>
> >
> > Ironically (perhaps) allowing a value of 0 would essentially make it so
> > no one waited - such as how qemuDomainObjBeginJobNowait operates
> > essentially. OTOH, in order to have everything wait "as long as
> > possible" passing -1 would be a lot easier than providing 2147483647 (or
> > if properly typed to uint, then 4294967295U).
>
> Passing 0 is explicitly forbidden (although in a bit clumsy way - we
> need to check the value right after it's parsed because multiplying it
> by 1000 can lead to overflow and a negative value becomes positive or
> vice versa).

[......]

> > If this were an unsigned int, then the < 0 check wouldn't necessary.
> >
> > So then the question becomes, should 0 (zero) be special cased as wait
> > forever.
>
> This check needs to be done before multiplication otherwise an overflow
> might occur. Also, the multiplication can be done in BeginJobInternal so
> that cfg->stateLockTimeout holds value entered by user.
>
> >
> >> +        virReportError(VIR_ERR_CONF_SYNTAX, "%s",
> >> +                       _("state_lock_timeout should larger than zero"));
> >
> > If 0 were left as invalid, then s/should/must be/
> >
> >> +        return -1;
> >> +    }
> >> +
> >>      return 0;
> >>  }
> >>
> Michal


---
Best wishes
Yi Wang--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list