[libvirt] [PATCH v4] qemu: Introduce state_lock_timeout to qemu.conf

Yi Wang posted 1 patch 5 years, 7 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/1535445616-8156-1-git-send-email-wang.yi59@zte.com.cn
Test syntax-check passed
There is a newer version of this series
src/qemu/libvirtd_qemu.aug         |  1 +
src/qemu/qemu.conf                 | 10 ++++++++++
src/qemu/qemu_conf.c               | 14 ++++++++++++++
src/qemu/qemu_conf.h               |  2 ++
src/qemu/qemu_domain.c             |  5 +----
src/qemu/test_libvirtd_qemu.aug.in |  1 +
6 files changed, 29 insertions(+), 4 deletions(-)
[libvirt] [PATCH v4] qemu: Introduce state_lock_timeout to qemu.conf
Posted by Yi Wang 5 years, 7 months ago
When doing some job holding state lock for a long time,
we may come across error:
"Timed out during operation: cannot acquire state change lock"
Well, sometimes it's not a problem and users wanner continue
to wait, and this patch allow users decide how long time they
can wait the state lock.

Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
Reviewed-by: Xi Xu <xu.xi8@zte.com.cn>
---
changes in v4:
- fox syntax-check error

changes in v3:
- add user-friendly description and nb of state lock
- check validity of stateLockTimeout

changes in v2:
- change default value to 30 in qemu.conf
- set the default value in virQEMUDriverConfigNew()

v4

Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
---
 src/qemu/libvirtd_qemu.aug         |  1 +
 src/qemu/qemu.conf                 | 10 ++++++++++
 src/qemu/qemu_conf.c               | 14 ++++++++++++++
 src/qemu/qemu_conf.h               |  2 ++
 src/qemu/qemu_domain.c             |  5 +----
 src/qemu/test_libvirtd_qemu.aug.in |  1 +
 6 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug
index ddc4bbf..f7287ae 100644
--- a/src/qemu/libvirtd_qemu.aug
+++ b/src/qemu/libvirtd_qemu.aug
@@ -93,6 +93,7 @@ module Libvirtd_qemu =
                  | limits_entry "max_core"
                  | bool_entry "dump_guest_core"
                  | str_entry "stdio_handler"
+                 | int_entry "state_lock_timeout"
 
    let device_entry = bool_entry "mac_filter"
                  | bool_entry "relaxed_acs_check"
diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
index cd57b3c..8920a1a 100644
--- a/src/qemu/qemu.conf
+++ b/src/qemu/qemu.conf
@@ -667,6 +667,16 @@
 #
 #max_queued = 0
 
+
+# When two or more threads want to work with the same domain they use a
+# job lock to mutually exclude each other. However, waiting for the lock
+# is limited up to state_lock_timeout seconds.
+# NB, strong recommendation to set the timeout longer than 30 seconds.
+#
+# Default is 30
+#
+#state_lock_timeout = 60
+
 ###################################################################
 # Keepalive protocol:
 # This allows qemu driver to detect broken connections to remote
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index a4f545e..c761cae 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -129,6 +129,9 @@ void qemuDomainCmdlineDefFree(qemuDomainCmdlineDefPtr def)
 #endif
 
 
+/* Give up waiting for mutex after 30 seconds */
+#define QEMU_JOB_WAIT_TIME (1000ull * 30)
+
 virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
 {
     virQEMUDriverConfigPtr cfg;
@@ -346,6 +349,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
     cfg->glusterDebugLevel = 4;
     cfg->stdioLogD = true;
 
+    cfg->stateLockTimeout = QEMU_JOB_WAIT_TIME;
+
     if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST)))
         goto error;
 
@@ -863,6 +868,9 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
     if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0)
         goto cleanup;
 
+    if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout) < 0)
+        goto cleanup;
+
     if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0)
         goto cleanup;
 
@@ -1055,6 +1063,12 @@ virQEMUDriverConfigValidate(virQEMUDriverConfigPtr cfg)
         return -1;
     }
 
+    if (cfg->stateLockTimeout <= 0) {
+        virReportError(VIR_ERR_CONF_SYNTAX, "%s",
+                       _("state_lock_timeout should larger than zero"));
+        return -1;
+    }
+
     return 0;
 }
 
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index a8d84ef..97cf2e1 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -190,6 +190,8 @@ struct _virQEMUDriverConfig {
     int keepAliveInterval;
     unsigned int keepAliveCount;
 
+    int stateLockTimeout;
+
     int seccompSandbox;
 
     char *migrateHost;
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 886e3fb..5a2ca52 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -6652,9 +6652,6 @@ qemuDomainObjCanSetJob(qemuDomainObjPrivatePtr priv,
              priv->job.agentActive == QEMU_AGENT_JOB_NONE));
 }
 
-/* Give up waiting for mutex after 30 seconds */
-#define QEMU_JOB_WAIT_TIME (1000ull * 30)
-
 /**
  * qemuDomainObjBeginJobInternal:
  * @driver: qemu driver
@@ -6714,7 +6711,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
     }
 
     priv->jobs_queued++;
-    then = now + QEMU_JOB_WAIT_TIME;
+    then = now + cfg->stateLockTimeout;
 
  retry:
     if ((!async && job != QEMU_JOB_DESTROY) &&
diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
index f1e8806..dc5de96 100644
--- a/src/qemu/test_libvirtd_qemu.aug.in
+++ b/src/qemu/test_libvirtd_qemu.aug.in
@@ -105,3 +105,4 @@ module Test_libvirtd_qemu =
 { "pr_helper" = "/usr/bin/qemu-pr-helper" }
 { "swtpm_user" = "tss" }
 { "swtpm_group" = "tss" }
+{ "state_lock_timeout" = "60" }
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v4] qemu: Introduce state_lock_timeout to qemu.conf
Posted by Ján Tomko 5 years, 6 months ago
On Tue, Aug 28, 2018 at 04:40:16PM +0800, Yi Wang wrote:
>When doing some job holding state lock for a long time,
>we may come across error:
>"Timed out during operation: cannot acquire state change lock"
>Well, sometimes it's not a problem and users wanner continue

s/wanner/want to/

>to wait, and this patch allow users decide how long time they
>can wait the state lock.
>
>Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
>Reviewed-by: Xi Xu <xu.xi8@zte.com.cn>
>---
>changes in v4:
>- fox syntax-check error
>
>changes in v3:
>- add user-friendly description and nb of state lock
>- check validity of stateLockTimeout
>
>changes in v2:
>- change default value to 30 in qemu.conf
>- set the default value in virQEMUDriverConfigNew()
>
>v4
>
>Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
>---
> src/qemu/libvirtd_qemu.aug         |  1 +
> src/qemu/qemu.conf                 | 10 ++++++++++
> src/qemu/qemu_conf.c               | 14 ++++++++++++++
> src/qemu/qemu_conf.h               |  2 ++
> src/qemu/qemu_domain.c             |  5 +----
> src/qemu/test_libvirtd_qemu.aug.in |  1 +
> 6 files changed, 29 insertions(+), 4 deletions(-)
>
>diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug
>index ddc4bbf..f7287ae 100644
>--- a/src/qemu/libvirtd_qemu.aug
>+++ b/src/qemu/libvirtd_qemu.aug
>@@ -93,6 +93,7 @@ module Libvirtd_qemu =
>                  | limits_entry "max_core"
>                  | bool_entry "dump_guest_core"
>                  | str_entry "stdio_handler"
>+                 | int_entry "state_lock_timeout"
>

here you add the option at the end of the 'process_entry' group

>    let device_entry = bool_entry "mac_filter"
>                  | bool_entry "relaxed_acs_check"
>diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
>index cd57b3c..8920a1a 100644
>--- a/src/qemu/qemu.conf
>+++ b/src/qemu/qemu.conf
>@@ -667,6 +667,16 @@
> #
> #max_queued = 0
>
>+
>+# When two or more threads want to work with the same domain they use a
>+# job lock to mutually exclude each other. However, waiting for the lock
>+# is limited up to state_lock_timeout seconds.
>+# NB, strong recommendation to set the timeout longer than 30 seconds.
>+#
>+# Default is 30
>+#
>+#state_lock_timeout = 60

But here in qemu.conf, you add it between the rpc entries.

It seems we did not follow the structure with 'stdio_handler',
but adding it either right after 'dump_guest_core' or at the end of file
would be better than squeezing it between rpc entries.

>+
> ###################################################################
> # Keepalive protocol:
> # This allows qemu driver to detect broken connections to remote
>diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
>index a4f545e..c761cae 100644
>--- a/src/qemu/qemu_conf.c
>+++ b/src/qemu/qemu_conf.c
>@@ -129,6 +129,9 @@ void qemuDomainCmdlineDefFree(qemuDomainCmdlineDefPtr def)
> #endif
>
>
>+/* Give up waiting for mutex after 30 seconds */
>+#define QEMU_JOB_WAIT_TIME (1000ull * 30)
>+

Here the constant is multiplied by 1000 to convert from seconds to
milliseconds.

> virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
> {
>     virQEMUDriverConfigPtr cfg;
>@@ -346,6 +349,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
>     cfg->glusterDebugLevel = 4;
>     cfg->stdioLogD = true;
>
>+    cfg->stateLockTimeout = QEMU_JOB_WAIT_TIME;
>+
>     if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST)))
>         goto error;
>
>@@ -863,6 +868,9 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
>     if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0)
>         goto cleanup;
>
>+    if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout) < 0)
>+        goto cleanup;
>+

But the parsed value is passed directly here, so '60' in the config file
would result in 60 ms.

>     if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0)
>         goto cleanup;
>
>@@ -1055,6 +1063,12 @@ virQEMUDriverConfigValidate(virQEMUDriverConfigPtr cfg)
>         return -1;
>     }
>
>+    if (cfg->stateLockTimeout <= 0) {
>+        virReportError(VIR_ERR_CONF_SYNTAX, "%s",
>+                       _("state_lock_timeout should larger than zero"));
>+        return -1;
>+    }
>+
>     return 0;
> }
>
>diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
>index a8d84ef..97cf2e1 100644
>--- a/src/qemu/qemu_conf.h
>+++ b/src/qemu/qemu_conf.h
>@@ -190,6 +190,8 @@ struct _virQEMUDriverConfig {
>     int keepAliveInterval;
>     unsigned int keepAliveCount;
>
>+    int stateLockTimeout;
>+
>     int seccompSandbox;
>
>     char *migrateHost;
>diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>index 886e3fb..5a2ca52 100644
>--- a/src/qemu/qemu_domain.c
>+++ b/src/qemu/qemu_domain.c
>@@ -6652,9 +6652,6 @@ qemuDomainObjCanSetJob(qemuDomainObjPrivatePtr priv,
>              priv->job.agentActive == QEMU_AGENT_JOB_NONE));
> }
>
>-/* Give up waiting for mutex after 30 seconds */
>-#define QEMU_JOB_WAIT_TIME (1000ull * 30)
>-
> /**
>  * qemuDomainObjBeginJobInternal:
>  * @driver: qemu driver
>@@ -6714,7 +6711,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
>     }
>
>     priv->jobs_queued++;
>-    then = now + QEMU_JOB_WAIT_TIME;
>+    then = now + cfg->stateLockTimeout;
>
>  retry:
>     if ((!async && job != QEMU_JOB_DESTROY) &&
>diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
>index f1e8806..dc5de96 100644
>--- a/src/qemu/test_libvirtd_qemu.aug.in
>+++ b/src/qemu/test_libvirtd_qemu.aug.in
>@@ -105,3 +105,4 @@ module Test_libvirtd_qemu =
> { "pr_helper" = "/usr/bin/qemu-pr-helper" }
> { "swtpm_user" = "tss" }
> { "swtpm_group" = "tss" }
>+{ "state_lock_timeout" = "60" }

This needs to be ordered properly - install the 'augeas' tool to see
where 'make check' fails.

Jano
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt][PATCH v4] qemu: Introduce state_lock_timeout toqemu.conf
Posted by wang.yi59@zte.com.cn 5 years, 6 months ago
Hi Jano,
thanks for your reply.

> On Tue, Aug 28, 2018 at 04:40:16PM +0800, Yi Wang wrote:
> >When doing some job holding state lock for a long time,
> >we may come across error:
> >"Timed out during operation: cannot acquire state change lock"
> >Well, sometimes it's not a problem and users wanner continue
>
> s/wanner/want to/

Ok, I will pay attention to this later.

> >diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug
> >index ddc4bbf..f7287ae 100644
> >--- a/src/qemu/libvirtd_qemu.aug
> >+++ b/src/qemu/libvirtd_qemu.aug
> >@@ -93,6 +93,7 @@ module Libvirtd_qemu =
> > | limits_entry "max_core"
> > | bool_entry "dump_guest_core"
> > | str_entry "stdio_handler"
> >+ | int_entry "state_lock_timeout"
> >
>
> here you add the option at the end of the 'process_entry' group
>
> > let device_entry = bool_entry "mac_filter"
> > | bool_entry "relaxed_acs_check"
> >diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
> >index cd57b3c..8920a1a 100644
> >--- a/src/qemu/qemu.conf
> >+++ b/src/qemu/qemu.conf
> >@@ -667,6 +667,16 @@
> > #
> > #max_queued = 0
> >
> >+
> >+# When two or more threads want to work with the same domain they use a
> >+# job lock to mutually exclude each other. However, waiting for the lock
> >+# is limited up to state_lock_timeout seconds.
> >+# NB, strong recommendation to set the timeout longer than 30 seconds.
> >+#
> >+# Default is 30
> >+#
> >+#state_lock_timeout = 60
>
> But here in qemu.conf, you add it between the rpc entries.
>
> It seems we did not follow the structure with 'stdio_handler',
> but adding it either right after 'dump_guest_core' or at the end of file
> would be better than squeezing it between rpc entries.

As Michal suggested in:
https://www.redhat.com/archives/libvir-list/2018-August/msg01693.html
max_queued and state_lock_timeout both refer to the same area, so I put it
here :)

> >@@ -863,6 +868,9 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
> > if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0)
> > goto cleanup;
> >
> >+ if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout) < 0)
> >+ goto cleanup;
> >+
>
> But the parsed value is passed directly here, so '60' in the config file
> would result in 60 ms.

Oho, I will fix this.

> >
> > retry:
> > if ((!async && job != QEMU_JOB_DESTROY) &&
> >diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
> >index f1e8806..dc5de96 100644
> >--- a/src/qemu/test_libvirtd_qemu.aug.in
> >+++ b/src/qemu/test_libvirtd_qemu.aug.in
> >@@ -105,3 +105,4 @@ module Test_libvirtd_qemu =
> > { "pr_helper" = "/usr/bin/qemu-pr-helper" }
> > { "swtpm_user" = "tss" }
> > { "swtpm_group" = "tss" }
> >+{ "state_lock_timeout" = "60" }
>
> This needs to be ordered properly - install the 'augeas' tool to see
> where 'make check' fails.

My fault, sorry for the mistake, and next patch will fix this.
Thanks again, Jano.


---
Best wishes
Yi Wang--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v4] qemu: Introduce state_lock_timeout toqemu.conf
Posted by Ján Tomko 5 years, 6 months ago
On Wed, Sep 05, 2018 at 04:49:59PM +0800, wang.yi59@zte.com.cn wrote:
>Hi Jano,
>thanks for your reply.
>
>> On Tue, Aug 28, 2018 at 04:40:16PM +0800, Yi Wang wrote:

[...]

>> >diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug
>> >index ddc4bbf..f7287ae 100644
>> >--- a/src/qemu/libvirtd_qemu.aug
>> >+++ b/src/qemu/libvirtd_qemu.aug
>> >@@ -93,6 +93,7 @@ module Libvirtd_qemu =
>> > | limits_entry "max_core"
>> > | bool_entry "dump_guest_core"
>> > | str_entry "stdio_handler"
>> >+ | int_entry "state_lock_timeout"
>> >
>>
>> here you add the option at the end of the 'process_entry' group
>>
>> > let device_entry = bool_entry "mac_filter"
>> > | bool_entry "relaxed_acs_check"
>> >diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
>> >index cd57b3c..8920a1a 100644
>> >--- a/src/qemu/qemu.conf
>> >+++ b/src/qemu/qemu.conf
>> >@@ -667,6 +667,16 @@
>> > #
>> > #max_queued = 0
>> >
>> >+
>> >+# When two or more threads want to work with the same domain they use a
>> >+# job lock to mutually exclude each other. However, waiting for the lock
>> >+# is limited up to state_lock_timeout seconds.
>> >+# NB, strong recommendation to set the timeout longer than 30 seconds.
>> >+#
>> >+# Default is 30
>> >+#
>> >+#state_lock_timeout = 60
>>
>> But here in qemu.conf, you add it between the rpc entries.
>>
>> It seems we did not follow the structure with 'stdio_handler',
>> but adding it either right after 'dump_guest_core' or at the end of file
>> would be better than squeezing it between rpc entries.
>
>As Michal suggested in:
>https://www.redhat.com/archives/libvir-list/2018-August/msg01693.html
>max_queued and state_lock_timeout both refer to the same area, so I put it
>here :)
>

My point was that the position in qemu.conf does not match the grouping
in libvirtd_qemu.aug

So if it's related to max_queued, it should also be in the rpc_entry group
in the aug file.

Jano
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt][PATCH v4] qemu: Introduce state_lock_timeouttoqemu.conf
Posted by wang.yi59@zte.com.cn 5 years, 6 months ago
> On Wed, Sep 05, 2018 at 04:49:59PM +0800, wang.yi59@zte.com.cn wrote:
> >Hi Jano,
> >thanks for your reply.
> >
> >> On Tue, Aug 28, 2018 at 04:40:16PM +0800, Yi Wang wrote:
>
> [...]
>
> >> >diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug
> >> >index ddc4bbf..f7287ae 100644
> >> >--- a/src/qemu/libvirtd_qemu.aug
> >> >+++ b/src/qemu/libvirtd_qemu.aug
> >> >@@ -93,6 +93,7 @@ module Libvirtd_qemu =
> >> > | limits_entry "max_core"
> >> > | bool_entry "dump_guest_core"
> >> > | str_entry "stdio_handler"
> >> >+ | int_entry "state_lock_timeout"
> >> >
> >>
> >> here you add the option at the end of the 'process_entry' group
> >>
> >> > let device_entry = bool_entry "mac_filter"
> >> > | bool_entry "relaxed_acs_check"
> >> >diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
> >> >index cd57b3c..8920a1a 100644
> >> >--- a/src/qemu/qemu.conf
> >> >+++ b/src/qemu/qemu.conf
> >> >@@ -667,6 +667,16 @@
> >> > #
> >> > #max_queued = 0
> >> >
> >> >+
> >> >+# When two or more threads want to work with the same domain they use a
> >> >+# job lock to mutually exclude each other. However, waiting for the lock
> >> >+# is limited up to state_lock_timeout seconds.
> >> >+# NB, strong recommendation to set the timeout longer than 30 seconds.
> >> >+#
> >> >+# Default is 30
> >> >+#
> >> >+#state_lock_timeout = 60
> >>
> >> But here in qemu.conf, you add it between the rpc entries.
> >>
> >> It seems we did not follow the structure with 'stdio_handler',
> >> but adding it either right after 'dump_guest_core' or at the end of file
> >> would be better than squeezing it between rpc entries.
> >
> >As Michal suggested in:
> >https://www.redhat.com/archives/libvir-list/2018-August/msg01693.html
> >max_queued and state_lock_timeout both refer to the same area, so I put it
> >here :)
> >
>
> My point was that the position in qemu.conf does not match the grouping
> in libvirtd_qemu.aug
>
> So if it's related to max_queued, it should also be in the rpc_entry group
> in the aug file.

I got it, and I will adjust that in the next version. Thanks.


---
Best wishes
Yi Wang--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list