src/qemu/libvirtd_qemu.aug | 1 + src/qemu/qemu.conf | 10 ++++++++++ src/qemu/qemu_conf.c | 14 ++++++++++++++ src/qemu/qemu_conf.h | 2 ++ src/qemu/qemu_domain.c | 5 +---- src/qemu/test_libvirtd_qemu.aug.in | 1 + 6 files changed, 29 insertions(+), 4 deletions(-)
When doing some job holding state lock for a long time,
we may come across error:
"Timed out during operation: cannot acquire state change lock"
Well, sometimes it's not a problem and users wanner continue
to wait, and this patch allow users decide how long time they
can wait the state lock.
Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
Reviewed-by: Xi Xu <xu.xi8@zte.com.cn>
---
changes in v4:
- fox syntax-check error
changes in v3:
- add user-friendly description and nb of state lock
- check validity of stateLockTimeout
changes in v2:
- change default value to 30 in qemu.conf
- set the default value in virQEMUDriverConfigNew()
v4
Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
---
src/qemu/libvirtd_qemu.aug | 1 +
src/qemu/qemu.conf | 10 ++++++++++
src/qemu/qemu_conf.c | 14 ++++++++++++++
src/qemu/qemu_conf.h | 2 ++
src/qemu/qemu_domain.c | 5 +----
src/qemu/test_libvirtd_qemu.aug.in | 1 +
6 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug
index ddc4bbf..f7287ae 100644
--- a/src/qemu/libvirtd_qemu.aug
+++ b/src/qemu/libvirtd_qemu.aug
@@ -93,6 +93,7 @@ module Libvirtd_qemu =
| limits_entry "max_core"
| bool_entry "dump_guest_core"
| str_entry "stdio_handler"
+ | int_entry "state_lock_timeout"
let device_entry = bool_entry "mac_filter"
| bool_entry "relaxed_acs_check"
diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
index cd57b3c..8920a1a 100644
--- a/src/qemu/qemu.conf
+++ b/src/qemu/qemu.conf
@@ -667,6 +667,16 @@
#
#max_queued = 0
+
+# When two or more threads want to work with the same domain they use a
+# job lock to mutually exclude each other. However, waiting for the lock
+# is limited up to state_lock_timeout seconds.
+# NB, strong recommendation to set the timeout longer than 30 seconds.
+#
+# Default is 30
+#
+#state_lock_timeout = 60
+
###################################################################
# Keepalive protocol:
# This allows qemu driver to detect broken connections to remote
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index a4f545e..c761cae 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -129,6 +129,9 @@ void qemuDomainCmdlineDefFree(qemuDomainCmdlineDefPtr def)
#endif
+/* Give up waiting for mutex after 30 seconds */
+#define QEMU_JOB_WAIT_TIME (1000ull * 30)
+
virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
{
virQEMUDriverConfigPtr cfg;
@@ -346,6 +349,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
cfg->glusterDebugLevel = 4;
cfg->stdioLogD = true;
+ cfg->stateLockTimeout = QEMU_JOB_WAIT_TIME;
+
if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST)))
goto error;
@@ -863,6 +868,9 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0)
goto cleanup;
+ if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout) < 0)
+ goto cleanup;
+
if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0)
goto cleanup;
@@ -1055,6 +1063,12 @@ virQEMUDriverConfigValidate(virQEMUDriverConfigPtr cfg)
return -1;
}
+ if (cfg->stateLockTimeout <= 0) {
+ virReportError(VIR_ERR_CONF_SYNTAX, "%s",
+ _("state_lock_timeout should larger than zero"));
+ return -1;
+ }
+
return 0;
}
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index a8d84ef..97cf2e1 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -190,6 +190,8 @@ struct _virQEMUDriverConfig {
int keepAliveInterval;
unsigned int keepAliveCount;
+ int stateLockTimeout;
+
int seccompSandbox;
char *migrateHost;
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 886e3fb..5a2ca52 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -6652,9 +6652,6 @@ qemuDomainObjCanSetJob(qemuDomainObjPrivatePtr priv,
priv->job.agentActive == QEMU_AGENT_JOB_NONE));
}
-/* Give up waiting for mutex after 30 seconds */
-#define QEMU_JOB_WAIT_TIME (1000ull * 30)
-
/**
* qemuDomainObjBeginJobInternal:
* @driver: qemu driver
@@ -6714,7 +6711,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
}
priv->jobs_queued++;
- then = now + QEMU_JOB_WAIT_TIME;
+ then = now + cfg->stateLockTimeout;
retry:
if ((!async && job != QEMU_JOB_DESTROY) &&
diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
index f1e8806..dc5de96 100644
--- a/src/qemu/test_libvirtd_qemu.aug.in
+++ b/src/qemu/test_libvirtd_qemu.aug.in
@@ -105,3 +105,4 @@ module Test_libvirtd_qemu =
{ "pr_helper" = "/usr/bin/qemu-pr-helper" }
{ "swtpm_user" = "tss" }
{ "swtpm_group" = "tss" }
+{ "state_lock_timeout" = "60" }
--
1.8.3.1
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
On Tue, Aug 28, 2018 at 04:40:16PM +0800, Yi Wang wrote: >When doing some job holding state lock for a long time, >we may come across error: >"Timed out during operation: cannot acquire state change lock" >Well, sometimes it's not a problem and users wanner continue s/wanner/want to/ >to wait, and this patch allow users decide how long time they >can wait the state lock. > >Signed-off-by: Yi Wang <wang.yi59@zte.com.cn> >Reviewed-by: Xi Xu <xu.xi8@zte.com.cn> >--- >changes in v4: >- fox syntax-check error > >changes in v3: >- add user-friendly description and nb of state lock >- check validity of stateLockTimeout > >changes in v2: >- change default value to 30 in qemu.conf >- set the default value in virQEMUDriverConfigNew() > >v4 > >Signed-off-by: Yi Wang <wang.yi59@zte.com.cn> >--- > src/qemu/libvirtd_qemu.aug | 1 + > src/qemu/qemu.conf | 10 ++++++++++ > src/qemu/qemu_conf.c | 14 ++++++++++++++ > src/qemu/qemu_conf.h | 2 ++ > src/qemu/qemu_domain.c | 5 +---- > src/qemu/test_libvirtd_qemu.aug.in | 1 + > 6 files changed, 29 insertions(+), 4 deletions(-) > >diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug >index ddc4bbf..f7287ae 100644 >--- a/src/qemu/libvirtd_qemu.aug >+++ b/src/qemu/libvirtd_qemu.aug >@@ -93,6 +93,7 @@ module Libvirtd_qemu = > | limits_entry "max_core" > | bool_entry "dump_guest_core" > | str_entry "stdio_handler" >+ | int_entry "state_lock_timeout" > here you add the option at the end of the 'process_entry' group > let device_entry = bool_entry "mac_filter" > | bool_entry "relaxed_acs_check" >diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf >index cd57b3c..8920a1a 100644 >--- a/src/qemu/qemu.conf >+++ b/src/qemu/qemu.conf >@@ -667,6 +667,16 @@ > # > #max_queued = 0 > >+ >+# When two or more threads want to work with the same domain they use a >+# job lock to mutually exclude each other. However, waiting for the lock >+# is limited up to state_lock_timeout seconds. >+# NB, strong recommendation to set the timeout longer than 30 seconds. >+# >+# Default is 30 >+# >+#state_lock_timeout = 60 But here in qemu.conf, you add it between the rpc entries. It seems we did not follow the structure with 'stdio_handler', but adding it either right after 'dump_guest_core' or at the end of file would be better than squeezing it between rpc entries. >+ > ################################################################### > # Keepalive protocol: > # This allows qemu driver to detect broken connections to remote >diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c >index a4f545e..c761cae 100644 >--- a/src/qemu/qemu_conf.c >+++ b/src/qemu/qemu_conf.c >@@ -129,6 +129,9 @@ void qemuDomainCmdlineDefFree(qemuDomainCmdlineDefPtr def) > #endif > > >+/* Give up waiting for mutex after 30 seconds */ >+#define QEMU_JOB_WAIT_TIME (1000ull * 30) >+ Here the constant is multiplied by 1000 to convert from seconds to milliseconds. > virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged) > { > virQEMUDriverConfigPtr cfg; >@@ -346,6 +349,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged) > cfg->glusterDebugLevel = 4; > cfg->stdioLogD = true; > >+ cfg->stateLockTimeout = QEMU_JOB_WAIT_TIME; >+ > if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST))) > goto error; > >@@ -863,6 +868,9 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg, > if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0) > goto cleanup; > >+ if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout) < 0) >+ goto cleanup; >+ But the parsed value is passed directly here, so '60' in the config file would result in 60 ms. > if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0) > goto cleanup; > >@@ -1055,6 +1063,12 @@ virQEMUDriverConfigValidate(virQEMUDriverConfigPtr cfg) > return -1; > } > >+ if (cfg->stateLockTimeout <= 0) { >+ virReportError(VIR_ERR_CONF_SYNTAX, "%s", >+ _("state_lock_timeout should larger than zero")); >+ return -1; >+ } >+ > return 0; > } > >diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h >index a8d84ef..97cf2e1 100644 >--- a/src/qemu/qemu_conf.h >+++ b/src/qemu/qemu_conf.h >@@ -190,6 +190,8 @@ struct _virQEMUDriverConfig { > int keepAliveInterval; > unsigned int keepAliveCount; > >+ int stateLockTimeout; >+ > int seccompSandbox; > > char *migrateHost; >diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c >index 886e3fb..5a2ca52 100644 >--- a/src/qemu/qemu_domain.c >+++ b/src/qemu/qemu_domain.c >@@ -6652,9 +6652,6 @@ qemuDomainObjCanSetJob(qemuDomainObjPrivatePtr priv, > priv->job.agentActive == QEMU_AGENT_JOB_NONE)); > } > >-/* Give up waiting for mutex after 30 seconds */ >-#define QEMU_JOB_WAIT_TIME (1000ull * 30) >- > /** > * qemuDomainObjBeginJobInternal: > * @driver: qemu driver >@@ -6714,7 +6711,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, > } > > priv->jobs_queued++; >- then = now + QEMU_JOB_WAIT_TIME; >+ then = now + cfg->stateLockTimeout; > > retry: > if ((!async && job != QEMU_JOB_DESTROY) && >diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in >index f1e8806..dc5de96 100644 >--- a/src/qemu/test_libvirtd_qemu.aug.in >+++ b/src/qemu/test_libvirtd_qemu.aug.in >@@ -105,3 +105,4 @@ module Test_libvirtd_qemu = > { "pr_helper" = "/usr/bin/qemu-pr-helper" } > { "swtpm_user" = "tss" } > { "swtpm_group" = "tss" } >+{ "state_lock_timeout" = "60" } This needs to be ordered properly - install the 'augeas' tool to see where 'make check' fails. Jano -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Hi Jano, thanks for your reply. > On Tue, Aug 28, 2018 at 04:40:16PM +0800, Yi Wang wrote: > >When doing some job holding state lock for a long time, > >we may come across error: > >"Timed out during operation: cannot acquire state change lock" > >Well, sometimes it's not a problem and users wanner continue > > s/wanner/want to/ Ok, I will pay attention to this later. > >diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug > >index ddc4bbf..f7287ae 100644 > >--- a/src/qemu/libvirtd_qemu.aug > >+++ b/src/qemu/libvirtd_qemu.aug > >@@ -93,6 +93,7 @@ module Libvirtd_qemu = > > | limits_entry "max_core" > > | bool_entry "dump_guest_core" > > | str_entry "stdio_handler" > >+ | int_entry "state_lock_timeout" > > > > here you add the option at the end of the 'process_entry' group > > > let device_entry = bool_entry "mac_filter" > > | bool_entry "relaxed_acs_check" > >diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf > >index cd57b3c..8920a1a 100644 > >--- a/src/qemu/qemu.conf > >+++ b/src/qemu/qemu.conf > >@@ -667,6 +667,16 @@ > > # > > #max_queued = 0 > > > >+ > >+# When two or more threads want to work with the same domain they use a > >+# job lock to mutually exclude each other. However, waiting for the lock > >+# is limited up to state_lock_timeout seconds. > >+# NB, strong recommendation to set the timeout longer than 30 seconds. > >+# > >+# Default is 30 > >+# > >+#state_lock_timeout = 60 > > But here in qemu.conf, you add it between the rpc entries. > > It seems we did not follow the structure with 'stdio_handler', > but adding it either right after 'dump_guest_core' or at the end of file > would be better than squeezing it between rpc entries. As Michal suggested in: https://www.redhat.com/archives/libvir-list/2018-August/msg01693.html max_queued and state_lock_timeout both refer to the same area, so I put it here :) > >@@ -863,6 +868,9 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg, > > if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0) > > goto cleanup; > > > >+ if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout) < 0) > >+ goto cleanup; > >+ > > But the parsed value is passed directly here, so '60' in the config file > would result in 60 ms. Oho, I will fix this. > > > > retry: > > if ((!async && job != QEMU_JOB_DESTROY) && > >diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in > >index f1e8806..dc5de96 100644 > >--- a/src/qemu/test_libvirtd_qemu.aug.in > >+++ b/src/qemu/test_libvirtd_qemu.aug.in > >@@ -105,3 +105,4 @@ module Test_libvirtd_qemu = > > { "pr_helper" = "/usr/bin/qemu-pr-helper" } > > { "swtpm_user" = "tss" } > > { "swtpm_group" = "tss" } > >+{ "state_lock_timeout" = "60" } > > This needs to be ordered properly - install the 'augeas' tool to see > where 'make check' fails. My fault, sorry for the mistake, and next patch will fix this. Thanks again, Jano. --- Best wishes Yi Wang-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
On Wed, Sep 05, 2018 at 04:49:59PM +0800, wang.yi59@zte.com.cn wrote: >Hi Jano, >thanks for your reply. > >> On Tue, Aug 28, 2018 at 04:40:16PM +0800, Yi Wang wrote: [...] >> >diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug >> >index ddc4bbf..f7287ae 100644 >> >--- a/src/qemu/libvirtd_qemu.aug >> >+++ b/src/qemu/libvirtd_qemu.aug >> >@@ -93,6 +93,7 @@ module Libvirtd_qemu = >> > | limits_entry "max_core" >> > | bool_entry "dump_guest_core" >> > | str_entry "stdio_handler" >> >+ | int_entry "state_lock_timeout" >> > >> >> here you add the option at the end of the 'process_entry' group >> >> > let device_entry = bool_entry "mac_filter" >> > | bool_entry "relaxed_acs_check" >> >diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf >> >index cd57b3c..8920a1a 100644 >> >--- a/src/qemu/qemu.conf >> >+++ b/src/qemu/qemu.conf >> >@@ -667,6 +667,16 @@ >> > # >> > #max_queued = 0 >> > >> >+ >> >+# When two or more threads want to work with the same domain they use a >> >+# job lock to mutually exclude each other. However, waiting for the lock >> >+# is limited up to state_lock_timeout seconds. >> >+# NB, strong recommendation to set the timeout longer than 30 seconds. >> >+# >> >+# Default is 30 >> >+# >> >+#state_lock_timeout = 60 >> >> But here in qemu.conf, you add it between the rpc entries. >> >> It seems we did not follow the structure with 'stdio_handler', >> but adding it either right after 'dump_guest_core' or at the end of file >> would be better than squeezing it between rpc entries. > >As Michal suggested in: >https://www.redhat.com/archives/libvir-list/2018-August/msg01693.html >max_queued and state_lock_timeout both refer to the same area, so I put it >here :) > My point was that the position in qemu.conf does not match the grouping in libvirtd_qemu.aug So if it's related to max_queued, it should also be in the rpc_entry group in the aug file. Jano -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
> On Wed, Sep 05, 2018 at 04:49:59PM +0800, wang.yi59@zte.com.cn wrote: > >Hi Jano, > >thanks for your reply. > > > >> On Tue, Aug 28, 2018 at 04:40:16PM +0800, Yi Wang wrote: > > [...] > > >> >diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug > >> >index ddc4bbf..f7287ae 100644 > >> >--- a/src/qemu/libvirtd_qemu.aug > >> >+++ b/src/qemu/libvirtd_qemu.aug > >> >@@ -93,6 +93,7 @@ module Libvirtd_qemu = > >> > | limits_entry "max_core" > >> > | bool_entry "dump_guest_core" > >> > | str_entry "stdio_handler" > >> >+ | int_entry "state_lock_timeout" > >> > > >> > >> here you add the option at the end of the 'process_entry' group > >> > >> > let device_entry = bool_entry "mac_filter" > >> > | bool_entry "relaxed_acs_check" > >> >diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf > >> >index cd57b3c..8920a1a 100644 > >> >--- a/src/qemu/qemu.conf > >> >+++ b/src/qemu/qemu.conf > >> >@@ -667,6 +667,16 @@ > >> > # > >> > #max_queued = 0 > >> > > >> >+ > >> >+# When two or more threads want to work with the same domain they use a > >> >+# job lock to mutually exclude each other. However, waiting for the lock > >> >+# is limited up to state_lock_timeout seconds. > >> >+# NB, strong recommendation to set the timeout longer than 30 seconds. > >> >+# > >> >+# Default is 30 > >> >+# > >> >+#state_lock_timeout = 60 > >> > >> But here in qemu.conf, you add it between the rpc entries. > >> > >> It seems we did not follow the structure with 'stdio_handler', > >> but adding it either right after 'dump_guest_core' or at the end of file > >> would be better than squeezing it between rpc entries. > > > >As Michal suggested in: > >https://www.redhat.com/archives/libvir-list/2018-August/msg01693.html > >max_queued and state_lock_timeout both refer to the same area, so I put it > >here :) > > > > My point was that the position in qemu.conf does not match the grouping > in libvirtd_qemu.aug > > So if it's related to max_queued, it should also be in the rpc_entry group > in the aug file. I got it, and I will adjust that in the next version. Thanks. --- Best wishes Yi Wang-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
© 2016 - 2024 Red Hat, Inc.