Currently dying to one of the core_dump_signal()s deadlocks, because
dump_core_and_abort() calls start_exclusive() two times: first via
stop_all_tasks(), and then via preexit_cleanup() ->
qemu_plugin_user_exit().
There are a number of ways to solve this: resume after dumping core;
check cpu_in_exclusive_context() in qemu_plugin_user_exit(); or make
{start,end}_exclusive() recursive. Pick the last option, since it's
the most straightforward one.
Fixes: da91c1920242 ("linux-user: Clean up when exiting due to a signal")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
cpus-common.c | 12 ++++++++++--
include/hw/core/cpu.h | 4 ++--
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/cpus-common.c b/cpus-common.c
index 793364dc0ed..a0c52cd187f 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -192,6 +192,11 @@ void start_exclusive(void)
CPUState *other_cpu;
int running_cpus;
+ if (current_cpu->exclusive_context_count) {
+ current_cpu->exclusive_context_count++;
+ return;
+ }
+
qemu_mutex_lock(&qemu_cpu_list_lock);
exclusive_idle();
@@ -219,13 +224,16 @@ void start_exclusive(void)
*/
qemu_mutex_unlock(&qemu_cpu_list_lock);
- current_cpu->in_exclusive_context = true;
+ current_cpu->exclusive_context_count++;
}
/* Finish an exclusive operation. */
void end_exclusive(void)
{
- current_cpu->in_exclusive_context = false;
+ current_cpu->exclusive_context_count--;
+ if (current_cpu->exclusive_context_count) {
+ return;
+ }
qemu_mutex_lock(&qemu_cpu_list_lock);
qatomic_set(&pending_cpus, 0);
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 2417597236b..671f041bec6 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -349,7 +349,7 @@ struct CPUState {
bool unplug;
bool crash_occurred;
bool exit_request;
- bool in_exclusive_context;
+ int exclusive_context_count;
uint32_t cflags_next_tb;
/* updates protected by BQL */
uint32_t interrupt_request;
@@ -758,7 +758,7 @@ void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data
*/
static inline bool cpu_in_exclusive_context(const CPUState *cpu)
{
- return cpu->in_exclusive_context;
+ return cpu->exclusive_context_count;
}
/**
--
2.39.1
On Wed, 2023-02-01 at 01:46 +0100, Ilya Leoshkevich wrote: > Currently dying to one of the core_dump_signal()s deadlocks, because > dump_core_and_abort() calls start_exclusive() two times: first via > stop_all_tasks(), and then via preexit_cleanup() -> > qemu_plugin_user_exit(). > > There are a number of ways to solve this: resume after dumping core; > check cpu_in_exclusive_context() in qemu_plugin_user_exit(); or make > {start,end}_exclusive() recursive. Pick the last option, since it's > the most straightforward one. > > Fixes: da91c1920242 ("linux-user: Clean up when exiting due to a > signal") > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> Hi, I noticed that fork()ed CPUs start with in_exclusive_context set (in this patch it is renamed to exclusive_context_count, but the point stands). That was not important before, since only pending_cpus decided what happens in start_exclusive()/end_exclusive(). Now that exclusive_context_count is also important, we need something like: --- a/linux-user/main.c +++ b/linux-user/main.c @@ -161,13 +161,15 @@ void fork_end(int child) } qemu_init_cpu_list(); gdbserver_fork(thread_cpu); - /* qemu_init_cpu_list() takes care of reinitializing the - * exclusive state, so we don't need to end_exclusive() here. - */ } else { cpu_list_unlock(); - end_exclusive(); } + /* + * qemu_init_cpu_list() reinitialized the child exclusive state, but we + * also need to keep current_cpu consistent, so call end_exclusive() for + * both child and parent. + */ + end_exclusive(); } __thread CPUState *thread_cpu; diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 1f8c10f8ef9..70fad4bed01 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -6776,6 +6776,7 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, cpu_clone_regs_parent(env, flags); fork_end(0); } + g_assert(!cpu_in_exclusive_context(cpu)); } return ret; } I can include this in v2, if the overall recursive lock approach is considered appropriate. Best regards, Ilya
On 2/1/23 03:07, Ilya Leoshkevich wrote: > I can include this in v2, if the overall recursive lock approach is > considered appropriate. I think it's a good idea. Certainly an improvement over removing the lock from the plugin code, like I tried. r~
© 2016 - 2025 Red Hat, Inc.