> Zhengui's theory that notify_me doesn't work properly on ARM is more
> promising, but he couldn't provide a clear explanation of why he thought
> notify_me is involved. In particular, I would have expected notify_me to
> be wrong if the qemu_poll_ns call came from aio_ctx_dispatch, for example:
>
>
> glib_pollfds_fill
> g_main_context_prepare
> aio_ctx_prepare
> atomic_or(&ctx->notify_me, 1)
> qemu_poll_ns
> glib_pollfds_poll
> g_main_context_check
> aio_ctx_check
> atomic_and(&ctx->notify_me, ~1)
> g_main_context_dispatch
> aio_ctx_dispatch
> /* do something for event */
> qemu_poll_ns
>
Paolo,
I tried confining execution in a single NUMA domain (cpu & mem) and
still faced the issue, then, I added a mutex "ctx->notify_me_lcktest"
into context to protect "ctx->notify_me", like showed bellow, and it
seems to have either fixed or mitigated it.
I was able to cause the hung once every 3 or 4 runs. I have already ran
qemu-img convert more than 30 times now and couldn't reproduce it again.
Next step is to play with the barriers and check why existing ones
aren't enough for ordering access to ctx->notify_me ... or should I
try/do something else in your opinion ?
This arch/machine (Huawei D06):
$ lscpu
Architecture: aarch64
Byte Order: Little Endian
CPU(s): 96
On-line CPU(s) list: 0-95
Thread(s) per core: 1
Core(s) per socket: 48
Socket(s): 2
NUMA node(s): 4
Vendor ID: 0x48
Model: 0
Stepping: 0x0
CPU max MHz: 2000.0000
CPU min MHz: 200.0000
BogoMIPS: 200.00
L1d cache: 64K
L1i cache: 64K
L2 cache: 512K
L3 cache: 32768K
NUMA node0 CPU(s): 0-23
NUMA node1 CPU(s): 24-47
NUMA node2 CPU(s): 48-71
NUMA node3 CPU(s): 72-95
Flags: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics
cpuid asimdrdm dcpop
----
diff --git a/include/block/aio.h b/include/block/aio.h
index 0ca25dfec6..0724086d91 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -84,6 +84,7 @@ struct AioContext {
* dispatch phase, hence a simple counter is enough for them.
*/
uint32_t notify_me;
+ QemuMutex notify_me_lcktest;
/* A lock to protect between QEMUBH and AioHandler adders and deleter,
* and to ensure that no callbacks are removed while we're walking and
diff --git a/util/aio-posix.c b/util/aio-posix.c
index 51c41ed3c9..031d6e2997 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -529,7 +529,9 @@ static bool run_poll_handlers(AioContext *ctx,
int64_t max_ns, int64_t *timeout)
bool progress;
int64_t start_time, elapsed_time;
> Zhengui's theory that notify_me doesn't work properly on ARM is more context_ prepare or(&ctx- >notify_ me, 1) context_ check and(&ctx- >notify_ me, ~1) context_ dispatch
> promising, but he couldn't provide a clear explanation of why he thought
> notify_me is involved. In particular, I would have expected notify_me to
> be wrong if the qemu_poll_ns call came from aio_ctx_dispatch, for example:
>
>
> glib_pollfds_fill
> g_main_
> aio_ctx_prepare
> atomic_
> qemu_poll_ns
> glib_pollfds_poll
> g_main_
> aio_ctx_check
> atomic_
> g_main_
> aio_ctx_dispatch
> /* do something for event */
> qemu_poll_ns
>
Paolo,
I tried confining execution in a single NUMA domain (cpu & mem) and me_lcktest"
still faced the issue, then, I added a mutex "ctx->notify_
into context to protect "ctx->notify_me", like showed bellow, and it
seems to have either fixed or mitigated it.
I was able to cause the hung once every 3 or 4 runs. I have already ran
qemu-img convert more than 30 times now and couldn't reproduce it again.
Next step is to play with the barriers and check why existing ones
aren't enough for ordering access to ctx->notify_me ... or should I
try/do something else in your opinion ?
This arch/machine (Huawei D06):
$ lscpu
Architecture: aarch64
Byte Order: Little Endian
CPU(s): 96
On-line CPU(s) list: 0-95
Thread(s) per core: 1
Core(s) per socket: 48
Socket(s): 2
NUMA node(s): 4
Vendor ID: 0x48
Model: 0
Stepping: 0x0
CPU max MHz: 2000.0000
CPU min MHz: 200.0000
BogoMIPS: 200.00
L1d cache: 64K
L1i cache: 64K
L2 cache: 512K
L3 cache: 32768K
NUMA node0 CPU(s): 0-23
NUMA node1 CPU(s): 24-47
NUMA node2 CPU(s): 48-71
NUMA node3 CPU(s): 72-95
Flags: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics
cpuid asimdrdm dcpop
----
diff --git a/include/ block/aio. h b/include/ block/aio. h .0724086d91 100644 block/aio. h block/aio. h
index 0ca25dfec6.
--- a/include/
+++ b/include/
@@ -84,6 +84,7 @@ struct AioContext {
* dispatch phase, hence a simple counter is enough for them.
*/
uint32_t notify_me;
+ QemuMutex notify_me_lcktest;
/* A lock to protect between QEMUBH and AioHandler adders and deleter, .031d6e2997 100644 handlers( AioContext *ctx,
* and to ensure that no callbacks are removed while we're walking and
diff --git a/util/aio-posix.c b/util/aio-posix.c
index 51c41ed3c9.
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -529,7 +529,9 @@ static bool run_poll_
int64_t max_ns, int64_t *timeout)
bool progress;
int64_t start_time, elapsed_time;
+ qemu_mutex_ lock(&ctx- >notify_ me_lcktest) ; ctx->notify_ me); unlock( &ctx->notify_ me_lcktest) ; qemu_lockcnt_ count(& ctx->list_ lock) > 0);
assert(
+ qemu_mutex_
assert(
trace_ run_poll_ handlers_ begin(ctx, max_ns, *timeout); lock(&ctx- >notify_ me_lcktest) ;
assert( in_aio_ context_ home_thread( ctx));
atomic_ add(&ctx- >notify_ me, 2); unlock( &ctx->notify_ me_lcktest) ;
@@ -601,8 +603,10 @@ bool aio_poll(AioContext *ctx, bool blocking)
* so disable the optimization now.
*/
if (blocking) {
+ qemu_mutex_
+ qemu_mutex_
}
qemu_ lockcnt_ inc(&ctx- >list_lock) ;
@@ -647,8 +651,10 @@ bool aio_poll(AioContext *ctx, bool blocking)
}
if (blocking) { lock(&ctx- >notify_ me_lcktest) ;
atomic_ sub(&ctx- >notify_ me, 2);
aio_notify_ accept( ctx); unlock( &ctx->notify_ me_lcktest) ;
+ qemu_mutex_
+ qemu_mutex_
}
/* Adjust polling time */ .140e1e86f5 100644 prepare( GSource *source, gint *timeout)
diff --git a/util/async.c b/util/async.c
index c10642a385.
--- a/util/async.c
+++ b/util/async.c
@@ -221,7 +221,9 @@ aio_ctx_
{
AioContext *ctx = (AioContext *) source;
+ qemu_mutex_ lock(&ctx- >notify_ me_lcktest) ; or(&ctx- >notify_ me, 1); unlock( &ctx->notify_ me_lcktest) ;
atomic_
+ qemu_mutex_
/* We assume there is no timeout already supplied */ ns_to_ms( aio_compute_ timeout( ctx)); check(GSource *source)
*timeout = qemu_timeout_
@@ -239,8 +241,10 @@ aio_ctx_
AioContext *ctx = (AioContext *) source;
QEMUBH *bh;
+ qemu_mutex_ lock(&ctx- >notify_ me_lcktest) ; and(&ctx- >notify_ me, ~1); notify_ accept( ctx); unlock( &ctx->notify_ me_lcktest) ;
atomic_
aio_
+ qemu_mutex_
for (bh = ctx->first_bh; bh; bh = bh->next) { AioContext *ctx) lock(&ctx- >notify_ me_lcktest) ;
event_ notifier_ set(&ctx- >notifier) ;
atomic_ mb_set( &ctx->notified, true); unlock( &ctx->notify_ me_lcktest) ;
if (bh->scheduled) {
@@ -346,11 +350,13 @@ void aio_notify(
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
* with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
*/
- smp_mb();
+ //smp_mb();
+ qemu_mutex_
if (ctx->notify_me) {
}
+ qemu_mutex_
}
void aio_notify_ accept( AioContext *ctx) new(Error **errp) >co_schedule_ bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx); INIT(&ctx- >scheduled_ coroutines) ;
@@ -424,6 +430,8 @@ AioContext *aio_context_
ctx-
QSLIST_
+ qemu_rec_ mutex_init( &ctx->notify_ me_lcktest) ; set_event_ notifier( ctx, &ctx->notifier,
false,
(EventNotifierH andler *)
+
aio_