[PATCH v2 09/13] target/ppc: Improve helper_dcbz for user-only

Richard Henderson posted 13 patches 5 months, 3 weeks ago
There is a newer version of this series
[PATCH v2 09/13] target/ppc: Improve helper_dcbz for user-only
Posted by Richard Henderson 5 months, 3 weeks ago
Mark the reserve_addr check unlikely.  Use tlb_vaddr_to_host
instead of probe_write, relying on the memset itself to test
for page writability.  Use set/clear_helper_retaddr so that
we can properly unwind on segfault.

With this, a trivial loop around guest memset will spend
nearly 50% of runtime within helper_dcbz and host memset.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/ppc/mem_helper.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 24bae3b80c..fa4c4f9fa9 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -280,20 +280,26 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
     addr &= mask;
 
     /* Check reservation */
-    if ((env->reserve_addr & mask) == addr)  {
+    if (unlikely((env->reserve_addr & mask) == addr))  {
         env->reserve_addr = (target_ulong)-1ULL;
     }
 
     /* Try fast path translate */
+#ifdef CONFIG_USER_ONLY
+    haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, mmu_idx);
+#else
     haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr);
-    if (haddr) {
-        memset(haddr, 0, dcbz_size);
-    } else {
+    if (unlikely(!haddr)) {
         /* Slow path */
         for (int i = 0; i < dcbz_size; i += 8) {
             cpu_stq_mmuidx_ra(env, addr + i, 0, mmu_idx, retaddr);
         }
     }
+#endif
+
+    set_helper_retaddr(retaddr);
+    memset(haddr, 0, dcbz_size);
+    clear_helper_retaddr();
 }
 
 void helper_dcbz(CPUPPCState *env, target_ulong addr, int mmu_idx)
-- 
2.43.0
Re: [PATCH v2 09/13] target/ppc: Improve helper_dcbz for user-only
Posted by BALATON Zoltan 5 months, 3 weeks ago
On Tue, 9 Jul 2024, Richard Henderson wrote:
> Mark the reserve_addr check unlikely.  Use tlb_vaddr_to_host
> instead of probe_write, relying on the memset itself to test
> for page writability.  Use set/clear_helper_retaddr so that
> we can properly unwind on segfault.
>
> With this, a trivial loop around guest memset will spend
> nearly 50% of runtime within helper_dcbz and host memset.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/ppc/mem_helper.c | 14 ++++++++++----
> 1 file changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
> index 24bae3b80c..fa4c4f9fa9 100644
> --- a/target/ppc/mem_helper.c
> +++ b/target/ppc/mem_helper.c
> @@ -280,20 +280,26 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
>     addr &= mask;
>
>     /* Check reservation */
> -    if ((env->reserve_addr & mask) == addr)  {
> +    if (unlikely((env->reserve_addr & mask) == addr))  {
>         env->reserve_addr = (target_ulong)-1ULL;
>     }
>
>     /* Try fast path translate */
> +#ifdef CONFIG_USER_ONLY
> +    haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, mmu_idx);
> +#else
>     haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr);
> -    if (haddr) {
> -        memset(haddr, 0, dcbz_size);
> -    } else {
> +    if (unlikely(!haddr)) {
>         /* Slow path */
>         for (int i = 0; i < dcbz_size; i += 8) {
>             cpu_stq_mmuidx_ra(env, addr + i, 0, mmu_idx, retaddr);
>         }

Is a return needed here to only get to memset below when haddr != NULL?

Regards,
BALATON Zoltan

>     }
> +#endif
> +
> +    set_helper_retaddr(retaddr);
> +    memset(haddr, 0, dcbz_size);
> +    clear_helper_retaddr();
> }
>
> void helper_dcbz(CPUPPCState *env, target_ulong addr, int mmu_idx)
>
Re: [PATCH v2 09/13] target/ppc: Improve helper_dcbz for user-only
Posted by Richard Henderson 5 months, 3 weeks ago
On 7/10/24 05:25, BALATON Zoltan wrote:
> On Tue, 9 Jul 2024, Richard Henderson wrote:
>> Mark the reserve_addr check unlikely.  Use tlb_vaddr_to_host
>> instead of probe_write, relying on the memset itself to test
>> for page writability.  Use set/clear_helper_retaddr so that
>> we can properly unwind on segfault.
>>
>> With this, a trivial loop around guest memset will spend
>> nearly 50% of runtime within helper_dcbz and host memset.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>> target/ppc/mem_helper.c | 14 ++++++++++----
>> 1 file changed, 10 insertions(+), 4 deletions(-)
>>
>> diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
>> index 24bae3b80c..fa4c4f9fa9 100644
>> --- a/target/ppc/mem_helper.c
>> +++ b/target/ppc/mem_helper.c
>> @@ -280,20 +280,26 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
>>     addr &= mask;
>>
>>     /* Check reservation */
>> -    if ((env->reserve_addr & mask) == addr)  {
>> +    if (unlikely((env->reserve_addr & mask) == addr))  {
>>         env->reserve_addr = (target_ulong)-1ULL;
>>     }
>>
>>     /* Try fast path translate */
>> +#ifdef CONFIG_USER_ONLY
>> +    haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, mmu_idx);
>> +#else
>>     haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr);
>> -    if (haddr) {
>> -        memset(haddr, 0, dcbz_size);
>> -    } else {
>> +    if (unlikely(!haddr)) {
>>         /* Slow path */
>>         for (int i = 0; i < dcbz_size; i += 8) {
>>             cpu_stq_mmuidx_ra(env, addr + i, 0, mmu_idx, retaddr);
>>         }
> 
> Is a return needed here to only get to memset below when haddr != NULL?

Oops, yes.


r~