[PATCH 36/43] target/ppc/mmu-hash32: Remove some static inlines from header

BALATON Zoltan posted 43 patches 6 months ago
Maintainers: Nicholas Piggin <npiggin@gmail.com>, Daniel Henrique Barboza <danielhb413@gmail.com>, David Gibson <david@gibson.dropbear.id.au>, Harsh Prateek Bora <harshpb@linux.ibm.com>
[PATCH 36/43] target/ppc/mmu-hash32: Remove some static inlines from header
Posted by BALATON Zoltan 6 months ago
Two of these are not used anywhere and the other two are used only
once and can be inlined and removed from the header.

Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
---
 target/ppc/mmu-hash32.c |  5 +++--
 target/ppc/mmu-hash32.h | 32 --------------------------------
 2 files changed, 3 insertions(+), 34 deletions(-)

diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index a2c0ac05d2..7a6a674f8a 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -206,17 +206,18 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
 {
     hwaddr pte_offset = pteg_off;
     target_ulong pte0, pte1;
+    hwaddr base = ppc_hash32_hpt_base(cpu);
     int i;
 
     for (i = 0; i < HPTES_PER_GROUP; i++) {
-        pte0 = ppc_hash32_load_hpte0(cpu, pte_offset);
+        pte0 = ldl_phys(CPU(cpu)->as, base + pte_offset);
         /*
          * pte0 contains the valid bit and must be read before pte1,
          * otherwise we might see an old pte1 with a new valid bit and
          * thus an inconsistent hpte value
          */
         smp_rmb();
-        pte1 = ppc_hash32_load_hpte1(cpu, pte_offset);
+        pte1 = ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
 
         if ((pte0 & HPTE32_V_VALID)
             && (secondary == !!(pte0 & HPTE32_V_SECONDARY))
diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
index 2838de031c..4db55fb0a0 100644
--- a/target/ppc/mmu-hash32.h
+++ b/target/ppc/mmu-hash32.h
@@ -69,38 +69,6 @@ static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
     return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF;
 }
 
-static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu,
-                                                 hwaddr pte_offset)
-{
-    target_ulong base = ppc_hash32_hpt_base(cpu);
-
-    return ldl_phys(CPU(cpu)->as, base + pte_offset);
-}
-
-static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu,
-                                                 hwaddr pte_offset)
-{
-    target_ulong base = ppc_hash32_hpt_base(cpu);
-
-    return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
-}
-
-static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu,
-                                          hwaddr pte_offset, target_ulong pte0)
-{
-    target_ulong base = ppc_hash32_hpt_base(cpu);
-
-    stl_phys(CPU(cpu)->as, base + pte_offset, pte0);
-}
-
-static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu,
-                                          hwaddr pte_offset, target_ulong pte1)
-{
-    target_ulong base = ppc_hash32_hpt_base(cpu);
-
-    stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
-}
-
 static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
 {
     return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);
-- 
2.30.9
Re: [PATCH 36/43] target/ppc/mmu-hash32: Remove some static inlines from header
Posted by Nicholas Piggin 4 months, 3 weeks ago
On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Two of these are not used anywhere and the other two are used only
> once and can be inlined and removed from the header.

I'd prefer to put these in the .c file. Probably calculating the
base once would generate marginally better code since it would not
have to keep reloading it (since there is a barrier there it can't
cache the value).

Thanks,
Nick

>
> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
> ---
>  target/ppc/mmu-hash32.c |  5 +++--
>  target/ppc/mmu-hash32.h | 32 --------------------------------
>  2 files changed, 3 insertions(+), 34 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index a2c0ac05d2..7a6a674f8a 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -206,17 +206,18 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
>  {
>      hwaddr pte_offset = pteg_off;
>      target_ulong pte0, pte1;
> +    hwaddr base = ppc_hash32_hpt_base(cpu);
>      int i;
>  
>      for (i = 0; i < HPTES_PER_GROUP; i++) {
> -        pte0 = ppc_hash32_load_hpte0(cpu, pte_offset);
> +        pte0 = ldl_phys(CPU(cpu)->as, base + pte_offset);
>          /*
>           * pte0 contains the valid bit and must be read before pte1,
>           * otherwise we might see an old pte1 with a new valid bit and
>           * thus an inconsistent hpte value
>           */
>          smp_rmb();
> -        pte1 = ppc_hash32_load_hpte1(cpu, pte_offset);
> +        pte1 = ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
>  
>          if ((pte0 & HPTE32_V_VALID)
>              && (secondary == !!(pte0 & HPTE32_V_SECONDARY))
> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
> index 2838de031c..4db55fb0a0 100644
> --- a/target/ppc/mmu-hash32.h
> +++ b/target/ppc/mmu-hash32.h
> @@ -69,38 +69,6 @@ static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
>      return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF;
>  }
>  
> -static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu,
> -                                                 hwaddr pte_offset)
> -{
> -    target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -    return ldl_phys(CPU(cpu)->as, base + pte_offset);
> -}
> -
> -static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu,
> -                                                 hwaddr pte_offset)
> -{
> -    target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -    return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
> -}
> -
> -static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu,
> -                                          hwaddr pte_offset, target_ulong pte0)
> -{
> -    target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -    stl_phys(CPU(cpu)->as, base + pte_offset, pte0);
> -}
> -
> -static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu,
> -                                          hwaddr pte_offset, target_ulong pte1)
> -{
> -    target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -    stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
> -}
> -
>  static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
>  {
>      return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);
Re: [PATCH 36/43] target/ppc/mmu-hash32: Remove some static inlines from header
Posted by BALATON Zoltan 4 months, 3 weeks ago
On Thu, 4 Jul 2024, Nicholas Piggin wrote:
> On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
>> Two of these are not used anywhere and the other two are used only
>> once and can be inlined and removed from the header.
>
> I'd prefer to put these in the .c file. Probably calculating the
> base once would generate marginally better code since it would not
> have to keep reloading it (since there is a barrier there it can't
> cache the value).

These aren't even used anywhere but one function and they are inefficient 
becuase they would call ppc_hash32_hpt_base() on each call. Next patch 
even removes base and calculates pte_addr once before the loop, then it's 
quite straight forward what these read from guest memory even without 
inline functions. I see no reason to keep these inline functions.

> Thanks,
> Nick
>
>>
>> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
>> ---
>>  target/ppc/mmu-hash32.c |  5 +++--
>>  target/ppc/mmu-hash32.h | 32 --------------------------------
>>  2 files changed, 3 insertions(+), 34 deletions(-)
>>
>> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
>> index a2c0ac05d2..7a6a674f8a 100644
>> --- a/target/ppc/mmu-hash32.c
>> +++ b/target/ppc/mmu-hash32.c
>> @@ -206,17 +206,18 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
>>  {
>>      hwaddr pte_offset = pteg_off;
>>      target_ulong pte0, pte1;
>> +    hwaddr base = ppc_hash32_hpt_base(cpu);
>>      int i;
>>
>>      for (i = 0; i < HPTES_PER_GROUP; i++) {
>> -        pte0 = ppc_hash32_load_hpte0(cpu, pte_offset);
>> +        pte0 = ldl_phys(CPU(cpu)->as, base + pte_offset);
>>          /*
>>           * pte0 contains the valid bit and must be read before pte1,
>>           * otherwise we might see an old pte1 with a new valid bit and
>>           * thus an inconsistent hpte value
>>           */
>>          smp_rmb();
>> -        pte1 = ppc_hash32_load_hpte1(cpu, pte_offset);
>> +        pte1 = ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
>>
>>          if ((pte0 & HPTE32_V_VALID)
>>              && (secondary == !!(pte0 & HPTE32_V_SECONDARY))
>> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
>> index 2838de031c..4db55fb0a0 100644
>> --- a/target/ppc/mmu-hash32.h
>> +++ b/target/ppc/mmu-hash32.h
>> @@ -69,38 +69,6 @@ static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
>>      return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF;
>>  }
>>
>> -static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu,
>> -                                                 hwaddr pte_offset)
>> -{
>> -    target_ulong base = ppc_hash32_hpt_base(cpu);
>> -
>> -    return ldl_phys(CPU(cpu)->as, base + pte_offset);
>> -}
>> -
>> -static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu,
>> -                                                 hwaddr pte_offset)
>> -{
>> -    target_ulong base = ppc_hash32_hpt_base(cpu);
>> -
>> -    return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
>> -}
>> -
>> -static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu,
>> -                                          hwaddr pte_offset, target_ulong pte0)
>> -{
>> -    target_ulong base = ppc_hash32_hpt_base(cpu);
>> -
>> -    stl_phys(CPU(cpu)->as, base + pte_offset, pte0);
>> -}
>> -
>> -static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu,
>> -                                          hwaddr pte_offset, target_ulong pte1)
>> -{
>> -    target_ulong base = ppc_hash32_hpt_base(cpu);
>> -
>> -    stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
>> -}
>> -
>>  static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
>>  {
>>      return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);
>
>
Re: [PATCH 36/43] target/ppc/mmu-hash32: Remove some static inlines from header
Posted by Nicholas Piggin 4 months, 2 weeks ago
On Sun Jul 7, 2024 at 6:18 AM AEST, BALATON Zoltan wrote:
> On Thu, 4 Jul 2024, Nicholas Piggin wrote:
> > On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> >> Two of these are not used anywhere and the other two are used only
> >> once and can be inlined and removed from the header.
> >
> > I'd prefer to put these in the .c file. Probably calculating the
> > base once would generate marginally better code since it would not
> > have to keep reloading it (since there is a barrier there it can't
> > cache the value).
>
> These aren't even used anywhere but one function and they are inefficient 
> becuase they would call ppc_hash32_hpt_base() on each call. Next patch 
> even removes base and calculates pte_addr once before the loop, then it's 
> quite straight forward what these read from guest memory even without 
> inline functions. I see no reason to keep these inline functions.

Make them take the hash base instead of cpu if that's the performance
issue to solve. And open coded access can always be converted to use
it.

Thanks,
Nick
Re: [PATCH 36/43] target/ppc/mmu-hash32: Remove some static inlines from header
Posted by BALATON Zoltan 4 months, 2 weeks ago
On Mon, 8 Jul 2024, Nicholas Piggin wrote:
> On Sun Jul 7, 2024 at 6:18 AM AEST, BALATON Zoltan wrote:
>> On Thu, 4 Jul 2024, Nicholas Piggin wrote:
>>> On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
>>>> Two of these are not used anywhere and the other two are used only
>>>> once and can be inlined and removed from the header.
>>>
>>> I'd prefer to put these in the .c file. Probably calculating the
>>> base once would generate marginally better code since it would not
>>> have to keep reloading it (since there is a barrier there it can't
>>> cache the value).
>>
>> These aren't even used anywhere but one function and they are inefficient
>> becuase they would call ppc_hash32_hpt_base() on each call. Next patch
>> even removes base and calculates pte_addr once before the loop, then it's
>> quite straight forward what these read from guest memory even without
>> inline functions. I see no reason to keep these inline functions.
>
> Make them take the hash base instead of cpu if that's the performance
> issue to solve. And open coded access can always be converted to use
> it.

If you look at the next patch you can see the base calculatoin is gone and 
it does pte_addr = ppc_hash32_hpt_base(cpu) + pteg_off once at the 
beginning before the loop, then the function you want to make is just: 
pte0 = ldl_phys(CPU(cpu)->as, pte_addr). I don't think it's worth making 
it a separate fucntion .The other one
pte1 = ldl_phys(CPU(cpu)->as, pte_addr + HASH_PTE_SIZE_32 / 2)
still has some calculation left but that's pretty straight forward. Maybe 
we could add a mcacro for HASH_PTE_SIZE_32 / 2 like HARH_PTE1_OFFS or 
something but I don't think that or having separate functions for this 
would add any more clarity just unnecessary complication. Yout one line 
helpers would only be used by ppc_hash32_pteg_search which is already a 
helper tp get pte values so open coding it within this function is OK in 
my opinion. There are no other places your helper functions would be 
needed.

Regards,
BALATON Zoltlan