Although the hppa_is_pa20() helper is costly due to string comparisms in
object_dynamic_cast(), it is called quite often during memory lookups
and at each start of a block of instruction translations.
Speed hppa_is_pa20() up by calling object_dynamic_cast() only once at
CPU creation and store the result in the is_pa20 of struct CPUArchState.
Signed-off-by: Helge Deller <deller@gmx.de>
diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index a31dc32a9f..08ac1ec068 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -281,6 +281,7 @@ static TranslateFn *machine_HP_common_init_cpus(MachineState *machine)
/* Create CPUs. */
for (unsigned int i = 0; i < smp_cpus; i++) {
cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type));
+ cpu[i]->env.is_pa20 = object_dynamic_cast(OBJECT(cpu[i]), TYPE_HPPA64_CPU);
}
/*
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index e45ba50a59..c37a701f44 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -208,6 +208,7 @@ typedef struct CPUArchState {
uint64_t fr[32];
uint64_t sr[8]; /* stored shifted into place for gva */
+ bool is_pa20;
uint32_t psw; /* All psw bits except the following: */
uint32_t psw_xb; /* X and B, in their normal positions */
target_ulong psw_n; /* boolean */
@@ -294,7 +295,7 @@ struct HPPACPUClass {
static inline bool hppa_is_pa20(CPUHPPAState *env)
{
- return object_dynamic_cast(OBJECT(env_cpu(env)), TYPE_HPPA64_CPU) != NULL;
+ return env->is_pa20;
}
static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
On 12/27/24 23:23, Helge Deller wrote:
> Although the hppa_is_pa20() helper is costly due to string comparisms in
> object_dynamic_cast(), it is called quite often during memory lookups
> and at each start of a block of instruction translations.
> Speed hppa_is_pa20() up by calling object_dynamic_cast() only once at
> CPU creation and store the result in the is_pa20 of struct CPUArchState.
>
> Signed-off-by: Helge Deller <deller@gmx.de>
>
> diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
> index a31dc32a9f..08ac1ec068 100644
> --- a/hw/hppa/machine.c
> +++ b/hw/hppa/machine.c
> @@ -281,6 +281,7 @@ static TranslateFn *machine_HP_common_init_cpus(MachineState *machine)
> /* Create CPUs. */
> for (unsigned int i = 0; i < smp_cpus; i++) {
> cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type));
> + cpu[i]->env.is_pa20 = object_dynamic_cast(OBJECT(cpu[i]), TYPE_HPPA64_CPU);
> }
This belongs in hppa_cpu_initfn, since it's internal to the workings of the cpu.
Otherwise,
> diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
> index e45ba50a59..c37a701f44 100644
> --- a/target/hppa/cpu.h
> +++ b/target/hppa/cpu.h
> @@ -208,6 +208,7 @@ typedef struct CPUArchState {
> uint64_t fr[32];
> uint64_t sr[8]; /* stored shifted into place for gva */
>
> + bool is_pa20;
This placement will interact badly with your reset function.
Probably better to put it at the end of ArchCPU.
r~
On Sat, 28 Dec 2024, Helge Deller wrote:
> Although the hppa_is_pa20() helper is costly due to string comparisms in
> object_dynamic_cast(), it is called quite often during memory lookups
> and at each start of a block of instruction translations.
> Speed hppa_is_pa20() up by calling object_dynamic_cast() only once at
> CPU creation and store the result in the is_pa20 of struct CPUArchState.
>
> Signed-off-by: Helge Deller <deller@gmx.de>
>
> diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
> index a31dc32a9f..08ac1ec068 100644
> --- a/hw/hppa/machine.c
> +++ b/hw/hppa/machine.c
> @@ -281,6 +281,7 @@ static TranslateFn *machine_HP_common_init_cpus(MachineState *machine)
> /* Create CPUs. */
> for (unsigned int i = 0; i < smp_cpus; i++) {
> cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type));
> + cpu[i]->env.is_pa20 = object_dynamic_cast(OBJECT(cpu[i]), TYPE_HPPA64_CPU);
> }
>
> /*
> diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
> index e45ba50a59..c37a701f44 100644
> --- a/target/hppa/cpu.h
> +++ b/target/hppa/cpu.h
> @@ -208,6 +208,7 @@ typedef struct CPUArchState {
> uint64_t fr[32];
> uint64_t sr[8]; /* stored shifted into place for gva */
>
> + bool is_pa20;
> uint32_t psw; /* All psw bits except the following: */
> uint32_t psw_xb; /* X and B, in their normal positions */
> target_ulong psw_n; /* boolean */
> @@ -294,7 +295,7 @@ struct HPPACPUClass {
>
> static inline bool hppa_is_pa20(CPUHPPAState *env)
> {
> - return object_dynamic_cast(OBJECT(env_cpu(env)), TYPE_HPPA64_CPU) != NULL;
> + return env->is_pa20;
> }
Now this function name is longer than what it extends to so maybe it would
be simpler to drop the inline function and use env->is_pa20 directly
where it's needed? Is there a reason to keep the function?
Regards,
BALATON Zoltan
>
> static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
>
>
On 12/28/24 12:16, BALATON Zoltan wrote:
> On Sat, 28 Dec 2024, Helge Deller wrote:
>> Although the hppa_is_pa20() helper is costly due to string comparisms in
>> object_dynamic_cast(), it is called quite often during memory lookups
>> and at each start of a block of instruction translations.
>> Speed hppa_is_pa20() up by calling object_dynamic_cast() only once at
>> CPU creation and store the result in the is_pa20 of struct CPUArchState.
>>
>> Signed-off-by: Helge Deller <deller@gmx.de>
>>
>> diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
>> index a31dc32a9f..08ac1ec068 100644
>> --- a/hw/hppa/machine.c
>> +++ b/hw/hppa/machine.c
>> @@ -281,6 +281,7 @@ static TranslateFn *machine_HP_common_init_cpus(MachineState *machine)
>> /* Create CPUs. */
>> for (unsigned int i = 0; i < smp_cpus; i++) {
>> cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type));
>> + cpu[i]->env.is_pa20 = object_dynamic_cast(OBJECT(cpu[i]), TYPE_HPPA64_CPU);
>> }
>>
>> /*
>> diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
>> index e45ba50a59..c37a701f44 100644
>> --- a/target/hppa/cpu.h
>> +++ b/target/hppa/cpu.h
>> @@ -208,6 +208,7 @@ typedef struct CPUArchState {
>> uint64_t fr[32];
>> uint64_t sr[8]; /* stored shifted into place for gva */
>>
>> + bool is_pa20;
>> uint32_t psw; /* All psw bits except the following: */
>> uint32_t psw_xb; /* X and B, in their normal positions */
>> target_ulong psw_n; /* boolean */
>> @@ -294,7 +295,7 @@ struct HPPACPUClass {
>>
>> static inline bool hppa_is_pa20(CPUHPPAState *env)
>> {
>> - return object_dynamic_cast(OBJECT(env_cpu(env)), TYPE_HPPA64_CPU) != NULL;
>> + return env->is_pa20;
>> }
>
> Now this function name is longer than what it extends to so maybe it
> would be simpler to drop the inline function and use env->is_pa20
> directly where it's needed?
Yes, that's a possible cleanup which can be done afterwards.
> Is there a reason to keep the function?
Personally I like it more than the "env->is_pa20".
Richard, any opinion from your side? Should I send a such a replacement patch?
Helge
On 12/28/24 13:12, Helge Deller wrote:
> On 12/28/24 12:16, BALATON Zoltan wrote:
>>> static inline bool hppa_is_pa20(CPUHPPAState *env)
>>> {
>>> - return object_dynamic_cast(OBJECT(env_cpu(env)), TYPE_HPPA64_CPU) != NULL;
>>> + return env->is_pa20;
>>> }
>>
>> Now this function name is longer than what it extends to so maybe it
>> would be simpler to drop the inline function and use env->is_pa20
>> directly where it's needed?
>
> Yes, that's a possible cleanup which can be done afterwards.
>
>> Is there a reason to keep the function?
>
> Personally I like it more than the "env->is_pa20".
> Richard, any opinion from your side? Should I send a such a replacement patch?
I like keeping the accessor function.
r~
© 2016 - 2025 Red Hat, Inc.