There is only one caller, tlb_set_page_with_attrs. We cannot
inline the entire function because the AddressSpaceDispatch
structure is private to exec.c, and cannot easily be moved to
include/exec/memory-internal.h.
Compute is_ram and is_romd once within tlb_set_page_with_attrs.
Fold the number of tests against these predicates. Compute
cpu_physical_memory_is_clean outside of the tlb lock region.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/exec/exec-all.h | 6 +---
accel/tcg/cputlb.c | 68 ++++++++++++++++++++++++++---------------
exec.c | 22 ++-----------
3 files changed, 47 insertions(+), 49 deletions(-)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 81b02eb2fe..49db07ba0b 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
hwaddr *xlat, hwaddr *plen,
MemTxAttrs attrs, int *prot);
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
- MemoryRegionSection *section,
- target_ulong vaddr,
- hwaddr paddr, hwaddr xlat,
- int prot,
- target_ulong *address);
+ MemoryRegionSection *section);
#endif
/* vl.c */
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 05212ff244..05530a8b0c 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -704,13 +704,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
MemoryRegionSection *section;
unsigned int index;
target_ulong address;
- target_ulong code_address;
+ target_ulong write_address;
uintptr_t addend;
CPUTLBEntry *te, tn;
hwaddr iotlb, xlat, sz, paddr_page;
target_ulong vaddr_page;
int asidx = cpu_asidx_from_attrs(cpu, attrs);
int wp_flags;
+ bool is_ram, is_romd;
assert_cpu_is_self(cpu);
@@ -739,18 +740,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
if (attrs.byte_swap) {
address |= TLB_BSWAP;
}
- if (!memory_region_is_ram(section->mr) &&
- !memory_region_is_romd(section->mr)) {
- /* IO memory case */
- address |= TLB_MMIO;
- addend = 0;
- } else {
+
+ is_ram = memory_region_is_ram(section->mr);
+ is_romd = memory_region_is_romd(section->mr);
+
+ if (is_ram || is_romd) {
+ /* RAM and ROMD both have associated host memory. */
addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
+ } else {
+ /* I/O does not; force the host address to NULL. */
+ addend = 0;
+ }
+
+ write_address = address;
+ if (is_ram) {
+ iotlb = memory_region_get_ram_addr(section->mr) + xlat;
+ /*
+ * Computing is_clean is expensive; avoid all that unless
+ * the page is actually writable.
+ */
+ if (prot & PAGE_WRITE) {
+ if (section->readonly) {
+ write_address |= TLB_ROM;
+ } else if (cpu_physical_memory_is_clean(iotlb)) {
+ write_address |= TLB_NOTDIRTY;
+ }
+ }
+ } else {
+ /* I/O or ROMD */
+ iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
+ /*
+ * Writes to romd devices must go through MMIO to enable write.
+ * Reads to romd devices go through the ram_ptr found above,
+ * but of course reads to I/O must go through MMIO.
+ */
+ write_address |= TLB_MMIO;
+ if (!is_romd) {
+ address = write_address;
+ }
}
- code_address = address;
- iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
- paddr_page, xlat, prot, &address);
wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
TARGET_PAGE_SIZE);
@@ -790,8 +819,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
/*
* At this point iotlb contains a physical section number in the lower
* TARGET_PAGE_BITS, and either
- * + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
- * + the offset within section->mr of the page base (otherwise)
+ * + the ram_addr_t of the page base of the target RAM (RAM)
+ * + the offset within section->mr of the page base (I/O, ROMD)
* We subtract the vaddr_page (which is page aligned and thus won't
* disturb the low bits) to give an offset which can be added to the
* (non-page-aligned) vaddr of the eventual memory access to get
@@ -814,25 +843,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
}
if (prot & PAGE_EXEC) {
- tn.addr_code = code_address;
+ tn.addr_code = address;
} else {
tn.addr_code = -1;
}
tn.addr_write = -1;
if (prot & PAGE_WRITE) {
- tn.addr_write = address;
- if (memory_region_is_romd(section->mr)) {
- /* Use the MMIO path so that the device can switch states. */
- tn.addr_write |= TLB_MMIO;
- } else if (memory_region_is_ram(section->mr)) {
- if (section->readonly) {
- tn.addr_write |= TLB_ROM;
- } else if (cpu_physical_memory_is_clean(
- memory_region_get_ram_addr(section->mr) + xlat)) {
- tn.addr_write |= TLB_NOTDIRTY;
- }
- }
+ tn.addr_write = write_address;
if (prot & PAGE_WRITE_INV) {
tn.addr_write |= TLB_INVALID_MASK;
}
diff --git a/exec.c b/exec.c
index dc7001f115..961d7d6497 100644
--- a/exec.c
+++ b/exec.c
@@ -1459,26 +1459,10 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
/* Called from RCU critical section */
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
- MemoryRegionSection *section,
- target_ulong vaddr,
- hwaddr paddr, hwaddr xlat,
- int prot,
- target_ulong *address)
+ MemoryRegionSection *section)
{
- hwaddr iotlb;
-
- if (memory_region_is_ram(section->mr)) {
- /* Normal RAM. */
- iotlb = memory_region_get_ram_addr(section->mr) + xlat;
- } else {
- AddressSpaceDispatch *d;
-
- d = flatview_to_dispatch(section->fv);
- iotlb = section - d->map.sections;
- iotlb += xlat;
- }
-
- return iotlb;
+ AddressSpaceDispatch *d = flatview_to_dispatch(section->fv);
+ return section - d->map.sections;
}
#endif /* defined(CONFIG_USER_ONLY) */
--
2.17.1
On 24.09.19 00:59, Richard Henderson wrote:
> There is only one caller, tlb_set_page_with_attrs. We cannot
> inline the entire function because the AddressSpaceDispatch
> structure is private to exec.c, and cannot easily be moved to
> include/exec/memory-internal.h.
>
> Compute is_ram and is_romd once within tlb_set_page_with_attrs.
> Fold the number of tests against these predicates. Compute
> cpu_physical_memory_is_clean outside of the tlb lock region.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/exec/exec-all.h | 6 +---
> accel/tcg/cputlb.c | 68 ++++++++++++++++++++++++++---------------
> exec.c | 22 ++-----------
> 3 files changed, 47 insertions(+), 49 deletions(-)
>
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 81b02eb2fe..49db07ba0b 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
> hwaddr *xlat, hwaddr *plen,
> MemTxAttrs attrs, int *prot);
> hwaddr memory_region_section_get_iotlb(CPUState *cpu,
> - MemoryRegionSection *section,
> - target_ulong vaddr,
> - hwaddr paddr, hwaddr xlat,
> - int prot,
> - target_ulong *address);
> + MemoryRegionSection *section);
> #endif
>
> /* vl.c */
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 05212ff244..05530a8b0c 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -704,13 +704,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> MemoryRegionSection *section;
> unsigned int index;
> target_ulong address;
> - target_ulong code_address;
> + target_ulong write_address;
> uintptr_t addend;
> CPUTLBEntry *te, tn;
> hwaddr iotlb, xlat, sz, paddr_page;
> target_ulong vaddr_page;
> int asidx = cpu_asidx_from_attrs(cpu, attrs);
> int wp_flags;
> + bool is_ram, is_romd;
>
> assert_cpu_is_self(cpu);
>
> @@ -739,18 +740,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> if (attrs.byte_swap) {
> address |= TLB_BSWAP;
> }
> - if (!memory_region_is_ram(section->mr) &&
> - !memory_region_is_romd(section->mr)) {
> - /* IO memory case */
> - address |= TLB_MMIO;
> - addend = 0;
> - } else {
> +
> + is_ram = memory_region_is_ram(section->mr);
> + is_romd = memory_region_is_romd(section->mr);
> +
> + if (is_ram || is_romd) {
> + /* RAM and ROMD both have associated host memory. */
> addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
> + } else {
> + /* I/O does not; force the host address to NULL. */
> + addend = 0;
> + }
> +
> + write_address = address;
I guess the only "suboptimal" change is that you now have two checks for
"prot & PAGE_WRITE" twice in the case of ram instead of one.
> + if (is_ram) {
> + iotlb = memory_region_get_ram_addr(section->mr) + xlat;
> + /*
> + * Computing is_clean is expensive; avoid all that unless
> + * the page is actually writable.
> + */
> + if (prot & PAGE_WRITE) {
> + if (section->readonly) {
> + write_address |= TLB_ROM;
> + } else if (cpu_physical_memory_is_clean(iotlb)) {
> + write_address |= TLB_NOTDIRTY;
> + }
> + }
> + } else {
> + /* I/O or ROMD */
> + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
> + /*
> + * Writes to romd devices must go through MMIO to enable write.
> + * Reads to romd devices go through the ram_ptr found above,
> + * but of course reads to I/O must go through MMIO.
> + */
> + write_address |= TLB_MMIO;
... and here you calculate write_address even if probably unused.
Can your move the calculation of the write_address completely into the
"prot & PAGE_WRITE" case below?
(I'm not looking at the full code, so could as well be that I am missing
something :) )
--
Thanks,
David / dhildenb
On 9/24/19 12:59 AM, David Hildenbrand wrote:
>> + is_ram = memory_region_is_ram(section->mr);
>> + is_romd = memory_region_is_romd(section->mr);
>> +
>> + if (is_ram || is_romd) {
>> + /* RAM and ROMD both have associated host memory. */
>> addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
>> + } else {
>> + /* I/O does not; force the host address to NULL. */
>> + addend = 0;
>> + }
>> +
>> + write_address = address;
>
> I guess the only "suboptimal" change is that you now have two checks for
> "prot & PAGE_WRITE" twice in the case of ram instead of one.
It's a single bit test on a register operand -- as cheap as can be. If you
look at the entire code, there *must* be more than one test. You can rearrange
the code to choose exactly where those tests are, but you'll have to have them
somewhere.
>> + /* I/O or ROMD */
>> + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
>> + /*
>> + * Writes to romd devices must go through MMIO to enable write.
>> + * Reads to romd devices go through the ram_ptr found above,
>> + * but of course reads to I/O must go through MMIO.
>> + */
>> + write_address |= TLB_MMIO;
>
> ... and here you calculate write_address even if probably unused.
Well... while the page might not be writable (but I'd bet that it is -- I/O
memory is almost never read-only), and therefore write_address is technically
unused, the variable is practically used in the next line:
if (!is_romd) {
address = write_address
}
which will compile to a conditional move.
> Can your move the calculation of the write_address completely into the
> "prot & PAGE_WRITE" case below?
We'd prefer not to, since the code below is within the cpu tlb lock region.
We'd prefer to keep all of the expensive operations outside that.
r~
On 25.09.19 19:55, Richard Henderson wrote:
> On 9/24/19 12:59 AM, David Hildenbrand wrote:
>>> + is_ram = memory_region_is_ram(section->mr);
>>> + is_romd = memory_region_is_romd(section->mr);
>>> +
>>> + if (is_ram || is_romd) {
>>> + /* RAM and ROMD both have associated host memory. */
>>> addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
>>> + } else {
>>> + /* I/O does not; force the host address to NULL. */
>>> + addend = 0;
>>> + }
>>> +
>>> + write_address = address;
>>
>> I guess the only "suboptimal" change is that you now have two checks for
>> "prot & PAGE_WRITE" twice in the case of ram instead of one.
>
> It's a single bit test on a register operand -- as cheap as can be. If you
> look at the entire code, there *must* be more than one test. You can rearrange
> the code to choose exactly where those tests are, but you'll have to have them
> somewhere.
>
>>> + /* I/O or ROMD */
>>> + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
>>> + /*
>>> + * Writes to romd devices must go through MMIO to enable write.
>>> + * Reads to romd devices go through the ram_ptr found above,
>>> + * but of course reads to I/O must go through MMIO.
>>> + */
>>> + write_address |= TLB_MMIO;
>>
>> ... and here you calculate write_address even if probably unused.
>
> Well... while the page might not be writable (but I'd bet that it is -- I/O
> memory is almost never read-only), and therefore write_address is technically
> unused, the variable is practically used in the next line:
>
> if (!is_romd) {
> address = write_address
> }
>
> which will compile to a conditional move.
>
>> Can your move the calculation of the write_address completely into the
>> "prot & PAGE_WRITE" case below?
>
> We'd prefer not to, since the code below is within the cpu tlb lock region.
> We'd prefer to keep all of the expensive operations outside that.
Makes all sense to me then and looks sane :)
>
>
> r~
>
--
Thanks,
David / dhildenb
Richard Henderson <richard.henderson@linaro.org> writes:
> There is only one caller, tlb_set_page_with_attrs. We cannot
> inline the entire function because the AddressSpaceDispatch
> structure is private to exec.c, and cannot easily be moved to
> include/exec/memory-internal.h.
>
> Compute is_ram and is_romd once within tlb_set_page_with_attrs.
> Fold the number of tests against these predicates. Compute
> cpu_physical_memory_is_clean outside of the tlb lock region.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> ---
> include/exec/exec-all.h | 6 +---
> accel/tcg/cputlb.c | 68 ++++++++++++++++++++++++++---------------
> exec.c | 22 ++-----------
> 3 files changed, 47 insertions(+), 49 deletions(-)
>
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 81b02eb2fe..49db07ba0b 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
> hwaddr *xlat, hwaddr *plen,
> MemTxAttrs attrs, int *prot);
> hwaddr memory_region_section_get_iotlb(CPUState *cpu,
> - MemoryRegionSection *section,
> - target_ulong vaddr,
> - hwaddr paddr, hwaddr xlat,
> - int prot,
> - target_ulong *address);
> + MemoryRegionSection *section);
> #endif
>
> /* vl.c */
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 05212ff244..05530a8b0c 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -704,13 +704,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> MemoryRegionSection *section;
> unsigned int index;
> target_ulong address;
> - target_ulong code_address;
> + target_ulong write_address;
> uintptr_t addend;
> CPUTLBEntry *te, tn;
> hwaddr iotlb, xlat, sz, paddr_page;
> target_ulong vaddr_page;
> int asidx = cpu_asidx_from_attrs(cpu, attrs);
> int wp_flags;
> + bool is_ram, is_romd;
>
> assert_cpu_is_self(cpu);
>
> @@ -739,18 +740,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> if (attrs.byte_swap) {
> address |= TLB_BSWAP;
> }
> - if (!memory_region_is_ram(section->mr) &&
> - !memory_region_is_romd(section->mr)) {
> - /* IO memory case */
> - address |= TLB_MMIO;
> - addend = 0;
> - } else {
> +
> + is_ram = memory_region_is_ram(section->mr);
> + is_romd = memory_region_is_romd(section->mr);
> +
> + if (is_ram || is_romd) {
> + /* RAM and ROMD both have associated host memory. */
> addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
> + } else {
> + /* I/O does not; force the host address to NULL. */
> + addend = 0;
> + }
> +
> + write_address = address;
> + if (is_ram) {
> + iotlb = memory_region_get_ram_addr(section->mr) + xlat;
> + /*
> + * Computing is_clean is expensive; avoid all that unless
> + * the page is actually writable.
> + */
> + if (prot & PAGE_WRITE) {
> + if (section->readonly) {
> + write_address |= TLB_ROM;
> + } else if (cpu_physical_memory_is_clean(iotlb)) {
> + write_address |= TLB_NOTDIRTY;
> + }
> + }
> + } else {
> + /* I/O or ROMD */
> + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
> + /*
> + * Writes to romd devices must go through MMIO to enable write.
> + * Reads to romd devices go through the ram_ptr found above,
> + * but of course reads to I/O must go through MMIO.
> + */
> + write_address |= TLB_MMIO;
> + if (!is_romd) {
> + address = write_address;
> + }
> }
>
> - code_address = address;
> - iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
> - paddr_page, xlat, prot, &address);
> wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
> TARGET_PAGE_SIZE);
>
> @@ -790,8 +819,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> /*
> * At this point iotlb contains a physical section number in the lower
> * TARGET_PAGE_BITS, and either
> - * + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
> - * + the offset within section->mr of the page base (otherwise)
> + * + the ram_addr_t of the page base of the target RAM (RAM)
> + * + the offset within section->mr of the page base (I/O, ROMD)
> * We subtract the vaddr_page (which is page aligned and thus won't
> * disturb the low bits) to give an offset which can be added to the
> * (non-page-aligned) vaddr of the eventual memory access to get
> @@ -814,25 +843,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> }
>
> if (prot & PAGE_EXEC) {
> - tn.addr_code = code_address;
> + tn.addr_code = address;
> } else {
> tn.addr_code = -1;
> }
>
> tn.addr_write = -1;
> if (prot & PAGE_WRITE) {
> - tn.addr_write = address;
> - if (memory_region_is_romd(section->mr)) {
> - /* Use the MMIO path so that the device can switch states. */
> - tn.addr_write |= TLB_MMIO;
> - } else if (memory_region_is_ram(section->mr)) {
> - if (section->readonly) {
> - tn.addr_write |= TLB_ROM;
> - } else if (cpu_physical_memory_is_clean(
> - memory_region_get_ram_addr(section->mr) + xlat)) {
> - tn.addr_write |= TLB_NOTDIRTY;
> - }
> - }
> + tn.addr_write = write_address;
> if (prot & PAGE_WRITE_INV) {
> tn.addr_write |= TLB_INVALID_MASK;
> }
> diff --git a/exec.c b/exec.c
> index dc7001f115..961d7d6497 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1459,26 +1459,10 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
>
> /* Called from RCU critical section */
> hwaddr memory_region_section_get_iotlb(CPUState *cpu,
> - MemoryRegionSection *section,
> - target_ulong vaddr,
> - hwaddr paddr, hwaddr xlat,
> - int prot,
> - target_ulong *address)
> + MemoryRegionSection *section)
> {
> - hwaddr iotlb;
> -
> - if (memory_region_is_ram(section->mr)) {
> - /* Normal RAM. */
> - iotlb = memory_region_get_ram_addr(section->mr) + xlat;
> - } else {
> - AddressSpaceDispatch *d;
> -
> - d = flatview_to_dispatch(section->fv);
> - iotlb = section - d->map.sections;
> - iotlb += xlat;
> - }
> -
> - return iotlb;
> + AddressSpaceDispatch *d = flatview_to_dispatch(section->fv);
> + return section - d->map.sections;
> }
> #endif /* defined(CONFIG_USER_ONLY) */
--
Alex Bennée
© 2016 - 2026 Red Hat, Inc.