We want to store statistics for each TB even after flushes.
We do not want to modify or grow the TB struct.
So we create a new struct to contain this statistics and
link it to each TB while they are created.
Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
accel/tcg/translate-all.c | 40 +++++++++++++++++++++++++++++++++++++++
include/exec/exec-all.h | 21 ++++++++++++++++++++
include/exec/tb-context.h | 1 +
include/qemu/log.h | 1 +
4 files changed, 63 insertions(+)
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 5d1e08b169..f7e99f90e0 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1118,6 +1118,18 @@ static inline void code_gen_alloc(size_t tb_size)
}
}
+static gint statistics_cmp(gconstpointer p1, gconstpointer p2)
+{
+ const TBStatistics *a = (TBStatistics *) p1;
+ const TBStatistics *b = (TBStatistics *) p2;
+
+ return (a->pc == b->pc &&
+ a->cs_base == b->cs_base &&
+ a->flags == b->flags &&
+ a->page_addr[0] == b->page_addr[0] &&
+ a->page_addr[1] == b->page_addr[1]) ? 0 : 1;
+}
+
static bool tb_cmp(const void *ap, const void *bp)
{
const TranslationBlock *a = ap;
@@ -1586,6 +1598,29 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
#endif
}
+static void tb_insert_statistics_structure(TranslationBlock *tb) {
+ TBStatistics *new_stats = g_new0(TBStatistics, 1);
+ new_stats->pc = tb->pc;
+ new_stats->cs_base = tb->cs_base;
+ new_stats->flags = tb->flags;
+ new_stats->page_addr[0] = tb->page_addr[0];
+ new_stats->page_addr[1] = tb->page_addr[1];
+
+ GList *lookup_result = g_list_find_custom(tb_ctx.tb_statistics, new_stats, statistics_cmp);
+
+ if (lookup_result) {
+ /* If there is already a TBStatistic for this TB from a previous flush
+ * then just make the new TB point to the older TBStatistic
+ */
+ free(new_stats);
+ tb->tb_stats = lookup_result->data;
+ } else {
+ /* If not, then points to the new tb_statistics and add it to the hash */
+ tb->tb_stats = new_stats;
+ tb_ctx.tb_statistics = g_list_prepend(tb_ctx.tb_statistics, new_stats);
+ }
+}
+
/* add a new TB and link it to the physical page tables. phys_page2 is
* (-1) to indicate that only one page contains the TB.
*
@@ -1636,6 +1671,11 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
void *existing_tb = NULL;
uint32_t h;
+ if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
+ /* create and link to its TB a structure to store statistics */
+ tb_insert_statistics_structure(tb);
+ }
+
/* add in the hash table */
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
tb->trace_vcpu_dstate);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 16034ee651..359100ef3b 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -324,6 +324,24 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
#define CODE_GEN_AVG_BLOCK_SIZE 150
#endif
+typedef struct TBStatistics TBStatistics;
+
+/*
+ * This struct stores statistics such as execution count of the TranslationBlocks.
+ * Each TB has its own TBStatistics. TBStatistics is suppose to live even after
+ * flushes.
+ */
+struct TBStatistics {
+ target_ulong pc;
+ target_ulong cs_base;
+ uint32_t flags;
+ tb_page_addr_t page_addr[2];
+
+ // total number of times that the related TB have being executed
+ uint32_t exec_count;
+ uint32_t exec_count_overflows;
+};
+
/*
* Translation Cache-related fields of a TB.
* This struct exists just for convenience; we keep track of TB's in a binary
@@ -403,6 +421,9 @@ struct TranslationBlock {
uintptr_t jmp_list_head;
uintptr_t jmp_list_next[2];
uintptr_t jmp_dest[2];
+
+ // Pointer to a struct where statistics from the TB is stored
+ TBStatistics *tb_stats;
};
extern bool parallel_cpus;
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index feb585e0a7..a78ce92e60 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -35,6 +35,7 @@ struct TBContext {
/* statistics */
unsigned tb_flush_count;
+ GList *tb_statistics;
};
extern TBContext tb_ctx;
diff --git a/include/qemu/log.h b/include/qemu/log.h
index b097a6cae1..2fca65dd01 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void)
/* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
#define CPU_LOG_TB_OP_IND (1 << 16)
#define CPU_LOG_TB_FPU (1 << 17)
+#define CPU_LOG_HOT_TBS (1 << 18)
/* Lock output for a series of related logs. Since this is not needed
* for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
--
2.22.0
vandersonmr <vandersonmr2@gmail.com> writes:
> We want to store statistics for each TB even after flushes.
> We do not want to modify or grow the TB struct.
> So we create a new struct to contain this statistics and
> link it to each TB while they are created.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> ---
> accel/tcg/translate-all.c | 40 +++++++++++++++++++++++++++++++++++++++
> include/exec/exec-all.h | 21 ++++++++++++++++++++
> include/exec/tb-context.h | 1 +
> include/qemu/log.h | 1 +
> 4 files changed, 63 insertions(+)
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 5d1e08b169..f7e99f90e0 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1118,6 +1118,18 @@ static inline void code_gen_alloc(size_t tb_size)
> }
> }
>
> +static gint statistics_cmp(gconstpointer p1, gconstpointer p2)
> +{
> + const TBStatistics *a = (TBStatistics *) p1;
> + const TBStatistics *b = (TBStatistics *) p2;
> +
> + return (a->pc == b->pc &&
> + a->cs_base == b->cs_base &&
> + a->flags == b->flags &&
> + a->page_addr[0] == b->page_addr[0] &&
> + a->page_addr[1] == b->page_addr[1]) ? 0 : 1;
> +}
> +
There are a bunch of white space errors that need fixing up as you
probably already know from patchew ;-)
> static bool tb_cmp(const void *ap, const void *bp)
> {
> const TranslationBlock *a = ap;
> @@ -1586,6 +1598,29 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
> #endif
> }
>
> +static void tb_insert_statistics_structure(TranslationBlock *tb) {
> + TBStatistics *new_stats = g_new0(TBStatistics, 1);
> + new_stats->pc = tb->pc;
> + new_stats->cs_base = tb->cs_base;
> + new_stats->flags = tb->flags;
> + new_stats->page_addr[0] = tb->page_addr[0];
> + new_stats->page_addr[1] = tb->page_addr[1];
> +
> + GList *lookup_result = g_list_find_custom(tb_ctx.tb_statistics, new_stats, statistics_cmp);
> +
> + if (lookup_result) {
> + /* If there is already a TBStatistic for this TB from a previous flush
> + * then just make the new TB point to the older TBStatistic
> + */
> + free(new_stats);
> + tb->tb_stats = lookup_result->data;
> + } else {
> + /* If not, then points to the new tb_statistics and add it to the hash */
> + tb->tb_stats = new_stats;
> + tb_ctx.tb_statistics = g_list_prepend(tb_ctx.tb_statistics,
> new_stats);
This is going to race as nothing prevents two threads attempting to
insert records at the same time.
> + }
> +}
> +
> /* add a new TB and link it to the physical page tables. phys_page2 is
> * (-1) to indicate that only one page contains the TB.
> *
> @@ -1636,6 +1671,11 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
> void *existing_tb = NULL;
> uint32_t h;
>
> + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
> + /* create and link to its TB a structure to store statistics */
> + tb_insert_statistics_structure(tb);
> + }
> +
> /* add in the hash table */
> h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
> tb->trace_vcpu_dstate);
Perhaps we could just have a second qht array which allows for
"unlocked" insertion of record. You could take advantage of the fact the
hash would be the same:
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
tb->trace_vcpu_dstate);
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
/* remove TB from the page(s) if we couldn't insert it */
if (unlikely(existing_tb)) {
...
tb = existing_tb;
} else if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
TBStatistics *new_stats = g_new0(TBStatistics, 1);
bool ok;
new_stats->pc = tb->pc;
new_stats->cs_base = tb->cs_base;
new_stats->flags = tb->flags;
new_stats->page_addr[0] = tb->page_addr[0];
new_stats->page_addr[1] = tb->page_addr[1];
ok = qht_insert(&tb_ctx.tb_stats, new_stats, h, NULL);
/*
* This should never fail as we succeeded in inserting the
* TB itself which means we haven't seen this combination yet.
*/
g_assert(ok);
}
It would also avoid the clunkiness of having to allocate and then
freeing an unused structure.
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 16034ee651..359100ef3b 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -324,6 +324,24 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
> #define CODE_GEN_AVG_BLOCK_SIZE 150
> #endif
>
> +typedef struct TBStatistics TBStatistics;
> +
> +/*
> + * This struct stores statistics such as execution count of the TranslationBlocks.
> + * Each TB has its own TBStatistics. TBStatistics is suppose to live even after
> + * flushes.
> + */
> +struct TBStatistics {
> + target_ulong pc;
> + target_ulong cs_base;
> + uint32_t flags;
> + tb_page_addr_t page_addr[2];
> +
> + // total number of times that the related TB have being executed
No c++ style comments
> + uint32_t exec_count;
> + uint32_t exec_count_overflows;
> +};
> +
> /*
> * Translation Cache-related fields of a TB.
> * This struct exists just for convenience; we keep track of TB's in a binary
> @@ -403,6 +421,9 @@ struct TranslationBlock {
> uintptr_t jmp_list_head;
> uintptr_t jmp_list_next[2];
> uintptr_t jmp_dest[2];
> +
> + // Pointer to a struct where statistics from the TB is stored
No c++ style comments
> + TBStatistics *tb_stats;
> };
>
> extern bool parallel_cpus;
> diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
> index feb585e0a7..a78ce92e60 100644
> --- a/include/exec/tb-context.h
> +++ b/include/exec/tb-context.h
> @@ -35,6 +35,7 @@ struct TBContext {
>
> /* statistics */
> unsigned tb_flush_count;
> + GList *tb_statistics;
> };
>
> extern TBContext tb_ctx;
> diff --git a/include/qemu/log.h b/include/qemu/log.h
> index b097a6cae1..2fca65dd01 100644
> --- a/include/qemu/log.h
> +++ b/include/qemu/log.h
> @@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void)
> /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
> #define CPU_LOG_TB_OP_IND (1 << 16)
> #define CPU_LOG_TB_FPU (1 << 17)
> +#define CPU_LOG_HOT_TBS (1 << 18)
>
> /* Lock output for a series of related logs. Since this is not needed
> * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
--
Alex Bennée
Alex Bennée <alex.bennee@linaro.org> writes:
> vandersonmr <vandersonmr2@gmail.com> writes:
>
>> We want to store statistics for each TB even after flushes.
>> We do not want to modify or grow the TB struct.
>> So we create a new struct to contain this statistics and
>> link it to each TB while they are created.
>>
>> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>> ---
>> accel/tcg/translate-all.c | 40 +++++++++++++++++++++++++++++++++++++++
>> include/exec/exec-all.h | 21 ++++++++++++++++++++
>> include/exec/tb-context.h | 1 +
>> include/qemu/log.h | 1 +
>> 4 files changed, 63 insertions(+)
>>
>> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
>> index 5d1e08b169..f7e99f90e0 100644
>> --- a/accel/tcg/translate-all.c
>> +++ b/accel/tcg/translate-all.c
>> @@ -1118,6 +1118,18 @@ static inline void code_gen_alloc(size_t tb_size)
>> }
>> }
>>
>> +static gint statistics_cmp(gconstpointer p1, gconstpointer p2)
>> +{
>> + const TBStatistics *a = (TBStatistics *) p1;
>> + const TBStatistics *b = (TBStatistics *) p2;
>> +
>> + return (a->pc == b->pc &&
>> + a->cs_base == b->cs_base &&
>> + a->flags == b->flags &&
>> + a->page_addr[0] == b->page_addr[0] &&
>> + a->page_addr[1] == b->page_addr[1]) ? 0 : 1;
>> +}
>> +
>
> There are a bunch of white space errors that need fixing up as you
> probably already know from patchew ;-)
>
>> static bool tb_cmp(const void *ap, const void *bp)
>> {
>> const TranslationBlock *a = ap;
>> @@ -1586,6 +1598,29 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
>> #endif
>> }
>>
>> +static void tb_insert_statistics_structure(TranslationBlock *tb) {
>> + TBStatistics *new_stats = g_new0(TBStatistics, 1);
>> + new_stats->pc = tb->pc;
>> + new_stats->cs_base = tb->cs_base;
>> + new_stats->flags = tb->flags;
>> + new_stats->page_addr[0] = tb->page_addr[0];
>> + new_stats->page_addr[1] = tb->page_addr[1];
>> +
>> + GList *lookup_result = g_list_find_custom(tb_ctx.tb_statistics, new_stats, statistics_cmp);
>> +
>> + if (lookup_result) {
>> + /* If there is already a TBStatistic for this TB from a previous flush
>> + * then just make the new TB point to the older TBStatistic
>> + */
>> + free(new_stats);
>> + tb->tb_stats = lookup_result->data;
>> + } else {
>> + /* If not, then points to the new tb_statistics and add it to the hash */
>> + tb->tb_stats = new_stats;
>> + tb_ctx.tb_statistics = g_list_prepend(tb_ctx.tb_statistics,
>> new_stats);
>
> This is going to race as nothing prevents two threads attempting to
> insert records at the same time.
>
>> + }
>> +}
>> +
>> /* add a new TB and link it to the physical page tables. phys_page2 is
>> * (-1) to indicate that only one page contains the TB.
>> *
>> @@ -1636,6 +1671,11 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
>> void *existing_tb = NULL;
>> uint32_t h;
>>
>> + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
>> + /* create and link to its TB a structure to store statistics */
>> + tb_insert_statistics_structure(tb);
>> + }
>> +
>> /* add in the hash table */
>> h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
>> tb->trace_vcpu_dstate);
>
> Perhaps we could just have a second qht array which allows for
> "unlocked" insertion of record. You could take advantage of the fact the
> hash would be the same:
>
> h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
> tb->trace_vcpu_dstate);
> qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
>
> /* remove TB from the page(s) if we couldn't insert it */
> if (unlikely(existing_tb)) {
> ...
> tb = existing_tb;
> } else if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
> TBStatistics *new_stats = g_new0(TBStatistics, 1);
> bool ok;
> new_stats->pc = tb->pc;
> new_stats->cs_base = tb->cs_base;
> new_stats->flags = tb->flags;
> new_stats->page_addr[0] = tb->page_addr[0];
> new_stats->page_addr[1] = tb->page_addr[1];
> ok = qht_insert(&tb_ctx.tb_stats, new_stats, h, NULL);
> /*
> * This should never fail as we succeeded in inserting the
> * TB itself which means we haven't seen this combination yet.
> */
> g_assert(ok);
> }
>
> It would also avoid the clunkiness of having to allocate and then
> freeing an unused structure.
Actually thinking on this we still have to handle it. We may have
tb_flushed away a translation and just be regenerating the same block.
As TBStatistics should transcend tb_flush events we need to re-use it's
structure.
It would be worth counting the regens as well so we can see blocks that
keep getting re-translated after each flush.
>
>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
>> index 16034ee651..359100ef3b 100644
>> --- a/include/exec/exec-all.h
>> +++ b/include/exec/exec-all.h
>> @@ -324,6 +324,24 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
>> #define CODE_GEN_AVG_BLOCK_SIZE 150
>> #endif
>>
>> +typedef struct TBStatistics TBStatistics;
>> +
>> +/*
>> + * This struct stores statistics such as execution count of the TranslationBlocks.
>> + * Each TB has its own TBStatistics. TBStatistics is suppose to live even after
>> + * flushes.
>> + */
>> +struct TBStatistics {
>> + target_ulong pc;
>> + target_ulong cs_base;
>> + uint32_t flags;
>> + tb_page_addr_t page_addr[2];
>> +
>> + // total number of times that the related TB have being executed
>
> No c++ style comments
>
>> + uint32_t exec_count;
>> + uint32_t exec_count_overflows;
>> +};
>> +
>> /*
>> * Translation Cache-related fields of a TB.
>> * This struct exists just for convenience; we keep track of TB's in a binary
>> @@ -403,6 +421,9 @@ struct TranslationBlock {
>> uintptr_t jmp_list_head;
>> uintptr_t jmp_list_next[2];
>> uintptr_t jmp_dest[2];
>> +
>> + // Pointer to a struct where statistics from the TB is stored
>
> No c++ style comments
>
>> + TBStatistics *tb_stats;
>> };
>>
>> extern bool parallel_cpus;
>> diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
>> index feb585e0a7..a78ce92e60 100644
>> --- a/include/exec/tb-context.h
>> +++ b/include/exec/tb-context.h
>> @@ -35,6 +35,7 @@ struct TBContext {
>>
>> /* statistics */
>> unsigned tb_flush_count;
>> + GList *tb_statistics;
>> };
>>
>> extern TBContext tb_ctx;
>> diff --git a/include/qemu/log.h b/include/qemu/log.h
>> index b097a6cae1..2fca65dd01 100644
>> --- a/include/qemu/log.h
>> +++ b/include/qemu/log.h
>> @@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void)
>> /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
>> #define CPU_LOG_TB_OP_IND (1 << 16)
>> #define CPU_LOG_TB_FPU (1 << 17)
>> +#define CPU_LOG_HOT_TBS (1 << 18)
>>
>> /* Lock output for a series of related logs. Since this is not needed
>> * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
--
Alex Bennée
When the tb_flush removes a block and it's recreated, this shouldn't
be creating a new block but using the one that is found by:
lookup_result = g_list_find_custom(tb_ctx.tb_statistics, new_stats,
statistics_cmp);
So the tb_statisticics will be reused and we could just add this
regen counter in the if statement: if (lookup_result)
isn't that correct?
Vanderson M. Rosario
On Tue, Jun 25, 2019 at 1:28 PM Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Alex Bennée <alex.bennee@linaro.org> writes:
>
> > vandersonmr <vandersonmr2@gmail.com> writes:
> >
> >> We want to store statistics for each TB even after flushes.
> >> We do not want to modify or grow the TB struct.
> >> So we create a new struct to contain this statistics and
> >> link it to each TB while they are created.
> >>
> >> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> >> ---
> >> accel/tcg/translate-all.c | 40 +++++++++++++++++++++++++++++++++++++++
> >> include/exec/exec-all.h | 21 ++++++++++++++++++++
> >> include/exec/tb-context.h | 1 +
> >> include/qemu/log.h | 1 +
> >> 4 files changed, 63 insertions(+)
> >>
> >> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> >> index 5d1e08b169..f7e99f90e0 100644
> >> --- a/accel/tcg/translate-all.c
> >> +++ b/accel/tcg/translate-all.c
> >> @@ -1118,6 +1118,18 @@ static inline void code_gen_alloc(size_t tb_size)
> >> }
> >> }
> >>
> >> +static gint statistics_cmp(gconstpointer p1, gconstpointer p2)
> >> +{
> >> + const TBStatistics *a = (TBStatistics *) p1;
> >> + const TBStatistics *b = (TBStatistics *) p2;
> >> +
> >> + return (a->pc == b->pc &&
> >> + a->cs_base == b->cs_base &&
> >> + a->flags == b->flags &&
> >> + a->page_addr[0] == b->page_addr[0] &&
> >> + a->page_addr[1] == b->page_addr[1]) ? 0 : 1;
> >> +}
> >> +
> >
> > There are a bunch of white space errors that need fixing up as you
> > probably already know from patchew ;-)
> >
> >> static bool tb_cmp(const void *ap, const void *bp)
> >> {
> >> const TranslationBlock *a = ap;
> >> @@ -1586,6 +1598,29 @@ static inline void tb_page_add(PageDesc *p,
> TranslationBlock *tb,
> >> #endif
> >> }
> >>
> >> +static void tb_insert_statistics_structure(TranslationBlock *tb) {
> >> + TBStatistics *new_stats = g_new0(TBStatistics, 1);
> >> + new_stats->pc = tb->pc;
> >> + new_stats->cs_base = tb->cs_base;
> >> + new_stats->flags = tb->flags;
> >> + new_stats->page_addr[0] = tb->page_addr[0];
> >> + new_stats->page_addr[1] = tb->page_addr[1];
> >> +
> >> + GList *lookup_result = g_list_find_custom(tb_ctx.tb_statistics,
> new_stats, statistics_cmp);
> >> +
> >> + if (lookup_result) {
> >> + /* If there is already a TBStatistic for this TB from a
> previous flush
> >> + * then just make the new TB point to the older TBStatistic
> >> + */
> >> + free(new_stats);
> >> + tb->tb_stats = lookup_result->data;
> >> + } else {
> >> + /* If not, then points to the new tb_statistics and add it
> to the hash */
> >> + tb->tb_stats = new_stats;
> >> + tb_ctx.tb_statistics = g_list_prepend(tb_ctx.tb_statistics,
> >> new_stats);
> >
> > This is going to race as nothing prevents two threads attempting to
> > insert records at the same time.
> >
> >> + }
> >> +}
> >> +
> >> /* add a new TB and link it to the physical page tables. phys_page2 is
> >> * (-1) to indicate that only one page contains the TB.
> >> *
> >> @@ -1636,6 +1671,11 @@ tb_link_page(TranslationBlock *tb,
> tb_page_addr_t phys_pc,
> >> void *existing_tb = NULL;
> >> uint32_t h;
> >>
> >> + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
> >> + /* create and link to its TB a structure to store
> statistics */
> >> + tb_insert_statistics_structure(tb);
> >> + }
> >> +
> >> /* add in the hash table */
> >> h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags &
> CF_HASH_MASK,
> >> tb->trace_vcpu_dstate);
> >
> > Perhaps we could just have a second qht array which allows for
> > "unlocked" insertion of record. You could take advantage of the fact the
> > hash would be the same:
> >
> > h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags &
> CF_HASH_MASK,
> > tb->trace_vcpu_dstate);
> > qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
> >
> > /* remove TB from the page(s) if we couldn't insert it */
> > if (unlikely(existing_tb)) {
> > ...
> > tb = existing_tb;
> > } else if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
> > TBStatistics *new_stats = g_new0(TBStatistics, 1);
> > bool ok;
> > new_stats->pc = tb->pc;
> > new_stats->cs_base = tb->cs_base;
> > new_stats->flags = tb->flags;
> > new_stats->page_addr[0] = tb->page_addr[0];
> > new_stats->page_addr[1] = tb->page_addr[1];
> > ok = qht_insert(&tb_ctx.tb_stats, new_stats, h, NULL);
> > /*
> > * This should never fail as we succeeded in inserting the
> > * TB itself which means we haven't seen this combination
> yet.
> > */
> > g_assert(ok);
> > }
> >
> > It would also avoid the clunkiness of having to allocate and then
> > freeing an unused structure.
>
>
> Actually thinking on this we still have to handle it. We may have
> tb_flushed away a translation and just be regenerating the same block.
> As TBStatistics should transcend tb_flush events we need to re-use it's
> structure.
>
> It would be worth counting the regens as well so we can see blocks that
> keep getting re-translated after each flush.
>
> >
> >> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> >> index 16034ee651..359100ef3b 100644
> >> --- a/include/exec/exec-all.h
> >> +++ b/include/exec/exec-all.h
> >> @@ -324,6 +324,24 @@ static inline void
> tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
> >> #define CODE_GEN_AVG_BLOCK_SIZE 150
> >> #endif
> >>
> >> +typedef struct TBStatistics TBStatistics;
> >> +
> >> +/*
> >> + * This struct stores statistics such as execution count of the
> TranslationBlocks.
> >> + * Each TB has its own TBStatistics. TBStatistics is suppose to live
> even after
> >> + * flushes.
> >> + */
> >> +struct TBStatistics {
> >> + target_ulong pc;
> >> + target_ulong cs_base;
> >> + uint32_t flags;
> >> + tb_page_addr_t page_addr[2];
> >> +
> >> + // total number of times that the related TB have being executed
> >
> > No c++ style comments
> >
> >> + uint32_t exec_count;
> >> + uint32_t exec_count_overflows;
> >> +};
> >> +
> >> /*
> >> * Translation Cache-related fields of a TB.
> >> * This struct exists just for convenience; we keep track of TB's in a
> binary
> >> @@ -403,6 +421,9 @@ struct TranslationBlock {
> >> uintptr_t jmp_list_head;
> >> uintptr_t jmp_list_next[2];
> >> uintptr_t jmp_dest[2];
> >> +
> >> + // Pointer to a struct where statistics from the TB is stored
> >
> > No c++ style comments
> >
> >> + TBStatistics *tb_stats;
> >> };
> >>
> >> extern bool parallel_cpus;
> >> diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
> >> index feb585e0a7..a78ce92e60 100644
> >> --- a/include/exec/tb-context.h
> >> +++ b/include/exec/tb-context.h
> >> @@ -35,6 +35,7 @@ struct TBContext {
> >>
> >> /* statistics */
> >> unsigned tb_flush_count;
> >> + GList *tb_statistics;
> >> };
> >>
> >> extern TBContext tb_ctx;
> >> diff --git a/include/qemu/log.h b/include/qemu/log.h
> >> index b097a6cae1..2fca65dd01 100644
> >> --- a/include/qemu/log.h
> >> +++ b/include/qemu/log.h
> >> @@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void)
> >> /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
> >> #define CPU_LOG_TB_OP_IND (1 << 16)
> >> #define CPU_LOG_TB_FPU (1 << 17)
> >> +#define CPU_LOG_HOT_TBS (1 << 18)
> >>
> >> /* Lock output for a series of related logs. Since this is not needed
> >> * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
>
>
> --
> Alex Bennée
>
Vanderson Martins do Rosario <vandersonmr2@gmail.com> writes:
> When the tb_flush removes a block and it's recreated, this shouldn't
> be creating a new block but using the one that is found by:
>
> lookup_result = g_list_find_custom(tb_ctx.tb_statistics, new_stats,
> statistics_cmp);
>
> So the tb_statisticics will be reused and we could just add this
> regen counter in the if statement: if (lookup_result)
>
> isn't that correct?
Yes, although as I said earlier you want to use a QHT hash table rather
than a g_list to avoid racing with multiple translations at once.
>
> Vanderson M. Rosario
>
>
> On Tue, Jun 25, 2019 at 1:28 PM Alex Bennée <alex.bennee@linaro.org> wrote:
>
>>
>> Alex Bennée <alex.bennee@linaro.org> writes:
>>
>> > vandersonmr <vandersonmr2@gmail.com> writes:
>> >
>> >> We want to store statistics for each TB even after flushes.
>> >> We do not want to modify or grow the TB struct.
>> >> So we create a new struct to contain this statistics and
>> >> link it to each TB while they are created.
>> >>
>> >> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>> >> ---
>> >> accel/tcg/translate-all.c | 40 +++++++++++++++++++++++++++++++++++++++
>> >> include/exec/exec-all.h | 21 ++++++++++++++++++++
>> >> include/exec/tb-context.h | 1 +
>> >> include/qemu/log.h | 1 +
>> >> 4 files changed, 63 insertions(+)
>> >>
>> >> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
>> >> index 5d1e08b169..f7e99f90e0 100644
>> >> --- a/accel/tcg/translate-all.c
>> >> +++ b/accel/tcg/translate-all.c
>> >> @@ -1118,6 +1118,18 @@ static inline void code_gen_alloc(size_t tb_size)
>> >> }
>> >> }
>> >>
>> >> +static gint statistics_cmp(gconstpointer p1, gconstpointer p2)
>> >> +{
>> >> + const TBStatistics *a = (TBStatistics *) p1;
>> >> + const TBStatistics *b = (TBStatistics *) p2;
>> >> +
>> >> + return (a->pc == b->pc &&
>> >> + a->cs_base == b->cs_base &&
>> >> + a->flags == b->flags &&
>> >> + a->page_addr[0] == b->page_addr[0] &&
>> >> + a->page_addr[1] == b->page_addr[1]) ? 0 : 1;
>> >> +}
>> >> +
>> >
>> > There are a bunch of white space errors that need fixing up as you
>> > probably already know from patchew ;-)
>> >
>> >> static bool tb_cmp(const void *ap, const void *bp)
>> >> {
>> >> const TranslationBlock *a = ap;
>> >> @@ -1586,6 +1598,29 @@ static inline void tb_page_add(PageDesc *p,
>> TranslationBlock *tb,
>> >> #endif
>> >> }
>> >>
>> >> +static void tb_insert_statistics_structure(TranslationBlock *tb) {
>> >> + TBStatistics *new_stats = g_new0(TBStatistics, 1);
>> >> + new_stats->pc = tb->pc;
>> >> + new_stats->cs_base = tb->cs_base;
>> >> + new_stats->flags = tb->flags;
>> >> + new_stats->page_addr[0] = tb->page_addr[0];
>> >> + new_stats->page_addr[1] = tb->page_addr[1];
>> >> +
>> >> + GList *lookup_result = g_list_find_custom(tb_ctx.tb_statistics,
>> new_stats, statistics_cmp);
>> >> +
>> >> + if (lookup_result) {
>> >> + /* If there is already a TBStatistic for this TB from a
>> previous flush
>> >> + * then just make the new TB point to the older TBStatistic
>> >> + */
>> >> + free(new_stats);
>> >> + tb->tb_stats = lookup_result->data;
>> >> + } else {
>> >> + /* If not, then points to the new tb_statistics and add it
>> to the hash */
>> >> + tb->tb_stats = new_stats;
>> >> + tb_ctx.tb_statistics = g_list_prepend(tb_ctx.tb_statistics,
>> >> new_stats);
>> >
>> > This is going to race as nothing prevents two threads attempting to
>> > insert records at the same time.
>> >
>> >> + }
>> >> +}
>> >> +
>> >> /* add a new TB and link it to the physical page tables. phys_page2 is
>> >> * (-1) to indicate that only one page contains the TB.
>> >> *
>> >> @@ -1636,6 +1671,11 @@ tb_link_page(TranslationBlock *tb,
>> tb_page_addr_t phys_pc,
>> >> void *existing_tb = NULL;
>> >> uint32_t h;
>> >>
>> >> + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
>> >> + /* create and link to its TB a structure to store
>> statistics */
>> >> + tb_insert_statistics_structure(tb);
>> >> + }
>> >> +
>> >> /* add in the hash table */
>> >> h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags &
>> CF_HASH_MASK,
>> >> tb->trace_vcpu_dstate);
>> >
>> > Perhaps we could just have a second qht array which allows for
>> > "unlocked" insertion of record. You could take advantage of the fact the
>> > hash would be the same:
>> >
>> > h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags &
>> CF_HASH_MASK,
>> > tb->trace_vcpu_dstate);
>> > qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
>> >
>> > /* remove TB from the page(s) if we couldn't insert it */
>> > if (unlikely(existing_tb)) {
>> > ...
>> > tb = existing_tb;
>> > } else if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
>> > TBStatistics *new_stats = g_new0(TBStatistics, 1);
>> > bool ok;
>> > new_stats->pc = tb->pc;
>> > new_stats->cs_base = tb->cs_base;
>> > new_stats->flags = tb->flags;
>> > new_stats->page_addr[0] = tb->page_addr[0];
>> > new_stats->page_addr[1] = tb->page_addr[1];
>> > ok = qht_insert(&tb_ctx.tb_stats, new_stats, h, NULL);
>> > /*
>> > * This should never fail as we succeeded in inserting the
>> > * TB itself which means we haven't seen this combination
>> yet.
>> > */
>> > g_assert(ok);
>> > }
>> >
>> > It would also avoid the clunkiness of having to allocate and then
>> > freeing an unused structure.
>>
>>
>> Actually thinking on this we still have to handle it. We may have
>> tb_flushed away a translation and just be regenerating the same block.
>> As TBStatistics should transcend tb_flush events we need to re-use it's
>> structure.
>>
>> It would be worth counting the regens as well so we can see blocks that
>> keep getting re-translated after each flush.
>>
>> >
>> >> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
>> >> index 16034ee651..359100ef3b 100644
>> >> --- a/include/exec/exec-all.h
>> >> +++ b/include/exec/exec-all.h
>> >> @@ -324,6 +324,24 @@ static inline void
>> tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
>> >> #define CODE_GEN_AVG_BLOCK_SIZE 150
>> >> #endif
>> >>
>> >> +typedef struct TBStatistics TBStatistics;
>> >> +
>> >> +/*
>> >> + * This struct stores statistics such as execution count of the
>> TranslationBlocks.
>> >> + * Each TB has its own TBStatistics. TBStatistics is suppose to live
>> even after
>> >> + * flushes.
>> >> + */
>> >> +struct TBStatistics {
>> >> + target_ulong pc;
>> >> + target_ulong cs_base;
>> >> + uint32_t flags;
>> >> + tb_page_addr_t page_addr[2];
>> >> +
>> >> + // total number of times that the related TB have being executed
>> >
>> > No c++ style comments
>> >
>> >> + uint32_t exec_count;
>> >> + uint32_t exec_count_overflows;
>> >> +};
>> >> +
>> >> /*
>> >> * Translation Cache-related fields of a TB.
>> >> * This struct exists just for convenience; we keep track of TB's in a
>> binary
>> >> @@ -403,6 +421,9 @@ struct TranslationBlock {
>> >> uintptr_t jmp_list_head;
>> >> uintptr_t jmp_list_next[2];
>> >> uintptr_t jmp_dest[2];
>> >> +
>> >> + // Pointer to a struct where statistics from the TB is stored
>> >
>> > No c++ style comments
>> >
>> >> + TBStatistics *tb_stats;
>> >> };
>> >>
>> >> extern bool parallel_cpus;
>> >> diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
>> >> index feb585e0a7..a78ce92e60 100644
>> >> --- a/include/exec/tb-context.h
>> >> +++ b/include/exec/tb-context.h
>> >> @@ -35,6 +35,7 @@ struct TBContext {
>> >>
>> >> /* statistics */
>> >> unsigned tb_flush_count;
>> >> + GList *tb_statistics;
>> >> };
>> >>
>> >> extern TBContext tb_ctx;
>> >> diff --git a/include/qemu/log.h b/include/qemu/log.h
>> >> index b097a6cae1..2fca65dd01 100644
>> >> --- a/include/qemu/log.h
>> >> +++ b/include/qemu/log.h
>> >> @@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void)
>> >> /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
>> >> #define CPU_LOG_TB_OP_IND (1 << 16)
>> >> #define CPU_LOG_TB_FPU (1 << 17)
>> >> +#define CPU_LOG_HOT_TBS (1 << 18)
>> >>
>> >> /* Lock output for a series of related logs. Since this is not needed
>> >> * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
>>
>>
>> --
>> Alex Bennée
>>
--
Alex Bennée
Alex Bennée <alex.bennee@linaro.org> writes:
> Vanderson Martins do Rosario <vandersonmr2@gmail.com> writes:
>
>> When the tb_flush removes a block and it's recreated, this shouldn't
>> be creating a new block but using the one that is found by:
>>
>> lookup_result = g_list_find_custom(tb_ctx.tb_statistics, new_stats,
>> statistics_cmp);
>>
>> So the tb_statisticics will be reused and we could just add this
>> regen counter in the if statement: if (lookup_result)
>>
>> isn't that correct?
>
> Yes, although as I said earlier you want to use a QHT hash table rather
> than a g_list to avoid racing with multiple translations at once.
>
>>
>> Vanderson M. Rosario
>>
>>
>> On Tue, Jun 25, 2019 at 1:28 PM Alex Bennée <alex.bennee@linaro.org> wrote:
>>
>>>
>>> Alex Bennée <alex.bennee@linaro.org> writes:
>>>
>>> > vandersonmr <vandersonmr2@gmail.com> writes:
>>> >
>>> >> We want to store statistics for each TB even after flushes.
>>> >> We do not want to modify or grow the TB struct.
>>> >> So we create a new struct to contain this statistics and
>>> >> link it to each TB while they are created.
>>> >>
>>> >> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>>> >> ---
>>> >> accel/tcg/translate-all.c | 40 +++++++++++++++++++++++++++++++++++++++
>>> >> include/exec/exec-all.h | 21 ++++++++++++++++++++
>>> >> include/exec/tb-context.h | 1 +
>>> >> include/qemu/log.h | 1 +
>>> >> 4 files changed, 63 insertions(+)
>>> >>
>>> >> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
>>> >> index 5d1e08b169..f7e99f90e0 100644
>>> >> --- a/accel/tcg/translate-all.c
>>> >> +++ b/accel/tcg/translate-all.c
>>> >> @@ -1118,6 +1118,18 @@ static inline void code_gen_alloc(size_t tb_size)
>>> >> }
>>> >> }
>>> >>
>>> >> +static gint statistics_cmp(gconstpointer p1, gconstpointer p2)
>>> >> +{
>>> >> + const TBStatistics *a = (TBStatistics *) p1;
>>> >> + const TBStatistics *b = (TBStatistics *) p2;
>>> >> +
>>> >> + return (a->pc == b->pc &&
>>> >> + a->cs_base == b->cs_base &&
>>> >> + a->flags == b->flags &&
>>> >> + a->page_addr[0] == b->page_addr[0] &&
>>> >> + a->page_addr[1] == b->page_addr[1]) ? 0 : 1;
>>> >> +}
>>> >> +
>>> >
>>> > There are a bunch of white space errors that need fixing up as you
>>> > probably already know from patchew ;-)
>>> >
>>> >> static bool tb_cmp(const void *ap, const void *bp)
>>> >> {
>>> >> const TranslationBlock *a = ap;
>>> >> @@ -1586,6 +1598,29 @@ static inline void tb_page_add(PageDesc *p,
>>> TranslationBlock *tb,
>>> >> #endif
>>> >> }
>>> >>
>>> >> +static void tb_insert_statistics_structure(TranslationBlock *tb) {
>>> >> + TBStatistics *new_stats = g_new0(TBStatistics, 1);
>>> >> + new_stats->pc = tb->pc;
>>> >> + new_stats->cs_base = tb->cs_base;
>>> >> + new_stats->flags = tb->flags;
>>> >> + new_stats->page_addr[0] = tb->page_addr[0];
>>> >> + new_stats->page_addr[1] = tb->page_addr[1];
>>> >> +
>>> >> + GList *lookup_result = g_list_find_custom(tb_ctx.tb_statistics,
>>> new_stats, statistics_cmp);
>>> >> +
>>> >> + if (lookup_result) {
>>> >> + /* If there is already a TBStatistic for this TB from a
>>> previous flush
>>> >> + * then just make the new TB point to the older TBStatistic
>>> >> + */
>>> >> + free(new_stats);
>>> >> + tb->tb_stats = lookup_result->data;
>>> >> + } else {
>>> >> + /* If not, then points to the new tb_statistics and add it
>>> to the hash */
>>> >> + tb->tb_stats = new_stats;
>>> >> + tb_ctx.tb_statistics = g_list_prepend(tb_ctx.tb_statistics,
>>> >> new_stats);
>>> >
>>> > This is going to race as nothing prevents two threads attempting to
>>> > insert records at the same time.
>>> >
>>> >> + }
>>> >> +}
>>> >> +
>>> >> /* add a new TB and link it to the physical page tables. phys_page2 is
>>> >> * (-1) to indicate that only one page contains the TB.
>>> >> *
>>> >> @@ -1636,6 +1671,11 @@ tb_link_page(TranslationBlock *tb,
>>> tb_page_addr_t phys_pc,
>>> >> void *existing_tb = NULL;
>>> >> uint32_t h;
>>> >>
>>> >> + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
>>> >> + /* create and link to its TB a structure to store
>>> statistics */
>>> >> + tb_insert_statistics_structure(tb);
>>> >> + }
>>> >> +
>>> >> /* add in the hash table */
>>> >> h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags &
>>> CF_HASH_MASK,
>>> >> tb->trace_vcpu_dstate);
>>> >
>>> > Perhaps we could just have a second qht array which allows for
>>> > "unlocked" insertion of record. You could take advantage of the fact the
>>> > hash would be the same:
>>> >
>>> > h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags &
>>> CF_HASH_MASK,
>>> > tb->trace_vcpu_dstate);
>>> > qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
>>> >
>>> > /* remove TB from the page(s) if we couldn't insert it */
>>> > if (unlikely(existing_tb)) {
>>> > ...
>>> > tb = existing_tb;
>>> > } else if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
>>> > TBStatistics *new_stats = g_new0(TBStatistics, 1);
>>> > bool ok;
>>> > new_stats->pc = tb->pc;
>>> > new_stats->cs_base = tb->cs_base;
>>> > new_stats->flags = tb->flags;
>>> > new_stats->page_addr[0] = tb->page_addr[0];
>>> > new_stats->page_addr[1] = tb->page_addr[1];
>>> > ok = qht_insert(&tb_ctx.tb_stats, new_stats, h, NULL);
>>> > /*
>>> > * This should never fail as we succeeded in inserting the
>>> > * TB itself which means we haven't seen this combination
>>> yet.
>>> > */
>>> > g_assert(ok);
>>> > }
>>> >
>>> > It would also avoid the clunkiness of having to allocate and then
>>> > freeing an unused structure.
>>>
>>>
>>> Actually thinking on this we still have to handle it. We may have
>>> tb_flushed away a translation and just be regenerating the same block.
>>> As TBStatistics should transcend tb_flush events we need to re-use it's
>>> structure.
>>>
>>> It would be worth counting the regens as well so we can see blocks that
>>> keep getting re-translated after each flush.
Another issue is lifetime. We really want to have the statistics
available before we generate the code (so we can pass a direct pointer
to the helper) and we can't wait until after we qht_insert the TB into
the buffer (as any thread can start executing that buffer from that
point). So we need to ensure tb->tb_stats is live and ready as
translation time.
--
Alex Bennée
© 2016 - 2026 Red Hat, Inc.