accel/tcg/plugin-gen.c | 10 ++++----- include/exec/helper-head.h | 2 -- include/tcg/tcg.h | 46 +++++++++++++------------------------- tcg/tcg.c | 35 +++++++++++++++++++++-------- 4 files changed, 47 insertions(+), 46 deletions(-)
From: Richard Henderson <richard.henderson@linaro.org>
We have been allocating a worst case number of arguments
to support calls. Instead, allow the size to vary.
By default leave space for 4 args, to maximize reuse,
but allow calls to increase the number of args to 32.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
[PMD: Split patch in two]
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
accel/tcg/plugin-gen.c | 10 ++++-----
include/exec/helper-head.h | 2 --
include/tcg/tcg.h | 46 +++++++++++++-------------------------
tcg/tcg.c | 35 +++++++++++++++++++++--------
4 files changed, 47 insertions(+), 46 deletions(-)
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 62e775d34d..c7d6514840 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -258,12 +258,12 @@ static TCGOp *rm_ops(TCGOp *op)
static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op)
{
- unsigned nargs = ARRAY_SIZE(op->args);
+ TCGOp *old_op = QTAILQ_NEXT(*begin_op, link);
+ unsigned nargs = old_op->nargs;
- *begin_op = QTAILQ_NEXT(*begin_op, link);
- tcg_debug_assert(*begin_op);
- op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc, nargs);
- memcpy(op->args, (*begin_op)->args, sizeof(op->args));
+ *begin_op = old_op;
+ op = tcg_op_insert_after(tcg_ctx, op, old_op->opc, nargs);
+ memcpy(op->args, old_op->args, sizeof(op->args[0]) * nargs);
return op;
}
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
index e242fed46e..8bdf0f6ea2 100644
--- a/include/exec/helper-head.h
+++ b/include/exec/helper-head.h
@@ -133,6 +133,4 @@
#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \
DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7)
-/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
-
#endif /* EXEC_HELPER_HEAD_H */
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index c55fa21a89..d430ea10c8 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -38,20 +38,6 @@
/* XXX: make safe guess about sizes */
#define MAX_OP_PER_INSTR 266
-#if HOST_LONG_BITS == 32
-#define MAX_OPC_PARAM_PER_ARG 2
-#else
-#define MAX_OPC_PARAM_PER_ARG 1
-#endif
-#define MAX_OPC_PARAM_IARGS 7
-#define MAX_OPC_PARAM_OARGS 1
-#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
-
-/* A Call op needs up to 4 + 2N parameters on 32-bit archs,
- * and up to 4 + N parameters on 64-bit archs
- * (N = number of input arguments + output arguments). */
-#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
-
#define CPU_TEMP_BUF_NLONGS 128
#define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long))
@@ -493,34 +479,34 @@ typedef struct TCGTempSet {
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
} TCGTempSet;
-/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
- this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
- There are never more than 2 outputs, which means that we can store all
- dead + sync data within 16 bits. */
-#define DEAD_ARG 4
-#define SYNC_ARG 1
-typedef uint16_t TCGLifeData;
+/*
+ * With 1 128-bit output, a 32-bit host requires 4 output parameters,
+ * which leaves a maximum of 28 other slots. Which is enough for 7
+ * 128-bit operands.
+ */
+#define DEAD_ARG (1 << 4)
+#define SYNC_ARG (1 << 0)
+typedef uint32_t TCGLifeData;
-/* The layout here is designed to avoid a bitfield crossing of
- a 32-bit boundary, which would cause GCC to add extra padding. */
typedef struct TCGOp {
- TCGOpcode opc : 8; /* 8 */
+ TCGOpcode opc : 8;
+ unsigned nargs : 8;
/* Parameters for this opcode. See below. */
- unsigned param1 : 4; /* 12 */
- unsigned param2 : 4; /* 16 */
+ unsigned param1 : 8;
+ unsigned param2 : 8;
/* Lifetime data of the operands. */
- unsigned life : 16; /* 32 */
+ TCGLifeData life;
/* Next and previous opcodes. */
QTAILQ_ENTRY(TCGOp) link;
- /* Arguments for the opcode. */
- TCGArg args[MAX_OPC_PARAM];
-
/* Register preferences for the output(s). */
TCGRegSet output_pref[2];
+
+ /* Arguments for the opcode. */
+ TCGArg args[];
} TCGOp;
#define TCGOP_CALLI(X) (X)->param1
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 3f172cb1d6..ccbe947222 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1513,7 +1513,12 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
}
}
- max_args = ARRAY_SIZE(op->args);
+ /*
+ * A Call op needs up to 4 + 2N parameters on 32-bit archs,
+ * and up to 4 + N parameters on 64-bit archs
+ * (N = number of input arguments + output arguments).
+ */
+ max_args = (64 / TCG_TARGET_REG_BITS) * nargs + 4;
op = tcg_emit_op(INDEX_op_call, max_args);
pi = 0;
@@ -2298,19 +2303,31 @@ void tcg_remove_ops_after(TCGOp *op)
static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
{
TCGContext *s = tcg_ctx;
- TCGOp *op;
+ TCGOp *op = NULL;
- assert(nargs < ARRAY_SIZE(op->args));
- if (likely(QTAILQ_EMPTY(&s->free_ops))) {
- op = tcg_malloc(sizeof(TCGOp));
- } else {
- op = QTAILQ_FIRST(&s->free_ops);
- QTAILQ_REMOVE(&s->free_ops, op, link);
+ if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
+ QTAILQ_FOREACH(op, &s->free_ops, link) {
+ if (nargs <= op->nargs) {
+ QTAILQ_REMOVE(&s->free_ops, op, link);
+ nargs = op->nargs;
+ goto found;
+ }
+ }
}
+
+ /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
+ nargs = MAX(4, nargs);
+ op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
+
+ found:
memset(op, 0, offsetof(TCGOp, link));
op->opc = opc;
- s->nb_ops++;
+ op->nargs = nargs;
+ /* Check for bitfield overflow. */
+ tcg_debug_assert(op->nargs == nargs);
+
+ s->nb_ops++;
return op;
}
--
2.38.1
On 18/12/22 22:18, Philippe Mathieu-Daudé wrote: > From: Richard Henderson <richard.henderson@linaro.org> > > We have been allocating a worst case number of arguments > to support calls. Instead, allow the size to vary. > By default leave space for 4 args, to maximize reuse, > but allow calls to increase the number of args to 32. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > [PMD: Split patch in two] > Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> > --- > accel/tcg/plugin-gen.c | 10 ++++----- > include/exec/helper-head.h | 2 -- > include/tcg/tcg.h | 46 +++++++++++++------------------------- > tcg/tcg.c | 35 +++++++++++++++++++++-------- > 4 files changed, 47 insertions(+), 46 deletions(-) > diff --git a/tcg/tcg.c b/tcg/tcg.c > index 3f172cb1d6..ccbe947222 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -1513,7 +1513,12 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) > } > } > > - max_args = ARRAY_SIZE(op->args); > + /* > + * A Call op needs up to 4 + 2N parameters on 32-bit archs, > + * and up to 4 + N parameters on 64-bit archs > + * (N = number of input arguments + output arguments). > + */ > + max_args = (64 / TCG_TARGET_REG_BITS) * nargs + 4; > op = tcg_emit_op(INDEX_op_call, max_args); > > pi = 0; > @@ -2298,19 +2303,31 @@ void tcg_remove_ops_after(TCGOp *op) > static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) > { > TCGContext *s = tcg_ctx; > - TCGOp *op; > + TCGOp *op = NULL; > > - assert(nargs < ARRAY_SIZE(op->args)); > - if (likely(QTAILQ_EMPTY(&s->free_ops))) { > - op = tcg_malloc(sizeof(TCGOp)); > - } else { > - op = QTAILQ_FIRST(&s->free_ops); > - QTAILQ_REMOVE(&s->free_ops, op, link); > + if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { > + QTAILQ_FOREACH(op, &s->free_ops, link) { > + if (nargs <= op->nargs) { > + QTAILQ_REMOVE(&s->free_ops, op, link); > + nargs = op->nargs; > + goto found; > + } > + } > } > + > + /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ > + nargs = MAX(4, nargs); > + op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); > + > + found: > memset(op, 0, offsetof(TCGOp, link)); > op->opc = opc; > - s->nb_ops++; > + op->nargs = nargs; We can move this assignation before the 'found' label. > > + /* Check for bitfield overflow. */ > + tcg_debug_assert(op->nargs == nargs); > + > + s->nb_ops++; > return op; > } > Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
On 12/18/22 13:49, Philippe Mathieu-Daudé wrote: >> + found: >> memset(op, 0, offsetof(TCGOp, link)); >> op->opc = opc; >> - s->nb_ops++; >> + op->nargs = nargs; > > We can move this assignation before the 'found' label. No, affected by the memset. r~
On 18/12/22 23:44, Richard Henderson wrote: > On 12/18/22 13:49, Philippe Mathieu-Daudé wrote: >>> + found: >>> memset(op, 0, offsetof(TCGOp, link)); >>> op->opc = opc; >>> - s->nb_ops++; >>> + op->nargs = nargs; >> >> We can move this assignation before the 'found' label. > > No, affected by the memset. Oh, I missed that, good point.
© 2016 - 2024 Red Hat, Inc.