When clearing a bitfield we don't need to lead the
source register. Use deposit_z_i32() with the BFC
opcode to save a load_reg() call.
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
target/arm/tcg/translate.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index b71ac2d0d5..1a6938d1b3 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -7255,7 +7255,7 @@ static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
{
int msb = a->msb, lsb = a->lsb;
- TCGv_i32 t_in, t_rd;
+ TCGv_i32 t_rd;
int width;
if (!ENABLE_ARCH_6T2) {
@@ -7268,15 +7268,14 @@ static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
}
width = msb + 1 - lsb;
+ t_rd = load_reg(s, a->rd);
if (a->rn == 15) {
/* BFC */
- t_in = tcg_constant_i32(0);
+ tcg_gen_deposit_z_i32(t_rd, t_rd, lsb, width);
} else {
/* BFI */
- t_in = load_reg(s, a->rn);
+ tcg_gen_deposit_i32(t_rd, t_rd, load_reg(s, a->rn), lsb, width);
}
- t_rd = load_reg(s, a->rd);
- tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
store_reg(s, a->rd, t_rd);
return true;
}
--
2.41.0
On Tue, 22 Aug 2023 at 10:51, Philippe Mathieu-Daudé <philmd@linaro.org> wrote: > > When clearing a bitfield we don't need to lead the "load" ? > source register. Use deposit_z_i32() with the BFC > opcode to save a load_reg() call. > > Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> > --- > target/arm/tcg/translate.c | 9 ++++----- > 1 file changed, 4 insertions(+), 5 deletions(-) > > diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c > index b71ac2d0d5..1a6938d1b3 100644 > --- a/target/arm/tcg/translate.c > +++ b/target/arm/tcg/translate.c > @@ -7255,7 +7255,7 @@ static bool trans_UBFX(DisasContext *s, arg_UBFX *a) > static bool trans_BFCI(DisasContext *s, arg_BFCI *a) > { > int msb = a->msb, lsb = a->lsb; > - TCGv_i32 t_in, t_rd; > + TCGv_i32 t_rd; > int width; > > if (!ENABLE_ARCH_6T2) { > @@ -7268,15 +7268,14 @@ static bool trans_BFCI(DisasContext *s, arg_BFCI *a) > } > > width = msb + 1 - lsb; > + t_rd = load_reg(s, a->rd); > if (a->rn == 15) { > /* BFC */ > - t_in = tcg_constant_i32(0); > + tcg_gen_deposit_z_i32(t_rd, t_rd, lsb, width); > } else { > /* BFI */ > - t_in = load_reg(s, a->rn); > + tcg_gen_deposit_i32(t_rd, t_rd, load_reg(s, a->rn), lsb, width); > } > - t_rd = load_reg(s, a->rd); > - tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width); > store_reg(s, a->rd, t_rd); > return true; The comment says we are saving a load_reg() call, but the code change doesn't seem to do that. Before the change: * for BFC we call load_reg for rd * for BFI we call load_reg for rn and rd After the change: * for BFC we call load_reg for rd * for BFI we call load_reg for rn and rd So we're not saving any load_reg() calls as far as I can see ? -- PMM
On 29/8/23 15:20, Peter Maydell wrote: > On Tue, 22 Aug 2023 at 10:51, Philippe Mathieu-Daudé <philmd@linaro.org> wrote: >> >> When clearing a bitfield we don't need to lead the > > "load" ? > >> source register. Use deposit_z_i32() with the BFC >> opcode to save a load_reg() call. >> >> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> >> --- >> target/arm/tcg/translate.c | 9 ++++----- >> 1 file changed, 4 insertions(+), 5 deletions(-) >> >> diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c >> index b71ac2d0d5..1a6938d1b3 100644 >> --- a/target/arm/tcg/translate.c >> +++ b/target/arm/tcg/translate.c >> @@ -7255,7 +7255,7 @@ static bool trans_UBFX(DisasContext *s, arg_UBFX *a) >> static bool trans_BFCI(DisasContext *s, arg_BFCI *a) >> { >> int msb = a->msb, lsb = a->lsb; >> - TCGv_i32 t_in, t_rd; >> + TCGv_i32 t_rd; >> int width; >> >> if (!ENABLE_ARCH_6T2) { >> @@ -7268,15 +7268,14 @@ static bool trans_BFCI(DisasContext *s, arg_BFCI *a) >> } >> >> width = msb + 1 - lsb; >> + t_rd = load_reg(s, a->rd); >> if (a->rn == 15) { >> /* BFC */ >> - t_in = tcg_constant_i32(0); >> + tcg_gen_deposit_z_i32(t_rd, t_rd, lsb, width); >> } else { >> /* BFI */ >> - t_in = load_reg(s, a->rn); >> + tcg_gen_deposit_i32(t_rd, t_rd, load_reg(s, a->rn), lsb, width); >> } >> - t_rd = load_reg(s, a->rd); >> - tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width); >> store_reg(s, a->rd, t_rd); >> return true; > > The comment says we are saving a load_reg() call, but the > code change doesn't seem to do that. Before the change: > * for BFC we call load_reg for rd > * for BFI we call load_reg for rn and rd > > After the change: > * for BFC we call load_reg for rd > * for BFI we call load_reg for rn and rd > > So we're not saving any load_reg() calls as far as I can see ? Indeed you are right. The optimizations are in tcg_gen_deposit_z_i32() vs using tcg_gen_deposit_i32(). I'll reword the description. Thanks, Phil.
On 8/22/23 02:51, Philippe Mathieu-Daudé wrote: > When clearing a bitfield we don't need to lead the > source register. Use deposit_z_i32() with the BFC > opcode to save a load_reg() call. > > Signed-off-by: Philippe Mathieu-Daudé<philmd@linaro.org> > --- > target/arm/tcg/translate.c | 9 ++++----- > 1 file changed, 4 insertions(+), 5 deletions(-) Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
© 2016 - 2024 Red Hat, Inc.