[PATCH v5 06/12] ati-vga: Create setup_2d_blt_dst helper

Chad Jablonski posted 12 patches 3 weeks, 1 day ago
There is a newer version of this series
[PATCH v5 06/12] ati-vga: Create setup_2d_blt_dst helper
Posted by Chad Jablonski 3 weeks, 1 day ago
A large amount of the common setup involved in a blit deals with the
destination. This moves that setup to a helper function which
initializes a struct (ATIBltDst) holding all of the dst state. This setup
will be shared between blits from memory as well as from HOST_DATA.

Otherwise this is a pure refactor of the ati_2d_blt function to make use
of the setup_2d_blt_dst helper and the struct it initializes. There should
be no change in behavior in this patch.

Signed-off-by: Chad Jablonski <chad@jablonski.xyz>
---
 hw/display/ati_2d.c | 195 +++++++++++++++++++++++++-------------------
 1 file changed, 112 insertions(+), 83 deletions(-)

diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
index a8c4c534b9..75bd38e9b0 100644
--- a/hw/display/ati_2d.c
+++ b/hw/display/ati_2d.c
@@ -13,6 +13,7 @@
 #include "qemu/log.h"
 #include "ui/pixel_ops.h"
 #include "ui/console.h"
+#include "ui/rect.h"
 
 /*
  * NOTE:
@@ -24,7 +25,16 @@
  * possible.
  */
 
-static int ati_bpp_from_datatype(ATIVGAState *s)
+typedef struct {
+    QemuRect rect;
+    int bpp;
+    int stride;
+    bool top_to_bottom;
+    bool left_to_right;
+    uint8_t *bits;
+} ATIBltDst;
+
+static int ati_bpp_from_datatype(const ATIVGAState *s)
 {
     switch (s->regs.dp_datatype & 0xf) {
     case 2:
@@ -43,57 +53,76 @@ static int ati_bpp_from_datatype(ATIVGAState *s)
     }
 }
 
+static void setup_2d_blt_dst(const ATIVGAState *s, ATIBltDst *dst)
+{
+    unsigned dst_x, dst_y;
+    dst->bpp = ati_bpp_from_datatype(s);
+    dst->stride = s->regs.dst_pitch;
+    dst->left_to_right = s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT;
+    dst->top_to_bottom = s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM;
+    dst->bits = s->vga.vram_ptr + s->regs.dst_offset;
+    dst_x = (dst->left_to_right ?
+            s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width);
+    dst_y = (dst->top_to_bottom ?
+            s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height);
+    qemu_rect_init(&dst->rect, dst_x, dst_y,
+                   s->regs.dst_width, s->regs.dst_height);
+    if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) {
+        dst->bits += s->regs.crtc_offset & 0x07ffffff;
+        dst->stride *= dst->bpp;
+    }
+}
+
 void ati_2d_blt(ATIVGAState *s)
 {
     /* FIXME it is probably more complex than this and may need to be */
     /* rewritten but for now as a start just to get some output: */
     DisplaySurface *ds = qemu_console_surface(s->vga.con);
+    uint8_t *end = s->vga.vram_ptr + s->vga.vram_size;
+    int dst_stride_words;
+    ATIBltDst _dst; /* TEMP: avoid churn in future patches */
+    ATIBltDst *dst = &_dst;
+
     DPRINTF("%p %u ds: %p %d %d rop: %x\n", s->vga.vram_ptr,
             s->vga.vbe_start_addr, surface_data(ds), surface_stride(ds),
             surface_bits_per_pixel(ds),
             (s->regs.dp_mix & GMC_ROP3_MASK) >> 16);
-    unsigned dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
-                      s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width);
-    unsigned dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
-                      s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height);
-    int bpp = ati_bpp_from_datatype(s);
-    if (!bpp) {
+
+    setup_2d_blt_dst(s, dst);
+
+    if (!dst->bpp) {
         qemu_log_mask(LOG_GUEST_ERROR, "Invalid bpp\n");
         return;
     }
-    int dst_stride = s->regs.dst_pitch;
-    if (!dst_stride) {
+    if (!dst->stride) {
         qemu_log_mask(LOG_GUEST_ERROR, "Zero dest pitch\n");
         return;
     }
-    uint8_t *dst_bits = s->vga.vram_ptr + s->regs.dst_offset;
-
-    if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) {
-        dst_bits += s->regs.crtc_offset & 0x07ffffff;
-        dst_stride *= bpp;
-    }
-    uint8_t *end = s->vga.vram_ptr + s->vga.vram_size;
-    if (dst_x > 0x3fff || dst_y > 0x3fff || dst_bits >= end
-        || dst_bits + dst_x
-         + (dst_y + s->regs.dst_height) * dst_stride >= end) {
+    if (dst->rect.x > 0x3fff || dst->rect.y > 0x3fff || dst->bits >= end
+        || dst->bits + dst->rect.x
+         + (dst->rect.y + dst->rect.height) * dst->stride >= end) {
         qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n");
         return;
     }
+
+    dst_stride_words = dst->stride / sizeof(uint32_t);
+
     DPRINTF("%d %d %d, %d %d %d, (%d,%d) -> (%d,%d) %dx%d %c %c\n",
             s->regs.src_offset, s->regs.dst_offset, s->regs.default_offset,
-            s->regs.src_pitch, s->regs.dst_pitch, s->regs.default_pitch,
-            s->regs.src_x, s->regs.src_y, dst_x, dst_y,
-            s->regs.dst_width, s->regs.dst_height,
-            (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? '>' : '<'),
-            (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? 'v' : '^'));
+            s->regs.src_pitch, dst->stride, s->regs.default_pitch,
+            s->regs.src_x, s->regs.src_y, dst->rect.x, dst->rect.y,
+            dst->rect.width, dst->rect.height,
+            (dst->left_to_right ? '>' : '<'),
+            (dst->top_to_bottom ? 'v' : '^'));
+
     switch (s->regs.dp_mix & GMC_ROP3_MASK) {
     case ROP3_SRCCOPY:
     {
         bool fallback = false;
-        unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
-                       s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
-        unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
-                       s->regs.src_y : s->regs.src_y + 1 - s->regs.dst_height);
+        unsigned src_x = (dst->left_to_right ?
+                       s->regs.src_x : s->regs.src_x + 1 - dst->rect.width);
+        unsigned src_y = (dst->top_to_bottom ?
+                       s->regs.src_y : s->regs.src_y + 1 - dst->rect.height);
         int src_stride = s->regs.src_pitch;
         if (!src_stride) {
             qemu_log_mask(LOG_GUEST_ERROR, "Zero source pitch\n");
@@ -103,44 +132,47 @@ void ati_2d_blt(ATIVGAState *s)
 
         if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) {
             src_bits += s->regs.crtc_offset & 0x07ffffff;
-            src_stride *= bpp;
+            src_stride *= dst->bpp;
         }
         if (src_x > 0x3fff || src_y > 0x3fff || src_bits >= end
             || src_bits + src_x
-             + (src_y + s->regs.dst_height) * src_stride >= end) {
+             + (src_y + dst->rect.height) * src_stride >= end) {
             qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n");
             return;
         }
 
         src_stride /= sizeof(uint32_t);
-        dst_stride /= sizeof(uint32_t);
         DPRINTF("pixman_blt(%p, %p, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d)\n",
-                src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
-                src_x, src_y, dst_x, dst_y,
-                s->regs.dst_width, s->regs.dst_height);
+                src_bits, dst->bits,
+                src_stride, dst_stride_words,
+                dst->bpp, dst->bpp,
+                src_x, src_y, dst->rect.x, dst->rect.y,
+                dst->rect.width, dst->rect.height);
 #ifdef CONFIG_PIXMAN
         if ((s->use_pixman & BIT(1)) &&
-            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
-            s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
-            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
-                                   src_stride, dst_stride, bpp, bpp,
-                                   src_x, src_y, dst_x, dst_y,
-                                   s->regs.dst_width, s->regs.dst_height);
+            dst->left_to_right && dst->top_to_bottom) {
+            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst->bits,
+                                   src_stride, dst_stride_words,
+                                   dst->bpp, dst->bpp,
+                                   src_x, src_y, dst->rect.x, dst->rect.y,
+                                   dst->rect.width, dst->rect.height);
         } else if (s->use_pixman & BIT(1)) {
             /* FIXME: We only really need a temporary if src and dst overlap */
-            int llb = s->regs.dst_width * (bpp / 8);
+            int llb = dst->rect.width * (dst->bpp / 8);
             int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
             uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
-                                     s->regs.dst_height);
+                                     dst->rect.height);
             fallback = !pixman_blt((uint32_t *)src_bits, tmp,
-                                   src_stride, tmp_stride, bpp, bpp,
+                                   src_stride, tmp_stride,
+                                   dst->bpp, dst->bpp,
                                    src_x, src_y, 0, 0,
-                                   s->regs.dst_width, s->regs.dst_height);
+                                   dst->rect.width, dst->rect.height);
             if (!fallback) {
-                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
-                                       tmp_stride, dst_stride, bpp, bpp,
-                                       0, 0, dst_x, dst_y,
-                                       s->regs.dst_width, s->regs.dst_height);
+                fallback = !pixman_blt(tmp, (uint32_t *)dst->bits,
+                                       tmp_stride, dst_stride_words,
+                                       dst->bpp, dst->bpp,
+                                       0, 0, dst->rect.x, dst->rect.y,
+                                       dst->rect.width, dst->rect.height);
             }
             g_free(tmp);
         } else
@@ -149,35 +181,33 @@ void ati_2d_blt(ATIVGAState *s)
             fallback = true;
         }
         if (fallback) {
-            unsigned int y, i, j, bypp = bpp / 8;
+            unsigned int y, i, j, bypp = dst->bpp / 8;
             unsigned int src_pitch = src_stride * sizeof(uint32_t);
-            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
-
-            for (y = 0; y < s->regs.dst_height; y++) {
-                i = dst_x * bypp;
+            for (y = 0; y < dst->rect.height; y++) {
+                i = dst->rect.x * bypp;
                 j = src_x * bypp;
-                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
-                    i += (dst_y + y) * dst_pitch;
+                if (dst->top_to_bottom) {
+                    i += (dst->rect.y + y) * dst->stride;
                     j += (src_y + y) * src_pitch;
                 } else {
-                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
-                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
+                    i += (dst->rect.y + dst->rect.height - 1 - y) * dst->stride;
+                    j += (src_y + dst->rect.height - 1 - y) * src_pitch;
                 }
-                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
+                memmove(&dst->bits[i], &src_bits[j], dst->rect.width * bypp);
             }
         }
-        if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
-            dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
+        if (dst->bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
+            dst->bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
             s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
             memory_region_set_dirty(&s->vga.vram, s->vga.vbe_start_addr +
                                     s->regs.dst_offset +
-                                    dst_y * surface_stride(ds),
-                                    s->regs.dst_height * surface_stride(ds));
+                                    dst->rect.y * surface_stride(ds),
+                                    dst->rect.height * surface_stride(ds));
         }
-        s->regs.dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
-                         dst_x + s->regs.dst_width : dst_x);
-        s->regs.dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
-                         dst_y + s->regs.dst_height : dst_y);
+        s->regs.dst_x = (dst->left_to_right ?
+                         dst->rect.x + dst->rect.width : dst->rect.x);
+        s->regs.dst_y = (dst->top_to_bottom ?
+                         dst->rect.y + dst->rect.height : dst->rect.y);
         break;
     }
     case ROP3_PATCOPY:
@@ -200,36 +230,35 @@ void ati_2d_blt(ATIVGAState *s)
             break;
         }
 
-        dst_stride /= sizeof(uint32_t);
         DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
-                dst_bits, dst_stride, bpp, dst_x, dst_y,
-                s->regs.dst_width, s->regs.dst_height, filler);
+                dst->bits, dst_stride_words, dst->bpp, dst->rect.x, dst->rect.y,
+                dst->rect.width, dst->rect.height, filler);
 #ifdef CONFIG_PIXMAN
         if (!(s->use_pixman & BIT(0)) ||
-            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
-                    s->regs.dst_width, s->regs.dst_height, filler))
+            !pixman_fill((uint32_t *)dst->bits,
+                    dst_stride_words, dst->bpp, dst->rect.x, dst->rect.y,
+                    dst->rect.width, dst->rect.height, filler))
 #endif
         {
             /* fallback when pixman failed or we don't want to call it */
-            unsigned int x, y, i, bypp = bpp / 8;
-            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
-            for (y = 0; y < s->regs.dst_height; y++) {
-                i = dst_x * bypp + (dst_y + y) * dst_pitch;
-                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
-                    stn_he_p(&dst_bits[i], bypp, filler);
+            unsigned int x, y, i, bypp = dst->bpp / 8;
+            for (y = 0; y < dst->rect.height; y++) {
+                i = dst->rect.x * bypp + (dst->rect.y + y) * dst->stride;
+                for (x = 0; x < dst->rect.width; x++, i += bypp) {
+                    stn_he_p(&dst->bits[i], bypp, filler);
                 }
             }
         }
-        if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
-            dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
+        if (dst->bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
+            dst->bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
             s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
             memory_region_set_dirty(&s->vga.vram, s->vga.vbe_start_addr +
                                     s->regs.dst_offset +
-                                    dst_y * surface_stride(ds),
-                                    s->regs.dst_height * surface_stride(ds));
+                                    dst->rect.y * surface_stride(ds),
+                                    dst->rect.height * surface_stride(ds));
         }
-        s->regs.dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
-                         dst_y + s->regs.dst_height : dst_y);
+        s->regs.dst_y = (dst->top_to_bottom ?
+                         dst->rect.y + dst->rect.height : dst->rect.y);
         break;
     }
     default:
-- 
2.51.2
Re: [PATCH v5 06/12] ati-vga: Create setup_2d_blt_dst helper
Posted by BALATON Zoltan 2 weeks, 3 days ago
On Thu, 15 Jan 2026, Chad Jablonski wrote:
> A large amount of the common setup involved in a blit deals with the
> destination. This moves that setup to a helper function which
> initializes a struct (ATIBltDst) holding all of the dst state. This setup
> will be shared between blits from memory as well as from HOST_DATA.
>
> Otherwise this is a pure refactor of the ati_2d_blt function to make use
> of the setup_2d_blt_dst helper and the struct it initializes. There should
> be no change in behavior in this patch.
>
> Signed-off-by: Chad Jablonski <chad@jablonski.xyz>
> ---
> hw/display/ati_2d.c | 195 +++++++++++++++++++++++++-------------------
> 1 file changed, 112 insertions(+), 83 deletions(-)
>
> diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
> index a8c4c534b9..75bd38e9b0 100644
> --- a/hw/display/ati_2d.c
> +++ b/hw/display/ati_2d.c
> @@ -13,6 +13,7 @@
> #include "qemu/log.h"
> #include "ui/pixel_ops.h"
> #include "ui/console.h"
> +#include "ui/rect.h"
>
> /*
>  * NOTE:
> @@ -24,7 +25,16 @@
>  * possible.
>  */
>
> -static int ati_bpp_from_datatype(ATIVGAState *s)
> +typedef struct {
> +    QemuRect rect;
> +    int bpp;
> +    int stride;
> +    bool top_to_bottom;
> +    bool left_to_right;
> +    uint8_t *bits;
> +} ATIBltDst;

I'll need to look at this and further patches in more detail but I wonder 
if we need separate types for BltDst and BltSrc or could we use a single 
type for both maybe not using some fields for Src. But if that would not 
simplify things then these can be separate too.

> +
> +static int ati_bpp_from_datatype(const ATIVGAState *s)
> {
>     switch (s->regs.dp_datatype & 0xf) {
>     case 2:
> @@ -43,57 +53,76 @@ static int ati_bpp_from_datatype(ATIVGAState *s)
>     }
> }
>
> +static void setup_2d_blt_dst(const ATIVGAState *s, ATIBltDst *dst)
> +{
> +    unsigned dst_x, dst_y;
> +    dst->bpp = ati_bpp_from_datatype(s);
> +    dst->stride = s->regs.dst_pitch;
> +    dst->left_to_right = s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT;
> +    dst->top_to_bottom = s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM;
> +    dst->bits = s->vga.vram_ptr + s->regs.dst_offset;
> +    dst_x = (dst->left_to_right ?
> +            s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width);
> +    dst_y = (dst->top_to_bottom ?
> +            s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height);
> +    qemu_rect_init(&dst->rect, dst_x, dst_y,
> +                   s->regs.dst_width, s->regs.dst_height);
> +    if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) {
> +        dst->bits += s->regs.crtc_offset & 0x07ffffff;
> +        dst->stride *= dst->bpp;
> +    }
> +}
> +
> void ati_2d_blt(ATIVGAState *s)
> {
>     /* FIXME it is probably more complex than this and may need to be */
>     /* rewritten but for now as a start just to get some output: */
>     DisplaySurface *ds = qemu_console_surface(s->vga.con);
> +    uint8_t *end = s->vga.vram_ptr + s->vga.vram_size;
> +    int dst_stride_words;
> +    ATIBltDst _dst; /* TEMP: avoid churn in future patches */
> +    ATIBltDst *dst = &_dst;

These will be removed but names beginning with _ are reserved and should 
not be used so maybe this (and _src in next patch) should be dst_ or 
something else? Can the churn be avoided by reorganising changes or moving 
converting to pointer in a separate patch? This patch is quite long and 
mixes pointer conversion with other refactoring so separating those may 
help in review.

Regards,
BALATON Zoltan
Re: [PATCH v5 06/12] ati-vga: Create setup_2d_blt_dst helper
Posted by Chad Jablonski 1 week, 4 days ago
>>
>> -static int ati_bpp_from_datatype(ATIVGAState *s)
>> +typedef struct {
>> +    QemuRect rect;
>> +    int bpp;
>> +    int stride;
>> +    bool top_to_bottom;
>> +    bool left_to_right;
>> +    uint8_t *bits;
>> +} ATIBltDst;
>
> I'll need to look at this and further patches in more detail but I wonder 
> if we need separate types for BltDst and BltSrc or could we use a single 
> type for both maybe not using some fields for Src. But if that would not 
> simplify things then these can be separate too.
>

I do think that we would end up with unused fields in some cases. To me
this mapped well conceptually to the ROP3 operations. For example the
fill operations won't use src at all. It's possible we would even end up
with ATIBltBrush or similar in the future. But definitely open to
changing it if you feel strongly after looking at it in more detail!

>> +    ATIBltDst _dst; /* TEMP: avoid churn in future patches */
>> +    ATIBltDst *dst = &_dst;
>
> These will be removed but names beginning with _ are reserved and should 
> not be used so maybe this (and _src in next patch) should be dst_ or 
> something else? Can the churn be avoided by reorganising changes or moving 
> converting to pointer in a separate patch? This patch is quite long and 
> mixes pointer conversion with other refactoring so separating those may 
> help in review.
>

Ah, yep, renaming the _dst and _src makes sense! As far as the churn, as I
look at this again I think it's primarily because there's an inconsistency
in how the register values are accessed and this is attempting to unify that.
The local variables become struct members (e.g. bpp becomes dst->bpp) and so
do the direct register references (e.g. s->regs.dst_pitch becomes dst->stride).
Whether dst is a pointer or a local struct doesn't change that.

I was thinking we could initialize the dst struct and then assign the locals to
those values to avoid churn (e.g. unsigned dst_x = dst->rect.x) and anything
using those locals wouldn't need to be updated. But then the problem still
remains with the direct register references.

Another thought is that we could first create local variables for each of these,
even those that are currently direct register references, and do the replacement
in the function. Then in a separate patch assign the locals to the struct
members. The locals could then either stick around indefinitely or be replaced
themselves with direct references to the struct in another patch.

Do you have any preference, or maybe even other ideas about which might be
clearer?
Re: [PATCH v5 06/12] ati-vga: Create setup_2d_blt_dst helper
Posted by BALATON Zoltan 1 week, 4 days ago
On Mon, 26 Jan 2026, Chad Jablonski wrote:
>>> -static int ati_bpp_from_datatype(ATIVGAState *s)
>>> +typedef struct {
>>> +    QemuRect rect;
>>> +    int bpp;
>>> +    int stride;
>>> +    bool top_to_bottom;
>>> +    bool left_to_right;
>>> +    uint8_t *bits;
>>> +} ATIBltDst;
>>
>> I'll need to look at this and further patches in more detail but I wonder
>> if we need separate types for BltDst and BltSrc or could we use a single
>> type for both maybe not using some fields for Src. But if that would not
>> simplify things then these can be separate too.
>>
>
> I do think that we would end up with unused fields in some cases. To me
> this mapped well conceptually to the ROP3 operations. For example the
> fill operations won't use src at all. It's possible we would even end up
> with ATIBltBrush or similar in the future. But definitely open to
> changing it if you feel strongly after looking at it in more detail!

I don't feel strongly about it but I though it might be simpler to reuse 
the same type instead of adding a new struct for everything. I wonder if 
something like

typedef {
     QemuRect rect;
     int bpp;
     int stride;
     uint8_t *bits;
} ATIBltRect;

ati_2d_do_blt(ATIBltRect src, ATIBltRect dst, bool top_to_bottom, bool left_to_right)

or similar could work (even if you don't use src.rect.width/height) and be 
simpler or if you still have ATIVGAState as parameter to ati_2d_do_blt 
then you don't even need to pass the bools as those could be get from the 
device state which should not change during host data blits (or if they do 
real hardware may also change what it does).

Or maybe ati_2d_do_blt really only needs a copy of the relevant registers 
(e.g. from dst_offset to dp_write_mask copied to an ATI2DCtx or similar 
struct) and work from that instead of ATIVGAState directly so the host 
data route can pass it's own copy with modified register contents and can 
update these without affecting the registers.

But if this does not make sense to you and you think having separate 
structs are clearer I'm OK with that too, these could be modified later as 
well. I think I'll wait for the next version before going into the details 
of the end of the series as I don't expect to need any more major rewrite 
so it will probably already be close to be good enough and only need maybe 
one more revision but this next version may change some these patches so 
the next version may look a bit different from the current version.

>>> +    ATIBltDst _dst; /* TEMP: avoid churn in future patches */
>>> +    ATIBltDst *dst = &_dst;
>>
>> These will be removed but names beginning with _ are reserved and should
>> not be used so maybe this (and _src in next patch) should be dst_ or
>> something else? Can the churn be avoided by reorganising changes or moving
>> converting to pointer in a separate patch? This patch is quite long and
>> mixes pointer conversion with other refactoring so separating those may
>> help in review.
>
> Ah, yep, renaming the _dst and _src makes sense! As far as the churn, as I
> look at this again I think it's primarily because there's an inconsistency
> in how the register values are accessed and this is attempting to unify that.
> The local variables become struct members (e.g. bpp becomes dst->bpp) and so
> do the direct register references (e.g. s->regs.dst_pitch becomes dst->stride).
> Whether dst is a pointer or a local struct doesn't change that.
>
> I was thinking we could initialize the dst struct and then assign the locals to
> those values to avoid churn (e.g. unsigned dst_x = dst->rect.x) and anything
> using those locals wouldn't need to be updated. But then the problem still
> remains with the direct register references.
>
> Another thought is that we could first create local variables for each of these,
> even those that are currently direct register references, and do the replacement
> in the function. Then in a separate patch assign the locals to the struct
> members. The locals could then either stick around indefinitely or be replaced
> themselves with direct references to the struct in another patch.
>
> Do you have any preference, or maybe even other ideas about which might be
> clearer?

The only point is that try not to mix other changes with reorganisation. 
It's OK to change the function to use different parameters or locals even 
if that changes a lot of it but try do that in separate patch that only 
does that and nothing else so it's easy to see no other change was made. 
It may even help to split that reorganisation into more patches for each 
new parameter and convert them in separate patch (like you had separate 
patch for Src already). Generally patches should be doing one thing only 
or only contain very simple additional changes such as changing a comment 
or fixing up some style but something that changes multiple lines 
throughout a function should be in its own patch if possible separate from 
other changes as much as possible.

Regards,
BALATON Zoltan
Re: [PATCH v5 06/12] ati-vga: Create setup_2d_blt_dst helper
Posted by Chad Jablonski 1 week, 4 days ago
>
> I don't feel strongly about it but I though it might be simpler to reuse 
> the same type instead of adding a new struct for everything. I wonder if 
> something like
>
> typedef {
>      QemuRect rect;
>      int bpp;
>      int stride;
>      uint8_t *bits;
> } ATIBltRect;
>
> ati_2d_do_blt(ATIBltRect src, ATIBltRect dst, bool top_to_bottom, bool left_to_right)
>
> or similar could work (even if you don't use src.rect.width/height) and be 
> simpler or if you still have ATIVGAState as parameter to ati_2d_do_blt 
> then you don't even need to pass the bools as those could be get from the 
> device state which should not change during host data blits (or if they do 
> real hardware may also change what it does).
>
> Or maybe ati_2d_do_blt really only needs a copy of the relevant registers 
> (e.g. from dst_offset to dp_write_mask copied to an ATI2DCtx or similar 
> struct) and work from that instead of ATIVGAState directly so the host 
> data route can pass it's own copy with modified register contents and can 
> update these without affecting the registers.
>
> But if this does not make sense to you and you think having separate 
> structs are clearer I'm OK with that too, these could be modified later as 
> well. I think I'll wait for the next version before going into the details 
> of the end of the series as I don't expect to need any more major rewrite 
> so it will probably already be close to be good enough and only need maybe 
> one more revision but this next version may change some these patches so 
> the next version may look a bit different from the current version.
>

I see. That makes a lot of sense. And now that you mention the potential
ATI2DCtx solution I do like that it removes direct dependency on the registers
inside of the blit function. I wonder if we could go a step even further and
store the actual derived values in the struct instead of a copy of raw register
values. Something like:

typedef struct {
  /* src */
  uint8_t *src_bits;
  int src_x, src_y;
  int src_stride;

  /* dst */
  uint8_t *dst_bits;
  QemuRect dst_rect;
  int dst_stride;
  int bpp;

  QemuRect scissor;
  uint32_t rop3;
  bool left_to_right;
  bool top_to_bottom;
  uint32_t frgd_clr;
  uint32_t bkgd_clr;
} ATI2DCtx;

We would still need to pass VGACommonState in to access a few things (vram and
the palette) and use_pixman but we wouldn't need the full device at that point.
So the function signature would look like:

static void ati_2d_do_blt(VGACommonState *vga, ATI2DCtx *ctx, bool use_pixman)

A blt ctx setup function could then fill this struct in the most common case (memory)
and the host_data flush could easily modify it without needing any of the register access
details.

One downside of course is that if we discover we need to mutate registers
during the blit we will need to rethink this a bit. But so far it isn't looking
like that is necessary.
Re: [PATCH v5 06/12] ati-vga: Create setup_2d_blt_dst helper
Posted by BALATON Zoltan 1 week, 3 days ago
On Mon, 26 Jan 2026, Chad Jablonski wrote:
>> I don't feel strongly about it but I though it might be simpler to reuse
>> the same type instead of adding a new struct for everything. I wonder if
>> something like
>>
>> typedef {
>>      QemuRect rect;
>>      int bpp;
>>      int stride;
>>      uint8_t *bits;
>> } ATIBltRect;
>>
>> ati_2d_do_blt(ATIBltRect src, ATIBltRect dst, bool top_to_bottom, bool left_to_right)
>>
>> or similar could work (even if you don't use src.rect.width/height) and be
>> simpler or if you still have ATIVGAState as parameter to ati_2d_do_blt
>> then you don't even need to pass the bools as those could be get from the
>> device state which should not change during host data blits (or if they do
>> real hardware may also change what it does).
>>
>> Or maybe ati_2d_do_blt really only needs a copy of the relevant registers
>> (e.g. from dst_offset to dp_write_mask copied to an ATI2DCtx or similar
>> struct) and work from that instead of ATIVGAState directly so the host
>> data route can pass it's own copy with modified register contents and can
>> update these without affecting the registers.
>>
>> But if this does not make sense to you and you think having separate
>> structs are clearer I'm OK with that too, these could be modified later as
>> well. I think I'll wait for the next version before going into the details
>> of the end of the series as I don't expect to need any more major rewrite
>> so it will probably already be close to be good enough and only need maybe
>> one more revision but this next version may change some these patches so
>> the next version may look a bit different from the current version.
>>
>
> I see. That makes a lot of sense. And now that you mention the potential
> ATI2DCtx solution I do like that it removes direct dependency on the registers
> inside of the blit function. I wonder if we could go a step even further and
> store the actual derived values in the struct instead of a copy of raw register
> values. Something like:
>
> typedef struct {
>  /* src */
>  uint8_t *src_bits;
>  int src_x, src_y;
>  int src_stride;
>
>  /* dst */
>  uint8_t *dst_bits;
>  QemuRect dst_rect;
>  int dst_stride;
>  int bpp;
>
>  QemuRect scissor;
>  uint32_t rop3;
>  bool left_to_right;
>  bool top_to_bottom;
>  uint32_t frgd_clr;
>  uint32_t bkgd_clr;
> } ATI2DCtx;

That would work. For consistency I'd rather either go with dst_x dst_y 
dst_w, etc. and likewise for scissor (which could be abbreviated sc) or 
also use QemuRect src even if w and h are never used just so all usage 
will be uniform and you don't have to remember why src is special.

> We would still need to pass VGACommonState in to access a few things (vram and
> the palette) and use_pixman but we wouldn't need the full device at that point.
> So the function signature would look like:
>
> static void ati_2d_do_blt(VGACommonState *vga, ATI2DCtx *ctx, bool use_pixman)

The use_pixman may need to be an int and mirror the x-pixman property and 
used like the property now so we can control different operations 
separately instead of just on or off for all.

> A blt ctx setup function could then fill this struct in the most common case (memory)
> and the host_data flush could easily modify it without needing any of the register access
> details.
>
> One downside of course is that if we discover we need to mutate registers
> during the blit we will need to rethink this a bit. But so far it isn't looking
> like that is necessary.

Maybe if any changes to regs are needed those can be done in whatever 
calls ati_2d_do_blt so hopefully this won't be a problem. But we may need 
to add more values to ctx in the future but that should be OK.

Regards,
BALATON Zoltan