Writing to any of the HOST_DATA0-7 registers pushes the written data
into a buffer. A final write to HOST_DATA_LAST writes data to the
buffer and triggers the pending blit operation.
The buffer for now is a static 4MiB and overflows are checked. This
seems like a large enough value given what I've seen in testing. Future
work could dynamically size the buffer based on the destination dimensions if
needed.
This sets things up for implementation of HOST_DATA as a blit operation
source in a future patch.
Signed-off-by: Chad Jablonski <chad@jablonski.xyz>
---
hw/display/ati.c | 15 +++++++++++++++
hw/display/ati_dbg.c | 9 +++++++++
hw/display/ati_int.h | 3 +++
hw/display/ati_regs.h | 9 +++++++++
4 files changed, 36 insertions(+)
diff --git a/hw/display/ati.c b/hw/display/ati.c
index 4ff17209c4..0a686750ae 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -969,6 +969,20 @@ static void ati_mm_write(void *opaque, hwaddr addr,
case SRC_SC_BOTTOM_RIGHT:
s->regs.src_sc_bottom_right = data;
break;
+ case HOST_DATA0 ... HOST_DATA7:
+ case HOST_DATA_LAST:
+ if (s->host_data_pos + 4 > sizeof(s->host_data_buffer)) {
+ qemu_log_mask(LOG_UNIMP, "HOST_DATA buffer overflow "
+ "(buffer size: %zu bytes)\n",
+ sizeof(s->host_data_buffer));
+ return;
+ }
+ stn_he_p(&s->host_data_buffer[s->host_data_pos], 4, data);
+ s->host_data_pos += 4;
+ if (addr == HOST_DATA_LAST) {
+ ati_2d_blt(s);
+ }
+ break;
default:
break;
}
@@ -1074,6 +1088,7 @@ static void ati_vga_reset(DeviceState *dev)
/* reset vga */
vga_common_reset(&s->vga);
s->mode = VGA_MODE;
+ s->host_data_pos = 0;
}
static void ati_vga_exit(PCIDevice *dev)
diff --git a/hw/display/ati_dbg.c b/hw/display/ati_dbg.c
index 3ffa7f35df..5c799d540a 100644
--- a/hw/display/ati_dbg.c
+++ b/hw/display/ati_dbg.c
@@ -252,6 +252,15 @@ static struct ati_regdesc ati_reg_names[] = {
{"MC_SRC1_CNTL", 0x19D8},
{"TEX_CNTL", 0x1800},
{"RAGE128_MPP_TB_CONFIG", 0x01c0},
+ {"HOST_DATA0", 0x17c0},
+ {"HOST_DATA1", 0x17c4},
+ {"HOST_DATA2", 0x17c8},
+ {"HOST_DATA3", 0x17cc},
+ {"HOST_DATA4", 0x17d0},
+ {"HOST_DATA5", 0x17d4},
+ {"HOST_DATA6", 0x17d8},
+ {"HOST_DATA7", 0x17dc},
+ {"HOST_DATA_LAST", 0x17e0},
{NULL, -1}
};
diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
index aab3cbf81a..16e5d29a5a 100644
--- a/hw/display/ati_int.h
+++ b/hw/display/ati_int.h
@@ -15,6 +15,7 @@
#include "hw/i2c/bitbang_i2c.h"
#include "vga_int.h"
#include "qom/object.h"
+#include "qemu/units.h"
/*#define DEBUG_ATI*/
@@ -108,6 +109,8 @@ struct ATIVGAState {
MemoryRegion io;
MemoryRegion mm;
ATIVGARegs regs;
+ uint32_t host_data_pos;
+ uint8_t host_data_buffer[4 * MiB];
};
const char *ati_reg_name(int num);
diff --git a/hw/display/ati_regs.h b/hw/display/ati_regs.h
index 2b56b9fb66..9b52b61dcb 100644
--- a/hw/display/ati_regs.h
+++ b/hw/display/ati_regs.h
@@ -252,6 +252,15 @@
#define DP_T12_CNTL 0x178c
#define DST_BRES_T1_LNTH 0x1790
#define DST_BRES_T2_LNTH 0x1794
+#define HOST_DATA0 0x17c0
+#define HOST_DATA1 0x17c4
+#define HOST_DATA2 0x17c8
+#define HOST_DATA3 0x17cc
+#define HOST_DATA4 0x17d0
+#define HOST_DATA5 0x17d4
+#define HOST_DATA6 0x17d8
+#define HOST_DATA7 0x17dc
+#define HOST_DATA_LAST 0x17e0
#define SCALE_SRC_HEIGHT_WIDTH 0x1994
#define SCALE_OFFSET_0 0x1998
#define SCALE_PITCH 0x199c
--
2.51.0
On Sun, 2 Nov 2025, Chad Jablonski wrote: > Writing to any of the HOST_DATA0-7 registers pushes the written data > into a buffer. A final write to HOST_DATA_LAST writes data to the > buffer and triggers the pending blit operation. > > The buffer for now is a static 4MiB and overflows are checked. This > seems like a large enough value given what I've seen in testing. Future > work could dynamically size the buffer based on the destination dimensions if > needed. I wonder where the real chip stores this information? Regards, BALATON Zoltan
On Mon, 3 Nov 2025, BALATON Zoltan wrote: > On Sun, 2 Nov 2025, Chad Jablonski wrote: >> Writing to any of the HOST_DATA0-7 registers pushes the written data >> into a buffer. A final write to HOST_DATA_LAST writes data to the >> buffer and triggers the pending blit operation. >> >> The buffer for now is a static 4MiB and overflows are checked. This >> seems like a large enough value given what I've seen in testing. Future >> work could dynamically size the buffer based on the destination dimensions >> if >> needed. > > I wonder where the real chip stores this information? I don't think there's a separate buffer for this on real card and the command FIFO is not long enough to store it so it should probably use vram. But how does it know which part of that can be used? Maybe you could write some pattern into HOST_DATAx registers (like 0xaaaaaaaa, 0x55555555 but longer than the FIFO to make sure it's not staying there) and then before writing HOST_DATA_LAST look for that pattern in vram to see if it appears anywhere. Maybe some register points there or the card has some memory management I don't know about? (I don't know much about GPUs so it's quite possible I have no idea how it should work.) If the pattern is not found I don't have any better idea to find out how this should work. (We could keep the separate buffer in emulation for now but I'm curious how the real chip does it and if we can emulate that.) Regards, BALATON Zoltan
>> >> I wonder where the real chip stores this information? > > I don't think there's a separate buffer for this on real card and the > command FIFO is not long enough to store it so it should probably use > vram. But how does it know which part of that can be used? Maybe you could > write some pattern into HOST_DATAx registers (like 0xaaaaaaaa, 0x55555555 > but longer than the FIFO to make sure it's not staying there) and then > before writing HOST_DATA_LAST look for that pattern in vram to see if it > appears anywhere. Maybe some register points there or the card has some > memory management I don't know about? (I don't know much about GPUs so > it's quite possible I have no idea how it should work.) If the pattern is > not found I don't have any better idea to find out how this should work. > (We could keep the separate buffer in emulation for now but I'm curious > how the real chip does it and if we can emulate that.) > > Regards, > BALATON Zoltan Hi BALATON, You're absolutely right. After spending some time setting up a nicer test environment I'm able to confirm this behavior on the Rage 128 Pro Ultra TF: write HOST_DATA0 write HOST_DATA1 write HOST_DATA2 write HOST_DATA3 -> Data appears in the framebuffer at the destination write HOST_DATA4 write HOST_DATA5 write HOST_DATA6 write HOST_DATA7 -> Data appears in the framebuffer at the destination The card does not wait for HOST_DATA_LAST to flush to the destination. So it would appear that there is no buffer at all or even a special area in VRAM. It's looking like there is a 128-bit accumulator which makes total sense given the architecture of the card. I'd like to do some additional testing but you were right to question this. I'll address it in patch v3.
© 2016 - 2026 Red Hat, Inc.