The amount of data transmitted each time is the maximum value between s and w,
but it should be noted that the remaining transmission amount is less than the width.
Configure swidth to 1 byte, width to 4 bytes, and transfer size to 4.
Configuration
../configure --target-list=arm-softmmu --enable-debug
Reproducer
./qemu-system-arm -M versatilepb -m 128M -nographic -S \
-device loader,addr=0x00000000,data=0x44332211,data-len=4 \
-device loader,addr=0x00001000,data=0x00000000,data-len=4 \
-device loader,addr=0x10130030,data=0x00000001,data-len=4 \
-device loader,addr=0x10130100,data=0x00000000,data-len=4 \
-device loader,addr=0x10130104,data=0x00001000,data-len=4 \
-device loader,addr=0x10130108,data=0x00000000,data-len=4 \
-device loader,addr=0x1013010C,data=0x9e47f004,data-len=4 \
-device loader,addr=0x10130110,data=0x0000c001,data-len=4
Qemu monitor
(qemu) xp /1wx 0x00001000
00001000: 0x00002211
The result correctly after fixed
(qemu) xp /1wx 0x00001000
00001000: 0x44332211
Signed-off-by: Tao Ding <dingtao0430@163.com>
---
hw/dma/pl080.c | 32 ++++++++++++++++++++------------
1 file changed, 20 insertions(+), 12 deletions(-)
diff --git a/hw/dma/pl080.c b/hw/dma/pl080.c
index 0979a687aa..4f97943b28 100644
--- a/hw/dma/pl080.c
+++ b/hw/dma/pl080.c
@@ -160,26 +160,34 @@ again:
/* Transfer one element. */
/* ??? Should transfer multiple elements for a burst request. */
- /* ??? Unclear what the proper behavior is when source and
- destination widths are different. */
swidth = 1 << ((ch->ctrl >> 18) & 7);
dwidth = 1 << ((ch->ctrl >> 21) & 7);
- for (n = 0; n < dwidth; n+= swidth) {
+ xsize = (dwidth < swidth) ? swidth : dwidth;
+ xsize = (xsize < size * swidth) ? xsize : (size * swidth);
+ for (n = 0; n < xsize; n += swidth) {
address_space_read(&s->downstream_as, ch->src,
MEMTXATTRS_UNSPECIFIED, buff + n, swidth);
- if (ch->ctrl & PL080_CCTRL_SI)
+ if (ch->ctrl & PL080_CCTRL_SI) {
ch->src += swidth;
+ }
}
- xsize = (dwidth < swidth) ? swidth : dwidth;
- /* ??? This may pad the value incorrectly for dwidth < 32. */
- for (n = 0; n < xsize; n += dwidth) {
- address_space_write(&s->downstream_as, ch->dest + n,
- MEMTXATTRS_UNSPECIFIED, buff + n, dwidth);
- if (ch->ctrl & PL080_CCTRL_DI)
- ch->dest += swidth;
+ if (xsize < dwidth) {
+ address_space_write(&s->downstream_as, ch->dest,
+ MEMTXATTRS_UNSPECIFIED, buff, xsize);
+ if (ch->ctrl & PL080_CCTRL_DI) {
+ ch->dest += dwidth;
+ }
+ } else {
+ for (n = 0; n < xsize; n += dwidth) {
+ address_space_write(&s->downstream_as, ch->dest,
+ MEMTXATTRS_UNSPECIFIED, buff + n, dwidth);
+ if (ch->ctrl & PL080_CCTRL_DI) {
+ ch->dest += dwidth;
+ }
+ }
}
- size--;
+ size -= xsize / swidth;
ch->ctrl = (ch->ctrl & 0xfffff000) | size;
if (size == 0) {
/* Transfer complete. */
--
2.43.0
On Thu, 12 Mar 2026 at 08:02, Tao Ding <dingtao0430@163.com> wrote:
>
> The amount of data transmitted each time is the maximum value between s and w,
> but it should be noted that the remaining transmission amount is less than the width.
>
> Configure swidth to 1 byte, width to 4 bytes, and transfer size to 4.
>
> Configuration
> ../configure --target-list=arm-softmmu --enable-debug
> Reproducer
> ./qemu-system-arm -M versatilepb -m 128M -nographic -S \
> -device loader,addr=0x00000000,data=0x44332211,data-len=4 \
> -device loader,addr=0x00001000,data=0x00000000,data-len=4 \
> -device loader,addr=0x10130030,data=0x00000001,data-len=4 \
> -device loader,addr=0x10130100,data=0x00000000,data-len=4 \
> -device loader,addr=0x10130104,data=0x00001000,data-len=4 \
> -device loader,addr=0x10130108,data=0x00000000,data-len=4 \
> -device loader,addr=0x1013010C,data=0x9e47f004,data-len=4 \
> -device loader,addr=0x10130110,data=0x0000c001,data-len=4
>
> Qemu monitor
> (qemu) xp /1wx 0x00001000
> 00001000: 0x00002211
>
> The result correctly after fixed
> (qemu) xp /1wx 0x00001000
> 00001000: 0x44332211
>
> Signed-off-by: Tao Ding <dingtao0430@163.com>
> ---
> hw/dma/pl080.c | 32 ++++++++++++++++++++------------
> 1 file changed, 20 insertions(+), 12 deletions(-)
>
> diff --git a/hw/dma/pl080.c b/hw/dma/pl080.c
> index 0979a687aa..4f97943b28 100644
> --- a/hw/dma/pl080.c
> +++ b/hw/dma/pl080.c
> @@ -160,26 +160,34 @@ again:
>
> /* Transfer one element. */
> /* ??? Should transfer multiple elements for a burst request. */
> - /* ??? Unclear what the proper behavior is when source and
> - destination widths are different. */
> swidth = 1 << ((ch->ctrl >> 18) & 7);
> dwidth = 1 << ((ch->ctrl >> 21) & 7);
> - for (n = 0; n < dwidth; n+= swidth) {
> + xsize = (dwidth < swidth) ? swidth : dwidth;
I think this can be more clearly written as
xsize = MAX(swidth, dwidth);
since we're editing the line anyway.
> + xsize = (xsize < size * swidth) ? xsize : (size * swidth);
xsize = MIN(xsize, size * swidth);
So this is handling the case where our destination width is larger
than the remaining amount the transfer size says we should be reading
from the source, which is cases like
"source width = 1, dest width = 4, transfer count not a multiple of 4".
The TRM is unfortunately very vague about what the hardware does here.
I think the hardware designers considered this to be a programming error:
in the "Software considerations" section of the TRM one of the items is:
* If the source width is less than the destination width, the
TransferSize value multiplied by the source width must be an
integral multiple of the destination width.
> + for (n = 0; n < xsize; n += swidth) {
> address_space_read(&s->downstream_as, ch->src,
> MEMTXATTRS_UNSPECIFIED, buff + n, swidth);
> - if (ch->ctrl & PL080_CCTRL_SI)
> + if (ch->ctrl & PL080_CCTRL_SI) {
> ch->src += swidth;
> + }
> }
> - xsize = (dwidth < swidth) ? swidth : dwidth;
> - /* ??? This may pad the value incorrectly for dwidth < 32. */
> - for (n = 0; n < xsize; n += dwidth) {
> - address_space_write(&s->downstream_as, ch->dest + n,
> - MEMTXATTRS_UNSPECIFIED, buff + n, dwidth);
> - if (ch->ctrl & PL080_CCTRL_DI)
> - ch->dest += swidth;
> + if (xsize < dwidth) {
> + address_space_write(&s->downstream_as, ch->dest,
> + MEMTXATTRS_UNSPECIFIED, buff, xsize);
> + if (ch->ctrl & PL080_CCTRL_DI) {
> + ch->dest += dwidth;
> + }
Here we choose to handle the "swidth and tsize say we should
read e.g. 2 bytes from the source but dwidth is e.g. 4" case
by performing a 2 byte write to the destination. I don't think this
is what the hardware would do.
There is this interesting bit in the "software considerations" section:
* While writing, the TransferSize bit-field is like a control register
because it determines how many transfers the DMAC performs. However,
during read-back, TransferSize behaves like a status register because
it returns the number of remaining transfers in terms of source width.
So when TransferSize is read back, it returns the number of
destination-transfer-completed stored in a separate counter called
TrfSizeDst multiplied by a factor.
My best guess from stuff like this is that the way the PL080 works
internally is that for each dest-write it needs to do for the transfer
it does the appropriate number of source-reads. If the guest
misprograms it such that that means it reads more than
(transferwidth * swidth) that will result in reading more data
from the source than intended, but that would be a software bug.
I would suggest doing something like that, but at any rate
commenting and doing a LOG_GUEST_ERROR if there's a mismatch in
swidth/dwidth/tsize such that we wouldn't have enough bytes to
do a full destination-write.
> + } else {
> + for (n = 0; n < xsize; n += dwidth) {
> + address_space_write(&s->downstream_as, ch->dest,
> + MEMTXATTRS_UNSPECIFIED, buff + n, dwidth);
> + if (ch->ctrl & PL080_CCTRL_DI) {
> + ch->dest += dwidth;
> + }
> + }
> }
>
> - size--;
> + size -= xsize / swidth;
This change is correct (the transfer size field in the control register
is supposed to count down by 1 for each swidth-sized data unit we
transfer, and our loop can do more than one swidth read), and probably
the most important bugfix in this patch, because it's the one that
matters for normal correct use of the PL080.
> ch->ctrl = (ch->ctrl & 0xfffff000) | size;
> if (size == 0) {
> /* Transfer complete. */
> --
> 2.43.0
thanks
-- PMM
Thank you for your valuable review.
I agree with your suggestion, it is necessary to satisfy that transfersize * swidth is
an integer multiple of dwidth. So, some of the code is actually redundant in this patch.
Before that, swidth,dwidth and transfersize will be checked, and if the conditions are not met,
there will be qemu_log_mask(LOG_GUEST_ERROR).
Can I modify this patch and submit it as patch v2.
>> ---
>> hw/dma/pl080.c | 32 ++++++++++++++++++++------------
>> 1 file changed, 20 insertions(+), 12 deletions(-)
>>
>> diff --git a/hw/dma/pl080.c b/hw/dma/pl080.c
>> index 0979a687aa..4f97943b28 100644
>> --- a/hw/dma/pl080.c
>> +++ b/hw/dma/pl080.c
>> @@ -160,26 +160,34 @@ again:
>>
>> /* Transfer one element. */
>> /* ??? Should transfer multiple elements for a burst request. */
>> - /* ??? Unclear what the proper behavior is when source and
>> - destination widths are different. */
>> swidth = 1 << ((ch->ctrl >> 18) & 7);
>> dwidth = 1 << ((ch->ctrl >> 21) & 7);
>> - for (n = 0; n < dwidth; n+= swidth) {
>> + xsize = (dwidth < swidth) ? swidth : dwidth;
> I think this can be more clearly written as
> xsize = MAX(swidth, dwidth);
> since we're editing the line anyway.
>
>> + xsize = (xsize < size * swidth) ? xsize : (size * swidth);
> xsize = MIN(xsize, size * swidth);
>
> So this is handling the case where our destination width is larger
> than the remaining amount the transfer size says we should be reading
> from the source, which is cases like
> "source width = 1, dest width = 4, transfer count not a multiple of 4".
> The TRM is unfortunately very vague about what the hardware does here.
> I think the hardware designers considered this to be a programming error:
> in the "Software considerations" section of the TRM one of the items is:
>
> * If the source width is less than the destination width, the
> TransferSize value multiplied by the source width must be an
> integral multiple of the destination width.
>
>> + for (n = 0; n < xsize; n += swidth) {
>> address_space_read(&s->downstream_as, ch->src,
>> MEMTXATTRS_UNSPECIFIED, buff + n, swidth);
>> - if (ch->ctrl & PL080_CCTRL_SI)
>> + if (ch->ctrl & PL080_CCTRL_SI) {
>> ch->src += swidth;
>> + }
>> }
>> - xsize = (dwidth < swidth) ? swidth : dwidth;
>> - /* ??? This may pad the value incorrectly for dwidth < 32. */
>> - for (n = 0; n < xsize; n += dwidth) {
>> - address_space_write(&s->downstream_as, ch->dest + n,
>> - MEMTXATTRS_UNSPECIFIED, buff + n, dwidth);
>> - if (ch->ctrl & PL080_CCTRL_DI)
>> - ch->dest += swidth;
>> + if (xsize < dwidth) {
>> + address_space_write(&s->downstream_as, ch->dest,
>> + MEMTXATTRS_UNSPECIFIED, buff, xsize);
>> + if (ch->ctrl & PL080_CCTRL_DI) {
>> + ch->dest += dwidth;
>> + }
> Here we choose to handle the "swidth and tsize say we should
> read e.g. 2 bytes from the source but dwidth is e.g. 4" case
> by performing a 2 byte write to the destination. I don't think this
> is what the hardware would do.
>
> There is this interesting bit in the "software considerations" section:
>
> * While writing, the TransferSize bit-field is like a control register
> because it determines how many transfers the DMAC performs. However,
> during read-back, TransferSize behaves like a status register because
> it returns the number of remaining transfers in terms of source width.
> So when TransferSize is read back, it returns the number of
> destination-transfer-completed stored in a separate counter called
> TrfSizeDst multiplied by a factor.
>
> My best guess from stuff like this is that the way the PL080 works
> internally is that for each dest-write it needs to do for the transfer
> it does the appropriate number of source-reads. If the guest
> misprograms it such that that means it reads more than
> (transferwidth * swidth) that will result in reading more data
> from the source than intended, but that would be a software bug.
>
> I would suggest doing something like that, but at any rate
> commenting and doing a LOG_GUEST_ERROR if there's a mismatch in
> swidth/dwidth/tsize such that we wouldn't have enough bytes to
> do a full destination-write.
>
>> + } else {
>> + for (n = 0; n < xsize; n += dwidth) {
>> + address_space_write(&s->downstream_as, ch->dest,
>> + MEMTXATTRS_UNSPECIFIED, buff + n, dwidth);
>> + if (ch->ctrl & PL080_CCTRL_DI) {
>> + ch->dest += dwidth;
>> + }
>> + }
>> }
>>
>> - size--;
>> + size -= xsize / swidth;
> This change is correct (the transfer size field in the control register
> is supposed to count down by 1 for each swidth-sized data unit we
> transfer, and our loop can do more than one swidth read), and probably
> the most important bugfix in this patch, because it's the one that
> matters for normal correct use of the PL080.
>
>> ch->ctrl = (ch->ctrl & 0xfffff000) | size;
>> if (size == 0) {
>> /* Transfer complete. */
>> --
>> 2.43.0
> thanks
> -- PMM
When source and destination widths are different, the transfer size register was not being decremented correctly, causing data corruption. Compared to the previous version(PATCH v1 2/3), simplified the code and the judgment of constraint relationships between size, swidth, and dwidth has been added. Tao Ding (1): Fix transfer size register decrement in pl080 hw/dma/pl080.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) base-commit: 8e711856d7639cbffa51405f2cc2366e3d9e3a23 -- 2.43.0
Configure swidth to 1 byte, width to 4 bytes, and transfer size to 4.
Configuration
../configure --target-list=arm-softmmu --enable-debug
Reproducer
./qemu-system-arm -M versatilepb -m 128M -nographic -S \
-device loader,addr=0x00000000,data=0x44332211,data-len=4 \
-device loader,addr=0x00001000,data=0x00000000,data-len=4 \
-device loader,addr=0x10130030,data=0x00000001,data-len=4 \
-device loader,addr=0x10130100,data=0x00000000,data-len=4 \
-device loader,addr=0x10130104,data=0x00001000,data-len=4 \
-device loader,addr=0x10130108,data=0x00000000,data-len=4 \
-device loader,addr=0x1013010C,data=0x9e47f004,data-len=4 \
-device loader,addr=0x10130110,data=0x0000c001,data-len=4
Qemu monitor
(qemu) xp /1wx 0x00001000
00001000: 0x00002211
The result correctly after fixed
(qemu) xp /1wx 0x00001000
00001000: 0x44332211
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Tao Ding <dingtao0430@163.com>
---
hw/dma/pl080.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/hw/dma/pl080.c b/hw/dma/pl080.c
index 627ccbbd81..4a90c7bb27 100644
--- a/hw/dma/pl080.c
+++ b/hw/dma/pl080.c
@@ -179,23 +179,28 @@ again:
c, extract32(ch->ctrl, 21, 3));
continue;
}
-
- for (n = 0; n < dwidth; n+= swidth) {
+ if ((size * swidth) % dwidth) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "pl080: channel %d: transfer size mismatch: size=%d swidth=%d dwidth=%d\n",
+ c, size, swidth, dwidth);
+ continue;
+ }
+ xsize = MAX(swidth, dwidth);
+ for (n = 0; n < xsize; n += swidth) {
address_space_read(&s->downstream_as, ch->src,
MEMTXATTRS_UNSPECIFIED, buff + n, swidth);
if (ch->ctrl & PL080_CCTRL_SI)
ch->src += swidth;
}
- xsize = (dwidth < swidth) ? swidth : dwidth;
/* ??? This may pad the value incorrectly for dwidth < 32. */
for (n = 0; n < xsize; n += dwidth) {
- address_space_write(&s->downstream_as, ch->dest + n,
+ address_space_write(&s->downstream_as, ch->dest,
MEMTXATTRS_UNSPECIFIED, buff + n, dwidth);
if (ch->ctrl & PL080_CCTRL_DI)
- ch->dest += swidth;
+ ch->dest += dwidth;
}
- size--;
+ size -= xsize / swidth;
ch->ctrl = (ch->ctrl & 0xfffff000) | size;
if (size == 0) {
/* Transfer complete. */
--
2.43.0
On Mon, 23 Mar 2026 at 16:50, Tao Ding <dingtao0430@163.com> wrote: > > Configure swidth to 1 byte, width to 4 bytes, and transfer size to 4. > > Configuration > ../configure --target-list=arm-softmmu --enable-debug > Reproducer > ./qemu-system-arm -M versatilepb -m 128M -nographic -S \ > -device loader,addr=0x00000000,data=0x44332211,data-len=4 \ > -device loader,addr=0x00001000,data=0x00000000,data-len=4 \ > -device loader,addr=0x10130030,data=0x00000001,data-len=4 \ > -device loader,addr=0x10130100,data=0x00000000,data-len=4 \ > -device loader,addr=0x10130104,data=0x00001000,data-len=4 \ > -device loader,addr=0x10130108,data=0x00000000,data-len=4 \ > -device loader,addr=0x1013010C,data=0x9e47f004,data-len=4 \ > -device loader,addr=0x10130110,data=0x0000c001,data-len=4 > > Qemu monitor > (qemu) xp /1wx 0x00001000 > 00001000: 0x00002211 > > The result correctly after fixed > (qemu) xp /1wx 0x00001000 > 00001000: 0x44332211 > > Suggested-by: Peter Maydell <peter.maydell@linaro.org> > Signed-off-by: Tao Ding <dingtao0430@163.com> > --- > hw/dma/pl080.c | 17 +++++++++++------ Thanks for this patch; I've applied it to target-arm.next (with an expansion to the commit message to describe the bugs it is fixing). -- PMM
© 2016 - 2026 Red Hat, Inc.