[PATCH v1 0/5] PL011 and PL080 for dma transfer

Tao Ding posted 5 patches 20 hours ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/cover.1774099044.git.dingtao0430@163.com
Maintainers: Peter Maydell <peter.maydell@linaro.org>, Fabiano Rosas <farosas@suse.de>, Laurent Vivier <lvivier@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>
hw/char/pl011.c         |  72 ++++++++++++++++++++++---
hw/dma/pl080.c          | 116 ++++++++++++++++++++++++++++++++++------
include/hw/char/pl011.h |   1 +
include/hw/dma/pl080.h  |   7 +++
4 files changed, 174 insertions(+), 22 deletions(-)
[PATCH v1 0/5] PL011 and PL080 for dma transfer
Posted by Tao Ding 20 hours ago
The PL080 implementation in QEMU currently supports memory-to-memory data transfers 
but lacks support for peripheral-related data transfers. 

To enable this functionality, signal interfaces have been added for PL011 and PL080 to 
emulate the DMA request handshake, thereby completing the peripheral-related 
transfer features that were previously unimplemented in PL080. Specifically, 
memory-to-peripheral, peripheral-to-memory and periphral-to-periphral under dma controll.

Although PL011 and PL080 are not strongly coupled, for the convenience of verification, 
they are still placed in the same patch thread. Finally, unit testing was conducted using Qtest, 
and Linux system testing was conducted on "Versatilepb" machine in Qemu.

Tao Ding (5):
  Fixed the PL011 FIFO level bug
  PL011 generate receive timeout interrupt
  Implement the DMA interface of PL011
  Run pl080_run in main loop
  PL080 add transfer memory and peripheral

 hw/char/pl011.c         |  72 ++++++++++++++++++++++---
 hw/dma/pl080.c          | 116 ++++++++++++++++++++++++++++++++++------
 include/hw/char/pl011.h |   1 +
 include/hw/dma/pl080.h  |   7 +++
 4 files changed, 174 insertions(+), 22 deletions(-)


base-commit: 8e711856d7639cbffa51405f2cc2366e3d9e3a23

Testing:
========

Testing prepare:
	1. The changes in the patch need to be applied. 
	2. Then connect the DMA signals of PL011 and PL080 in the Versatilepb board.
		Channel0 is used to do UART's TX, memory to peripheral. Use 0 signal index.
		Channel1 is used to do UART's RX, peripheral to memory. Use 1 signal index.
		(The connection of the signal needs to consider the offset in the device.)

Change in Versatilepb.c :
-----------------------------------------
diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c
index 254b1610b3..ee0056e831 100644
--- a/hw/arm/versatilepb.c
+++ b/hw/arm/versatilepb.c
@@ -281,7 +281,7 @@ static void versatile_init(MachineState *machine, int board_id)
     }
 
     pl011_create(0x101f1000, pic[12], serial_hd(0));
-    pl011_create(0x101f2000, pic[13], serial_hd(1));
+    DeviceState *pl011_dev = pl011_create(0x101f2000, pic[13], serial_hd(1));
     pl011_create(0x101f3000, pic[14], serial_hd(2));
     pl011_create(0x10009000, sic[6], serial_hd(3));
 
@@ -292,6 +292,15 @@ static void versatile_init(MachineState *machine, int board_id)
     sysbus_realize_and_unref(busdev, &error_fatal);
     sysbus_mmio_map(busdev, 0, 0x10130000);
     sysbus_connect_irq(busdev, 0, pic[17]);
+    sysbus_connect_irq(busdev, 3, qdev_get_gpio_in(pl011_dev, 0));
+    sysbus_connect_irq(busdev, 4, qdev_get_gpio_in(pl011_dev, 1));
+
+    sysbus_connect_irq(SYS_BUS_DEVICE(pl011_dev), 6, qdev_get_gpio_in(dev, 0));
+    sysbus_connect_irq(SYS_BUS_DEVICE(pl011_dev), 7, qdev_get_gpio_in(dev, 1));
+
+    sysbus_connect_irq(SYS_BUS_DEVICE(pl011_dev), 8, qdev_get_gpio_in(dev, 16));
+    sysbus_connect_irq(SYS_BUS_DEVICE(pl011_dev), 9, qdev_get_gpio_in(dev, 17));
+
 
     sysbus_create_simple("sp804", 0x101e2000, pic[4]);
     sysbus_create_simple("sp804", 0x101e3000, pic[5]);
-----------------------------------------

Unit Test by QTest:
	1. create pl080-test.c in tests/qtest directory.
------------ pl080-test.c ------------
#include "qemu/osdep.h"
#include "libqtest-single.h"

#define PL011_BASE   0x101f2000
#define PL080_BASE   0x10130000
#define RAM_BASE     0x0

/* ---------------- PL011 Registers ---------------- */
#define UARTDR     0x00
#define UARTFR     0x18
#define UARTCR     0x30
#define UARTLCR_H  0x2C
#define UARTDMACR  0x48
/* UARTFR bits */
#define FR_RXFE  (1 << 4)
#define FR_TXFF  (1 << 5)

/* UARTCR bits */
#define CR_UARTEN (1 << 0)
#define CR_TXE    (1 << 8)
#define CR_RXE    (1 << 9)
#define CR_LBE    (1 << 7)

#define LCRH_FEN (1 << 4)   // FIFO enable

/* ---------------- PL080 DMA Registers ---------------- */
#define DMAC_CONFIG       0x30

/* Channel offsets */
#define DMA_CH_SRC        0x00
#define DMA_CH_DST        0x04
#define DMA_CH_LLI        0x08
#define DMA_CH_CTRL       0x0C
#define DMA_CH_CFG        0x10

/* Control Register Fields */
#define CTRL_TRANSFER_SIZE(x)   ((x) & 0xFFF)
#define CTRL_SRC_WIDTH(x)       ((x) << 18)
#define CTRL_DST_WIDTH(x)       ((x) << 21)
#define CTRL_SRC_INC            (1 << 26)
#define CTRL_DST_INC            (1 << 27)

/* Width */
#define WIDTH_8BIT   0
#define WIDTH_16BIT  1
#define WIDTH_32BIT  2

/* Config Register Fields */
#define CFG_ENABLE           (1 << 0)

/* Flow control */
#define CFG_FLOW_MEM2MEM     (0 << 11)
#define CFG_FLOW_MEM2PER     (1 << 11)
#define CFG_FLOW_PER2MEM     (2 << 11)
#define CFG_FLOW_PER2PER     (3 << 11)

/* Peripheral IDs (VersatilePB, PL011 UART) */
#define PERIPH_UART_TX       0 
#define PERIPH_UART_RX       1

/* Macro to set peripheral in CFG */
#define CFG_DST_PERIPH(x)    ((x) << 6)   // DST peripheral ID
#define CFG_SRC_PERIPH(x)    ((x) << 1)   // SRC peripheral ID

static void test_pl080_pl011(void)
{
    QTestState *s;
    uint32_t cr;
    uint32_t i;

    /* --------------------------- */
    /* 1.  UART(loopback) */
    /* --------------------------- */
    s = qtest_start("-machine versatilepb -d guest_errors,trace:memory_region_ops_* -D qemu.log");

    qtest_writel(s, PL011_BASE + UARTCR, 0);
    qtest_writel(s, PL011_BASE + UARTLCR_H, LCRH_FEN);

    cr = CR_UARTEN | CR_TXE | CR_RXE | CR_LBE;
    qtest_writel(s, PL011_BASE + UARTCR, cr);

    qtest_writel(s, PL011_BASE + UARTDMACR, 0x3);

    /* --------------------------- */
    /* 2. data in memory */
    /* --------------------------- */
    uint8_t tx_buff[12] = {
        0x1,0x2,0x3,0x4,
        0x5,0x6,0x7,0x8,
        0x9,0xa,0xb,0xc
    };

    uint32_t src_addr = RAM_BASE + 0x1000;
    uint32_t dst_addr = RAM_BASE + 0x2000;
    int len = 12;

    for (i = 0; i < len; i++) {
        qtest_writeb(s, src_addr + i, tx_buff[i]);  
        qtest_writeb(s, dst_addr + i, 0);           
    }

    /* --------------------------- */
    /* 3. enable DMA controller */
    /* --------------------------- */
    qtest_writel(s, PL080_BASE + DMAC_CONFIG, 1);

    /* --------------------------- */
    /* 4. DMA channel 0: mem -> UART */
    /* --------------------------- */
    uint32_t ch0_base = PL080_BASE + 0x100;

    qtest_writel(s, ch0_base + DMA_CH_SRC, src_addr);
    qtest_writel(s, ch0_base + DMA_CH_DST, PL011_BASE + UARTDR);

    qtest_writel(s, ch0_base + DMA_CH_CTRL,
        CTRL_TRANSFER_SIZE(len/4) |
        CTRL_SRC_WIDTH(WIDTH_32BIT) |
        CTRL_DST_WIDTH(WIDTH_8BIT) |
        CTRL_SRC_INC
    );

    /* CFG: enable + flow + destination peripheral */
    qtest_writel(s, ch0_base + DMA_CH_CFG,
        CFG_ENABLE |
        CFG_FLOW_MEM2PER |
        CFG_DST_PERIPH(PERIPH_UART_TX)
    );

    /* --------------------------- */
    /* 5. DMA channel 1: UART -> mem */
    /* --------------------------- */
    uint32_t ch1_base = PL080_BASE + 0x120;

    qtest_writel(s, ch1_base + DMA_CH_SRC, PL011_BASE + UARTDR);
    qtest_writel(s, ch1_base + DMA_CH_DST, dst_addr);

    qtest_writel(s, ch1_base + DMA_CH_CTRL,
        CTRL_TRANSFER_SIZE(len) |
        CTRL_SRC_WIDTH(WIDTH_8BIT) |
        CTRL_DST_WIDTH(WIDTH_32BIT) |
        CTRL_DST_INC
    );

    /* CFG: enable + flow + source peripheral */
    qtest_writel(s, ch1_base + DMA_CH_CFG,
        CFG_ENABLE |
        CFG_FLOW_PER2MEM |
        CFG_SRC_PERIPH(PERIPH_UART_RX)
    );

    /* --------------------------- */
    /* 6. DMA run*/
    /* --------------------------- */
    clock_step(1000000);

    
    /* wait Channel 0 */
    while (true) {
        uint32_t cfg = qtest_readl(s, ch0_base + DMA_CH_CFG);
        if (!(cfg & CFG_ENABLE)) {
            printf("Channel 0 finish\n");
            break;
        }
        clock_step(1000000);  
    }

    /* wait Channel 1 */
    while (true) {
        uint32_t cfg = qtest_readl(s, ch1_base + DMA_CH_CFG);
        if (!(cfg & CFG_ENABLE)) {
            printf("Channel 1 finish\n");
            break;
        }
        clock_step(1000000);  
    }

    /* --------------------------- */
    /* 7. check data */
    /* --------------------------- */
    for (i = 0; i < len; i++) {
        uint8_t s_val = qtest_readb(s, src_addr + i);
        uint8_t d_val = qtest_readb(s, dst_addr + i);

        g_assert_cmpuint(s_val, ==, d_val);
    }

    qtest_end();
}

int main(int argc, char **argv)
{
    g_test_init(&argc, &argv, NULL);
    qtest_add_func("/pl080/test1", test_pl080_pl011);
    return g_test_run();
}
-----------------------------------------
	2. modify meson
-----------------------------------------
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index be4fa627b5..6ea187517f 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -249,7 +249,8 @@ qtests_arm = \
   (config_all_devices.has_key('CONFIG_STM32L4X5_SOC') and
    config_all_devices.has_key('CONFIG_DM163')? ['dm163-test'] : []) + \
   ['arm-cpu-features',
-   'boot-serial-test']
+   'boot-serial-test'] + \
+  ['pl080-test']
 
 # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional
 qtests_aarch64 = \
-----------------------------------------

Configuration:
	../configure --target-list=arm-softmmu --enable-debug --disable-werror
build:
	make
Run:
	QTEST_QEMU_BINARY=./qemu-system-arm ./tests/qtest/pl080-test
Result:
-----------------------------------------
	TAP version 13
	# random seed: R02Sf2f5311a926a969453439a52838a6fd0
	1..1
	# Start of arm tests
	# Start of pl080 tests
	# starting QEMU: exec ./qemu-system-arm -qtest unix:/tmp/qtest-23375.sock 
    -qtest-log /dev/null -chardev socket,path=/tmp/qtest-23375.qmp,id=char0 -mon chardev=char0,mode=control 
    -display none -audio none -run-with exit-with-parent=on -machine versatilepb -d guest_errors,trace:memory_region_ops_* 
    -D qemu.log -accel qtest
	Channel 0 finish
	Channel 1 finish
	ok 1 /arm/pl080/test1
	# End of pl080 tests
	# End of arm tests
-----------------------------------------

System Test:
	1. Refer to qemu/docs/system/arm/volatile.rst to boot Linux.
	2. modify versatile-pb.dts in linux to use dma.
		serial@101f2000 {
			compatible = "arm,pl011\0arm,primecell";
			reg = <0x101f2000 0x1000>;
			interrupts = <0x0d>;
			clocks = <0x05 0x06>;
			clock-names = "uartclk\0apb_pclk";
			auto-poll;
+			dmas = <&dma0 1 1>,
+					<&dma0 0 1>;
+			dma-names = "rx", "tx";
		};

Run:
	./build/qemu-system-arm -M versatilepb -nographic \
	-serial mon:stdio \
	-serial pty \
	-kernel zImage \
	-dtb versatile-pb.dtb \
	-drive if=scsi,driver=file,filename=rootfs.img \
	-append "console=ttyAMA0 rw root=/dev/sda" \
	-d guest_errors -D qemu.log

Send and receive data to the guest's UART in another console of the host by pty.

Test result:

guest:
	~ # echo "hello" > /dev/ttyAMA1
	uart-pl011 101f2000.serial: DMA channel TX dma1chan0
	uart-pl011 101f2000.serial: DMA channel RX dma1chan1
host:
	cat /dev/pts/1
	hello

-- 
2.43.0
[PATCH v1 1/5] Fixed the PL011 FIFO level bug
Posted by Tao Ding 20 hours ago
According to TRM, PL011 generate a burst or a single DMA request based on the FIFO level

The FlowCntrl field description is as follows:
     Receive interrupt FIFO level select. 
     b000 = Receive FIFO becomes >= 1/8 full
     b001 = Receive FIFO becomes >= 1/4 full
     b010 = Receive FIFO becomes >= 1/2 full
     b011 = Receive FIFO becomes >= 3/4 full
     b100 = Receive FIFO becomes >= 7/8 full
     b101-b111 = reserved

Only the read_trigger of rx has been changed, and there is no need to set a trigger for tx 
because write tx data always completes in a blocking manner. That means tx FIFO is empty.

Signed-off-by: Tao Ding <dingtao0430@163.com>
---
 hw/char/pl011.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index cb12c3e224..c96a706da1 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -129,6 +129,14 @@ static const uint32_t irqmask[] = {
     INT_E,
 };
 
+static const uint32_t fifo_level[] = {
+    PL011_FIFO_DEPTH / 8,
+    PL011_FIFO_DEPTH / 4,
+    PL011_FIFO_DEPTH / 2,
+    PL011_FIFO_DEPTH * 3 / 4,
+    PL011_FIFO_DEPTH * 7 / 8,
+};
+
 static void pl011_update(PL011State *s)
 {
     uint32_t flags;
@@ -344,17 +352,24 @@ static uint64_t pl011_read(void *opaque, hwaddr offset,
     return r;
 }
 
+static uint32_t pl011_decode_fifo_level(uint32_t val)
+{
+    if (val > 4) {
+        qemu_log_mask(LOG_GUEST_ERROR, "pl011: invalid fifo level field\n");
+        return 1;
+    }
+    return fifo_level[val];
+}
+
 static void pl011_set_read_trigger(PL011State *s)
 {
-#if 0
     /* The docs say the RX interrupt is triggered when the FIFO exceeds
        the threshold.  However linux only reads the FIFO in response to an
        interrupt.  Triggering the interrupt when the FIFO is non-empty seems
        to make things work.  */
     if (s->lcr & LCR_FEN)
-        s->read_trigger = (s->ifl >> 1) & 0x1c;
+        s->read_trigger = pl011_decode_fifo_level(extract32(s->ifl, 3, 3));
     else
-#endif
         s->read_trigger = 1;
 }
 
-- 
2.43.0
[PATCH v1 2/5] PL011 generate receive timeout interrupt
Posted by Tao Ding 20 hours ago
After changing the read_trigger, no interrupt will be generated when the data 
in the rx FIFO of pl011 does not reach the FIFO level. This will cause input 
in the console to have no effect in Linux, until data input exceeds the FIFO level.

But UART will generate an interrupt after a timeout period after RX receives the data. 
The relevant description of this in TRM : 
    *The receive timeout interrupt is asserted when the receive FIFO is not empty, and no 
    more data is received during a 32-bit period.

To simplify this operation, pl011 immediately generates an RT interrupt upon receiving data, 
which means the period is triggered instantly. (QEMUTimer triggering is also optional)

Signed-off-by: Tao Ding <dingtao0430@163.com>
---
 hw/char/pl011.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index c96a706da1..58803f0fc2 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -200,6 +200,9 @@ static void pl011_fifo_rx_put(void *opaque, uint32_t value)
     }
     if (s->read_count == s->read_trigger) {
         s->int_level |= INT_RX;
+    }
+    if (s->read_count) {
+        s->int_level |= INT_RT;
         pl011_update(s);
     }
 }
@@ -281,6 +284,7 @@ static uint32_t pl011_read_rxdata(PL011State *s)
     }
     if (s->read_count == 0) {
         s->flags |= PL011_FLAG_RXFE;
+        s->int_level &= ~INT_RT;
     }
     if (s->read_count == s->read_trigger - 1) {
         s->int_level &= ~INT_RX;
-- 
2.43.0
[PATCH v1 3/5] Implement the DMA interface of PL011
Posted by Tao Ding 20 hours ago
Use gpio_out to emulate DMA requests and gpio_in to emulate clear requests.
The DMA function of pl011 is affected by the register UARDMACR.

Signed-off-by: Tao Ding <dingtao0430@163.com>
---
 hw/char/pl011.c         | 47 ++++++++++++++++++++++++++++++++++++++---
 include/hw/char/pl011.h |  1 +
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index 58803f0fc2..d3fcc6a02b 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -16,6 +16,10 @@
  *  + sysbus IRQ 3: UARTRTINTR (receive timeout interrupt line)
  *  + sysbus IRQ 4: UARTMSINTR (momem status interrupt line)
  *  + sysbus IRQ 5: UARTEINTR (error interrupt line)
+ *  + sysbus IRQ 6: UARTTXDMASREQ (transmit DMA single request)
+ *  + sysbus IRQ 7: UARTRXDMASREQ (receive DMA single request)
+ *  + sysbus IRQ 8: UARTTXDMABREQ (transmit DMA burst request)
+ *  + sysbus IRQ 9: UARTRXDMABREQ (receive DMA burst request)
  */
 
 #include "qemu/osdep.h"
@@ -96,6 +100,12 @@ DeviceState *pl011_create(hwaddr addr, qemu_irq irq, Chardev *chr)
 /* Fractional Baud Rate Divider, UARTFBRD */
 #define FBRD_MASK 0x3f
 
+/* UART DMA interface*/
+#define DMA_TX_SREQ 0x0
+#define DMA_RX_SREQ 0x1
+#define DMA_TX_BREQ 0x2
+#define DMA_RX_BREQ 0x3
+
 static const unsigned char pl011_id_arm[8] =
   { 0x11, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0xb1 };
 static const unsigned char pl011_id_luminary[8] =
@@ -147,6 +157,25 @@ static void pl011_update(PL011State *s)
     for (i = 0; i < ARRAY_SIZE(s->irq); i++) {
         qemu_set_irq(s->irq[i], (flags & irqmask[i]) != 0);
     }
+    if (s->dmacr & (1 << 1)) {
+        /*data in tx fifo is always lower than fifo level*/
+        qemu_set_irq(s->dma_req_irq[DMA_TX_SREQ], 1);
+        qemu_set_irq(s->dma_req_irq[DMA_TX_BREQ], 1);
+    } else {
+        qemu_set_irq(s->dma_req_irq[DMA_TX_SREQ], 0);
+        qemu_set_irq(s->dma_req_irq[DMA_TX_BREQ], 0);
+    }
+
+    if ((s->dmacr & 1) && s->read_count) {
+        qemu_set_irq(s->dma_req_irq[DMA_RX_SREQ], 1);
+    } else {
+        qemu_set_irq(s->dma_req_irq[DMA_RX_SREQ], 0);
+    }
+    if ((s->dmacr & 1) && s->read_count >= s->read_trigger) {
+        qemu_set_irq(s->dma_req_irq[DMA_RX_BREQ], 1);
+    } else {
+        qemu_set_irq(s->dma_req_irq[DMA_RX_BREQ], 0);
+    }
 }
 
 static bool pl011_loopback_enabled(PL011State *s)
@@ -512,9 +541,7 @@ static void pl011_write(void *opaque, hwaddr offset,
         break;
     case 18: /* UARTDMACR */
         s->dmacr = value;
-        if (value & 3) {
-            qemu_log_mask(LOG_UNIMP, "pl011: DMA not implemented\n");
-        }
+        pl011_update(s);
         break;
     default:
         qemu_log_mask(LOG_GUEST_ERROR,
@@ -626,6 +653,16 @@ static int pl011_post_load(void *opaque, int version_id)
     return 0;
 }
 
+static void pl011_dma_clear_handler(void *opaque, int n, int level)
+{
+    if (!level) {
+        return;
+    }
+
+    PL011State *s = PL011(opaque);
+    pl011_update(s);
+}
+
 static const VMStateDescription vmstate_pl011 = {
     .name = "pl011",
     .version_id = 2,
@@ -672,6 +709,10 @@ static void pl011_init(Object *obj)
     for (i = 0; i < ARRAY_SIZE(s->irq); i++) {
         sysbus_init_irq(sbd, &s->irq[i]);
     }
+    for (i = 0; i < ARRAY_SIZE(s->dma_req_irq); i++) {
+        sysbus_init_irq(sbd, &s->dma_req_irq[i]);
+    }
+    qdev_init_gpio_in(DEVICE(obj), pl011_dma_clear_handler, 2);
 
     s->clk = qdev_init_clock_in(DEVICE(obj), "clk", pl011_clock_update, s,
                                 ClockUpdate);
diff --git a/include/hw/char/pl011.h b/include/hw/char/pl011.h
index 5695787650..6f46151777 100644
--- a/include/hw/char/pl011.h
+++ b/include/hw/char/pl011.h
@@ -49,6 +49,7 @@ struct PL011State {
     int read_trigger;
     CharFrontend chr;
     qemu_irq irq[6];
+    qemu_irq dma_req_irq[4];
     Clock *clk;
     bool migrate_clk;
     bool logged_disabled_uart;
-- 
2.43.0
[PATCH v1 4/5] Run pl080_run in main loop
Posted by Tao Ding 20 hours ago
Accessing MMIO in QEMU will trigger pl080_run. If it is a memory to memory data transfer, 
it is generally faster, so it will not significantly block vCPU. But when dealing with peripherals, 
time is uncontrollable because it is unknown when the peripherals will generate data. 
This will result in continuous blocking of vCPU. 

Due to the presence of BQL in qemu, mmio access and bh callbacks are safty.

Signed-off-by: Tao Ding <dingtao0430@163.com>
---
 hw/dma/pl080.c         | 11 +++++++++--
 include/hw/dma/pl080.h |  3 +++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/dma/pl080.c b/hw/dma/pl080.c
index 627ccbbd81..3596739854 100644
--- a/hw/dma/pl080.c
+++ b/hw/dma/pl080.c
@@ -232,6 +232,12 @@ again:
     pl080_update(s);
 }
 
+static void pl080_run_cb(void *opaque)
+{
+    PL080State *s = PL080(opaque);
+    pl080_run(s);
+}
+
 static uint64_t pl080_read(void *opaque, hwaddr offset,
                            unsigned size)
 {
@@ -326,7 +332,7 @@ static void pl080_write(void *opaque, hwaddr offset,
             break;
         case 4: /* Configuration */
             s->chan[i].conf = value;
-            pl080_run(s);
+            qemu_bh_schedule(s->bh);
             break;
         }
         return;
@@ -351,7 +357,7 @@ static void pl080_write(void *opaque, hwaddr offset,
             qemu_log_mask(LOG_UNIMP,
                           "pl080_write: Big-endian DMA not implemented\n");
         }
-        pl080_run(s);
+        qemu_bh_schedule(s->bh);
         break;
     case 13: /* Sync */
         s->sync = value;
@@ -405,6 +411,7 @@ static void pl080_init(Object *obj)
     sysbus_init_irq(sbd, &s->interr);
     sysbus_init_irq(sbd, &s->inttc);
     s->nchannels = 8;
+    s->bh = qemu_bh_new(pl080_run_cb, s);
 }
 
 static void pl080_realize(DeviceState *dev, Error **errp)
diff --git a/include/hw/dma/pl080.h b/include/hw/dma/pl080.h
index 6635cd8e93..397417aa06 100644
--- a/include/hw/dma/pl080.h
+++ b/include/hw/dma/pl080.h
@@ -31,6 +31,7 @@
 
 #include "hw/core/sysbus.h"
 #include "qom/object.h"
+#include "qemu/main-loop.h"
 
 #define PL080_MAX_CHANNELS 8
 
@@ -68,6 +69,8 @@ struct PL080State {
 
     MemoryRegion *downstream;
     AddressSpace downstream_as;
+
+    QEMUBH *bh;
 };
 
 #endif
-- 
2.43.0
[PATCH v1 5/5] PL080 add transfer memory and peripheral
Posted by Tao Ding 20 hours ago
On the interface, 0~15 represents single request, 16~31 represents burst request.
Access peripheral data based on the validity of the request. For the convenience, 
we only respond to single requests. The number of data read from the source and written 
to the destination is always equal, with a value of max(swidth, dwidth).

Due to the lack of data preparation by the peripheral, it is necessary to cache data separately 
for each channel until the peripheral is ready for processing.

The width of peripherals perhaps not possible to process all data at once, 
so continuous processing is required. Read from source is achieved through "continue", 
while writing to destination is achieved through "goto".

Signed-off-by: Tao Ding <dingtao0430@163.com>
---
 hw/dma/pl080.c         | 105 +++++++++++++++++++++++++++++++++++------
 include/hw/dma/pl080.h |   4 ++
 2 files changed, 95 insertions(+), 14 deletions(-)

diff --git a/hw/dma/pl080.c b/hw/dma/pl080.c
index 3596739854..c40364de44 100644
--- a/hw/dma/pl080.c
+++ b/hw/dma/pl080.c
@@ -88,6 +88,13 @@ static void pl080_update(PL080State *s)
     qemu_set_irq(s->irq, errlevel || tclevel);
 }
 
+static void pl080_clr_req(PL080State *s, int req_index)
+{
+    s->req_single &= ~(1 << req_index);
+    s->req_burst &= ~(1 << req_index);
+    qemu_set_irq(s->clear_req_irq[req_index], 1);
+}
+
 static void pl080_run(PL080State *s)
 {
     int c;
@@ -100,7 +107,7 @@ static void pl080_run(PL080State *s)
     int src_id;
     int dest_id;
     int size;
-    uint8_t buff[4];
+    uint8_t *buff;
     uint32_t req;
     uint32_t next_lli;
 
@@ -125,6 +132,7 @@ static void pl080_run(PL080State *s)
     while (s->running) {
         for (c = 0; c < s->nchannels; c++) {
             ch = &s->chan[c];
+            buff = ch->buff;
 again:
             /* Test if thiws channel has any pending DMA requests.  */
             if ((ch->conf & (PL080_CCONF_H | PL080_CCONF_E))
@@ -179,23 +187,64 @@ again:
                               c, extract32(ch->ctrl, 21, 3));
                 continue;
             }
+            xsize = MAX(swidth, dwidth);
+            if (ch->continue_write) {
+                goto continue_write;
+            }
 
-            for (n = 0; n < dwidth; n+= swidth) {
-                address_space_read(&s->downstream_as, ch->src,
-                                   MEMTXATTRS_UNSPECIFIED, buff + n, swidth);
-                if (ch->ctrl & PL080_CCTRL_SI)
-                    ch->src += swidth;
+            if (flow & (1 << 1)) { /*source is peripheral*/
+                if (s->req_single & (1 << src_id)) {
+                    address_space_read(&s->downstream_as, ch->src,
+                            MEMTXATTRS_UNSPECIFIED, buff + ch->buff_remain, swidth);
+                    if (ch->ctrl & PL080_CCTRL_SI) {
+                        ch->src += swidth;
+                    }
+                    ch->buff_remain += swidth;
+                    pl080_clr_req(s, src_id);
+                }
+            } else { /*source is memory*/
+                for (n = 0; n < xsize; n += swidth) {
+                    address_space_read(&s->downstream_as, ch->src,
+                            MEMTXATTRS_UNSPECIFIED, buff + n, swidth);
+                    if (ch->ctrl & PL080_CCTRL_SI) {
+                        ch->src += swidth;
+                    }
+                }
+                ch->buff_remain += xsize;
             }
-            xsize = (dwidth < swidth) ? swidth : dwidth;
-            /* ??? This may pad the value incorrectly for dwidth < 32.  */
-            for (n = 0; n < xsize; n += dwidth) {
-                address_space_write(&s->downstream_as, ch->dest + n,
-                                    MEMTXATTRS_UNSPECIFIED, buff + n, dwidth);
-                if (ch->ctrl & PL080_CCTRL_DI)
-                    ch->dest += swidth;
+            if (ch->buff_remain != xsize) {
+                continue;
+            }
+
+continue_write:
+            if (flow & 1) { /*destination is peripheral*/
+                if (s->req_single & (1 << dest_id)) {
+                    address_space_write(&s->downstream_as, ch->dest,
+                        MEMTXATTRS_UNSPECIFIED, buff + xsize - ch->buff_remain, dwidth);
+                    if (ch->ctrl & PL080_CCTRL_DI) {
+                        ch->dest += dwidth;
+                    }
+                    ch->buff_remain -= dwidth;
+                    pl080_clr_req(s, dest_id);
+                }
+            } else { /*destination is memory*/
+                for (n = 0; n < xsize; n += dwidth) {
+                    address_space_write(&s->downstream_as, ch->dest,
+                                        MEMTXATTRS_UNSPECIFIED, buff + n, dwidth);
+                    if (ch->ctrl & PL080_CCTRL_DI) {
+                        ch->dest += dwidth;
+                    }
+                }
+                ch->buff_remain -= xsize;
+            }
+            if (ch->buff_remain != 0) {
+                ch->continue_write = true;
+                continue;
+            } else {
+                ch->continue_write = false;
             }
 
-            size--;
+            size -= xsize / swidth;
             ch->ctrl = (ch->ctrl & 0xfffff000) | size;
             if (size == 0) {
                 /* Transfer complete.  */
@@ -370,6 +419,27 @@ static void pl080_write(void *opaque, hwaddr offset,
     pl080_update(s);
 }
 
+static void pl080_dma_request_handler(void *opaque, int n, int level)
+{
+    PL080State *s = PL080(opaque);
+    if (level == 0) {
+        if (n < 16) {
+            s->req_single &= ~(1 << n);
+        } else {
+            s->req_burst &= ~(1 << (n - 16));
+        }
+    } else {
+        if (n < 16 && (s->req_single & (1 << n)) == 0) {
+            s->req_single |= (1 << n);
+            qemu_bh_schedule(s->bh);
+        }
+        if (n >= 16 && (s->req_burst & (1 << (n - 16)))) {
+            s->req_burst |= (1 << (n - 16));
+            qemu_bh_schedule(s->bh);
+        }
+    }
+}
+
 static const MemoryRegionOps pl080_ops = {
     .read = pl080_read,
     .write = pl080_write,
@@ -397,6 +467,8 @@ static void pl080_reset(DeviceState *dev)
         s->chan[i].lli = 0;
         s->chan[i].ctrl = 0;
         s->chan[i].conf = 0;
+        s->chan[i].buff_remain = 0;
+        s->chan[i].continue_write = false;
     }
 }
 
@@ -404,12 +476,17 @@ static void pl080_init(Object *obj)
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     PL080State *s = PL080(obj);
+    int i;
 
     memory_region_init_io(&s->iomem, OBJECT(s), &pl080_ops, s, "pl080", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
     sysbus_init_irq(sbd, &s->irq);
     sysbus_init_irq(sbd, &s->interr);
     sysbus_init_irq(sbd, &s->inttc);
+    for (i = 0; i < ARRAY_SIZE(s->clear_req_irq); i++) {
+        sysbus_init_irq(sbd, &s->clear_req_irq[i]);
+    }
+    qdev_init_gpio_in(DEVICE(obj), pl080_dma_request_handler, 32);
     s->nchannels = 8;
     s->bh = qemu_bh_new(pl080_run_cb, s);
 }
diff --git a/include/hw/dma/pl080.h b/include/hw/dma/pl080.h
index 397417aa06..4511b92900 100644
--- a/include/hw/dma/pl080.h
+++ b/include/hw/dma/pl080.h
@@ -41,6 +41,9 @@ typedef struct {
     uint32_t lli;
     uint32_t ctrl;
     uint32_t conf;
+    uint32_t buff_remain;
+    bool continue_write;
+    uint8_t buff[4];
 } pl080_channel;
 
 #define TYPE_PL080 "pl080"
@@ -66,6 +69,7 @@ struct PL080State {
     qemu_irq irq;
     qemu_irq interr;
     qemu_irq inttc;
+    qemu_irq clear_req_irq[16];
 
     MemoryRegion *downstream;
     AddressSpace downstream_as;
-- 
2.43.0