From nobody Fri May 17 04:59:51 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=member.fsf.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1654206105510378.93388504102427; Thu, 2 Jun 2022 14:41:45 -0700 (PDT) Received: from localhost ([::1]:41032 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1nwsZc-0001t0-4Q for importer@patchew.org; Thu, 02 Jun 2022 17:41:44 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:34648) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1nwsXU-0008Ib-G6; Thu, 02 Jun 2022 17:39:32 -0400 Received: from mout-u-107.mailbox.org ([2001:67c:2050:101:465::107]:35908) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_CHACHA20_POLY1305:256) (Exim 4.90_1) (envelope-from ) id 1nwsXS-00070y-Hi; Thu, 02 Jun 2022 17:39:32 -0400 Received: from smtp2.mailbox.org (smtp2.mailbox.org [10.196.197.2]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange ECDHE (P-384) server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by mout-u-107.mailbox.org (Postfix) with ESMTPS id 4LDfYC1S9Wz9sSr; Thu, 2 Jun 2022 23:39:27 +0200 (CEST) From: Lev Kujawski To: qemu-devel@nongnu.org Cc: Lev Kujawski , John Snow , qemu-block@nongnu.org (open list:IDE) Subject: [PATCH 1/1] hw/ide/core: Accumulate PIO output within io_buffer prior to pwritev Date: Thu, 2 Jun 2022 21:39:04 +0000 Message-Id: <20220602213904.19533-2-lkujaw@member.fsf.org> In-Reply-To: <20220602213904.19533-1-lkujaw@member.fsf.org> References: <20220602213904.19533-1-lkujaw@member.fsf.org> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=2001:67c:2050:101:465::107; envelope-from=lkujaw@member.fsf.org; helo=mout-u-107.mailbox.org X-Spam_score_int: -25 X-Spam_score: -2.6 X-Spam_bar: -- X-Spam_report: (-2.6 / 5.0 requ) BAYES_00=-1.9, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=unavailable autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1654206105903100001 Content-Type: text/plain; charset="utf-8" Delay writing PIO output until io_buffer is filled or ATA command completion, rather than when interrupts are generated. As an example of the new behavior, issuing WRITE SECTOR(S) with a sector count of 256 will result in only a single call to blk_aio_pwritev rather than after each of the 256 sectors are transferred. Up to a 50% increase in PIO throughput can be achieved thanks to the reduction in system call overhead and writing larger blocks (up to 128 KiB, with the size limited by IDE_DMA_BUF_SECTORS). Signed-off-by: Lev Kujawski --- hw/ide/core.c | 62 ++++++++++++++++++++++++--------------- include/hw/ide/internal.h | 1 + 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 5a24547e49..b178584bc3 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1025,23 +1025,20 @@ static void ide_sector_write_cb(void *opaque, int r= et) =20 block_acct_done(blk_get_stats(s->blk), &s->acct); =20 - n =3D s->nsector; - if (n > s->req_nb_sectors) { - n =3D s->req_nb_sectors; - } - s->nsector -=3D n; - + n =3D (s->data_end - s->io_buffer) >> BDRV_SECTOR_BITS; ide_set_sector(s, ide_get_sector(s) + n); + n %=3D s->req_nb_sectors; + s->nsector -=3D n ? n : s->req_nb_sectors; + if (s->nsector =3D=3D 0) { /* no more sectors to write */ ide_transfer_stop(s); } else { - int n1 =3D s->nsector; - if (n1 > s->req_nb_sectors) { - n1 =3D s->req_nb_sectors; - } - ide_transfer_start(s, s->io_buffer, n1 * BDRV_SECTOR_SIZE, - ide_sector_write); + const int n1 =3D + (MIN(IDE_DMA_BUF_SECTORS, s->nsector)) << BDRV_SECTOR_BITS; + s->octets_until_irq =3D + (MIN(s->nsector, s->req_nb_sectors)) << BDRV_SECTOR_BITS; + ide_transfer_start(s, s->io_buffer, n1, ide_sector_write); } =20 if (win2k_install_hack && ((++s->irq_count % 16) =3D=3D 0)) { @@ -1063,14 +1060,21 @@ static void ide_sector_write(IDEState *s) int64_t sector_num; int n; =20 - s->status =3D READY_STAT | SEEK_STAT | BUSY_STAT; - sector_num =3D ide_get_sector(s); + assert(s->octets_until_irq =3D=3D 0); =20 - n =3D s->nsector; - if (n > s->req_nb_sectors) { - n =3D s->req_nb_sectors; + if (s->data_ptr < s->data_end) { + s->nsector -=3D s->req_nb_sectors; + s->octets_until_irq =3D + (MIN(s->nsector, s->req_nb_sectors)) << BDRV_SECTOR_BITS; + s->status =3D READY_STAT | SEEK_STAT | DRQ_STAT; + ide_set_irq(s->bus); + return; } =20 + s->status =3D READY_STAT | SEEK_STAT | BUSY_STAT; + sector_num =3D ide_get_sector(s); + n =3D (s->data_end - s->io_buffer) >> BDRV_SECTOR_BITS; + trace_ide_sector_write(sector_num, n); =20 if (!ide_sect_range_ok(s, sector_num, n)) { @@ -1378,6 +1382,7 @@ static void ide_reset(IDEState *s) /* ATA DMA state */ s->io_buffer_size =3D 0; s->req_nb_sectors =3D 0; + s->octets_until_irq =3D 0; =20 ide_set_signature(s); /* init the transfer handler so that 0xffff is returned on data @@ -1500,10 +1505,11 @@ static bool cmd_write_multiple(IDEState *s, uint8_t= cmd) ide_cmd_lba48_transform(s, lba48); =20 s->req_nb_sectors =3D s->mult_sectors; - n =3D MIN(s->nsector, s->req_nb_sectors); - + n =3D (MIN(IDE_DMA_BUF_SECTORS, s->nsector)) << BDRV_SECTOR_BITS; + s->octets_until_irq =3D + (MIN(s->nsector, s->req_nb_sectors)) << BDRV_SECTOR_BITS; s->status =3D SEEK_STAT | READY_STAT; - ide_transfer_start(s, s->io_buffer, 512 * n, ide_sector_write); + ide_transfer_start(s, s->io_buffer, n, ide_sector_write); =20 s->media_changed =3D 1; =20 @@ -1535,6 +1541,7 @@ static bool cmd_read_pio(IDEState *s, uint8_t cmd) static bool cmd_write_pio(IDEState *s, uint8_t cmd) { bool lba48 =3D (cmd =3D=3D WIN_WRITE_EXT); + int n; =20 if (!s->blk) { ide_abort_command(s); @@ -1544,8 +1551,10 @@ static bool cmd_write_pio(IDEState *s, uint8_t cmd) ide_cmd_lba48_transform(s, lba48); =20 s->req_nb_sectors =3D 1; + n =3D (MIN(IDE_DMA_BUF_SECTORS, s->nsector)) << BDRV_SECTOR_BITS; + s->octets_until_irq =3D BDRV_SECTOR_SIZE; s->status =3D SEEK_STAT | READY_STAT; - ide_transfer_start(s, s->io_buffer, 512, ide_sector_write); + ide_transfer_start(s, s->io_buffer, n, ide_sector_write); =20 s->media_changed =3D 1; =20 @@ -1699,7 +1708,7 @@ static bool cmd_identify_packet(IDEState *s, uint8_t = cmd) { ide_atapi_identify(s); s->status =3D READY_STAT | SEEK_STAT; - ide_transfer_start(s, s->io_buffer, 512, ide_transfer_stop); + ide_transfer_start(s, s->io_buffer, BDRV_SECTOR_SIZE, ide_transfer_sto= p); ide_set_irq(s->bus); return false; } @@ -1745,6 +1754,7 @@ static bool cmd_packet(IDEState *s, uint8_t cmd) s->dma_cmd =3D IDE_DMA_ATAPI; } s->nsector =3D 1; + s->octets_until_irq =3D ATAPI_PACKET_SIZE; ide_transfer_start(s, s->io_buffer, ATAPI_PACKET_SIZE, ide_atapi_cmd); return false; @@ -2358,7 +2368,9 @@ void ide_data_writew(void *opaque, uint32_t addr, uin= t32_t val) *(uint16_t *)p =3D le16_to_cpu(val); p +=3D 2; s->data_ptr =3D p; - if (p >=3D s->data_end) { + s->octets_until_irq -=3D 2; + + if (s->octets_until_irq =3D=3D 0) { s->status &=3D ~DRQ_STAT; s->end_transfer_func(s); } @@ -2416,7 +2428,9 @@ void ide_data_writel(void *opaque, uint32_t addr, uin= t32_t val) *(uint32_t *)p =3D le32_to_cpu(val); p +=3D 4; s->data_ptr =3D p; - if (p >=3D s->data_end) { + s->octets_until_irq -=3D 4; + + if (s->octets_until_irq =3D=3D 0) { s->status &=3D ~DRQ_STAT; s->end_transfer_func(s); } diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h index 97e7e59dc5..3f79fbaf32 100644 --- a/include/hw/ide/internal.h +++ b/include/hw/ide/internal.h @@ -428,6 +428,7 @@ struct IDEState { QEMUSGList sg; /* PIO transfer handling */ int req_nb_sectors; /* number of sectors per interrupt */ + int octets_until_irq; EndTransferFunc *end_transfer_func; uint8_t *data_ptr; uint8_t *data_end; --=20 2.34.1