block/qcow2-cluster.c | 17 +++++++++++++++++ block/qcow2.c | 12 ------------ 2 files changed, 17 insertions(+), 12 deletions(-)
Most qcow2 files are uncompressed so it is wasteful to allocate (32 + 1)
* cluster_size + 512 bytes upfront. Allocate s->cluster_cache and
s->cluster_data when the first read operation is performance on a
compressed cluster.
The buffers are freed in .bdrv_close(). .bdrv_open() no longer has any
code paths that can allocate these buffers, so remove the free functions
in the error code path.
Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
Alexey: Does this improve your memory profiling results?
block/qcow2-cluster.c | 17 +++++++++++++++++
block/qcow2.c | 12 ------------
2 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f06c08f64c..c47600a44e 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1516,6 +1516,23 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
sector_offset = coffset & 511;
csize = nb_csectors * 512 - sector_offset;
+
+ /* Allocate buffers on first decompress operation, most images are
+ * uncompressed and the memory overhead can be avoided. The buffers
+ * are freed in .bdrv_close().
+ */
+ if (!s->cluster_data) {
+ /* one more sector for decompressed data alignment */
+ s->cluster_data = qemu_try_blockalign(bs->file->bs,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512);
+ if (!s->cluster_data) {
+ return -EIO;
+ }
+ }
+ if (!s->cluster_cache) {
+ s->cluster_cache = g_malloc(s->cluster_size);
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
nb_csectors);
diff --git a/block/qcow2.c b/block/qcow2.c
index 40ba26c111..0ac201910a 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1360,16 +1360,6 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- s->cluster_cache = g_malloc(s->cluster_size);
- /* one more sector for decompressed data alignment */
- s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS
- * s->cluster_size + 512);
- if (s->cluster_data == NULL) {
- error_setg(errp, "Could not allocate temporary cluster buffer");
- ret = -ENOMEM;
- goto fail;
- }
-
s->cluster_cache_offset = -1;
s->flags = flags;
@@ -1507,8 +1497,6 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
if (s->refcount_block_cache) {
qcow2_cache_destroy(bs, s->refcount_block_cache);
}
- g_free(s->cluster_cache);
- qemu_vfree(s->cluster_data);
qcrypto_block_free(s->crypto);
qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
return ret;
--
2.13.4
On 08/18/2017 08:31 AM, Stefan Hajnoczi wrote:
> Most qcow2 files are uncompressed so it is wasteful to allocate (32 + 1)
> * cluster_size + 512 bytes upfront. Allocate s->cluster_cache and
> s->cluster_data when the first read operation is performance on a
> compressed cluster.
>
> The buffers are freed in .bdrv_close(). .bdrv_open() no longer has any
> code paths that can allocate these buffers, so remove the free functions
> in the error code path.
>
> Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> Cc: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> Alexey: Does this improve your memory profiling results?
Is this a regression from earlier versions? Likely, it is NOT -rc4
material, and thus can wait for 2.11; but it should be fine for -stable
as part of 2.10.1 down the road.
> +++ b/block/qcow2-cluster.c
> @@ -1516,6 +1516,23 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
> nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
> sector_offset = coffset & 511;
> csize = nb_csectors * 512 - sector_offset;
> +
> + /* Allocate buffers on first decompress operation, most images are
> + * uncompressed and the memory overhead can be avoided. The buffers
> + * are freed in .bdrv_close().
> + */
> + if (!s->cluster_data) {
> + /* one more sector for decompressed data alignment */
> + s->cluster_data = qemu_try_blockalign(bs->file->bs,
> + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512);
> + if (!s->cluster_data) {
> + return -EIO;
Is -ENOMEM any better than -EIO here?
--
Eric Blake, Principal Software Engineer
Red Hat, Inc. +1-919-301-3266
Virtualization: qemu.org | libvirt.org
On 19/08/17 01:18, Eric Blake wrote:
> On 08/18/2017 08:31 AM, Stefan Hajnoczi wrote:
>> Most qcow2 files are uncompressed so it is wasteful to allocate (32 + 1)
>> * cluster_size + 512 bytes upfront. Allocate s->cluster_cache and
>> s->cluster_data when the first read operation is performance on a
>> compressed cluster.
>>
>> The buffers are freed in .bdrv_close(). .bdrv_open() no longer has any
>> code paths that can allocate these buffers, so remove the free functions
>> in the error code path.
>>
>> Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> Cc: Kevin Wolf <kwolf@redhat.com>
>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>> ---
>> Alexey: Does this improve your memory profiling results?
>
> Is this a regression from earlier versions?
Hm, I have not thought about this.
So. I did bisect and this started happening from
9a4c0e220d8a4f82b5665d0ee95ef94d8e1509d5
"hw/virtio-pci: fix virtio behaviour"
Before that, the very same command line would take less than 1GB of
resident memory. That thing basically enforces virtio-1.0 for QEMU <=2.6
which means that upstream with "-machine pseries-2.6" works fine (less than
1GB), "-machine pseries-2.7" does not (close to 7GB, sometime even 9GB).
Then I tried bisecting again, with
"scsi=off,disable-modern=off,disable-legacy=on" on my 150 virtio-block
devices, started from
e266d421490e0 "virtio-pci: add flags to enable/disable legacy/modern" (it
added the disable-modern switch) which uses 2GB of memory.
I ended up with ada434cd0b44 "virtio-pci: implement cfg capability".
Then I removed proxy->modern_as on v2.10.0-rc3 (see below) and got 1.5GB of
used memory (yay!)
I do not really know how to reinterpret all of this, do you?
Note: 1GB..9GB numbers from below are the peak values from valgrind's
massif. This is pretty much resident memory used by QEMU process. In my
testing I did not enable KVM and I did not start the guest (i.e. used -S).
150 virtio-block devices, 2GB RAM for the guest.
Also, while bisecting, I only paid attention if it is 1..2GB or 6..9GB -
all tests did fit these 2 ranges, for any given sha1 the amount of memory
would be stable but among "good" commits it could change between 1GB and 2GB.
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 5d14bd6..7ad447a 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1783,6 +1783,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev,
Error **errp)
/* PCI BAR regions must be powers of 2 */
pow2ceil(proxy->notify.offset + proxy->notify.size));
+#if 0
memory_region_init_alias(&proxy->modern_cfg,
OBJECT(proxy),
"virtio-pci-cfg",
@@ -1791,7 +1792,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev,
Error **errp)
memory_region_size(&proxy->modern_bar));
address_space_init(&proxy->modern_as, &proxy->modern_cfg,
"virtio-pci-cfg-as");
-
+#endif
if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
}
@@ -1860,10 +1861,10 @@ static void virtio_pci_realize(PCIDevice *pci_dev,
Error **errp)
static void virtio_pci_exit(PCIDevice *pci_dev)
{
- VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
+ //VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
msix_uninit_exclusive_bar(pci_dev);
- address_space_destroy(&proxy->modern_as);
+ //address_space_destroy(&proxy->modern_as);
}
static void virtio_pci_reset(DeviceState *qdev)
> Likely, it is NOT -rc4
> material, and thus can wait for 2.11; but it should be fine for -stable
> as part of 2.10.1 down the road.
>
>> +++ b/block/qcow2-cluster.c
>> @@ -1516,6 +1516,23 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
>> nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
>> sector_offset = coffset & 511;
>> csize = nb_csectors * 512 - sector_offset;
>> +
>> + /* Allocate buffers on first decompress operation, most images are
>> + * uncompressed and the memory overhead can be avoided. The buffers
>> + * are freed in .bdrv_close().
>> + */
>> + if (!s->cluster_data) {
>> + /* one more sector for decompressed data alignment */
>> + s->cluster_data = qemu_try_blockalign(bs->file->bs,
>> + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512);
>> + if (!s->cluster_data) {
>> + return -EIO;
>
> Is -ENOMEM any better than -EIO here?
>
--
Alexey
On 19/08/17 12:46, Alexey Kardashevskiy wrote: > On 19/08/17 01:18, Eric Blake wrote: >> On 08/18/2017 08:31 AM, Stefan Hajnoczi wrote: >>> Most qcow2 files are uncompressed so it is wasteful to allocate (32 + 1) >>> * cluster_size + 512 bytes upfront. Allocate s->cluster_cache and >>> s->cluster_data when the first read operation is performance on a >>> compressed cluster. >>> >>> The buffers are freed in .bdrv_close(). .bdrv_open() no longer has any >>> code paths that can allocate these buffers, so remove the free functions >>> in the error code path. >>> >>> Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru> >>> Cc: Kevin Wolf <kwolf@redhat.com> >>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> >>> --- >>> Alexey: Does this improve your memory profiling results? >> >> Is this a regression from earlier versions? > > Hm, I have not thought about this. > > So. I did bisect and this started happening from > 9a4c0e220d8a4f82b5665d0ee95ef94d8e1509d5 > "hw/virtio-pci: fix virtio behaviour" > > Before that, the very same command line would take less than 1GB of > resident memory. That thing basically enforces virtio-1.0 for QEMU <=2.6 > which means that upstream with "-machine pseries-2.6" works fine (less than > 1GB), "-machine pseries-2.7" does not (close to 7GB, sometime even 9GB). > > Then I tried bisecting again, with > "scsi=off,disable-modern=off,disable-legacy=on" on my 150 virtio-block > devices, started from > e266d421490e0 "virtio-pci: add flags to enable/disable legacy/modern" (it > added the disable-modern switch) which uses 2GB of memory. > > I ended up with ada434cd0b44 "virtio-pci: implement cfg capability". > > Then I removed proxy->modern_as on v2.10.0-rc3 (see below) and got 1.5GB of > used memory (yay!) > > I do not really know how to reinterpret all of this, do you? > > > Note: 1GB..9GB numbers from below are the peak values from valgrind's s/from below/from above/ , sorry, bad cut-n-paste :) -- Alexey
On 19/08/17 12:53, Alexey Kardashevskiy wrote: > On 19/08/17 12:46, Alexey Kardashevskiy wrote: >> On 19/08/17 01:18, Eric Blake wrote: >>> On 08/18/2017 08:31 AM, Stefan Hajnoczi wrote: >>>> Most qcow2 files are uncompressed so it is wasteful to allocate (32 + 1) >>>> * cluster_size + 512 bytes upfront. Allocate s->cluster_cache and >>>> s->cluster_data when the first read operation is performance on a >>>> compressed cluster. >>>> >>>> The buffers are freed in .bdrv_close(). .bdrv_open() no longer has any >>>> code paths that can allocate these buffers, so remove the free functions >>>> in the error code path. >>>> >>>> Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru> >>>> Cc: Kevin Wolf <kwolf@redhat.com> >>>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> >>>> --- >>>> Alexey: Does this improve your memory profiling results? >>> >>> Is this a regression from earlier versions? >> >> Hm, I have not thought about this. >> >> So. I did bisect and this started happening from >> 9a4c0e220d8a4f82b5665d0ee95ef94d8e1509d5 >> "hw/virtio-pci: fix virtio behaviour" >> >> Before that, the very same command line would take less than 1GB of >> resident memory. That thing basically enforces virtio-1.0 for QEMU <=2.6 >> which means that upstream with "-machine pseries-2.6" works fine (less than >> 1GB), "-machine pseries-2.7" does not (close to 7GB, sometime even 9GB). >> >> Then I tried bisecting again, with >> "scsi=off,disable-modern=off,disable-legacy=on" on my 150 virtio-block >> devices, started from >> e266d421490e0 "virtio-pci: add flags to enable/disable legacy/modern" (it >> added the disable-modern switch) which uses 2GB of memory. >> >> I ended up with ada434cd0b44 "virtio-pci: implement cfg capability". >> >> Then I removed proxy->modern_as on v2.10.0-rc3 (see below) and got 1.5GB of >> used memory (yay!) >> >> I do not really know how to reinterpret all of this, do you? >> >> >> Note: 1GB..9GB numbers from below are the peak values from valgrind's > > s/from below/from above/ , sorry, bad cut-n-paste :) Just realized - I should have replied with this to "Memory use with >100 virtio devices" actually, sorry for the confusion. -- Alexey
On 19/08/17 12:46, Alexey Kardashevskiy wrote:
> On 19/08/17 01:18, Eric Blake wrote:
>> On 08/18/2017 08:31 AM, Stefan Hajnoczi wrote:
>>> Most qcow2 files are uncompressed so it is wasteful to allocate (32 + 1)
>>> * cluster_size + 512 bytes upfront. Allocate s->cluster_cache and
>>> s->cluster_data when the first read operation is performance on a
>>> compressed cluster.
>>>
>>> The buffers are freed in .bdrv_close(). .bdrv_open() no longer has any
>>> code paths that can allocate these buffers, so remove the free functions
>>> in the error code path.
>>>
>>> Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> Cc: Kevin Wolf <kwolf@redhat.com>
>>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>>> ---
>>> Alexey: Does this improve your memory profiling results?
>>
>> Is this a regression from earlier versions?
>
> Hm, I have not thought about this.
>
> So. I did bisect and this started happening from
> 9a4c0e220d8a4f82b5665d0ee95ef94d8e1509d5
> "hw/virtio-pci: fix virtio behaviour"
>
> Before that, the very same command line would take less than 1GB of
> resident memory. That thing basically enforces virtio-1.0 for QEMU <=2.6
> which means that upstream with "-machine pseries-2.6" works fine (less than
> 1GB), "-machine pseries-2.7" does not (close to 7GB, sometime even 9GB).
>
> Then I tried bisecting again, with
> "scsi=off,disable-modern=off,disable-legacy=on" on my 150 virtio-block
> devices, started from
> e266d421490e0 "virtio-pci: add flags to enable/disable legacy/modern" (it
> added the disable-modern switch) which uses 2GB of memory.
>
> I ended up with ada434cd0b44 "virtio-pci: implement cfg capability".
>
> Then I removed proxy->modern_as on v2.10.0-rc3 (see below) and got 1.5GB of
> used memory (yay!)
>
> I do not really know how to reinterpret all of this, do you?
Anyone, ping? Should I move the conversation to the original thread? Any
hacks to try with libc?
>
>
> Note: 1GB..9GB numbers from below are the peak values from valgrind's
> massif. This is pretty much resident memory used by QEMU process. In my
> testing I did not enable KVM and I did not start the guest (i.e. used -S).
> 150 virtio-block devices, 2GB RAM for the guest.
>
> Also, while bisecting, I only paid attention if it is 1..2GB or 6..9GB -
> all tests did fit these 2 ranges, for any given sha1 the amount of memory
> would be stable but among "good" commits it could change between 1GB and 2GB.
>
>
>
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 5d14bd6..7ad447a 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1783,6 +1783,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev,
> Error **errp)
> /* PCI BAR regions must be powers of 2 */
> pow2ceil(proxy->notify.offset + proxy->notify.size));
>
> +#if 0
> memory_region_init_alias(&proxy->modern_cfg,
> OBJECT(proxy),
> "virtio-pci-cfg",
> @@ -1791,7 +1792,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev,
> Error **errp)
> memory_region_size(&proxy->modern_bar));
>
> address_space_init(&proxy->modern_as, &proxy->modern_cfg,
> "virtio-pci-cfg-as");
> -
> +#endif
> if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
> proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
> }
> @@ -1860,10 +1861,10 @@ static void virtio_pci_realize(PCIDevice *pci_dev,
> Error **errp)
>
> static void virtio_pci_exit(PCIDevice *pci_dev)
> {
> - VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
> + //VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
>
> msix_uninit_exclusive_bar(pci_dev);
> - address_space_destroy(&proxy->modern_as);
> + //address_space_destroy(&proxy->modern_as);
> }
>
> static void virtio_pci_reset(DeviceState *qdev)
>
>
>
>
>
>> Likely, it is NOT -rc4
>> material, and thus can wait for 2.11; but it should be fine for -stable
>> as part of 2.10.1 down the road.
>>
>>> +++ b/block/qcow2-cluster.c
>>> @@ -1516,6 +1516,23 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
>>> nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
>>> sector_offset = coffset & 511;
>>> csize = nb_csectors * 512 - sector_offset;
>>> +
>>> + /* Allocate buffers on first decompress operation, most images are
>>> + * uncompressed and the memory overhead can be avoided. The buffers
>>> + * are freed in .bdrv_close().
>>> + */
>>> + if (!s->cluster_data) {
>>> + /* one more sector for decompressed data alignment */
>>> + s->cluster_data = qemu_try_blockalign(bs->file->bs,
>>> + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512);
>>> + if (!s->cluster_data) {
>>> + return -EIO;
>>
>> Is -ENOMEM any better than -EIO here?
>>
>
>
--
Alexey
On Tue, Aug 22, 2017 at 02:56:00PM +1000, Alexey Kardashevskiy wrote: > On 19/08/17 12:46, Alexey Kardashevskiy wrote: > > On 19/08/17 01:18, Eric Blake wrote: > >> On 08/18/2017 08:31 AM, Stefan Hajnoczi wrote: > >>> Most qcow2 files are uncompressed so it is wasteful to allocate (32 + 1) > >>> * cluster_size + 512 bytes upfront. Allocate s->cluster_cache and > >>> s->cluster_data when the first read operation is performance on a > >>> compressed cluster. > >>> > >>> The buffers are freed in .bdrv_close(). .bdrv_open() no longer has any > >>> code paths that can allocate these buffers, so remove the free functions > >>> in the error code path. > >>> > >>> Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru> > >>> Cc: Kevin Wolf <kwolf@redhat.com> > >>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > >>> --- > >>> Alexey: Does this improve your memory profiling results? > >> > >> Is this a regression from earlier versions? > > > > Hm, I have not thought about this. > > > > So. I did bisect and this started happening from > > 9a4c0e220d8a4f82b5665d0ee95ef94d8e1509d5 > > "hw/virtio-pci: fix virtio behaviour" > > > > Before that, the very same command line would take less than 1GB of > > resident memory. That thing basically enforces virtio-1.0 for QEMU <=2.6 > > which means that upstream with "-machine pseries-2.6" works fine (less than > > 1GB), "-machine pseries-2.7" does not (close to 7GB, sometime even 9GB). > > > > Then I tried bisecting again, with > > "scsi=off,disable-modern=off,disable-legacy=on" on my 150 virtio-block > > devices, started from > > e266d421490e0 "virtio-pci: add flags to enable/disable legacy/modern" (it > > added the disable-modern switch) which uses 2GB of memory. > > > > I ended up with ada434cd0b44 "virtio-pci: implement cfg capability". > > > > Then I removed proxy->modern_as on v2.10.0-rc3 (see below) and got 1.5GB of > > used memory (yay!) > > > > I do not really know how to reinterpret all of this, do you? > > > Anyone, ping? Should I move the conversation to the original thread? Any > hacks to try with libc? I suggest a new top-level thread with Michael Tsirkin CCed. Stefan
On 31/08/17 03:20, Stefan Hajnoczi wrote: > On Tue, Aug 22, 2017 at 02:56:00PM +1000, Alexey Kardashevskiy wrote: >> On 19/08/17 12:46, Alexey Kardashevskiy wrote: >>> On 19/08/17 01:18, Eric Blake wrote: >>>> On 08/18/2017 08:31 AM, Stefan Hajnoczi wrote: >>>>> Most qcow2 files are uncompressed so it is wasteful to allocate (32 + 1) >>>>> * cluster_size + 512 bytes upfront. Allocate s->cluster_cache and >>>>> s->cluster_data when the first read operation is performance on a >>>>> compressed cluster. >>>>> >>>>> The buffers are freed in .bdrv_close(). .bdrv_open() no longer has any >>>>> code paths that can allocate these buffers, so remove the free functions >>>>> in the error code path. >>>>> >>>>> Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru> >>>>> Cc: Kevin Wolf <kwolf@redhat.com> >>>>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> >>>>> --- >>>>> Alexey: Does this improve your memory profiling results? >>>> >>>> Is this a regression from earlier versions? >>> >>> Hm, I have not thought about this. >>> >>> So. I did bisect and this started happening from >>> 9a4c0e220d8a4f82b5665d0ee95ef94d8e1509d5 >>> "hw/virtio-pci: fix virtio behaviour" >>> >>> Before that, the very same command line would take less than 1GB of >>> resident memory. That thing basically enforces virtio-1.0 for QEMU <=2.6 >>> which means that upstream with "-machine pseries-2.6" works fine (less than >>> 1GB), "-machine pseries-2.7" does not (close to 7GB, sometime even 9GB). >>> >>> Then I tried bisecting again, with >>> "scsi=off,disable-modern=off,disable-legacy=on" on my 150 virtio-block >>> devices, started from >>> e266d421490e0 "virtio-pci: add flags to enable/disable legacy/modern" (it >>> added the disable-modern switch) which uses 2GB of memory. >>> >>> I ended up with ada434cd0b44 "virtio-pci: implement cfg capability". >>> >>> Then I removed proxy->modern_as on v2.10.0-rc3 (see below) and got 1.5GB of >>> used memory (yay!) >>> >>> I do not really know how to reinterpret all of this, do you? >> >> >> Anyone, ping? Should I move the conversation to the original thread? Any >> hacks to try with libc? > > I suggest a new top-level thread with Michael Tsirkin CCed. I am continuing in the original "Memory use with >100 virtio devices" thread and the problem is more generic than virtio, it is just easier to reproduce it with virtio, that's all. -- Alexey
On Fri, Aug 18, 2017 at 10:18:37AM -0500, Eric Blake wrote:
> On 08/18/2017 08:31 AM, Stefan Hajnoczi wrote:
> > Most qcow2 files are uncompressed so it is wasteful to allocate (32 + 1)
> > * cluster_size + 512 bytes upfront. Allocate s->cluster_cache and
> > s->cluster_data when the first read operation is performance on a
> > compressed cluster.
> >
> > The buffers are freed in .bdrv_close(). .bdrv_open() no longer has any
> > code paths that can allocate these buffers, so remove the free functions
> > in the error code path.
> >
> > Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> > Cc: Kevin Wolf <kwolf@redhat.com>
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > Alexey: Does this improve your memory profiling results?
>
> Is this a regression from earlier versions? Likely, it is NOT -rc4
> material, and thus can wait for 2.11; but it should be fine for -stable
> as part of 2.10.1 down the road.
It's not a regression. s->cluster_data was always allocated upfront in
.bdrv_open().
> > +++ b/block/qcow2-cluster.c
> > @@ -1516,6 +1516,23 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
> > nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
> > sector_offset = coffset & 511;
> > csize = nb_csectors * 512 - sector_offset;
> > +
> > + /* Allocate buffers on first decompress operation, most images are
> > + * uncompressed and the memory overhead can be avoided. The buffers
> > + * are freed in .bdrv_close().
> > + */
> > + if (!s->cluster_data) {
> > + /* one more sector for decompressed data alignment */
> > + s->cluster_data = qemu_try_blockalign(bs->file->bs,
> > + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512);
> > + if (!s->cluster_data) {
> > + return -EIO;
>
> Is -ENOMEM any better than -EIO here?
There is another instance of -ENOMEM in qcow2.c so it seems reasonable
to use the more specific ENOMEM error code.
I'll resend the patch.
Stefan
On 18/08/17 23:31, Stefan Hajnoczi wrote:
> Most qcow2 files are uncompressed so it is wasteful to allocate (32 + 1)
> * cluster_size + 512 bytes upfront. Allocate s->cluster_cache and
> s->cluster_data when the first read operation is performance on a
> compressed cluster.
>
> The buffers are freed in .bdrv_close(). .bdrv_open() no longer has any
> code paths that can allocate these buffers, so remove the free functions
> in the error code path.
>
> Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> Cc: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> Alexey: Does this improve your memory profiling results?
Yes, it does:
was:
12.81% (1,023,193,088B)
now:
05.36% (393,893,888B)
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> block/qcow2-cluster.c | 17 +++++++++++++++++
> block/qcow2.c | 12 ------------
> 2 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index f06c08f64c..c47600a44e 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -1516,6 +1516,23 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
> nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
> sector_offset = coffset & 511;
> csize = nb_csectors * 512 - sector_offset;
> +
> + /* Allocate buffers on first decompress operation, most images are
> + * uncompressed and the memory overhead can be avoided. The buffers
> + * are freed in .bdrv_close().
> + */
> + if (!s->cluster_data) {
> + /* one more sector for decompressed data alignment */
> + s->cluster_data = qemu_try_blockalign(bs->file->bs,
> + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512);
> + if (!s->cluster_data) {
> + return -EIO;
> + }
> + }
> + if (!s->cluster_cache) {
> + s->cluster_cache = g_malloc(s->cluster_size);
> + }
> +
> BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
> ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
> nb_csectors);
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 40ba26c111..0ac201910a 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -1360,16 +1360,6 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
> goto fail;
> }
>
> - s->cluster_cache = g_malloc(s->cluster_size);
> - /* one more sector for decompressed data alignment */
> - s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS
> - * s->cluster_size + 512);
> - if (s->cluster_data == NULL) {
> - error_setg(errp, "Could not allocate temporary cluster buffer");
> - ret = -ENOMEM;
> - goto fail;
> - }
> -
> s->cluster_cache_offset = -1;
> s->flags = flags;
>
> @@ -1507,8 +1497,6 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
> if (s->refcount_block_cache) {
> qcow2_cache_destroy(bs, s->refcount_block_cache);
> }
> - g_free(s->cluster_cache);
> - qemu_vfree(s->cluster_data);
> qcrypto_block_free(s->crypto);
> qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
> return ret;
>
--
Alexey
© 2016 - 2026 Red Hat, Inc.