src/conf/domain_conf.c | 25 +++++++++++++++++++++++++ src/conf/domain_conf.h | 4 ++++ src/qemu/qemu_command.c | 7 +++++++ src/qemu/qemu_domain.c | 4 ++++ 4 files changed, 40 insertions(+)
The patch add support L2 table cache for qcow2 disk.
L2 table cache can improve IO read and write performance for qcow2 img.
Example: random 4K read requests on a fully populated 100GB image (SSD backend and vm with directsync cacha mode),
IOPS increased by 7 times.
---
src/conf/domain_conf.c | 25 +++++++++++++++++++++++++
src/conf/domain_conf.h | 4 ++++
src/qemu/qemu_command.c | 7 +++++++
src/qemu/qemu_domain.c | 4 ++++
4 files changed, 40 insertions(+)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index b8b5345..cb9fb05 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -9502,6 +9502,8 @@ virDomainDiskDefParseXML(virDomainXMLOptionPtr xmlopt,
char *vendor = NULL;
char *product = NULL;
char *domain_name = NULL;
+ char *disk_l2_cache_size = NULL;
+ char *disk_cache_clean_interval = NULL;
if (!(def = virDomainDiskDefNew(xmlopt)))
return NULL;
@@ -9701,6 +9703,27 @@ virDomainDiskDefParseXML(virDomainXMLOptionPtr xmlopt,
}
} else if (virXMLNodeNameEqual(cur, "boot")) {
/* boot is parsed as part of virDomainDeviceInfoParseXML */
+ } else if (virXMLNodeNameEqual(cur, "diskCache")) {
+ disk_l2_cache_size =
+ virXMLPropString(cur, "disk_l2_cache_size");
+ if (disk_l2_cache_size &&
+ virStrToLong_ui(disk_l2_cache_size, NULL, 0,
+ &def->disk_cache.disk_l2_cache_size) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("invalid disk L2 cache size '%s'"),
+ disk_l2_cache_size);
+ goto error;
+ }
+ disk_cache_clean_interval =
+ virXMLPropString(cur, "disk_cache_clean_interval");
+ if (disk_cache_clean_interval &&
+ virStrToLong_ui(disk_cache_clean_interval, NULL, 0,
+ &def->disk_cache.disk_cache_clean_interval) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("invalid disk cache clean interval '%s'"),
+ disk_cache_clean_interval);
+ goto error;
+ }
}
}
@@ -9903,6 +9926,8 @@ virDomainDiskDefParseXML(virDomainXMLOptionPtr xmlopt,
VIR_FREE(vendor);
VIR_FREE(product);
VIR_FREE(domain_name);
+ VIR_FREE(disk_l2_cache_size);
+ VIR_FREE(disk_cache_clean_interval);
ctxt->node = save_ctxt;
return def;
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 71437dc..6396475 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -647,6 +647,10 @@ struct _virDomainDiskDef {
unsigned int physical_block_size;
} blockio;
+ struct {
+ unsigned int disk_l2_cache_size;
+ unsigned int disk_cache_clean_interval;
+ } disk_cache;
virDomainBlockIoTuneInfo blkdeviotune;
char *driverName;
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 4fc3176..4bc9412 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -1637,6 +1637,13 @@ qemuBuildDriveStr(virDomainDiskDefPtr disk,
if (qemuBuildDriveSourceStr(disk, qemuCaps, &opt) < 0)
goto error;
+
+ if (disk->disk_cache.disk_l2_cache_size > 0)
+ virBufferAsprintf(&opt, "l2-cache-size=%u,",
+ disk->disk_cache.disk_l2_cache_size);
+ if (disk->disk_cache.disk_cache_clean_interval > 0)
+ virBufferAsprintf(&opt, "cache-clean-interval=%u,",
+ disk->disk_cache.disk_cache_clean_interval);
if (qemuDiskBusNeedsDeviceArg(disk->bus)) {
char *drivealias = qemuAliasDiskDriveFromDisk(disk);
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index fee4481..4896bf7 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -8627,6 +8627,10 @@ qemuDomainDiskChangeSupported(virDomainDiskDefPtr disk,
CHECK_EQ(ioeventfd, "ioeventfd", true);
CHECK_EQ(event_idx, "event_idx", true);
CHECK_EQ(copy_on_read, "copy_on_read", true);
+ CHECK_EQ(disk_cache.disk_l2_cache_size,
+ "diskCache disk_l2_cache_size", true);
+ CHECK_EQ(disk_cache.disk_cache_clean_interval,
+ "diskCache disk_cache_clean_interval", true);
/* "snapshot" is a libvirt internal field and thus can be changed */
/* startupPolicy is allowed to be updated. Therefore not checked here. */
CHECK_EQ(transient, "transient", true);
--
1.8.3.1
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
On 06/29/2018 05:46 AM, dujiancheng wrote: > The patch add support L2 table cache for qcow2 disk. > L2 table cache can improve IO read and write performance for qcow2 img. > Example: random 4K read requests on a fully populated 100GB image (SSD backend and vm with directsync cacha mode), > IOPS increased by 7 times. > This commit message has some very long lines and does not explain the new element you are introducing. Also, you are missing Signed-off-by line which is required. > --- > src/conf/domain_conf.c | 25 +++++++++++++++++++++++++ > src/conf/domain_conf.h | 4 ++++ > src/qemu/qemu_command.c | 7 +++++++ > src/qemu/qemu_domain.c | 4 ++++ > 4 files changed, 40 insertions(+) Any new element that is introduced must go hand in hand with RNG schema adjustment, documentation and a test case. I believe virsh was complaining when you edited your domain and added <diskCache/>: virsh # edit fedora error: Reconnected to the hypervisor error: XML document failed to validate against schema: Unable to validate doc against //docs/schemas/domain.rng Extra element devices in interleave Element domain failed to validate content Failed. Try again? [y,n,i,f,?]: > > diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c > index b8b5345..cb9fb05 100644 > --- a/src/conf/domain_conf.c > +++ b/src/conf/domain_conf.c > @@ -9502,6 +9502,8 @@ virDomainDiskDefParseXML(virDomainXMLOptionPtr xmlopt, > char *vendor = NULL; > char *product = NULL; > char *domain_name = NULL; > + char *disk_l2_cache_size = NULL; > + char *disk_cache_clean_interval = NULL; > > if (!(def = virDomainDiskDefNew(xmlopt))) > return NULL; > @@ -9701,6 +9703,27 @@ virDomainDiskDefParseXML(virDomainXMLOptionPtr xmlopt, > } > } else if (virXMLNodeNameEqual(cur, "boot")) { > /* boot is parsed as part of virDomainDeviceInfoParseXML */ > + } else if (virXMLNodeNameEqual(cur, "diskCache")) { > + disk_l2_cache_size = > + virXMLPropString(cur, "disk_l2_cache_size"); This is rather unfortunate name for attribute. Also, is there a reason you have not put the assignment onto one line? > + if (disk_l2_cache_size && > + virStrToLong_ui(disk_l2_cache_size, NULL, 0, > + &def->disk_cache.disk_l2_cache_size) < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("invalid disk L2 cache size '%s'"), > + disk_l2_cache_size); > + goto error; > + } > + disk_cache_clean_interval = > + virXMLPropString(cur, "disk_cache_clean_interval"); > + if (disk_cache_clean_interval && > + virStrToLong_ui(disk_cache_clean_interval, NULL, 0, > + &def->disk_cache.disk_cache_clean_interval) < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("invalid disk cache clean interval '%s'"), > + disk_cache_clean_interval); > + goto error; > + } > } > } So what this adds is: <disk type='file' device='disk'> <driver name='qemu' type='qcow2' discard='unmap'/> <source file='/var/lib/libvirt/images/fedora.qcow2'/> <diskCache disk_l2_cache_size='128' disk_cache_clean_interval='8'/> <!-- This is the new element ^^ --> <target dev='sda' bus='scsi'/> <boot order='1'/> <address type='drive' controller='0' bus='0' target='0' unit='0'/> </disk> Well, from my example, what units are "128" and "8" in? Also, in libvirt we give users possibility to specify other units than KiB (even though we report back size in KiB), for instance look at <memory/> numa <cell/>. Also, your proposal is not that future proof. My suggestion is: <cache> <cache level='2'> <size unit='KiB'>128</size> </cache> <clean interval='8'/> </cache> But I'm sure there's even better approach. > > @@ -9903,6 +9926,8 @@ virDomainDiskDefParseXML(virDomainXMLOptionPtr xmlopt, > VIR_FREE(vendor); > VIR_FREE(product); > VIR_FREE(domain_name); > + VIR_FREE(disk_l2_cache_size); > + VIR_FREE(disk_cache_clean_interval); > > ctxt->node = save_ctxt; > return def; > diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h > index 71437dc..6396475 100644 > --- a/src/conf/domain_conf.h > +++ b/src/conf/domain_conf.h > @@ -647,6 +647,10 @@ struct _virDomainDiskDef { > unsigned int physical_block_size; > } blockio; > > + struct { > + unsigned int disk_l2_cache_size; > + unsigned int disk_cache_clean_interval; > + } disk_cache; This would look differently then. > virDomainBlockIoTuneInfo blkdeviotune; > > char *driverName; > diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c > index 4fc3176..4bc9412 100644 > --- a/src/qemu/qemu_command.c > +++ b/src/qemu/qemu_command.c > @@ -1637,6 +1637,13 @@ qemuBuildDriveStr(virDomainDiskDefPtr disk, > > if (qemuBuildDriveSourceStr(disk, qemuCaps, &opt) < 0) > goto error; > + Whitespace at EOL. 'make syntax-check' is your friend ;-) > + if (disk->disk_cache.disk_l2_cache_size > 0) > + virBufferAsprintf(&opt, "l2-cache-size=%u,", > + disk->disk_cache.disk_l2_cache_size); > + if (disk->disk_cache.disk_cache_clean_interval > 0) > + virBufferAsprintf(&opt, "cache-clean-interval=%u,", > + disk->disk_cache.disk_cache_clean_interval); > > if (qemuDiskBusNeedsDeviceArg(disk->bus)) { > char *drivealias = qemuAliasDiskDriveFromDisk(disk); > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > index fee4481..4896bf7 100644 > --- a/src/qemu/qemu_domain.c > +++ b/src/qemu/qemu_domain.c > @@ -8627,6 +8627,10 @@ qemuDomainDiskChangeSupported(virDomainDiskDefPtr disk, > CHECK_EQ(ioeventfd, "ioeventfd", true); > CHECK_EQ(event_idx, "event_idx", true); > CHECK_EQ(copy_on_read, "copy_on_read", true); > + CHECK_EQ(disk_cache.disk_l2_cache_size, > + "diskCache disk_l2_cache_size", true); > + CHECK_EQ(disk_cache.disk_cache_clean_interval, > + "diskCache disk_cache_clean_interval", true); > /* "snapshot" is a libvirt internal field and thus can be changed */ > /* startupPolicy is allowed to be updated. Therefore not checked here. */ > CHECK_EQ(transient, "transient", true); > As I said earlier, xml2xml test and xml2argv test cases are must. Michal -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
On Fri, Jun 29, 2018 at 09:02:17 +0200, Michal Privoznik wrote: > On 06/29/2018 05:46 AM, dujiancheng wrote: > > The patch add support L2 table cache for qcow2 disk. > > L2 table cache can improve IO read and write performance for qcow2 img. > > Example: random 4K read requests on a fully populated 100GB image (SSD backend and vm with directsync cacha mode), > > IOPS increased by 7 times. > > > > This commit message has some very long lines and does not explain the > new element you are introducing. Also, you are missing Signed-off-by > line which is required. > > > --- > > src/conf/domain_conf.c | 25 +++++++++++++++++++++++++ > > src/conf/domain_conf.h | 4 ++++ > > src/qemu/qemu_command.c | 7 +++++++ > > src/qemu/qemu_domain.c | 4 ++++ > > 4 files changed, 40 insertions(+) > > Any new element that is introduced must go hand in hand with RNG schema > adjustment, documentation and a test case. I believe virsh was > complaining when you edited your domain and added <diskCache/>: > > virsh # edit fedora > error: Reconnected to the hypervisor > error: XML document failed to validate against schema: Unable to validate doc against //docs/schemas/domain.rng > Extra element devices in interleave > Element domain failed to validate content > > Failed. Try again? [y,n,i,f,?]: > > > > > > diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c > > index b8b5345..cb9fb05 100644 > > --- a/src/conf/domain_conf.c > > +++ b/src/conf/domain_conf.c > > @@ -9502,6 +9502,8 @@ virDomainDiskDefParseXML(virDomainXMLOptionPtr xmlopt, > > char *vendor = NULL; > > char *product = NULL; > > char *domain_name = NULL; > > + char *disk_l2_cache_size = NULL; > > + char *disk_cache_clean_interval = NULL; > > > > if (!(def = virDomainDiskDefNew(xmlopt))) > > return NULL; > > @@ -9701,6 +9703,27 @@ virDomainDiskDefParseXML(virDomainXMLOptionPtr xmlopt, > > } > > } else if (virXMLNodeNameEqual(cur, "boot")) { > > /* boot is parsed as part of virDomainDeviceInfoParseXML */ > > + } else if (virXMLNodeNameEqual(cur, "diskCache")) { > > + disk_l2_cache_size = > > + virXMLPropString(cur, "disk_l2_cache_size"); > > This is rather unfortunate name for attribute. Also, is there a reason > you have not put the assignment onto one line? > > > + if (disk_l2_cache_size && > > + virStrToLong_ui(disk_l2_cache_size, NULL, 0, > > + &def->disk_cache.disk_l2_cache_size) < 0) { > > + virReportError(VIR_ERR_INTERNAL_ERROR, > > + _("invalid disk L2 cache size '%s'"), > > + disk_l2_cache_size); > > + goto error; > > + } > > + disk_cache_clean_interval = > > + virXMLPropString(cur, "disk_cache_clean_interval"); > > + if (disk_cache_clean_interval && > > + virStrToLong_ui(disk_cache_clean_interval, NULL, 0, > > + &def->disk_cache.disk_cache_clean_interval) < 0) { > > + virReportError(VIR_ERR_INTERNAL_ERROR, > > + _("invalid disk cache clean interval '%s'"), > > + disk_cache_clean_interval); > > + goto error; > > + } > > } > > } > > So what this adds is: > > <disk type='file' device='disk'> > <driver name='qemu' type='qcow2' discard='unmap'/> > <source file='/var/lib/libvirt/images/fedora.qcow2'/> > > <diskCache disk_l2_cache_size='128' disk_cache_clean_interval='8'/> > <!-- This is the new element ^^ --> > > <target dev='sda' bus='scsi'/> > <boot order='1'/> > <address type='drive' controller='0' bus='0' target='0' unit='0'/> > </disk> > > Well, from my example, what units are "128" and "8" in? Also, in libvirt > we give users possibility to specify other units than KiB (even though > we report back size in KiB), for instance look at <memory/> numa > <cell/>. Also, your proposal is not that future proof. My suggestion is: > > <cache> > <cache level='2'> > <size unit='KiB'>128</size> > </cache> > <clean interval='8'/> > </cache> > > But I'm sure there's even better approach. There were at least two attempts to implement this in the past which we've rejected as the configuration of this is more of a black magic than anything which users could do and there was no solid documentation how to tackle it or make it useful for higher level management apps. IIRC John provided a link to the latest discussion which also had patches. Those were much more complete and documented than this and had better naming of those. It may be worth reopening the discussion whether to include this but I'd certainly go with one of the older versions. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[...] >> Well, from my example, what units are "128" and "8" in? Also, in libvirt >> we give users possibility to specify other units than KiB (even though >> we report back size in KiB), for instance look at <memory/> numa >> <cell/>. Also, your proposal is not that future proof. My suggestion is: >> >> <cache> >> <cache level='2'> >> <size unit='KiB'>128</size> >> </cache> >> <clean interval='8'/> >> </cache> >> >> But I'm sure there's even better approach. > > There were at least two attempts to implement this in the past which > we've rejected as the configuration of this is more of a black magic > than anything which users could do and there was no solid documentation > how to tackle it or make it useful for higher level management apps. There's some updated description in the qemu.git/docs/qcow2-cache.txt and in the last few months Berto has made changes to upstream qemu in this area, see: http://lists.nongnu.org/archive/html/qemu-devel/2018-04/msg02406.html which ends through commit id 52253998 http://lists.nongnu.org/archive/html/qemu-devel/2018-02/msg00911.html which ends through as commit id 03b1b6f02. IIRC this one provided even more knobs to turn, but if you follow the links back to the v2 and v1 patches - each has a decent cover letter summarizing the state of things at the qemu level at least. > > IIRC John provided a link to the latest discussion which also had > patches. Those were much more complete and documented than this and had > better naming of those. Yep, I believe this is the same author as earlier this week: https://www.redhat.com/archives/libvir-list/2018-June/msg01721.html Berto even dropped some details on libvir-list during the review of the proposed Nov 2017 changes by Lin Ma, see: https://www.redhat.com/archives/libvir-list/2017-November/msg00572.html > > It may be worth reopening the discussion whether to include this but I'd > certainly go with one of the older versions. > > For as many times as we see this particular area - it feels that way. Anyone know a good patch necromancer ;-). If we do take that option, then we may also need to reconsider the poll-max-ns for IOThreads (see https://bugzilla.redhat.com/show_bug.cgi?id=1545732) which points one at Pavel's series: https://www.redhat.com/archives/libvir-list/2017-February/msg01047.html John -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
© 2016 - 2025 Red Hat, Inc.