[libvirt] [RFCv2 PATCH 3/5] conf: rename cachetune to restune

bing.niu@intel.com posted 5 patches 7 years ago
There is a newer version of this series
[libvirt] [RFCv2 PATCH 3/5] conf: rename cachetune to restune
Posted by bing.niu@intel.com 7 years ago
From: Bing Niu <bing.niu@intel.com>

resctrl not only supports cache tuning, but also memory bandwidth
tuning. Rename cachetune to restune(resource tuning) to reflect
that.

Signed-off-by: Bing Niu <bing.niu@intel.com>
---
 src/conf/domain_conf.c  | 44 ++++++++++++++++++++++----------------------
 src/conf/domain_conf.h  | 10 +++++-----
 src/qemu/qemu_process.c | 18 +++++++++---------
 3 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 62c412e..b3543f3 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2950,14 +2950,14 @@ virDomainLoaderDefFree(virDomainLoaderDefPtr loader)
 
 
 static void
-virDomainCachetuneDefFree(virDomainCachetuneDefPtr cachetune)
+virDomainRestuneDefFree(virDomainRestuneDefPtr restune)
 {
-    if (!cachetune)
+    if (!restune)
         return;
 
-    virObjectUnref(cachetune->alloc);
-    virBitmapFree(cachetune->vcpus);
-    VIR_FREE(cachetune);
+    virObjectUnref(restune->alloc);
+    virBitmapFree(restune->vcpus);
+    VIR_FREE(restune);
 }
 
 
@@ -3147,9 +3147,9 @@ void virDomainDefFree(virDomainDefPtr def)
         virDomainShmemDefFree(def->shmems[i]);
     VIR_FREE(def->shmems);
 
-    for (i = 0; i < def->ncachetunes; i++)
-        virDomainCachetuneDefFree(def->cachetunes[i]);
-    VIR_FREE(def->cachetunes);
+    for (i = 0; i < def->nrestunes; i++)
+        virDomainRestuneDefFree(def->restunes[i]);
+    VIR_FREE(def->restunes);
 
     VIR_FREE(def->keywrap);
 
@@ -18971,7 +18971,7 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
     xmlNodePtr *nodes = NULL;
     virBitmapPtr vcpus = NULL;
     virResctrlAllocPtr alloc = virResctrlAllocNew();
-    virDomainCachetuneDefPtr tmp_cachetune = NULL;
+    virDomainRestuneDefPtr tmp_restune = NULL;
     char *tmp = NULL;
     char *vcpus_str = NULL;
     char *alloc_id = NULL;
@@ -18984,7 +18984,7 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
     if (!alloc)
         goto cleanup;
 
-    if (VIR_ALLOC(tmp_cachetune) < 0)
+    if (VIR_ALLOC(tmp_restune) < 0)
         goto cleanup;
 
     vcpus_str = virXMLPropString(node, "vcpus");
@@ -19025,8 +19025,8 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
         goto cleanup;
     }
 
-    for (i = 0; i < def->ncachetunes; i++) {
-        if (virBitmapOverlaps(def->cachetunes[i]->vcpus, vcpus)) {
+    for (i = 0; i < def->nrestunes; i++) {
+        if (virBitmapOverlaps(def->restunes[i]->vcpus, vcpus)) {
             virReportError(VIR_ERR_XML_ERROR, "%s",
                            _("Overlapping vcpus in cachetunes"));
             goto cleanup;
@@ -19056,16 +19056,16 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
     if (virResctrlAllocSetID(alloc, alloc_id) < 0)
         goto cleanup;
 
-    VIR_STEAL_PTR(tmp_cachetune->vcpus, vcpus);
-    VIR_STEAL_PTR(tmp_cachetune->alloc, alloc);
+    VIR_STEAL_PTR(tmp_restune->vcpus, vcpus);
+    VIR_STEAL_PTR(tmp_restune->alloc, alloc);
 
-    if (VIR_APPEND_ELEMENT(def->cachetunes, def->ncachetunes, tmp_cachetune) < 0)
+    if (VIR_APPEND_ELEMENT(def->restunes, def->nrestunes, tmp_restune) < 0)
         goto cleanup;
 
     ret = 0;
  cleanup:
     ctxt->node = oldnode;
-    virDomainCachetuneDefFree(tmp_cachetune);
+    virDomainRestuneDefFree(tmp_restune);
     virObjectUnref(alloc);
     virBitmapFree(vcpus);
     VIR_FREE(alloc_id);
@@ -26874,7 +26874,7 @@ virDomainCachetuneDefFormatHelper(unsigned int level,
 
 static int
 virDomainCachetuneDefFormat(virBufferPtr buf,
-                            virDomainCachetuneDefPtr cachetune,
+                            virDomainRestuneDefPtr restune,
                             unsigned int flags)
 {
     virBuffer childrenBuf = VIR_BUFFER_INITIALIZER;
@@ -26882,7 +26882,7 @@ virDomainCachetuneDefFormat(virBufferPtr buf,
     int ret = -1;
 
     virBufferSetChildIndent(&childrenBuf, buf);
-    virResctrlAllocForeachCache(cachetune->alloc,
+    virResctrlAllocForeachCache(restune->alloc,
                                virDomainCachetuneDefFormatHelper,
                                &childrenBuf);
 
@@ -26895,14 +26895,14 @@ virDomainCachetuneDefFormat(virBufferPtr buf,
         goto cleanup;
     }
 
-    vcpus = virBitmapFormat(cachetune->vcpus);
+    vcpus = virBitmapFormat(restune->vcpus);
     if (!vcpus)
         goto cleanup;
 
     virBufferAsprintf(buf, "<cachetune vcpus='%s'", vcpus);
 
     if (!(flags & VIR_DOMAIN_DEF_FORMAT_INACTIVE)) {
-        const char *alloc_id = virResctrlAllocGetID(cachetune->alloc);
+        const char *alloc_id = virResctrlAllocGetID(restune->alloc);
         if (!alloc_id)
             goto cleanup;
 
@@ -27023,8 +27023,8 @@ virDomainCputuneDefFormat(virBufferPtr buf,
                                  def->iothreadids[i]->iothread_id);
     }
 
-    for (i = 0; i < def->ncachetunes; i++)
-        virDomainCachetuneDefFormat(&childrenBuf, def->cachetunes[i], flags);
+    for (i = 0; i < def->nrestunes; i++)
+        virDomainCachetuneDefFormat(&childrenBuf, def->restunes[i], flags);
 
     if (virBufferCheckError(&childrenBuf) < 0)
         return -1;
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 6344c02..9e71a0b 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2228,10 +2228,10 @@ struct _virDomainCputune {
 };
 
 
-typedef struct _virDomainCachetuneDef virDomainCachetuneDef;
-typedef virDomainCachetuneDef *virDomainCachetuneDefPtr;
+typedef struct _virDomainRestuneDef virDomainRestuneDef;
+typedef virDomainRestuneDef *virDomainRestuneDefPtr;
 
-struct _virDomainCachetuneDef {
+struct _virDomainRestuneDef {
     virBitmapPtr vcpus;
     virResctrlAllocPtr alloc;
 };
@@ -2409,8 +2409,8 @@ struct _virDomainDef {
 
     virDomainCputune cputune;
 
-    virDomainCachetuneDefPtr *cachetunes;
-    size_t ncachetunes;
+    virDomainRestuneDefPtr *restunes;
+    size_t nrestunes;
 
     virDomainNumaPtr numa;
     virDomainResourceDefPtr resource;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 93fd6ba..0659c06 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2447,7 +2447,7 @@ qemuProcessResctrlCreate(virQEMUDriverPtr driver,
     virCapsPtr caps = NULL;
     qemuDomainObjPrivatePtr priv = vm->privateData;
 
-    if (!vm->def->ncachetunes)
+    if (!vm->def->nrestunes)
         return 0;
 
     /* Force capability refresh since resctrl info can change
@@ -2456,9 +2456,9 @@ qemuProcessResctrlCreate(virQEMUDriverPtr driver,
     if (!caps)
         return -1;
 
-    for (i = 0; i < vm->def->ncachetunes; i++) {
+    for (i = 0; i < vm->def->nrestunes; i++) {
         if (virResctrlAllocCreate(caps->host.resctrl,
-                                  vm->def->cachetunes[i]->alloc,
+                                  vm->def->restunes[i]->alloc,
                                   priv->machineName) < 0)
             goto cleanup;
     }
@@ -5259,8 +5259,8 @@ qemuProcessSetupVcpu(virDomainObjPtr vm,
                             &vcpu->sched) < 0)
         return -1;
 
-    for (i = 0; i < vm->def->ncachetunes; i++) {
-        virDomainCachetuneDefPtr ct = vm->def->cachetunes[i];
+    for (i = 0; i < vm->def->nrestunes; i++) {
+        virDomainRestuneDefPtr ct = vm->def->restunes[i];
 
         if (virBitmapIsBitSet(ct->vcpus, vcpuid)) {
             if (virResctrlAllocAddPID(ct->alloc, vcpupid) < 0)
@@ -6955,8 +6955,8 @@ void qemuProcessStop(virQEMUDriverPtr driver,
     /* Remove resctrl allocation after cgroups are cleaned up which makes it
      * kind of safer (although removing the allocation should work even with
      * pids in tasks file */
-    for (i = 0; i < vm->def->ncachetunes; i++)
-        virResctrlAllocRemove(vm->def->cachetunes[i]->alloc);
+    for (i = 0; i < vm->def->nrestunes; i++)
+        virResctrlAllocRemove(vm->def->restunes[i]->alloc);
 
     qemuProcessRemoveDomainStatus(driver, vm);
 
@@ -7676,8 +7676,8 @@ qemuProcessReconnect(void *opaque)
     if (qemuConnectAgent(driver, obj) < 0)
         goto error;
 
-    for (i = 0; i < obj->def->ncachetunes; i++) {
-        if (virResctrlAllocDeterminePath(obj->def->cachetunes[i]->alloc,
+    for (i = 0; i < obj->def->nrestunes; i++) {
+        if (virResctrlAllocDeterminePath(obj->def->restunes[i]->alloc,
                                          priv->machineName) < 0)
             goto error;
     }
-- 
2.7.4

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [RFCv2 PATCH 3/5] conf: rename cachetune to restune
Posted by John Ferlan 7 years ago

On 06/15/2018 05:29 AM, bing.niu@intel.com wrote:
> From: Bing Niu <bing.niu@intel.com>
> 
> resctrl not only supports cache tuning, but also memory bandwidth
> tuning. Rename cachetune to restune(resource tuning) to reflect
> that.
> 
> Signed-off-by: Bing Niu <bing.niu@intel.com>
> ---
>  src/conf/domain_conf.c  | 44 ++++++++++++++++++++++----------------------
>  src/conf/domain_conf.h  | 10 +++++-----
>  src/qemu/qemu_process.c | 18 +++++++++---------
>  3 files changed, 36 insertions(+), 36 deletions(-)
> 

Not so sure I'm liking [R|r]estune[s]... Still @cachetunes is an array
of a structure that contains a vCPU bitmap and a virResctrlAllocPtr for
the /cputune/cachetunes data.  The virResctrlAllocPtr was changed to add
a the bandwidth allocation, so does this really have to change?

I think the question is - if both cachetunes and memtunes exist in
domain XML, then for which does the @vcpus relate to. That is, from the
cover there's:

    <memorytune vcpus='0-1'>
      <node id='0' bandwidth='20'/>
    </memorytune>

and then there's a

    <cachetune vcpus='0-3'>
      <cache id='0' level='3' type='both' size='3' unit='MiB'/>
      <cache id='1' level='3' type='both' size='3' unit='MiB'/>
    </cachetune>

Now what?  I haven't looked at patch 4 yet to even know how this "works".

I think what you want to do is create a virDomainMemtuneDef that mimics
virDomainCachetuneDef, but I'm not 100% sure.

Of course that still leaves me wondering how much of virResctrlAllocPtr
then becomes wasted. You chose an integration point, but essentially
have separate technologies. I'm trying to make sense of it all and am
starting to get lost in the process of doing that.

If @bandwidth can not be > 100... Can we duplicate the @id somehow? So
how does that free buffer that got 100% and then gets subtracted from
really work in this model? I probably need to study the code some more,
but it's not clear it requires using the "same" sort of logic that
cachetune uses where there could be different types (instructs, data, or
both) that would be drawing from the size, right? I think that's how
that worked.  So what's the corollary for memtunes? You set a range from
0 to 100, right and that's it.  OK, too many questions and it's late so
I'll stop here...  Hopefully I didn't leave any stray thoughts in the
review of patch 1 or 2.

John

> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index 62c412e..b3543f3 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -2950,14 +2950,14 @@ virDomainLoaderDefFree(virDomainLoaderDefPtr loader)
>  
>  
>  static void
> -virDomainCachetuneDefFree(virDomainCachetuneDefPtr cachetune)
> +virDomainRestuneDefFree(virDomainRestuneDefPtr restune)
>  {
> -    if (!cachetune)
> +    if (!restune)
>          return;
>  
> -    virObjectUnref(cachetune->alloc);
> -    virBitmapFree(cachetune->vcpus);
> -    VIR_FREE(cachetune);
> +    virObjectUnref(restune->alloc);
> +    virBitmapFree(restune->vcpus);
> +    VIR_FREE(restune);
>  }
>  
>  
> @@ -3147,9 +3147,9 @@ void virDomainDefFree(virDomainDefPtr def)
>          virDomainShmemDefFree(def->shmems[i]);
>      VIR_FREE(def->shmems);
>  
> -    for (i = 0; i < def->ncachetunes; i++)
> -        virDomainCachetuneDefFree(def->cachetunes[i]);
> -    VIR_FREE(def->cachetunes);
> +    for (i = 0; i < def->nrestunes; i++)
> +        virDomainRestuneDefFree(def->restunes[i]);
> +    VIR_FREE(def->restunes);
>  
>      VIR_FREE(def->keywrap);
>  
> @@ -18971,7 +18971,7 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
>      xmlNodePtr *nodes = NULL;
>      virBitmapPtr vcpus = NULL;
>      virResctrlAllocPtr alloc = virResctrlAllocNew();
> -    virDomainCachetuneDefPtr tmp_cachetune = NULL;
> +    virDomainRestuneDefPtr tmp_restune = NULL;
>      char *tmp = NULL;
>      char *vcpus_str = NULL;
>      char *alloc_id = NULL;
> @@ -18984,7 +18984,7 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
>      if (!alloc)
>          goto cleanup;
>  
> -    if (VIR_ALLOC(tmp_cachetune) < 0)
> +    if (VIR_ALLOC(tmp_restune) < 0)
>          goto cleanup;
>  
>      vcpus_str = virXMLPropString(node, "vcpus");
> @@ -19025,8 +19025,8 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
>          goto cleanup;
>      }
>  
> -    for (i = 0; i < def->ncachetunes; i++) {
> -        if (virBitmapOverlaps(def->cachetunes[i]->vcpus, vcpus)) {
> +    for (i = 0; i < def->nrestunes; i++) {
> +        if (virBitmapOverlaps(def->restunes[i]->vcpus, vcpus)) {
>              virReportError(VIR_ERR_XML_ERROR, "%s",
>                             _("Overlapping vcpus in cachetunes"));
>              goto cleanup;
> @@ -19056,16 +19056,16 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
>      if (virResctrlAllocSetID(alloc, alloc_id) < 0)
>          goto cleanup;
>  
> -    VIR_STEAL_PTR(tmp_cachetune->vcpus, vcpus);
> -    VIR_STEAL_PTR(tmp_cachetune->alloc, alloc);
> +    VIR_STEAL_PTR(tmp_restune->vcpus, vcpus);
> +    VIR_STEAL_PTR(tmp_restune->alloc, alloc);
>  
> -    if (VIR_APPEND_ELEMENT(def->cachetunes, def->ncachetunes, tmp_cachetune) < 0)
> +    if (VIR_APPEND_ELEMENT(def->restunes, def->nrestunes, tmp_restune) < 0)
>          goto cleanup;
>  
>      ret = 0;
>   cleanup:
>      ctxt->node = oldnode;
> -    virDomainCachetuneDefFree(tmp_cachetune);
> +    virDomainRestuneDefFree(tmp_restune);
>      virObjectUnref(alloc);
>      virBitmapFree(vcpus);
>      VIR_FREE(alloc_id);
> @@ -26874,7 +26874,7 @@ virDomainCachetuneDefFormatHelper(unsigned int level,
>  
>  static int
>  virDomainCachetuneDefFormat(virBufferPtr buf,
> -                            virDomainCachetuneDefPtr cachetune,
> +                            virDomainRestuneDefPtr restune,
>                              unsigned int flags)
>  {
>      virBuffer childrenBuf = VIR_BUFFER_INITIALIZER;
> @@ -26882,7 +26882,7 @@ virDomainCachetuneDefFormat(virBufferPtr buf,
>      int ret = -1;
>  
>      virBufferSetChildIndent(&childrenBuf, buf);
> -    virResctrlAllocForeachCache(cachetune->alloc,
> +    virResctrlAllocForeachCache(restune->alloc,
>                                 virDomainCachetuneDefFormatHelper,
>                                 &childrenBuf);
>  
> @@ -26895,14 +26895,14 @@ virDomainCachetuneDefFormat(virBufferPtr buf,
>          goto cleanup;
>      }
>  
> -    vcpus = virBitmapFormat(cachetune->vcpus);
> +    vcpus = virBitmapFormat(restune->vcpus);
>      if (!vcpus)
>          goto cleanup;
>  
>      virBufferAsprintf(buf, "<cachetune vcpus='%s'", vcpus);
>  
>      if (!(flags & VIR_DOMAIN_DEF_FORMAT_INACTIVE)) {
> -        const char *alloc_id = virResctrlAllocGetID(cachetune->alloc);
> +        const char *alloc_id = virResctrlAllocGetID(restune->alloc);
>          if (!alloc_id)
>              goto cleanup;
>  
> @@ -27023,8 +27023,8 @@ virDomainCputuneDefFormat(virBufferPtr buf,
>                                   def->iothreadids[i]->iothread_id);
>      }
>  
> -    for (i = 0; i < def->ncachetunes; i++)
> -        virDomainCachetuneDefFormat(&childrenBuf, def->cachetunes[i], flags);
> +    for (i = 0; i < def->nrestunes; i++)
> +        virDomainCachetuneDefFormat(&childrenBuf, def->restunes[i], flags);
>  
>      if (virBufferCheckError(&childrenBuf) < 0)
>          return -1;
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index 6344c02..9e71a0b 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -2228,10 +2228,10 @@ struct _virDomainCputune {
>  };
>  
>  
> -typedef struct _virDomainCachetuneDef virDomainCachetuneDef;
> -typedef virDomainCachetuneDef *virDomainCachetuneDefPtr;
> +typedef struct _virDomainRestuneDef virDomainRestuneDef;
> +typedef virDomainRestuneDef *virDomainRestuneDefPtr;
>  
> -struct _virDomainCachetuneDef {
> +struct _virDomainRestuneDef {
>      virBitmapPtr vcpus;
>      virResctrlAllocPtr alloc;
>  };
> @@ -2409,8 +2409,8 @@ struct _virDomainDef {
>  
>      virDomainCputune cputune;
>  
> -    virDomainCachetuneDefPtr *cachetunes;
> -    size_t ncachetunes;
> +    virDomainRestuneDefPtr *restunes;
> +    size_t nrestunes;
>  
>      virDomainNumaPtr numa;
>      virDomainResourceDefPtr resource;
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 93fd6ba..0659c06 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -2447,7 +2447,7 @@ qemuProcessResctrlCreate(virQEMUDriverPtr driver,
>      virCapsPtr caps = NULL;
>      qemuDomainObjPrivatePtr priv = vm->privateData;
>  
> -    if (!vm->def->ncachetunes)
> +    if (!vm->def->nrestunes)
>          return 0;
>  
>      /* Force capability refresh since resctrl info can change
> @@ -2456,9 +2456,9 @@ qemuProcessResctrlCreate(virQEMUDriverPtr driver,
>      if (!caps)
>          return -1;
>  
> -    for (i = 0; i < vm->def->ncachetunes; i++) {
> +    for (i = 0; i < vm->def->nrestunes; i++) {
>          if (virResctrlAllocCreate(caps->host.resctrl,
> -                                  vm->def->cachetunes[i]->alloc,
> +                                  vm->def->restunes[i]->alloc,
>                                    priv->machineName) < 0)
>              goto cleanup;
>      }
> @@ -5259,8 +5259,8 @@ qemuProcessSetupVcpu(virDomainObjPtr vm,
>                              &vcpu->sched) < 0)
>          return -1;
>  
> -    for (i = 0; i < vm->def->ncachetunes; i++) {
> -        virDomainCachetuneDefPtr ct = vm->def->cachetunes[i];
> +    for (i = 0; i < vm->def->nrestunes; i++) {
> +        virDomainRestuneDefPtr ct = vm->def->restunes[i];
>  
>          if (virBitmapIsBitSet(ct->vcpus, vcpuid)) {
>              if (virResctrlAllocAddPID(ct->alloc, vcpupid) < 0)
> @@ -6955,8 +6955,8 @@ void qemuProcessStop(virQEMUDriverPtr driver,
>      /* Remove resctrl allocation after cgroups are cleaned up which makes it
>       * kind of safer (although removing the allocation should work even with
>       * pids in tasks file */
> -    for (i = 0; i < vm->def->ncachetunes; i++)
> -        virResctrlAllocRemove(vm->def->cachetunes[i]->alloc);
> +    for (i = 0; i < vm->def->nrestunes; i++)
> +        virResctrlAllocRemove(vm->def->restunes[i]->alloc);
>  
>      qemuProcessRemoveDomainStatus(driver, vm);
>  
> @@ -7676,8 +7676,8 @@ qemuProcessReconnect(void *opaque)
>      if (qemuConnectAgent(driver, obj) < 0)
>          goto error;
>  
> -    for (i = 0; i < obj->def->ncachetunes; i++) {
> -        if (virResctrlAllocDeterminePath(obj->def->cachetunes[i]->alloc,
> +    for (i = 0; i < obj->def->nrestunes; i++) {
> +        if (virResctrlAllocDeterminePath(obj->def->restunes[i]->alloc,
>                                           priv->machineName) < 0)
>              goto error;
>      }
> 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [RFCv2 PATCH 3/5] conf: rename cachetune to restune
Posted by bing.niu 7 years ago
Hi John,
Thanks for reviewing! Since major questions are from this thread, so I 
think we can start from this.

On 2018年06月30日 06:47, John Ferlan wrote:
> 
> 
> On 06/15/2018 05:29 AM, bing.niu@intel.com wrote:
>> From: Bing Niu <bing.niu@intel.com>
>>
>> resctrl not only supports cache tuning, but also memory bandwidth
>> tuning. Rename cachetune to restune(resource tuning) to reflect
>> that.
>>
>> Signed-off-by: Bing Niu <bing.niu@intel.com>
>> ---
>>   src/conf/domain_conf.c  | 44 ++++++++++++++++++++++----------------------
>>   src/conf/domain_conf.h  | 10 +++++-----
>>   src/qemu/qemu_process.c | 18 +++++++++---------
>>   3 files changed, 36 insertions(+), 36 deletions(-)
>>
> 
> Not so sure I'm liking [R|r]estune[s]... Still @cachetunes is an array
> of a structure that contains a vCPU bitmap and a virResctrlAllocPtr for
> the /cputune/cachetunes data.  The virResctrlAllocPtr was changed to add
> a the bandwidth allocation, so does this really have to change?
> 
The cachetunes from Cache Allocation Technology(CAT) and memorytunes 
from Memory Bandwidth Allocation(MBA) are both features from Resource 
Directory Technology. RDT is a collection of resource distribution 
technology, right now it includes CAT and MBA. Details information can 
be found 17.18.1 of Intel's sdm.
https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf

The RDT capability and configuration methods is exposed to user land in 
form of resctrl file system by kernel. The basic programming model for 
this resctrl fs interface like this:

1. create a folder under /sys/fs/resctrl. And this folder is called one 
resctrl group.

2. writing user desired CAT and MBA config to the file 
/sys/fs/resctrl/<YOUR_GROUP_NAME>/schemata to allocate the cache or 
memory bandwidth. you can set CAT and MBA together or any of them.

3. moving the threads you want to that resctrl group by writing threads' 
PID to /sys/fs/resctrl/<YOUR_GROUP_NAME>/task file. So that those 
threads can be assign with the resource allocated in step 2. And those 
resources are shared by those threads.

*NOTE*: A thread only can be put in one resctrl group. This requirement 
is from how RDT and resctrl works.
Each resctrl group has a closid. The resource configuration in above 
step2 is set to that closid's msr(IA32_L?_QoS) to describe the resource 
allocation for this closid.
And thread use this closid to claim the allocated resource during 
context switch(scheduled_in()) in kernel by writing the closid to the 
msr IA32_PQR_ASSOC.
With closid wrote in IA32_PQR_ASSOC, RDT hardware knows current running 
closid and allocated resource basing on this closid's IA32_L?_QoS msr.
That why a thread can be put into one restrcl group only.

Details description of resctrl can be found:
https://github.com/torvalds/linux/blob/v4.17/Documentation/x86/intel_rdt_ui.txt

:)

Basing on above information, my understanding is that virResctrlAllocPtr 
is not only bind to cachetunes. It should be a backing object to 
describe a resctrl group. Especially current virresctrl class already 
works as that.
Libvirt will create one resctrl group for each virResctrlAllocPtr by 
virResctrlAllocCreate() interface.

So from components view, the big picture is something like below.


                <memorytune ^cpus='0-3'>
                    +---------+
                              |    <cachetune vcpus='0-3'>
        XML                   |           +
       parser                 +-----------+
                              |
                              |
                 +------------------------------+
                              |
                              |
internal resctrl      +------v--------------+
backing object        |  virResctrlAllocPtr |
                       +------+--------------+
                              |
                              |
                 +------------------------------+
                              |
                           +--v-------+
                           |          |
                           | schemata |
          /sys/fs/resctrl  | tasks    |
                           |   .      |
                           |   .      |
                           |          |
                           +----------+

Does this make sense? :)

> I think the question is - if both cachetunes and memtunes exist in
> domain XML, then for which does the @vcpus relate to. That is, from the
> cover there's:
> 
>      <memorytune vcpus='0-1'>
>        <node id='0' bandwidth='20'/>
>      </memorytune>
> 
> and then there's a
> 
>      <cachetune vcpus='0-3'>
>        <cache id='0' level='3' type='both' size='3' unit='MiB'/>
>        <cache id='1' level='3' type='both' size='3' unit='MiB'/>
>      </cachetune>
> 
> Now what?  I haven't looked at patch 4 yet to even know how this "works".

I also raised this concern in the first round review. Ideally, if we can 
have a domain XML like this

<resourcetune vcpu='0-3'>
      <memorytune>
         <node id='0' bandwidth='20'/>
      </memorytune>
       <cachetune>
         <cache id='0' level='3' type='both' size='3' unit='MiB'/>
         <cache id='1' level='3' type='both' size='3' unit='MiB'/>
       </cachetune>
<resourcetune>

That will be more clear. Above domain XML means: one resctrl group with 
memory bandwidth 20 @node 0, L3 cache 3MB @ node0 and node 1. Put vcpu 
'0-3' to this resctrl group. So those resources are shared among vcpus.
However, Pavel pointed out that we must keep backward compatible. And we 
need keep <cachetune vcpus='0-3'>.

In RFC v1, I also proposed to put memory bandwidth into cachetunes 
section like:
   <cachetune vcpus='0-3'>
     <cache id='0' level='3' type='both' size='3' unit='MiB'/>
     <node id='0' bandwidth='20'/>
   <cachetune vcpus>

Maintainer also comments on this "it's not a clear XML definition", 
describing memory bandwidth inside a cachetune section. That's why we 
come with a separated memorytune section.
In your example, the current implement will stop creating resctrl group 
and throw an error. vcpu list cannot overlap between memorytune and 
cachetune, but they can be same. Since vcpu can be put into one resctrl 
group only.

My understanding is that we can set using <resourcetune> to describe 
cachetune and memorytune together as a long term goal. Since that needs 
to deprecate cachetune's vcpu list. I am not sure about procedure to 
remove existing XML elements. Maybe you can point out.
For the short term goal, we can use this separated <memorytune 
vcpus='0-1'> section. How do you think? :)


> I think what you want to do is create a virDomainMemtuneDef that mimics
> virDomainCachetuneDef, but I'm not 100% sure.
> 
> Of course that still leaves me wondering how much of virResctrlAllocPtr
> then becomes wasted. You chose an integration point, but essentially
> have separate technologies. I'm trying to make sense of it all and am
> starting to get lost in the process of doing that.

 From resctrl group perspective, I don't think extend memory bandwidth 
information in virResctrlAllocPtr is a waste. I think this is a feature 
extending.  :)

> 
> If @bandwidth can not be > 100... Can we duplicate the @id somehow? So
Something like
  <cachetune vcpus='0-3'>
         <cache id='0' level='3' type='both' size='3' unit='MiB'/>
         <node id='0' bandwidth='20'/>
  </cachetune>

or

  <cachetune vcpus='0-3'>
         <cache id='0' level='3' type='both' size='3' unit='MiB' 
bandwidth='20'/>
  </cachetune>
???
That also works. This is something I did in RFC v1. But there is a 
comment said it's better not to mix memory bandwidth with cachetune. 
That may confuse people. :(

> how does that free buffer that got 100% and then gets subtracted from
> really work in this model? I probably need to study the code some more,
> but it's not clear it requires using the "same" sort of logic that
> cachetune uses where there could be different types (instructs, data, or
> both) that would be drawing from the size, right? I think that's how
> that worked.  So what's the corollary for memtunes? You set a range from
> 0 to 100, right and that's it.  OK, too many questions and it's late so
> I'll stop here...  Hopefully I didn't leave any stray thoughts in the
> review of patch 1 or 2.

The policy we used for memory bandwidth is consistent with the existing 
cachetune. Current cache allocation logic model forbids cache allocation 
overlap. This is to prevent resource contention I think. Same strategy 
on memory allocation part. Since total bandwidth beyond to 100 will lead 
to bandwidth competition.

Existing cache allocation logic achieves this by first populating 
virResctrlInfoPtr structure. Libvirt will fill the virResctrlInfoPtr 
structure during traverse host's cache hierarchy. When allocating cache 
by virResctrlAllocCreate, libvirt will first create a virResctrlAllocPtr 
basing on information from virResctrlInfoPtr. This virResctrlAllocPtr 
represents the maximum resource available(I mean no any allocation 
previously). Then libvirt traverse all resctrl groups (in 
/sys/fs/resctrl/)created already, subtract the resource they claimed. So 
the left resource is the resource free. Libvirt compares this free 
resource with resource requested in virResctrlAllocCreate. If enough 
free resource available, then new resctrl group will be created.
Memory bandwidth part follows the same idea, only the difference is 
memory bandwidth use percentage number and cache use bitmap.

Bing
> 
> John
> 
>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
>> index 62c412e..b3543f3 100644
>> --- a/src/conf/domain_conf.c
>> +++ b/src/conf/domain_conf.c
>> @@ -2950,14 +2950,14 @@ virDomainLoaderDefFree(virDomainLoaderDefPtr loader)
>>   
>>   
>>   static void
>> -virDomainCachetuneDefFree(virDomainCachetuneDefPtr cachetune)
>> +virDomainRestuneDefFree(virDomainRestuneDefPtr restune)
>>   {
>> -    if (!cachetune)
>> +    if (!restune)
>>           return;
>>   
>> -    virObjectUnref(cachetune->alloc);
>> -    virBitmapFree(cachetune->vcpus);
>> -    VIR_FREE(cachetune);
>> +    virObjectUnref(restune->alloc);
>> +    virBitmapFree(restune->vcpus);
>> +    VIR_FREE(restune);
>>   }
>>   
>>   
>> @@ -3147,9 +3147,9 @@ void virDomainDefFree(virDomainDefPtr def)
>>           virDomainShmemDefFree(def->shmems[i]);
>>       VIR_FREE(def->shmems);
>>   
>> -    for (i = 0; i < def->ncachetunes; i++)
>> -        virDomainCachetuneDefFree(def->cachetunes[i]);
>> -    VIR_FREE(def->cachetunes);
>> +    for (i = 0; i < def->nrestunes; i++)
>> +        virDomainRestuneDefFree(def->restunes[i]);
>> +    VIR_FREE(def->restunes);
>>   
>>       VIR_FREE(def->keywrap);
>>   
>> @@ -18971,7 +18971,7 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
>>       xmlNodePtr *nodes = NULL;
>>       virBitmapPtr vcpus = NULL;
>>       virResctrlAllocPtr alloc = virResctrlAllocNew();
>> -    virDomainCachetuneDefPtr tmp_cachetune = NULL;
>> +    virDomainRestuneDefPtr tmp_restune = NULL;
>>       char *tmp = NULL;
>>       char *vcpus_str = NULL;
>>       char *alloc_id = NULL;
>> @@ -18984,7 +18984,7 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
>>       if (!alloc)
>>           goto cleanup;
>>   
>> -    if (VIR_ALLOC(tmp_cachetune) < 0)
>> +    if (VIR_ALLOC(tmp_restune) < 0)
>>           goto cleanup;
>>   
>>       vcpus_str = virXMLPropString(node, "vcpus");
>> @@ -19025,8 +19025,8 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
>>           goto cleanup;
>>       }
>>   
>> -    for (i = 0; i < def->ncachetunes; i++) {
>> -        if (virBitmapOverlaps(def->cachetunes[i]->vcpus, vcpus)) {
>> +    for (i = 0; i < def->nrestunes; i++) {
>> +        if (virBitmapOverlaps(def->restunes[i]->vcpus, vcpus)) {
>>               virReportError(VIR_ERR_XML_ERROR, "%s",
>>                              _("Overlapping vcpus in cachetunes"));
>>               goto cleanup;
>> @@ -19056,16 +19056,16 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
>>       if (virResctrlAllocSetID(alloc, alloc_id) < 0)
>>           goto cleanup;
>>   
>> -    VIR_STEAL_PTR(tmp_cachetune->vcpus, vcpus);
>> -    VIR_STEAL_PTR(tmp_cachetune->alloc, alloc);
>> +    VIR_STEAL_PTR(tmp_restune->vcpus, vcpus);
>> +    VIR_STEAL_PTR(tmp_restune->alloc, alloc);
>>   
>> -    if (VIR_APPEND_ELEMENT(def->cachetunes, def->ncachetunes, tmp_cachetune) < 0)
>> +    if (VIR_APPEND_ELEMENT(def->restunes, def->nrestunes, tmp_restune) < 0)
>>           goto cleanup;
>>   
>>       ret = 0;
>>    cleanup:
>>       ctxt->node = oldnode;
>> -    virDomainCachetuneDefFree(tmp_cachetune);
>> +    virDomainRestuneDefFree(tmp_restune);
>>       virObjectUnref(alloc);
>>       virBitmapFree(vcpus);
>>       VIR_FREE(alloc_id);
>> @@ -26874,7 +26874,7 @@ virDomainCachetuneDefFormatHelper(unsigned int level,
>>   
>>   static int
>>   virDomainCachetuneDefFormat(virBufferPtr buf,
>> -                            virDomainCachetuneDefPtr cachetune,
>> +                            virDomainRestuneDefPtr restune,
>>                               unsigned int flags)
>>   {
>>       virBuffer childrenBuf = VIR_BUFFER_INITIALIZER;
>> @@ -26882,7 +26882,7 @@ virDomainCachetuneDefFormat(virBufferPtr buf,
>>       int ret = -1;
>>   
>>       virBufferSetChildIndent(&childrenBuf, buf);
>> -    virResctrlAllocForeachCache(cachetune->alloc,
>> +    virResctrlAllocForeachCache(restune->alloc,
>>                                  virDomainCachetuneDefFormatHelper,
>>                                  &childrenBuf);
>>   
>> @@ -26895,14 +26895,14 @@ virDomainCachetuneDefFormat(virBufferPtr buf,
>>           goto cleanup;
>>       }
>>   
>> -    vcpus = virBitmapFormat(cachetune->vcpus);
>> +    vcpus = virBitmapFormat(restune->vcpus);
>>       if (!vcpus)
>>           goto cleanup;
>>   
>>       virBufferAsprintf(buf, "<cachetune vcpus='%s'", vcpus);
>>   
>>       if (!(flags & VIR_DOMAIN_DEF_FORMAT_INACTIVE)) {
>> -        const char *alloc_id = virResctrlAllocGetID(cachetune->alloc);
>> +        const char *alloc_id = virResctrlAllocGetID(restune->alloc);
>>           if (!alloc_id)
>>               goto cleanup;
>>   
>> @@ -27023,8 +27023,8 @@ virDomainCputuneDefFormat(virBufferPtr buf,
>>                                    def->iothreadids[i]->iothread_id);
>>       }
>>   
>> -    for (i = 0; i < def->ncachetunes; i++)
>> -        virDomainCachetuneDefFormat(&childrenBuf, def->cachetunes[i], flags);
>> +    for (i = 0; i < def->nrestunes; i++)
>> +        virDomainCachetuneDefFormat(&childrenBuf, def->restunes[i], flags);
>>   
>>       if (virBufferCheckError(&childrenBuf) < 0)
>>           return -1;
>> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
>> index 6344c02..9e71a0b 100644
>> --- a/src/conf/domain_conf.h
>> +++ b/src/conf/domain_conf.h
>> @@ -2228,10 +2228,10 @@ struct _virDomainCputune {
>>   };
>>   
>>   
>> -typedef struct _virDomainCachetuneDef virDomainCachetuneDef;
>> -typedef virDomainCachetuneDef *virDomainCachetuneDefPtr;
>> +typedef struct _virDomainRestuneDef virDomainRestuneDef;
>> +typedef virDomainRestuneDef *virDomainRestuneDefPtr;
>>   
>> -struct _virDomainCachetuneDef {
>> +struct _virDomainRestuneDef {
>>       virBitmapPtr vcpus;
>>       virResctrlAllocPtr alloc;
>>   };
>> @@ -2409,8 +2409,8 @@ struct _virDomainDef {
>>   
>>       virDomainCputune cputune;
>>   
>> -    virDomainCachetuneDefPtr *cachetunes;
>> -    size_t ncachetunes;
>> +    virDomainRestuneDefPtr *restunes;
>> +    size_t nrestunes;
>>   
>>       virDomainNumaPtr numa;
>>       virDomainResourceDefPtr resource;
>> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
>> index 93fd6ba..0659c06 100644
>> --- a/src/qemu/qemu_process.c
>> +++ b/src/qemu/qemu_process.c
>> @@ -2447,7 +2447,7 @@ qemuProcessResctrlCreate(virQEMUDriverPtr driver,
>>       virCapsPtr caps = NULL;
>>       qemuDomainObjPrivatePtr priv = vm->privateData;
>>   
>> -    if (!vm->def->ncachetunes)
>> +    if (!vm->def->nrestunes)
>>           return 0;
>>   
>>       /* Force capability refresh since resctrl info can change
>> @@ -2456,9 +2456,9 @@ qemuProcessResctrlCreate(virQEMUDriverPtr driver,
>>       if (!caps)
>>           return -1;
>>   
>> -    for (i = 0; i < vm->def->ncachetunes; i++) {
>> +    for (i = 0; i < vm->def->nrestunes; i++) {
>>           if (virResctrlAllocCreate(caps->host.resctrl,
>> -                                  vm->def->cachetunes[i]->alloc,
>> +                                  vm->def->restunes[i]->alloc,
>>                                     priv->machineName) < 0)
>>               goto cleanup;
>>       }
>> @@ -5259,8 +5259,8 @@ qemuProcessSetupVcpu(virDomainObjPtr vm,
>>                               &vcpu->sched) < 0)
>>           return -1;
>>   
>> -    for (i = 0; i < vm->def->ncachetunes; i++) {
>> -        virDomainCachetuneDefPtr ct = vm->def->cachetunes[i];
>> +    for (i = 0; i < vm->def->nrestunes; i++) {
>> +        virDomainRestuneDefPtr ct = vm->def->restunes[i];
>>   
>>           if (virBitmapIsBitSet(ct->vcpus, vcpuid)) {
>>               if (virResctrlAllocAddPID(ct->alloc, vcpupid) < 0)
>> @@ -6955,8 +6955,8 @@ void qemuProcessStop(virQEMUDriverPtr driver,
>>       /* Remove resctrl allocation after cgroups are cleaned up which makes it
>>        * kind of safer (although removing the allocation should work even with
>>        * pids in tasks file */
>> -    for (i = 0; i < vm->def->ncachetunes; i++)
>> -        virResctrlAllocRemove(vm->def->cachetunes[i]->alloc);
>> +    for (i = 0; i < vm->def->nrestunes; i++)
>> +        virResctrlAllocRemove(vm->def->restunes[i]->alloc);
>>   
>>       qemuProcessRemoveDomainStatus(driver, vm);
>>   
>> @@ -7676,8 +7676,8 @@ qemuProcessReconnect(void *opaque)
>>       if (qemuConnectAgent(driver, obj) < 0)
>>           goto error;
>>   
>> -    for (i = 0; i < obj->def->ncachetunes; i++) {
>> -        if (virResctrlAllocDeterminePath(obj->def->cachetunes[i]->alloc,
>> +    for (i = 0; i < obj->def->nrestunes; i++) {
>> +        if (virResctrlAllocDeterminePath(obj->def->restunes[i]->alloc,
>>                                            priv->machineName) < 0)
>>               goto error;
>>       }
>>
> 
> --
> libvir-list mailing list
> libvir-list@redhat.com
> https://www.redhat.com/mailman/listinfo/libvir-list
> 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [RFCv2 PATCH 3/5] conf: rename cachetune to restune
Posted by John Ferlan 6 years, 12 months ago

On 07/03/2018 12:10 AM, bing.niu wrote:
> Hi John,
> Thanks for reviewing! Since major questions are from this thread, so I
> think we can start from this.
> 
> On 2018年06月30日 06:47, John Ferlan wrote:
>>
>>
>> On 06/15/2018 05:29 AM, bing.niu@intel.com wrote:
>>> From: Bing Niu <bing.niu@intel.com>
>>>
>>> resctrl not only supports cache tuning, but also memory bandwidth
>>> tuning. Rename cachetune to restune(resource tuning) to reflect
>>> that.
>>>
>>> Signed-off-by: Bing Niu <bing.niu@intel.com>
>>> ---
>>>   src/conf/domain_conf.c  | 44
>>> ++++++++++++++++++++++----------------------
>>>   src/conf/domain_conf.h  | 10 +++++-----
>>>   src/qemu/qemu_process.c | 18 +++++++++---------
>>>   3 files changed, 36 insertions(+), 36 deletions(-)
>>>
>>
>> Not so sure I'm liking [R|r]estune[s]... Still @cachetunes is an array
>> of a structure that contains a vCPU bitmap and a virResctrlAllocPtr for
>> the /cputune/cachetunes data.  The virResctrlAllocPtr was changed to add
>> a the bandwidth allocation, so does this really have to change?
>>
> The cachetunes from Cache Allocation Technology(CAT) and memorytunes
> from Memory Bandwidth Allocation(MBA) are both features from Resource
> Directory Technology. RDT is a collection of resource distribution
> technology, right now it includes CAT and MBA. Details information can
> be found 17.18.1 of Intel's sdm.
> https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf
> 

tl;dr :-)

So some day soon it'll be more complicated and have even more rules?!?!

> 
> The RDT capability and configuration methods is exposed to user land in
> form of resctrl file system by kernel. The basic programming model for
> this resctrl fs interface like this:
> 
> 1. create a folder under /sys/fs/resctrl. And this folder is called one
> resctrl group.
> 
> 2. writing user desired CAT and MBA config to the file
> /sys/fs/resctrl/<YOUR_GROUP_NAME>/schemata to allocate the cache or
> memory bandwidth. you can set CAT and MBA together or any of them.
> 
> 3. moving the threads you want to that resctrl group by writing threads'
> PID to /sys/fs/resctrl/<YOUR_GROUP_NAME>/task file. So that those
> threads can be assign with the resource allocated in step 2. And those
> resources are shared by those threads.
> 
> *NOTE*: A thread only can be put in one resctrl group. This requirement
> is from how RDT and resctrl works.

IOW: A vCPU or the vCPUs being used by CAT and MBA would need to be the
same because in "this world" the group *is* the single vCPU or the range
of vCPU's.

> Each resctrl group has a closid. The resource configuration in above
> step2 is set to that closid's msr(IA32_L?_QoS) to describe the resource
> allocation for this closid.
> And thread use this closid to claim the allocated resource during
> context switch(scheduled_in()) in kernel by writing the closid to the
> msr IA32_PQR_ASSOC.
> With closid wrote in IA32_PQR_ASSOC, RDT hardware knows current running
> closid and allocated resource basing on this closid's IA32_L?_QoS msr.
> That why a thread can be put into one restrcl group only.
> 
> Details description of resctrl can be found:
> https://github.com/torvalds/linux/blob/v4.17/Documentation/x86/intel_rdt_ui.txt
> 
> 
> :)

More lengthy things that I didn't read...  Thanks for the pointers. I
think keeping track of all the rules and relationships is going to be a
real "challenge".

> 
> Basing on above information, my understanding is that virResctrlAllocPtr
> is not only bind to cachetunes. It should be a backing object to
> describe a resctrl group. Especially current virresctrl class already
> works as that.
> Libvirt will create one resctrl group for each virResctrlAllocPtr by
> virResctrlAllocCreate() interface.
> 
> So from components view, the big picture is something like below.

Yay, pictures!  Thanks for taking the time.

> 
> 
>                <memorytune ^cpus='0-3'>
>                    +---------+
>                              |    <cachetune vcpus='0-3'>
>        XML                   |           +
>       parser                 +-----------+
>                              |
>                              |
>                 +------------------------------+
>                              |
>                              |
> internal resctrl      +------v--------------+
> backing object        |  virResctrlAllocPtr |
>                       +------+--------------+
>                              |
>                              |
>                 +------------------------------+
>                              |
>                           +--v-------+
>                           |          |
>                           | schemata |
>          /sys/fs/resctrl  | tasks    |
>                           |   .      |
>                           |   .      |
>                           |          |
>                           +----------+
> 
> Does this make sense? :)
> 

Yes and I think further has me believe that the vCPUs in this case are
the groups and perhaps becomes the internal "key" for all this. I didn't
go look, but have to assume multiple cachetune entries create multiple
groups into which the vCPU's for the cachetune entries are placed. So
far it's been easy because there's no way to overlap cachetune vCPU
values (my assumption, I didn't look at the code).

Now we add memtune and dictate that if it uses a vCPU or set of vCPUs
that it either matches the existing cachetune entries or is itself
unique and doesn't overlap any cachetune.

Assume the following:

  <cachetune vcpus='0-1'>
  <cachetune vcpus='2-4'>
  <cachetune vcpus='5-7'>

A valid memtune must also be '0-1', '2-4', 5-7', or any new grouping
from 8->maxvcpus, such as '8-11'.

Conversely, if <memtune vcpus='0-4'> was used, that's invalid. Similarly
<memtune vcpus='3'> would be invalid. For the former, it's crossing two
boundaries and for the latter it's "part of" some other boundary.

Hopefully that's right...

>> I think the question is - if both cachetunes and memtunes exist in
>> domain XML, then for which does the @vcpus relate to. That is, from the
>> cover there's:
>>
>>      <memorytune vcpus='0-1'>
>>        <node id='0' bandwidth='20'/>
>>      </memorytune>
>>
>> and then there's a
>>
>>      <cachetune vcpus='0-3'>
>>        <cache id='0' level='3' type='both' size='3' unit='MiB'/>
>>        <cache id='1' level='3' type='both' size='3' unit='MiB'/>
>>      </cachetune>
>>
>> Now what?  I haven't looked at patch 4 yet to even know how this "works".
> 
> I also raised this concern in the first round review. Ideally, if we can
> have a domain XML like this
> 
> <resourcetune vcpu='0-3'>
>      <memorytune>
>         <node id='0' bandwidth='20'/>
>      </memorytune>
>       <cachetune>
>         <cache id='0' level='3' type='both' size='3' unit='MiB'/>
>         <cache id='1' level='3' type='both' size='3' unit='MiB'/>
>       </cachetune>
> <resourcetune>
> 
> That will be more clear. Above domain XML means: one resctrl group with
> memory bandwidth 20 @node 0, L3 cache 3MB @ node0 and node 1. Put vcpu
> '0-3' to this resctrl group. So those resources are shared among vcpus.
> However, Pavel pointed out that we must keep backward compatible. And we
> need keep <cachetune vcpus='0-3'>.

I think I was right above based on this part...

Unfortunately forward thinking about memorytune didn't happen, so what
you propose above is what I think "internally" once we've read the
<cachetune> and <memorytune> elements we end up with. And yes, it makes
sense why you renamed things now and why the vcpus are essentially
reused between the two.

Not a fan of restunes, RsrcDir is closer, but not a perfect shorter name
the ResourceDirectory. Also rather than essentially cut-copy-paste for
the vcpus logic into memtunes from cachetunes, that should be extracted
into it's own helper.


> 
> In RFC v1, I also proposed to put memory bandwidth into cachetunes
> section like:
>   <cachetune vcpus='0-3'>
>     <cache id='0' level='3' type='both' size='3' unit='MiB'/>
>     <node id='0' bandwidth='20'/>
>   <cachetune vcpus>
> 
> Maintainer also comments on this "it's not a clear XML definition",
> describing memory bandwidth inside a cachetune section. That's why we
> come with a separated memorytune section.

I didn't go back to the RFC when I reviewed. I tried to have a fresh
look. After scanning Pavel's comments now though, I do agree having a
<node> inside a <cachetune> to go along with a <cache> element is a bit
confusing. But I do now see where the "math" is for the total bandwidth
that can be used - it's the per node value.  Something that I didn't see
during my first pass.  Of course I may have realized that had the
example in the commit had more than 1 vCPU and 1 node.

> In your example, the current implement will stop creating resctrl group
> and throw an error. vcpu list cannot overlap between memorytune and
> cachetune, but they can be same. Since vcpu can be put into one resctrl
> group only.
> 
> My understanding is that we can set using <resourcetune> to describe
> cachetune and memorytune together as a long term goal. Since that needs
> to deprecate cachetune's vcpu list. I am not sure about procedure to
> remove existing XML elements. Maybe you can point out.
> For the short term goal, we can use this separated <memorytune
> vcpus='0-1'> section. How do you think? :)
> 

There is no deprecation possible.  You will always have to support the
cachetune as a direct child of cputune. If you "fix" things up such that
you introduce some other means to have a cputune/resourcetune/cachetune
working before you introduce memtune as a child of resourcetune, then
that works. But you'll have to know whether you read cputune/cachetune
and then format out in that manner.  See how the <auth> element has been
handled as a "child" of the disk element originally, but now possible as
a child of the source element of the disk.  It's possible, but messy.

> 
>> I think what you want to do is create a virDomainMemtuneDef that mimics
>> virDomainCachetuneDef, but I'm not 100% sure.
>>
>> Of course that still leaves me wondering how much of virResctrlAllocPtr
>> then becomes wasted. You chose an integration point, but essentially
>> have separate technologies. I'm trying to make sense of it all and am
>> starting to get lost in the process of doing that.
> 
> From resctrl group perspective, I don't think extend memory bandwidth
> information in virResctrlAllocPtr is a waste. I think this is a feature
> extending.  :)
> 

Now that the picture is becoming clearer, I can see your point.

>>
>> If @bandwidth can not be > 100... Can we duplicate the @id somehow? So
> Something like
>  <cachetune vcpus='0-3'>
>         <cache id='0' level='3' type='both' size='3' unit='MiB'/>
>         <node id='0' bandwidth='20'/>
>  </cachetune>
> 
> or
> 
>  <cachetune vcpus='0-3'>
>         <cache id='0' level='3' type='both' size='3' unit='MiB'
> bandwidth='20'/>
>  </cachetune>
> ???
> That also works. This is something I did in RFC v1. But there is a
> comment said it's better not to mix memory bandwidth with cachetune.
> That may confuse people. :(
> 

Right - as noted. I didn't read that originally. I jumped into a review
for this series mainly because no one else had started reviewing it.

>> how does that free buffer that got 100% and then gets subtracted from
>> really work in this model? I probably need to study the code some more,
>> but it's not clear it requires using the "same" sort of logic that
>> cachetune uses where there could be different types (instructs, data, or
>> both) that would be drawing from the size, right? I think that's how
>> that worked.  So what's the corollary for memtunes? You set a range from
>> 0 to 100, right and that's it.  OK, too many questions and it's late so
>> I'll stop here...  Hopefully I didn't leave any stray thoughts in the
>> review of patch 1 or 2.
> 
> The policy we used for memory bandwidth is consistent with the existing
> cachetune. Current cache allocation logic model forbids cache allocation
> overlap. This is to prevent resource contention I think. Same strategy
> on memory allocation part. Since total bandwidth beyond to 100 will lead
> to bandwidth competition.

I think I've explained what I missed above... Essentially your example
had 1 vCPU and thus 1 node and so it didn't make sense about the 100%.
Of course if there were 3 vCPU's and 3 nodes each having some
percentage, yeah then it would have been more obvious.

My suggestion, let's try to get through reworking how the cachetunes
data is stored. Some sort of common API to "add" a new vCPU's value that
can be used by cachetune now and eventually memtune later. The vCPUs
being the obvious key rather than cachetune.

Fair warning - I have some days out of the office coming up over the
next week or so.

John

> 
> Existing cache allocation logic achieves this by first populating
> virResctrlInfoPtr structure. Libvirt will fill the virResctrlInfoPtr
> structure during traverse host's cache hierarchy. When allocating cache
> by virResctrlAllocCreate, libvirt will first create a virResctrlAllocPtr
> basing on information from virResctrlInfoPtr. This virResctrlAllocPtr
> represents the maximum resource available(I mean no any allocation
> previously). Then libvirt traverse all resctrl groups (in
> /sys/fs/resctrl/)created already, subtract the resource they claimed. So
> the left resource is the resource free. Libvirt compares this free
> resource with resource requested in virResctrlAllocCreate. If enough
> free resource available, then new resctrl group will be created.
> Memory bandwidth part follows the same idea, only the difference is
> memory bandwidth use percentage number and cache use bitmap.
> 
> Bing
>>
>> John
>>
>>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
>>> index 62c412e..b3543f3 100644
>>> --- a/src/conf/domain_conf.c
>>> +++ b/src/conf/domain_conf.c
>>> @@ -2950,14 +2950,14 @@ virDomainLoaderDefFree(virDomainLoaderDefPtr
>>> loader)
>>>       static void
>>> -virDomainCachetuneDefFree(virDomainCachetuneDefPtr cachetune)
>>> +virDomainRestuneDefFree(virDomainRestuneDefPtr restune)
>>>   {
>>> -    if (!cachetune)
>>> +    if (!restune)
>>>           return;
>>>   -    virObjectUnref(cachetune->alloc);
>>> -    virBitmapFree(cachetune->vcpus);
>>> -    VIR_FREE(cachetune);
>>> +    virObjectUnref(restune->alloc);
>>> +    virBitmapFree(restune->vcpus);
>>> +    VIR_FREE(restune);
>>>   }
>>>     @@ -3147,9 +3147,9 @@ void virDomainDefFree(virDomainDefPtr def)
>>>           virDomainShmemDefFree(def->shmems[i]);
>>>       VIR_FREE(def->shmems);
>>>   -    for (i = 0; i < def->ncachetunes; i++)
>>> -        virDomainCachetuneDefFree(def->cachetunes[i]);
>>> -    VIR_FREE(def->cachetunes);
>>> +    for (i = 0; i < def->nrestunes; i++)
>>> +        virDomainRestuneDefFree(def->restunes[i]);
>>> +    VIR_FREE(def->restunes);
>>>         VIR_FREE(def->keywrap);
>>>   @@ -18971,7 +18971,7 @@ virDomainCachetuneDefParse(virDomainDefPtr
>>> def,
>>>       xmlNodePtr *nodes = NULL;
>>>       virBitmapPtr vcpus = NULL;
>>>       virResctrlAllocPtr alloc = virResctrlAllocNew();
>>> -    virDomainCachetuneDefPtr tmp_cachetune = NULL;
>>> +    virDomainRestuneDefPtr tmp_restune = NULL;
>>>       char *tmp = NULL;
>>>       char *vcpus_str = NULL;
>>>       char *alloc_id = NULL;
>>> @@ -18984,7 +18984,7 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
>>>       if (!alloc)
>>>           goto cleanup;
>>>   -    if (VIR_ALLOC(tmp_cachetune) < 0)
>>> +    if (VIR_ALLOC(tmp_restune) < 0)
>>>           goto cleanup;
>>>         vcpus_str = virXMLPropString(node, "vcpus");
>>> @@ -19025,8 +19025,8 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
>>>           goto cleanup;
>>>       }
>>>   -    for (i = 0; i < def->ncachetunes; i++) {
>>> -        if (virBitmapOverlaps(def->cachetunes[i]->vcpus, vcpus)) {
>>> +    for (i = 0; i < def->nrestunes; i++) {
>>> +        if (virBitmapOverlaps(def->restunes[i]->vcpus, vcpus)) {
>>>               virReportError(VIR_ERR_XML_ERROR, "%s",
>>>                              _("Overlapping vcpus in cachetunes"));
>>>               goto cleanup;
>>> @@ -19056,16 +19056,16 @@ virDomainCachetuneDefParse(virDomainDefPtr
>>> def,
>>>       if (virResctrlAllocSetID(alloc, alloc_id) < 0)
>>>           goto cleanup;
>>>   -    VIR_STEAL_PTR(tmp_cachetune->vcpus, vcpus);
>>> -    VIR_STEAL_PTR(tmp_cachetune->alloc, alloc);
>>> +    VIR_STEAL_PTR(tmp_restune->vcpus, vcpus);
>>> +    VIR_STEAL_PTR(tmp_restune->alloc, alloc);
>>>   -    if (VIR_APPEND_ELEMENT(def->cachetunes, def->ncachetunes,
>>> tmp_cachetune) < 0)
>>> +    if (VIR_APPEND_ELEMENT(def->restunes, def->nrestunes,
>>> tmp_restune) < 0)
>>>           goto cleanup;
>>>         ret = 0;
>>>    cleanup:
>>>       ctxt->node = oldnode;
>>> -    virDomainCachetuneDefFree(tmp_cachetune);
>>> +    virDomainRestuneDefFree(tmp_restune);
>>>       virObjectUnref(alloc);
>>>       virBitmapFree(vcpus);
>>>       VIR_FREE(alloc_id);
>>> @@ -26874,7 +26874,7 @@ virDomainCachetuneDefFormatHelper(unsigned
>>> int level,
>>>     static int
>>>   virDomainCachetuneDefFormat(virBufferPtr buf,
>>> -                            virDomainCachetuneDefPtr cachetune,
>>> +                            virDomainRestuneDefPtr restune,
>>>                               unsigned int flags)
>>>   {
>>>       virBuffer childrenBuf = VIR_BUFFER_INITIALIZER;
>>> @@ -26882,7 +26882,7 @@ virDomainCachetuneDefFormat(virBufferPtr buf,
>>>       int ret = -1;
>>>         virBufferSetChildIndent(&childrenBuf, buf);
>>> -    virResctrlAllocForeachCache(cachetune->alloc,
>>> +    virResctrlAllocForeachCache(restune->alloc,
>>>                                  virDomainCachetuneDefFormatHelper,
>>>                                  &childrenBuf);
>>>   @@ -26895,14 +26895,14 @@ virDomainCachetuneDefFormat(virBufferPtr
>>> buf,
>>>           goto cleanup;
>>>       }
>>>   -    vcpus = virBitmapFormat(cachetune->vcpus);
>>> +    vcpus = virBitmapFormat(restune->vcpus);
>>>       if (!vcpus)
>>>           goto cleanup;
>>>         virBufferAsprintf(buf, "<cachetune vcpus='%s'", vcpus);
>>>         if (!(flags & VIR_DOMAIN_DEF_FORMAT_INACTIVE)) {
>>> -        const char *alloc_id = virResctrlAllocGetID(cachetune->alloc);
>>> +        const char *alloc_id = virResctrlAllocGetID(restune->alloc);
>>>           if (!alloc_id)
>>>               goto cleanup;
>>>   @@ -27023,8 +27023,8 @@ virDomainCputuneDefFormat(virBufferPtr buf,
>>>                                    def->iothreadids[i]->iothread_id);
>>>       }
>>>   -    for (i = 0; i < def->ncachetunes; i++)
>>> -        virDomainCachetuneDefFormat(&childrenBuf,
>>> def->cachetunes[i], flags);
>>> +    for (i = 0; i < def->nrestunes; i++)
>>> +        virDomainCachetuneDefFormat(&childrenBuf, def->restunes[i],
>>> flags);
>>>         if (virBufferCheckError(&childrenBuf) < 0)
>>>           return -1;
>>> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
>>> index 6344c02..9e71a0b 100644
>>> --- a/src/conf/domain_conf.h
>>> +++ b/src/conf/domain_conf.h
>>> @@ -2228,10 +2228,10 @@ struct _virDomainCputune {
>>>   };
>>>     -typedef struct _virDomainCachetuneDef virDomainCachetuneDef;
>>> -typedef virDomainCachetuneDef *virDomainCachetuneDefPtr;
>>> +typedef struct _virDomainRestuneDef virDomainRestuneDef;
>>> +typedef virDomainRestuneDef *virDomainRestuneDefPtr;
>>>   -struct _virDomainCachetuneDef {
>>> +struct _virDomainRestuneDef {
>>>       virBitmapPtr vcpus;
>>>       virResctrlAllocPtr alloc;
>>>   };
>>> @@ -2409,8 +2409,8 @@ struct _virDomainDef {
>>>         virDomainCputune cputune;
>>>   -    virDomainCachetuneDefPtr *cachetunes;
>>> -    size_t ncachetunes;
>>> +    virDomainRestuneDefPtr *restunes;
>>> +    size_t nrestunes;
>>>         virDomainNumaPtr numa;
>>>       virDomainResourceDefPtr resource;
>>> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
>>> index 93fd6ba..0659c06 100644
>>> --- a/src/qemu/qemu_process.c
>>> +++ b/src/qemu/qemu_process.c
>>> @@ -2447,7 +2447,7 @@ qemuProcessResctrlCreate(virQEMUDriverPtr driver,
>>>       virCapsPtr caps = NULL;
>>>       qemuDomainObjPrivatePtr priv = vm->privateData;
>>>   -    if (!vm->def->ncachetunes)
>>> +    if (!vm->def->nrestunes)
>>>           return 0;
>>>         /* Force capability refresh since resctrl info can change
>>> @@ -2456,9 +2456,9 @@ qemuProcessResctrlCreate(virQEMUDriverPtr driver,
>>>       if (!caps)
>>>           return -1;
>>>   -    for (i = 0; i < vm->def->ncachetunes; i++) {
>>> +    for (i = 0; i < vm->def->nrestunes; i++) {
>>>           if (virResctrlAllocCreate(caps->host.resctrl,
>>> -                                  vm->def->cachetunes[i]->alloc,
>>> +                                  vm->def->restunes[i]->alloc,
>>>                                     priv->machineName) < 0)
>>>               goto cleanup;
>>>       }
>>> @@ -5259,8 +5259,8 @@ qemuProcessSetupVcpu(virDomainObjPtr vm,
>>>                               &vcpu->sched) < 0)
>>>           return -1;
>>>   -    for (i = 0; i < vm->def->ncachetunes; i++) {
>>> -        virDomainCachetuneDefPtr ct = vm->def->cachetunes[i];
>>> +    for (i = 0; i < vm->def->nrestunes; i++) {
>>> +        virDomainRestuneDefPtr ct = vm->def->restunes[i];
>>>             if (virBitmapIsBitSet(ct->vcpus, vcpuid)) {
>>>               if (virResctrlAllocAddPID(ct->alloc, vcpupid) < 0)
>>> @@ -6955,8 +6955,8 @@ void qemuProcessStop(virQEMUDriverPtr driver,
>>>       /* Remove resctrl allocation after cgroups are cleaned up which
>>> makes it
>>>        * kind of safer (although removing the allocation should work
>>> even with
>>>        * pids in tasks file */
>>> -    for (i = 0; i < vm->def->ncachetunes; i++)
>>> -        virResctrlAllocRemove(vm->def->cachetunes[i]->alloc);
>>> +    for (i = 0; i < vm->def->nrestunes; i++)
>>> +        virResctrlAllocRemove(vm->def->restunes[i]->alloc);
>>>         qemuProcessRemoveDomainStatus(driver, vm);
>>>   @@ -7676,8 +7676,8 @@ qemuProcessReconnect(void *opaque)
>>>       if (qemuConnectAgent(driver, obj) < 0)
>>>           goto error;
>>>   -    for (i = 0; i < obj->def->ncachetunes; i++) {
>>> -        if
>>> (virResctrlAllocDeterminePath(obj->def->cachetunes[i]->alloc,
>>> +    for (i = 0; i < obj->def->nrestunes; i++) {
>>> +        if (virResctrlAllocDeterminePath(obj->def->restunes[i]->alloc,
>>>                                            priv->machineName) < 0)
>>>               goto error;
>>>       }
>>>
>>
>> -- 
>> libvir-list mailing list
>> libvir-list@redhat.com
>> https://www.redhat.com/mailman/listinfo/libvir-list
>>

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [RFCv2 PATCH 3/5] conf: rename cachetune to restune
Posted by bing.niu 6 years, 12 months ago

On 2018年07月06日 06:40, John Ferlan wrote:
> 
> 
> On 07/03/2018 12:10 AM, bing.niu wrote:
>> Hi John,
>> Thanks for reviewing! Since major questions are from this thread, so I
>> think we can start from this.
>>
>> On 2018年06月30日 06:47, John Ferlan wrote:
>>>
>>>
>>> On 06/15/2018 05:29 AM, bing.niu@intel.com wrote:
>>>> From: Bing Niu <bing.niu@intel.com>
>>>>
>>>> resctrl not only supports cache tuning, but also memory bandwidth
>>>> tuning. Rename cachetune to restune(resource tuning) to reflect
>>>> that.
>>>>
>>>> Signed-off-by: Bing Niu <bing.niu@intel.com>
>>>> ---
>>>>    src/conf/domain_conf.c  | 44
>>>> ++++++++++++++++++++++----------------------
>>>>    src/conf/domain_conf.h  | 10 +++++-----
>>>>    src/qemu/qemu_process.c | 18 +++++++++---------
>>>>    3 files changed, 36 insertions(+), 36 deletions(-)
>>>>
>>>
>>> Not so sure I'm liking [R|r]estune[s]... Still @cachetunes is an array
>>> of a structure that contains a vCPU bitmap and a virResctrlAllocPtr for
>>> the /cputune/cachetunes data.  The virResctrlAllocPtr was changed to add
>>> a the bandwidth allocation, so does this really have to change?
>>>
>> The cachetunes from Cache Allocation Technology(CAT) and memorytunes
>> from Memory Bandwidth Allocation(MBA) are both features from Resource
>> Directory Technology. RDT is a collection of resource distribution
>> technology, right now it includes CAT and MBA. Details information can
>> be found 17.18.1 of Intel's sdm.
>> https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf
>>
> 
> tl;dr :-)
> 
> So some day soon it'll be more complicated and have even more rules?!?!
Hopefully not, and Intel do backward compatible well.:)
So overall programing module should be steady.

> 
>>
>> The RDT capability and configuration methods is exposed to user land in
>> form of resctrl file system by kernel. The basic programming model for
>> this resctrl fs interface like this:
>>
>> 1. create a folder under /sys/fs/resctrl. And this folder is called one
>> resctrl group.
>>
>> 2. writing user desired CAT and MBA config to the file
>> /sys/fs/resctrl/<YOUR_GROUP_NAME>/schemata to allocate the cache or
>> memory bandwidth. you can set CAT and MBA together or any of them.
>>
>> 3. moving the threads you want to that resctrl group by writing threads'
>> PID to /sys/fs/resctrl/<YOUR_GROUP_NAME>/task file. So that those
>> threads can be assign with the resource allocated in step 2. And those
>> resources are shared by those threads.
>>
>> *NOTE*: A thread only can be put in one resctrl group. This requirement
>> is from how RDT and resctrl works.
> 
> IOW: A vCPU or the vCPUs being used by CAT and MBA would need to be the
> same because in "this world" the group *is* the single vCPU or the range
> of vCPU's.

Yup! vCPU actually is a thread of QEMU from host view.
> 
>> Each resctrl group has a closid. The resource configuration in above
>> step2 is set to that closid's msr(IA32_L?_QoS) to describe the resource
>> allocation for this closid.
>> And thread use this closid to claim the allocated resource during
>> context switch(scheduled_in()) in kernel by writing the closid to the
>> msr IA32_PQR_ASSOC.
>> With closid wrote in IA32_PQR_ASSOC, RDT hardware knows current running
>> closid and allocated resource basing on this closid's IA32_L?_QoS msr.
>> That why a thread can be put into one restrcl group only.
>>
>> Details description of resctrl can be found:
>> https://github.com/torvalds/linux/blob/v4.17/Documentation/x86/intel_rdt_ui.txt
>>
>>
>> :)
> 
> More lengthy things that I didn't read...  Thanks for the pointers. I
> think keeping track of all the rules and relationships is going to be a
> real "challenge".

Yes, that is a real "challenge". However, kernel modules usually give a 
consistent and unified interface. That will help a lot. We should be 
able to support new features by extending the existing logic.
> 
>>
>> Basing on above information, my understanding is that virResctrlAllocPtr
>> is not only bind to cachetunes. It should be a backing object to
>> describe a resctrl group. Especially current virresctrl class already
>> works as that.
>> Libvirt will create one resctrl group for each virResctrlAllocPtr by
>> virResctrlAllocCreate() interface.
>>
>> So from components view, the big picture is something like below.
> 
> Yay, pictures!  Thanks for taking the time.
> 
>>
>>
>>                 <memorytune ^cpus='0-3'>
>>                     +---------+
>>                               |    <cachetune vcpus='0-3'>
>>         XML                   |           +
>>        parser                 +-----------+
>>                               |
>>                               |
>>                  +------------------------------+
>>                               |
>>                               |
>> internal resctrl      +------v--------------+
>> backing object        |  virResctrlAllocPtr |
>>                        +------+--------------+
>>                               |
>>                               |
>>                  +------------------------------+
>>                               |
>>                            +--v-------+
>>                            |          |
>>                            | schemata |
>>           /sys/fs/resctrl  | tasks    |
>>                            |   .      |
>>                            |   .      |
>>                            |          |
>>                            +----------+
>>
>> Does this make sense? :)
>>
> 
> Yes and I think further has me believe that the vCPUs in this case are
> the groups and perhaps becomes the internal "key" for all this. I didn't
> go look, but have to assume multiple cachetune entries create multiple
> groups into which the vCPU's for the cachetune entries are placed. So
> far it's been easy because there's no way to overlap cachetune vCPU
> values (my assumption, I didn't look at the code).
> 
> Now we add memtune and dictate that if it uses a vCPU or set of vCPUs
> that it either matches the existing cachetune entries or is itself
> unique and doesn't overlap any cachetune.
> 
> Assume the following:
> 
>    <cachetune vcpus='0-1'>
>    <cachetune vcpus='2-4'>
>    <cachetune vcpus='5-7'>
> 
> A valid memtune must also be '0-1', '2-4', 5-7', or any new grouping
> from 8->maxvcpus, such as '8-11'.
> 
> Conversely, if <memtune vcpus='0-4'> was used, that's invalid. Similarly
> <memtune vcpus='3'> would be invalid. For the former, it's crossing two
> boundaries and for the latter it's "part of" some other boundary.
> 
> Hopefully that's right...
You are definitely right! That is how we implement in 
virDomainMemorytuneDefParse() of [RFCv2 PATCH 4/5] like below.

+    for (i = 0; i < def->nrestunes; i++) {
+        /* vcpus group has been created, directly use the existing one.
+         * Just updating memory allocation information of that group
+         * */
+        if (virBitmapEqual(def->restunes[i]->vcpus, vcpus)) {
+            alloc = def->restunes[i]->alloc;
+            break;
+        }
+
+        /* vcpus can not overlapping */
+        if (virBitmapOverlaps(def->restunes[i]->vcpus, vcpus)) {
+            virReportError(VIR_ERR_XML_ERROR, "%s",
+                           _("Overlapping vcpus in memorytunes"));
+            goto cleanup;
+        }
+    }
+    if (!alloc) {
+        alloc = virResctrlAllocNew();
+        new_alloc = true;
+    }

> 
>>> I think the question is - if both cachetunes and memtunes exist in
>>> domain XML, then for which does the @vcpus relate to. That is, from the
>>> cover there's:
>>>
>>>       <memorytune vcpus='0-1'>
>>>         <node id='0' bandwidth='20'/>
>>>       </memorytune>
>>>
>>> and then there's a
>>>
>>>       <cachetune vcpus='0-3'>
>>>         <cache id='0' level='3' type='both' size='3' unit='MiB'/>
>>>         <cache id='1' level='3' type='both' size='3' unit='MiB'/>
>>>       </cachetune>
>>>
>>> Now what?  I haven't looked at patch 4 yet to even know how this "works".
>>
>> I also raised this concern in the first round review. Ideally, if we can
>> have a domain XML like this
>>
>> <resourcetune vcpu='0-3'>
>>       <memorytune>
>>          <node id='0' bandwidth='20'/>
>>       </memorytune>
>>        <cachetune>
>>          <cache id='0' level='3' type='both' size='3' unit='MiB'/>
>>          <cache id='1' level='3' type='both' size='3' unit='MiB'/>
>>        </cachetune>
>> <resourcetune>
>>
>> That will be more clear. Above domain XML means: one resctrl group with
>> memory bandwidth 20 @node 0, L3 cache 3MB @ node0 and node 1. Put vcpu
>> '0-3' to this resctrl group. So those resources are shared among vcpus.
>> However, Pavel pointed out that we must keep backward compatible. And we
>> need keep <cachetune vcpus='0-3'>.
> 
> I think I was right above based on this part...
> 
> Unfortunately forward thinking about memorytune didn't happen, so what
> you propose above is what I think "internally" once we've read the
> <cachetune> and <memorytune> elements we end up with. And yes, it makes
> sense why you renamed things now and why the vcpus are essentially
> reused between the two.
Indeed. It's hard to keep space for feature extension without enough 
information. Introducing a parent section <resourcetune> for the single 
child <cachetune> without <memorytune> also confuses people.

> 
> Not a fan of restunes, RsrcDir is closer, but not a perfect shorter name
> the ResourceDirectory. Also rather than essentially cut-copy-paste for
> the vcpus logic into memtunes from cachetunes, that should be extracted
> into it's own helper.
> 
OK. I will pack vcpus logic into a helper function in next version.

>>
>> In RFC v1, I also proposed to put memory bandwidth into cachetunes
>> section like:
>>    <cachetune vcpus='0-3'>
>>      <cache id='0' level='3' type='both' size='3' unit='MiB'/>
>>      <node id='0' bandwidth='20'/>
>>    <cachetune vcpus>
>>
>> Maintainer also comments on this "it's not a clear XML definition",
>> describing memory bandwidth inside a cachetune section. That's why we
>> come with a separated memorytune section.
> 
> I didn't go back to the RFC when I reviewed. I tried to have a fresh
> look. After scanning Pavel's comments now though, I do agree having a
> <node> inside a <cachetune> to go along with a <cache> element is a bit
> confusing. But I do now see where the "math" is for the total bandwidth
> that can be used - it's the per node value.  Something that I didn't see
> during my first pass.  Of course I may have realized that had the
> example in the commit had more than 1 vCPU and 1 node.
Great! So we all agree to make <memtune> a separated session.
Beside more than 1 vCPU and 1 node, there is also the other possibility: 
put the host supporting threads (QEMU I/O thread and monitor thread) to 
the other group, So that memory bandwidth can be distributed among vCPUs 
and host supporting threads.
I will put the previous review link in the cover letter for easy 
reference in future.
> 
>> In your example, the current implement will stop creating resctrl group
>> and throw an error. vcpu list cannot overlap between memorytune and
>> cachetune, but they can be same. Since vcpu can be put into one resctrl
>> group only.
>>
>> My understanding is that we can set using <resourcetune> to describe
>> cachetune and memorytune together as a long term goal. Since that needs
>> to deprecate cachetune's vcpu list. I am not sure about procedure to
>> remove existing XML elements. Maybe you can point out. 
>> For the short term goal, we can use this separated <memorytune
>> vcpus='0-1'> section. How do you think? :)
>>
> 
> There is no deprecation possible.  You will always have to support the
> cachetune as a direct child of cputune. If you "fix" things up such that
> you introduce some other means to have a cputune/resourcetune/cachetune
> working before you introduce memtune as a child of resourcetune, then
> that works. But you'll have to know whether you read cputune/cachetune
> and then format out in that manner.  See how the <auth> element has been
> handled as a "child" of the disk element originally, but now possible as
> a child of the source element of the disk.  It's possible, but messy.
hmmm..., just learn the <auth> part. That cross reference between 
disk/auth and source/auth does make code complex and not so 
straightforward.
So better we keep cputune/cachetune and introduce cputune/memtune.

> 
>>
>>> I think what you want to do is create a virDomainMemtuneDef that mimics
>>> virDomainCachetuneDef, but I'm not 100% sure.
>>>
>>> Of course that still leaves me wondering how much of virResctrlAllocPtr
>>> then becomes wasted. You chose an integration point, but essentially
>>> have separate technologies. I'm trying to make sense of it all and am
>>> starting to get lost in the process of doing that.
>>
>>  From resctrl group perspective, I don't think extend memory bandwidth
>> information in virResctrlAllocPtr is a waste. I think this is a feature
>> extending.  :)
>>
> 
> Now that the picture is becoming clearer, I can see your point.
Great~ :)
> 
>>>
>>> If @bandwidth can not be > 100... Can we duplicate the @id somehow? So
>> Something like
>>   <cachetune vcpus='0-3'>
>>          <cache id='0' level='3' type='both' size='3' unit='MiB'/>
>>          <node id='0' bandwidth='20'/>
>>   </cachetune>
>>
>> or
>>
>>   <cachetune vcpus='0-3'>
>>          <cache id='0' level='3' type='both' size='3' unit='MiB'
>> bandwidth='20'/>
>>   </cachetune>
>> ???
>> That also works. This is something I did in RFC v1. But there is a
>> comment said it's better not to mix memory bandwidth with cachetune.
>> That may confuse people. :(
>>
> 
> Right - as noted. I didn't read that originally. I jumped into a review
> for this series mainly because no one else had started reviewing it.
Thanks for jumping in. I wait too long a time. :(
Your comments are absolutely helpful.

> 
>>> how does that free buffer that got 100% and then gets subtracted from
>>> really work in this model? I probably need to study the code some more,
>>> but it's not clear it requires using the "same" sort of logic that
>>> cachetune uses where there could be different types (instructs, data, or
>>> both) that would be drawing from the size, right? I think that's how
>>> that worked.  So what's the corollary for memtunes? You set a range from
>>> 0 to 100, right and that's it.  OK, too many questions and it's late so
>>> I'll stop here...  Hopefully I didn't leave any stray thoughts in the
>>> review of patch 1 or 2.
>>
>> The policy we used for memory bandwidth is consistent with the existing
>> cachetune. Current cache allocation logic model forbids cache allocation
>> overlap. This is to prevent resource contention I think. Same strategy
>> on memory allocation part. Since total bandwidth beyond to 100 will lead
>> to bandwidth competition.
> 
> I think I've explained what I missed above... Essentially your example
> had 1 vCPU and thus 1 node and so it didn't make sense about the 100%.
> Of course if there were 3 vCPU's and 3 nodes each having some
> percentage, yeah then it would have been more obvious.
> 
> My suggestion, let's try to get through reworking how the cachetunes
> data is stored. Some sort of common API to "add" a new vCPU's value that
> can be used by cachetune now and eventually memtune later. The vCPUs
> being the obvious key rather than cachetune.
The current cachetunes data is stored in a pointer based fashion. The 
advantage of that is NULL pointer means no cachetuen information for 
particular level/type cache. And vCPU is stored in virDomainCachetuneDef 
of domain_conf. And overlapping testing happens in domain_conf part.The 
reason vCPU not part of virresctrl is properly because domain_conf need 
this information to formate domain's XML to store.
As your suggested, I changed this virDomainCachetuneDef --> 
virDomainRestuneDef in this patch. Since it will hold information both 
cachetune and memtune. And yes, we can revisit virresctrl to extract 
common part of cachetune and memtune.
> 
> Fair warning - I have some days out of the office coming up over the
> next week or so.
Thanks for let me know this. I think we align together basing on above 
discuss. I will reply your comment in the PATH1 PATH2 and PATH4, and 
revise. Especially docs/schemas/*.rng, you pointed out. At the same 
time, we can see whether Pavel will give comment also.  ;)

Bing
> 
> John
> 
>>
>> Existing cache allocation logic achieves this by first populating
>> virResctrlInfoPtr structure. Libvirt will fill the virResctrlInfoPtr
>> structure during traverse host's cache hierarchy. When allocating cache
>> by virResctrlAllocCreate, libvirt will first create a virResctrlAllocPtr
>> basing on information from virResctrlInfoPtr. This virResctrlAllocPtr
>> represents the maximum resource available(I mean no any allocation
>> previously). Then libvirt traverse all resctrl groups (in
>> /sys/fs/resctrl/)created already, subtract the resource they claimed. So
>> the left resource is the resource free. Libvirt compares this free
>> resource with resource requested in virResctrlAllocCreate. If enough
>> free resource available, then new resctrl group will be created.
>> Memory bandwidth part follows the same idea, only the difference is
>> memory bandwidth use percentage number and cache use bitmap.
>>
>> Bing
>>>
>>> John
>>>
[.....]

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list