Currently the QEMU driver has three ways of setting up cgroups. It either
skips them entirely (if non-root), or uses systemd-machined, or uses
cgroups directly.
It is further possible to register directly with systemd and bypass
machined. We don't support this by systemd-nsspawn does and we ought
to.
This change adds ability to configure the mechanism for registering
resources between all these options explicitly. via
<resource register="none|direct|machined|systemd"/>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
---
src/conf/domain_conf.c | 42 ++++++++++++++++++++++-------
src/conf/domain_conf.h | 12 +++++++++
src/libvirt_private.syms | 2 ++
src/lxc/lxc_cgroup.c | 34 ++++++++++++++++++++++++
src/lxc/lxc_cgroup.h | 3 +++
src/lxc/lxc_process.c | 11 ++++----
src/qemu/qemu_cgroup.c | 69 +++++++++++++++++++++++++++++++++++++++++-------
src/util/vircgroup.c | 55 ++++++++++++++++++++++++--------------
src/util/vircgroup.h | 10 ++++++-
9 files changed, 194 insertions(+), 44 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 9a62bc472c..fb8e7a0ec7 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -910,6 +910,14 @@ VIR_ENUM_IMPL(virDomainHPTResizing,
"required",
);
+VIR_ENUM_IMPL(virDomainResourceRegister,
+ VIR_DOMAIN_RESOURCE_REGISTER_LAST,
+ "default",
+ "none",
+ "cgroup",
+ "machined",
+ "systemd");
+
/* Internal mapping: subset of block job types that can be present in
* <mirror> XML (remaining types are not two-phase). */
VIR_ENUM_DECL(virDomainBlockJob)
@@ -17698,16 +17706,20 @@ virDomainResourceDefParse(xmlNodePtr node,
{
virDomainResourceDefPtr def = NULL;
xmlNodePtr tmp = ctxt->node;
+ char *reg;
ctxt->node = node;
if (VIR_ALLOC(def) < 0)
goto error;
- /* Find out what type of virtualization to use */
- if (!(def->partition = virXPathString("string(./partition)", ctxt))) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- "%s", _("missing resource partition attribute"));
+ def->partition = virXPathString("string(./partition)", ctxt);
+
+ reg = virXMLPropString(node, "register");
+ if (reg != NULL &&
+ (def->reg = virDomainResourceRegisterTypeFromString(reg)) <= 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ "%s", _("Invalid register attribute"));
goto error;
}
@@ -25568,11 +25580,23 @@ static void
virDomainResourceDefFormat(virBufferPtr buf,
virDomainResourceDefPtr def)
{
- virBufferAddLit(buf, "<resource>\n");
- virBufferAdjustIndent(buf, 2);
- virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition);
- virBufferAdjustIndent(buf, -2);
- virBufferAddLit(buf, "</resource>\n");
+ if (def->reg == VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT &&
+ def->partition == NULL)
+ return;
+
+ virBufferAddLit(buf, "<resource");
+ if (def->reg != VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT)
+ virBufferAsprintf(buf, " register='%s'", virDomainResourceRegisterTypeToString(def->reg));
+
+ if (def->partition) {
+ virBufferAddLit(buf, ">\n");
+ virBufferAdjustIndent(buf, 2);
+ virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition);
+ virBufferAdjustIndent(buf, -2);
+ virBufferAddLit(buf, "</resource>\n");
+ } else {
+ virBufferAddLit(buf, "/>\n");
+ }
}
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 6f7f96b3dd..a7a6628a36 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2145,9 +2145,20 @@ struct _virDomainPanicDef {
void virBlkioDeviceArrayClear(virBlkioDevicePtr deviceWeights,
int ndevices);
+typedef enum {
+ VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT,
+ VIR_DOMAIN_RESOURCE_REGISTER_NONE,
+ VIR_DOMAIN_RESOURCE_REGISTER_CGROUP,
+ VIR_DOMAIN_RESOURCE_REGISTER_MACHINED,
+ VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD,
+
+ VIR_DOMAIN_RESOURCE_REGISTER_LAST,
+} virDomainResourceRegister;
+
typedef struct _virDomainResourceDef virDomainResourceDef;
typedef virDomainResourceDef *virDomainResourceDefPtr;
struct _virDomainResourceDef {
+ int reg; /* enum virDomainResourceRegister */
char *partition;
};
@@ -3325,6 +3336,7 @@ VIR_ENUM_DECL(virDomainMemorySource)
VIR_ENUM_DECL(virDomainMemoryAllocation)
VIR_ENUM_DECL(virDomainIOMMUModel)
VIR_ENUM_DECL(virDomainShmemModel)
+VIR_ENUM_DECL(virDomainResourceRegister)
/* from libvirt.h */
VIR_ENUM_DECL(virDomainState)
VIR_ENUM_DECL(virDomainNostateReason)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d5c3b9abb5..a0fde65dba 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -489,6 +489,8 @@ virDomainRedirdevBusTypeToString;
virDomainRedirdevDefFind;
virDomainRedirdevDefFree;
virDomainRedirdevDefRemove;
+virDomainResourceRegisterTypeFromString;
+virDomainResourceRegisterTypeToString;
virDomainRNGBackendTypeToString;
virDomainRNGDefFree;
virDomainRNGFind;
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 3369801870..7bd479df1b 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -478,6 +478,35 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
return ret;
}
+int virLXCCgroupMode(virDomainResourceRegister reg,
+ virCgroupRegister *cgreg)
+{
+ switch (reg) {
+ case VIR_DOMAIN_RESOURCE_REGISTER_NONE:
+ goto unsupported;
+ case VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT:
+ *cgreg = VIR_CGROUP_REGISTER_DEFAULT;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_MACHINED:
+ *cgreg = VIR_CGROUP_REGISTER_MACHINED;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_CGROUP:
+ *cgreg = VIR_CGROUP_REGISTER_DIRECT;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD:
+ default:
+ goto unsupported;
+ }
+
+ return 0;
+
+ unsupported:
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Resource register '%s' not available"),
+ virDomainResourceRegisterTypeToString(reg));
+ return -1;
+}
+
virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
pid_t initpid,
@@ -485,11 +514,15 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
int *nicindexes)
{
virCgroupPtr cgroup = NULL;
+ virCgroupRegister reg;
char *machineName = virLXCDomainGetMachineName(def, 0);
if (!machineName)
goto cleanup;
+ if (virLXCCgroupMode(def->resource->reg, ®) < 0)
+ goto cleanup;
+
if (def->resource->partition[0] != '/') {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Resource partition '%s' must start with '/'"),
@@ -504,6 +537,7 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
initpid,
true,
nnicindexes, nicindexes,
+ ®,
def->resource->partition,
-1,
&cgroup) < 0)
diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h
index e85f21c47d..979d6a154b 100644
--- a/src/lxc/lxc_cgroup.h
+++ b/src/lxc/lxc_cgroup.h
@@ -27,6 +27,9 @@
# include "lxc_fuse.h"
# include "virusb.h"
+int virLXCCgroupMode(virDomainResourceRegister reg,
+ virCgroupRegister *cgreg);
+
virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
pid_t initpid,
size_t nnicindexes,
diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
index efd8a69000..24aa0cb0bf 100644
--- a/src/lxc/lxc_process.c
+++ b/src/lxc/lxc_process.c
@@ -1166,6 +1166,7 @@ virLXCProcessEnsureRootFS(virDomainObjPtr vm)
return -1;
}
+
/**
* virLXCProcessStart:
* @conn: pointer to connection
@@ -1260,13 +1261,13 @@ int virLXCProcessStart(virConnectPtr conn,
if (VIR_ALLOC(res) < 0)
goto cleanup;
- if (VIR_STRDUP(res->partition, "/machine") < 0) {
- VIR_FREE(res);
- goto cleanup;
- }
-
vm->def->resource = res;
}
+ if (vm->def->resource->reg != VIR_DOMAIN_RESOURCE_REGISTER_NONE &&
+ !vm->def->resource->partition) {
+ if (VIR_STRDUP(vm->def->resource->partition, "/machine") < 0)
+ goto cleanup;
+ }
if (virAsprintf(&logfile, "%s/%s.log",
cfg->logDir, vm->def->name) < 0)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 19252ea239..5167d7fee1 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -834,6 +834,46 @@ qemuSetupCpuCgroup(virDomainObjPtr vm)
}
+static int qemuGetCgroupMode(virDomainObjPtr vm,
+ virDomainResourceRegister reg,
+ virCgroupRegister *cgreg)
+{
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ bool avail = virQEMUDriverIsPrivileged(priv->driver) &&
+ virCgroupAvailable();
+
+ switch (reg) {
+ case VIR_DOMAIN_RESOURCE_REGISTER_NONE:
+ return 0;
+ case VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT:
+ if (!avail)
+ return 0;
+ *cgreg = VIR_CGROUP_REGISTER_DEFAULT;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_MACHINED:
+ if (!avail)
+ goto unsupported;
+ *cgreg = VIR_CGROUP_REGISTER_MACHINED;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_CGROUP:
+ if (!avail)
+ goto unsupported;
+ *cgreg = VIR_CGROUP_REGISTER_DIRECT;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD:
+ default:
+ goto unsupported;
+ }
+
+ return 1;
+
+ unsupported:
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Resource register '%s' not available"),
+ virDomainResourceRegisterTypeToString(reg));
+ return -1;
+}
+
static int
qemuInitCgroup(virDomainObjPtr vm,
size_t nnicindexes,
@@ -842,11 +882,17 @@ qemuInitCgroup(virDomainObjPtr vm,
int ret = -1;
qemuDomainObjPrivatePtr priv = vm->privateData;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver);
+ virCgroupRegister reg;
+ int rv;
- if (!virQEMUDriverIsPrivileged(priv->driver))
- goto done;
-
- if (!virCgroupAvailable())
+ rv = qemuGetCgroupMode(vm,
+ vm->def->resource ?
+ vm->def->resource->reg :
+ VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT,
+ ®);
+ if (rv < 0)
+ goto cleanup;
+ if (rv == 0)
goto done;
virCgroupFree(&priv->cgroup);
@@ -857,13 +903,12 @@ qemuInitCgroup(virDomainObjPtr vm,
if (VIR_ALLOC(res) < 0)
goto cleanup;
- if (VIR_STRDUP(res->partition, "/machine") < 0) {
- VIR_FREE(res);
- goto cleanup;
- }
-
vm->def->resource = res;
}
+ if (!vm->def->resource->partition) {
+ if (VIR_STRDUP(vm->def->resource->partition, "/machine") < 0)
+ goto cleanup;
+ }
if (vm->def->resource->partition[0] != '/') {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
@@ -879,6 +924,7 @@ qemuInitCgroup(virDomainObjPtr vm,
vm->pid,
false,
nnicindexes, nicindexes,
+ ®,
vm->def->resource->partition,
cfg->cgroupControllers,
&priv->cgroup) < 0) {
@@ -980,6 +1026,11 @@ qemuConnectCgroup(virDomainObjPtr vm)
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver);
int ret = -1;
+ if (vm->def->resource &&
+ vm->def->resource->reg == VIR_DOMAIN_RESOURCE_REGISTER_NONE) {
+ goto done;
+ }
+
if (!virQEMUDriverIsPrivileged(priv->driver))
goto done;
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 0a31947b0d..07ffb78c78 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -1733,6 +1733,7 @@ virCgroupNewMachine(const char *name,
bool isContainer,
size_t nnicindexes,
int *nicindexes,
+ virCgroupRegister *reg,
const char *partition,
int controllers,
virCgroupPtr *group)
@@ -1741,28 +1742,42 @@ virCgroupNewMachine(const char *name,
*group = NULL;
- if ((rv = virCgroupNewMachineSystemd(name,
- drivername,
- uuid,
- rootdir,
- pidleader,
- isContainer,
- nnicindexes,
- nicindexes,
- partition,
- controllers,
- group)) == 0)
- return 0;
+ if (*reg == VIR_CGROUP_REGISTER_DEFAULT ||
+ *reg == VIR_CGROUP_REGISTER_MACHINED) {
+ if ((rv = virCgroupNewMachineSystemd(name,
+ drivername,
+ uuid,
+ rootdir,
+ pidleader,
+ isContainer,
+ nnicindexes,
+ nicindexes,
+ partition,
+ controllers,
+ group)) == 0) {
+ *reg = VIR_CGROUP_REGISTER_MACHINED;
+ return 0;
+ }
- if (rv == -1)
- return -1;
+ if (rv == -1)
+ return -1;
+
+ if (*reg == VIR_CGROUP_REGISTER_MACHINED) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ "%s", _("Systemd machined requested, but not available"));
+ return -1;
+ }
+ }
- return virCgroupNewMachineManual(name,
- drivername,
- pidleader,
- partition,
- controllers,
- group);
+ rv = virCgroupNewMachineManual(name,
+ drivername,
+ pidleader,
+ partition,
+ controllers,
+ group);
+ if (rv == 0)
+ *reg = VIR_CGROUP_REGISTER_DIRECT;
+ return rv;
}
diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
index d833927678..63ee1aba5c 100644
--- a/src/util/vircgroup.h
+++ b/src/util/vircgroup.h
@@ -46,7 +46,8 @@ enum {
VIR_CGROUP_CONTROLLER_LAST
};
-VIR_ENUM_DECL(virCgroupController);
+VIR_ENUM_DECL(virCgroupController)
+
/* Items of this enum are used later in virCgroupNew to create
* bit array stored in int. Like this:
* 1 << VIR_CGROUP_CONTROLLER_CPU
@@ -103,6 +104,12 @@ virCgroupNewDetectMachine(const char *name,
virCgroupPtr *group)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
+typedef enum {
+ VIR_CGROUP_REGISTER_DEFAULT,
+ VIR_CGROUP_REGISTER_DIRECT,
+ VIR_CGROUP_REGISTER_MACHINED,
+} virCgroupRegister;
+
int virCgroupNewMachine(const char *name,
const char *drivername,
const unsigned char *uuid,
@@ -111,6 +118,7 @@ int virCgroupNewMachine(const char *name,
bool isContainer,
size_t nnicindexes,
int *nicindexes,
+ virCgroupRegister *reg,
const char *partition,
int controllers,
virCgroupPtr *group)
--
2.14.3
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
I know it's a POC type series, but figured I'd take a look primarily at the first 4 patches, learn a bit in the 5th one, and provide some review feedback in order to help move things along... There's perhaps a few adjustments in here that could be made into multiple patches. On 12/20/2017 11:47 AM, Daniel P. Berrange wrote: > Currently the QEMU driver has three ways of setting up cgroups. It either > skips them entirely (if non-root), or uses systemd-machined, or uses > cgroups directly. > > It is further possible to register directly with systemd and bypass > machined. We don't support this by systemd-nsspawn does and we ought s/by/but/ (I think) Still a bit odd to add an option that isn't supported unless there was a plan to add the support when formulating the non POC patches... > to. > > This change adds ability to configure the mechanism for registering > resources between all these options explicitly. via s/. via/ via:/ (e.g. remove the '.' and add ':') > > <resource register="none|direct|machined|systemd"/> > > Signed-off-by: Daniel P. Berrange <berrange@redhat.com> > --- > src/conf/domain_conf.c | 42 ++++++++++++++++++++++------- > src/conf/domain_conf.h | 12 +++++++++ > src/libvirt_private.syms | 2 ++ > src/lxc/lxc_cgroup.c | 34 ++++++++++++++++++++++++ > src/lxc/lxc_cgroup.h | 3 +++ > src/lxc/lxc_process.c | 11 ++++---- > src/qemu/qemu_cgroup.c | 69 +++++++++++++++++++++++++++++++++++++++++------- > src/util/vircgroup.c | 55 ++++++++++++++++++++++++-------------- > src/util/vircgroup.h | 10 ++++++- > 9 files changed, 194 insertions(+), 44 deletions(-) > formatdomain.html.in would need to be augmented to describe the new attribute. domaincommon.rng modified to add the new attribute. A tests either added or existing one adjusted to exhibit the new attribute option(s). > diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c > index 9a62bc472c..fb8e7a0ec7 100644 > --- a/src/conf/domain_conf.c > +++ b/src/conf/domain_conf.c > @@ -910,6 +910,14 @@ VIR_ENUM_IMPL(virDomainHPTResizing, > "required", > ); > > +VIR_ENUM_IMPL(virDomainResourceRegister, > + VIR_DOMAIN_RESOURCE_REGISTER_LAST, > + "default", > + "none", > + "cgroup", > + "machined", > + "systemd"); > + > /* Internal mapping: subset of block job types that can be present in > * <mirror> XML (remaining types are not two-phase). */ > VIR_ENUM_DECL(virDomainBlockJob) > @@ -17698,16 +17706,20 @@ virDomainResourceDefParse(xmlNodePtr node, > { > virDomainResourceDefPtr def = NULL; > xmlNodePtr tmp = ctxt->node; > + char *reg; = NULL; > > ctxt->node = node; > > if (VIR_ALLOC(def) < 0) > goto error; > > - /* Find out what type of virtualization to use */ > - if (!(def->partition = virXPathString("string(./partition)", ctxt))) { > - virReportError(VIR_ERR_INTERNAL_ERROR, > - "%s", _("missing resource partition attribute")); > + def->partition = virXPathString("string(./partition)", ctxt); It seems partition is now optional, although formatdomain seems to have already indicated that... However, that's not a perfect match to domaincommon.rng where only "respartition" is optional. (This type of change could perhaps be it's own patch). > + > + reg = virXMLPropString(node, "register"); > + if (reg != NULL && > + (def->reg = virDomainResourceRegisterTypeFromString(reg)) <= 0) { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > + "%s", _("Invalid register attribute")); > goto error; > } Need a VIR_FREE(reg); since we haven't GO-ified yet in both error and normal paths unless this is rewritten a bit... > > @@ -25568,11 +25580,23 @@ static void > virDomainResourceDefFormat(virBufferPtr buf, > virDomainResourceDefPtr def) > { > - virBufferAddLit(buf, "<resource>\n"); > - virBufferAdjustIndent(buf, 2); > - virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition); > - virBufferAdjustIndent(buf, -2); > - virBufferAddLit(buf, "</resource>\n"); > + if (def->reg == VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT && > + def->partition == NULL) > + return; > + > + virBufferAddLit(buf, "<resource"); > + if (def->reg != VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT) > + virBufferAsprintf(buf, " register='%s'", virDomainResourceRegisterTypeToString(def->reg)); > + > + if (def->partition) { > + virBufferAddLit(buf, ">\n"); > + virBufferAdjustIndent(buf, 2); > + virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition); > + virBufferAdjustIndent(buf, -2); > + virBufferAddLit(buf, "</resource>\n"); > + } else { > + virBufferAddLit(buf, "/>\n"); > + } > } > > > diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h > index 6f7f96b3dd..a7a6628a36 100644 > --- a/src/conf/domain_conf.h > +++ b/src/conf/domain_conf.h > @@ -2145,9 +2145,20 @@ struct _virDomainPanicDef { > void virBlkioDeviceArrayClear(virBlkioDevicePtr deviceWeights, > int ndevices); > > +typedef enum { > + VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT, > + VIR_DOMAIN_RESOURCE_REGISTER_NONE, > + VIR_DOMAIN_RESOURCE_REGISTER_CGROUP, > + VIR_DOMAIN_RESOURCE_REGISTER_MACHINED, > + VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD, > + > + VIR_DOMAIN_RESOURCE_REGISTER_LAST, > +} virDomainResourceRegister; > + > typedef struct _virDomainResourceDef virDomainResourceDef; > typedef virDomainResourceDef *virDomainResourceDefPtr; > struct _virDomainResourceDef { > + int reg; /* enum virDomainResourceRegister */ > char *partition; > }; > > @@ -3325,6 +3336,7 @@ VIR_ENUM_DECL(virDomainMemorySource) > VIR_ENUM_DECL(virDomainMemoryAllocation) > VIR_ENUM_DECL(virDomainIOMMUModel) > VIR_ENUM_DECL(virDomainShmemModel) > +VIR_ENUM_DECL(virDomainResourceRegister) > /* from libvirt.h */ > VIR_ENUM_DECL(virDomainState) > VIR_ENUM_DECL(virDomainNostateReason) > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms > index d5c3b9abb5..a0fde65dba 100644 > --- a/src/libvirt_private.syms > +++ b/src/libvirt_private.syms > @@ -489,6 +489,8 @@ virDomainRedirdevBusTypeToString; > virDomainRedirdevDefFind; > virDomainRedirdevDefFree; > virDomainRedirdevDefRemove; > +virDomainResourceRegisterTypeFromString; > +virDomainResourceRegisterTypeToString; > virDomainRNGBackendTypeToString; > virDomainRNGDefFree; > virDomainRNGFind; > diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c > index 3369801870..7bd479df1b 100644 > --- a/src/lxc/lxc_cgroup.c > +++ b/src/lxc/lxc_cgroup.c > @@ -478,6 +478,35 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def, > return ret; > } > > +int virLXCCgroupMode(virDomainResourceRegister reg, > + virCgroupRegister *cgreg) > +{ > + switch (reg) { > + case VIR_DOMAIN_RESOURCE_REGISTER_NONE: > + goto unsupported; > + case VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT: > + *cgreg = VIR_CGROUP_REGISTER_DEFAULT; > + break; > + case VIR_DOMAIN_RESOURCE_REGISTER_MACHINED: > + *cgreg = VIR_CGROUP_REGISTER_MACHINED; > + break; > + case VIR_DOMAIN_RESOURCE_REGISTER_CGROUP: > + *cgreg = VIR_CGROUP_REGISTER_DIRECT; > + break; > + case VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD: > + default: Use VIR_DOMAIN_RESOURCE_REGISTER_LAST instead of default > + goto unsupported; > + } > + > + return 0; > + > + unsupported: > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > + _("Resource register '%s' not available"), > + virDomainResourceRegisterTypeToString(reg)); > + return -1; > +} > + > > virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, > pid_t initpid, > @@ -485,11 +514,15 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, > int *nicindexes) > { > virCgroupPtr cgroup = NULL; > + virCgroupRegister reg; > char *machineName = virLXCDomainGetMachineName(def, 0); > > if (!machineName) > goto cleanup; > > + if (virLXCCgroupMode(def->resource->reg, ®) < 0) > + goto cleanup; > + > if (def->resource->partition[0] != '/') { > virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > _("Resource partition '%s' must start with '/'"), > @@ -504,6 +537,7 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, > initpid, > true, > nnicindexes, nicindexes, > + ®, > def->resource->partition, > -1, > &cgroup) < 0) > diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h > index e85f21c47d..979d6a154b 100644 > --- a/src/lxc/lxc_cgroup.h > +++ b/src/lxc/lxc_cgroup.h > @@ -27,6 +27,9 @@ > # include "lxc_fuse.h" > # include "virusb.h" > > +int virLXCCgroupMode(virDomainResourceRegister reg, > + virCgroupRegister *cgreg); > + > virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, > pid_t initpid, > size_t nnicindexes, > diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c > index efd8a69000..24aa0cb0bf 100644 > --- a/src/lxc/lxc_process.c > +++ b/src/lxc/lxc_process.c > @@ -1166,6 +1166,7 @@ virLXCProcessEnsureRootFS(virDomainObjPtr vm) > return -1; > } > > + > /** > * virLXCProcessStart: > * @conn: pointer to connection > @@ -1260,13 +1261,13 @@ int virLXCProcessStart(virConnectPtr conn, > if (VIR_ALLOC(res) < 0) > goto cleanup; > > - if (VIR_STRDUP(res->partition, "/machine") < 0) { > - VIR_FREE(res); > - goto cleanup; > - } > - > vm->def->resource = res; > } So the above could be shortened to just: if (!vm->def->resource && VIR_ALLOC(vm->def->resource) < 0) goto cleanup; > + if (vm->def->resource->reg != VIR_DOMAIN_RESOURCE_REGISTER_NONE && > + !vm->def->resource->partition) { > + if (VIR_STRDUP(vm->def->resource->partition, "/machine") < 0) > + goto cleanup; > + } This could be similar too w/r/t single if condition... > > if (virAsprintf(&logfile, "%s/%s.log", > cfg->logDir, vm->def->name) < 0) > diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c > index 19252ea239..5167d7fee1 100644 > --- a/src/qemu/qemu_cgroup.c > +++ b/src/qemu/qemu_cgroup.c > @@ -834,6 +834,46 @@ qemuSetupCpuCgroup(virDomainObjPtr vm) > } > > There could be a helper here that would be callable by qemuConnectCgroup that would return the @avail value. > +static int qemuGetCgroupMode(virDomainObjPtr vm, > + virDomainResourceRegister reg, > + virCgroupRegister *cgreg) > +{ > + qemuDomainObjPrivatePtr priv = vm->privateData; > + bool avail = virQEMUDriverIsPrivileged(priv->driver) && > + virCgroupAvailable(); > + > + switch (reg) { > + case VIR_DOMAIN_RESOURCE_REGISTER_NONE: > + return 0; > + case VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT: > + if (!avail) > + return 0; > + *cgreg = VIR_CGROUP_REGISTER_DEFAULT; > + break; > + case VIR_DOMAIN_RESOURCE_REGISTER_MACHINED: > + if (!avail) > + goto unsupported; > + *cgreg = VIR_CGROUP_REGISTER_MACHINED; > + break; > + case VIR_DOMAIN_RESOURCE_REGISTER_CGROUP: > + if (!avail) > + goto unsupported; > + *cgreg = VIR_CGROUP_REGISTER_DIRECT; > + break; > + case VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD: > + default: Use VIR_DOMAIN_RESOURCE_REGISTER_LAST instead of default > + goto unsupported; > + } > + > + return 1; > + > + unsupported: > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > + _("Resource register '%s' not available"), > + virDomainResourceRegisterTypeToString(reg)); > + return -1; > +} > + > static int > qemuInitCgroup(virDomainObjPtr vm, > size_t nnicindexes, > @@ -842,11 +882,17 @@ qemuInitCgroup(virDomainObjPtr vm, > int ret = -1; > qemuDomainObjPrivatePtr priv = vm->privateData; > virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver); > + virCgroupRegister reg; > + int rv; > > - if (!virQEMUDriverIsPrivileged(priv->driver)) > - goto done; > - > - if (!virCgroupAvailable()) > + rv = qemuGetCgroupMode(vm, > + vm->def->resource ? > + vm->def->resource->reg : > + VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT, > + ®); > + if (rv < 0) > + goto cleanup; > + if (rv == 0) > goto done; > > virCgroupFree(&priv->cgroup); > @@ -857,13 +903,12 @@ qemuInitCgroup(virDomainObjPtr vm, > if (VIR_ALLOC(res) < 0) > goto cleanup; > > - if (VIR_STRDUP(res->partition, "/machine") < 0) { > - VIR_FREE(res); > - goto cleanup; > - } > - > vm->def->resource = res; > } Similar to above, we now have: if (!vm->def->resource && VIR_ALLOC(vm->def->resource) < 0) goto cleanup; > + if (!vm->def->resource->partition) { > + if (VIR_STRDUP(vm->def->resource->partition, "/machine") < 0) > + goto cleanup; > + } > and similar construct if desired. > if (vm->def->resource->partition[0] != '/') { > virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > @@ -879,6 +924,7 @@ qemuInitCgroup(virDomainObjPtr vm, > vm->pid, > false, > nnicindexes, nicindexes, > + ®, > vm->def->resource->partition, > cfg->cgroupControllers, > &priv->cgroup) < 0) { > @@ -980,6 +1026,11 @@ qemuConnectCgroup(virDomainObjPtr vm) > virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver); > int ret = -1; > > + if (vm->def->resource && > + vm->def->resource->reg == VIR_DOMAIN_RESOURCE_REGISTER_NONE) { > + goto done; > + } > + The subsequent checks could use the @avail helper mentioned above... John > if (!virQEMUDriverIsPrivileged(priv->driver)) > goto done; > > diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c > index 0a31947b0d..07ffb78c78 100644 > --- a/src/util/vircgroup.c > +++ b/src/util/vircgroup.c > @@ -1733,6 +1733,7 @@ virCgroupNewMachine(const char *name, > bool isContainer, > size_t nnicindexes, > int *nicindexes, > + virCgroupRegister *reg, > const char *partition, > int controllers, > virCgroupPtr *group) > @@ -1741,28 +1742,42 @@ virCgroupNewMachine(const char *name, > > *group = NULL; > > - if ((rv = virCgroupNewMachineSystemd(name, > - drivername, > - uuid, > - rootdir, > - pidleader, > - isContainer, > - nnicindexes, > - nicindexes, > - partition, > - controllers, > - group)) == 0) > - return 0; > + if (*reg == VIR_CGROUP_REGISTER_DEFAULT || > + *reg == VIR_CGROUP_REGISTER_MACHINED) { > + if ((rv = virCgroupNewMachineSystemd(name, > + drivername, > + uuid, > + rootdir, > + pidleader, > + isContainer, > + nnicindexes, > + nicindexes, > + partition, > + controllers, > + group)) == 0) { > + *reg = VIR_CGROUP_REGISTER_MACHINED; > + return 0; > + } > > - if (rv == -1) > - return -1; > + if (rv == -1) > + return -1; > + > + if (*reg == VIR_CGROUP_REGISTER_MACHINED) { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > + "%s", _("Systemd machined requested, but not available")); > + return -1; > + } > + } > > - return virCgroupNewMachineManual(name, > - drivername, > - pidleader, > - partition, > - controllers, > - group); > + rv = virCgroupNewMachineManual(name, > + drivername, > + pidleader, > + partition, > + controllers, > + group); > + if (rv == 0) > + *reg = VIR_CGROUP_REGISTER_DIRECT; > + return rv; > } > > > diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h > index d833927678..63ee1aba5c 100644 > --- a/src/util/vircgroup.h > +++ b/src/util/vircgroup.h > @@ -46,7 +46,8 @@ enum { > VIR_CGROUP_CONTROLLER_LAST > }; > > -VIR_ENUM_DECL(virCgroupController); > +VIR_ENUM_DECL(virCgroupController) > + > /* Items of this enum are used later in virCgroupNew to create > * bit array stored in int. Like this: > * 1 << VIR_CGROUP_CONTROLLER_CPU > @@ -103,6 +104,12 @@ virCgroupNewDetectMachine(const char *name, > virCgroupPtr *group) > ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); > > +typedef enum { > + VIR_CGROUP_REGISTER_DEFAULT, > + VIR_CGROUP_REGISTER_DIRECT, > + VIR_CGROUP_REGISTER_MACHINED, > +} virCgroupRegister; > + > int virCgroupNewMachine(const char *name, > const char *drivername, > const unsigned char *uuid, > @@ -111,6 +118,7 @@ int virCgroupNewMachine(const char *name, > bool isContainer, > size_t nnicindexes, > int *nicindexes, > + virCgroupRegister *reg, > const char *partition, > int controllers, > virCgroupPtr *group) > -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
I'm so sorry for not getting to this earlier. I though I'll get to this over the holidays, but they were very busy with no free time for me. On Wed, Dec 20, 2017 at 04:47:46PM +0000, Daniel P. Berrange wrote: >Currently the QEMU driver has three ways of setting up cgroups. It either >skips them entirely (if non-root), or uses systemd-machined, or uses >cgroups directly. > So what we are trying to fix here is that all of the variations don't create the same structure. So it needs to be clear for the mgmt app to guess^Wknow correctly where the domain is in the cgroup hierarchy. >It is further possible to register directly with systemd and bypass >machined. We don't support this by systemd-nsspawn does and we ought >to. > But what's the benefit of that? >This change adds ability to configure the mechanism for registering >resources between all these options explicitly. via > > <resource register="none|direct|machined|systemd"/> > I understand what you are trying to fix, but I don't quite follow why we should expose that. Can't we guess some of them easily? Or are you making this part of the PoC, but then removing it later? For now I am OK with that, but I would rather see that as a part of qemu.conf (or libvirtd.conf), not really in the XML. OK for the PoC, though. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
On Tue, Jan 09, 2018 at 01:43:39PM +0100, Martin Kletzander wrote: > I'm so sorry for not getting to this earlier. I though I'll get to this over > the holidays, but they were very busy with no free time for me. > > On Wed, Dec 20, 2017 at 04:47:46PM +0000, Daniel P. Berrange wrote: > > Currently the QEMU driver has three ways of setting up cgroups. It either > > skips them entirely (if non-root), or uses systemd-machined, or uses > > cgroups directly. > > > > So what we are trying to fix here is that all of the variations don't create the > same structure. So it needs to be clear for the mgmt app to guess^Wknow > correctly where the domain is in the cgroup hierarchy. > > > It is further possible to register directly with systemd and bypass > > machined. We don't support this by systemd-nsspawn does and we ought > > to. > > But what's the benefit of that? Systemd recommends that you only register with machined, if you are running a full OS install in the machine. IOW, if you are using LXC / QEMU for sandboxing individual applications, instead of full OS, then you should not register with machined. So this is something libvirt sandbox ought to be able to request, for exmaple. > > > This change adds ability to configure the mechanism for registering > > resources between all these options explicitly. via > > > > <resource register="none|direct|machined|systemd"/> > > > > I understand what you are trying to fix, but I don't quite follow why we should > expose that. Can't we guess some of them easily? Or are you making this part > of the PoC, but then removing it later? No, I intend this bit to be fully supported long term. * none - use this to just inherit current placement of the caller. This is akin to turning off all use of cgroups in qemu.conf if launching from libvirtd, causing currently placement of libvirtd in cgroups to be inherited by VMs. This is what we'll also need with the shim for KubeVirt's needs * machined - register with machined - this is what we do today, if we detect that machined is available * direct - create cgroups directly - this is what we do if machined is not installed, or we have no dbus connection available. * systemd - register with systemd only, not with machined - see above rationale It is unlikely that apps would use 'direct' if running on a systemd based host. Likewise using machined/systemd on a non-systemd host is not possible. But that still leaves a choice of 2/3 options that are viable and cannot be automatically determined, and are reasonable to vary per-VM. > For now I am OK with that, but I would rather see that as a part of qemu.conf > (or libvirtd.conf), not really in the XML. OK for the PoC, though. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :| -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
On Thu, Jan 11, 2018 at 12:14:21PM +0000, Daniel P. Berrange wrote: > On Tue, Jan 09, 2018 at 01:43:39PM +0100, Martin Kletzander wrote: > > I'm so sorry for not getting to this earlier. I though I'll get to this over > > the holidays, but they were very busy with no free time for me. > > > > On Wed, Dec 20, 2017 at 04:47:46PM +0000, Daniel P. Berrange wrote: > > > Currently the QEMU driver has three ways of setting up cgroups. It either > > > skips them entirely (if non-root), or uses systemd-machined, or uses > > > cgroups directly. > > > > > > > So what we are trying to fix here is that all of the variations don't create the > > same structure. So it needs to be clear for the mgmt app to guess^Wknow > > correctly where the domain is in the cgroup hierarchy. > > > > > It is further possible to register directly with systemd and bypass > > > machined. We don't support this by systemd-nsspawn does and we ought > > > to. > > > > But what's the benefit of that? > > Systemd recommends that you only register with machined, if you are running a > full OS install in the machine. IOW, if you are using LXC / QEMU for sandboxing > individual applications, instead of full OS, then you should not register with > machined. So this is something libvirt sandbox ought to be able to request, for > exmaple. > > > > > > This change adds ability to configure the mechanism for registering > > > resources between all these options explicitly. via > > > > > > <resource register="none|direct|machined|systemd"/> > > > > > > > I understand what you are trying to fix, but I don't quite follow why we should > > expose that. Can't we guess some of them easily? Or are you making this part > > of the PoC, but then removing it later? > > No, I intend this bit to be fully supported long term. > > * none - use this to just inherit current placement of the caller. > This is akin to turning off all use of cgroups in qemu.conf > if launching from libvirtd, causing currently placement of > libvirtd in cgroups to be inherited by VMs. > > This is what we'll also need with the shim for KubeVirt's > needs > > * machined - register with machined - this is what we do today, if we detect > that machined is available > > * direct - create cgroups directly - this is what we do if machined is not > installed, or we have no dbus connection available. > > * systemd - register with systemd only, not with machined - see above > rationale > > It is unlikely that apps would use 'direct' if running on a systemd based > host. Likewise using machined/systemd on a non-systemd host is not possible. > > But that still leaves a choice of 2/3 options that are viable and cannot be > automatically determined, and are reasonable to vary per-VM. Oh, I also meant to say that I would expect us to update this in the live XML, if the user/app left it unspecified. This would allow the app to determine whether libvirt has actually activated cgroup support or not, and thus whether resource mgmt apis/config will work Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :| -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
© 2016 - 2025 Red Hat, Inc.