[libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible

Marc Hartmayer posted 1 patch 5 years, 5 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20181029173458.15533-1-mhartmay@linux.ibm.com
src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
1 file changed, 52 insertions(+), 2 deletions(-)
[libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Marc Hartmayer 5 years, 5 months ago
Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
group. This reduces the overhead of the QEMU capabilities cache
lookup. Before this patch there were many fork() calls used for
checking whether /dev/kvm is accessible. Now we store the result
whether /dev/kvm is accessible or not and we only need to re-run the
virFileAccessibleAs check if the ctime of /dev/kvm has changed.

Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
---
 src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
index e228f52ec0bb..85516954149b 100644
--- a/src/qemu/qemu_capabilities.c
+++ b/src/qemu/qemu_capabilities.c
@@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
     virArch hostArch;
     unsigned int microcodeVersion;
     char *kernelVersion;
+
+    /* cache whether /dev/kvm is usable as runUid:runGuid */
+    virTristateBool kvmUsable;
+    time_t kvmCtime;
 };
 typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
 typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
@@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
 }
 
 
+/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
+static bool
+virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
+{
+    struct stat sb;
+    static const char *kvm_device = "/dev/kvm";
+    virTristateBool value;
+    virTristateBool cached_value = priv->kvmUsable;
+    time_t kvm_ctime;
+    time_t cached_kvm_ctime = priv->kvmCtime;
+
+    if (stat(kvm_device, &sb) < 0) {
+        virReportSystemError(errno,
+                             _("Failed to stat %s"), kvm_device);
+        return false;
+    }
+    kvm_ctime = sb.st_ctime;
+
+    if (kvm_ctime != cached_kvm_ctime) {
+        VIR_DEBUG("%s has changed (%lld vs %lld)", kvm_device,
+                  (long long)kvm_ctime, (long long)cached_kvm_ctime);
+        cached_value = VIR_TRISTATE_BOOL_ABSENT;
+    }
+
+    if (cached_value != VIR_TRISTATE_BOOL_ABSENT)
+        return cached_value == VIR_TRISTATE_BOOL_YES;
+
+    if (virFileAccessibleAs(kvm_device, R_OK | W_OK,
+                            priv->runUid, priv->runGid) == 0) {
+        value = VIR_TRISTATE_BOOL_YES;
+    } else {
+        value = VIR_TRISTATE_BOOL_NO;
+    }
+
+    /* There is a race window between 'stat' and
+     * 'virFileAccessibleAs'. However, since we're only interested in
+     * detecting changes *after* the virFileAccessibleAs check, we can
+     * neglect this here.
+     */
+    priv->kvmCtime = kvm_ctime;
+    priv->kvmUsable = value;
+
+    return value == VIR_TRISTATE_BOOL_YES;
+}
+
+
 static bool
 virQEMUCapsIsValid(void *data,
                    void *privData)
@@ -3872,8 +3922,7 @@ virQEMUCapsIsValid(void *data,
         return true;
     }
 
-    kvmUsable = virFileAccessibleAs("/dev/kvm", R_OK | W_OK,
-                                    priv->runUid, priv->runGid) == 0;
+    kvmUsable = virQEMUCapsKVMUsable(priv);
 
     if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_KVM) &&
         kvmUsable) {
@@ -4684,6 +4733,7 @@ virQEMUCapsCacheNew(const char *libDir,
     priv->runUid = runUid;
     priv->runGid = runGid;
     priv->microcodeVersion = microcodeVersion;
+    priv->kvmUsable = VIR_TRISTATE_BOOL_ABSENT;
 
     if (uname(&uts) == 0 &&
         virAsprintf(&priv->kernelVersion, "%s %s", uts.release, uts.version) < 0)
-- 
2.17.0

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Michal Privoznik 5 years, 5 months ago
On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
> group. This reduces the overhead of the QEMU capabilities cache
> lookup. Before this patch there were many fork() calls used for
> checking whether /dev/kvm is accessible. Now we store the result
> whether /dev/kvm is accessible or not and we only need to re-run the
> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
> 
> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
> ---
>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 52 insertions(+), 2 deletions(-)
> 
> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
> index e228f52ec0bb..85516954149b 100644
> --- a/src/qemu/qemu_capabilities.c
> +++ b/src/qemu/qemu_capabilities.c
> @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
>      virArch hostArch;
>      unsigned int microcodeVersion;
>      char *kernelVersion;
> +
> +    /* cache whether /dev/kvm is usable as runUid:runGuid */
> +    virTristateBool kvmUsable;
> +    time_t kvmCtime;
>  };
>  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
>  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
> @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
>  }
>  
>  
> +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
> +static bool
> +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
> +{
> +    struct stat sb;
> +    static const char *kvm_device = "/dev/kvm";
> +    virTristateBool value;
> +    virTristateBool cached_value = priv->kvmUsable;
> +    time_t kvm_ctime;
> +    time_t cached_kvm_ctime = priv->kvmCtime;
> +
> +    if (stat(kvm_device, &sb) < 0) {
> +        virReportSystemError(errno,
> +                             _("Failed to stat %s"), kvm_device);
> +        return false;
> +    }
> +    kvm_ctime = sb.st_ctime;

This doesn't feel right. /dev/kvm ctime is changed every time qemu is
started or powered off (try running stat over it before and after a
domain is started/shut off). So effectively we will fork more often than
we would think. Should we cache inode number instead? Because for all
that we care is simply if the file is there.

> +
> +    if (kvm_ctime != cached_kvm_ctime) {
> +        VIR_DEBUG("%s has changed (%lld vs %lld)", kvm_device,
> +                  (long long)kvm_ctime, (long long)cached_kvm_ctime);
> +        cached_value = VIR_TRISTATE_BOOL_ABSENT;
> +    }
> +
> +    if (cached_value != VIR_TRISTATE_BOOL_ABSENT)
> +        return cached_value == VIR_TRISTATE_BOOL_YES;
> +
> +    if (virFileAccessibleAs(kvm_device, R_OK | W_OK,
> +                            priv->runUid, priv->runGid) == 0) {
> +        value = VIR_TRISTATE_BOOL_YES;
> +    } else {
> +        value = VIR_TRISTATE_BOOL_NO;
> +    }
> +
> +    /* There is a race window between 'stat' and
> +     * 'virFileAccessibleAs'. However, since we're only interested in
> +     * detecting changes *after* the virFileAccessibleAs check, we can
> +     * neglect this here.
> +     */
> +    priv->kvmCtime = kvm_ctime;
> +    priv->kvmUsable = value;
> +
> +    return value == VIR_TRISTATE_BOOL_YES;
> +}
> +
> +
>  static bool
>  virQEMUCapsIsValid(void *data,
>                     void *privData)
> @@ -3872,8 +3922,7 @@ virQEMUCapsIsValid(void *data,
>          return true;
>      }
>  
> -    kvmUsable = virFileAccessibleAs("/dev/kvm", R_OK | W_OK,
> -                                    priv->runUid, priv->runGid) == 0;
> +    kvmUsable = virQEMUCapsKVMUsable(priv);
>  
>      if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_KVM) &&
>          kvmUsable) {
> @@ -4684,6 +4733,7 @@ virQEMUCapsCacheNew(const char *libDir,
>      priv->runUid = runUid;
>      priv->runGid = runGid;
>      priv->microcodeVersion = microcodeVersion;
> +    priv->kvmUsable = VIR_TRISTATE_BOOL_ABSENT;
>  
>      if (uname(&uts) == 0 &&
>          virAsprintf(&priv->kernelVersion, "%s %s", uts.release, uts.version) < 0)
> 

Otherwise looking good.

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Daniel P. Berrangé 5 years, 5 months ago
On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
> > Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
> > group. This reduces the overhead of the QEMU capabilities cache
> > lookup. Before this patch there were many fork() calls used for
> > checking whether /dev/kvm is accessible. Now we store the result
> > whether /dev/kvm is accessible or not and we only need to re-run the
> > virFileAccessibleAs check if the ctime of /dev/kvm has changed.
> > 
> > Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
> > Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
> > ---
> >  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
> >  1 file changed, 52 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
> > index e228f52ec0bb..85516954149b 100644
> > --- a/src/qemu/qemu_capabilities.c
> > +++ b/src/qemu/qemu_capabilities.c
> > @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
> >      virArch hostArch;
> >      unsigned int microcodeVersion;
> >      char *kernelVersion;
> > +
> > +    /* cache whether /dev/kvm is usable as runUid:runGuid */
> > +    virTristateBool kvmUsable;
> > +    time_t kvmCtime;
> >  };
> >  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
> >  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
> > @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
> >  }
> >  
> >  
> > +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
> > +static bool
> > +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
> > +{
> > +    struct stat sb;
> > +    static const char *kvm_device = "/dev/kvm";
> > +    virTristateBool value;
> > +    virTristateBool cached_value = priv->kvmUsable;
> > +    time_t kvm_ctime;
> > +    time_t cached_kvm_ctime = priv->kvmCtime;
> > +
> > +    if (stat(kvm_device, &sb) < 0) {
> > +        virReportSystemError(errno,
> > +                             _("Failed to stat %s"), kvm_device);
> > +        return false;
> > +    }
> > +    kvm_ctime = sb.st_ctime;
> 
> This doesn't feel right. /dev/kvm ctime is changed every time qemu is
> started or powered off (try running stat over it before and after a
> domain is started/shut off). So effectively we will fork more often than
> we would think. Should we cache inode number instead? Because for all
> that we care is simply if the file is there.

Urgh, that is a bit strange and not the usual semantics for timestamps :-(

We can't stat the inode - the code was explicitly trying to cope with the
way /dev/kvm can change permissions when udev rules get applied. We would
have to compare the user, group, permissions mask and even ACL, or a hash
of those.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Michal Privoznik 5 years, 5 months ago
On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
> On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
>> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
>>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
>>> group. This reduces the overhead of the QEMU capabilities cache
>>> lookup. Before this patch there were many fork() calls used for
>>> checking whether /dev/kvm is accessible. Now we store the result
>>> whether /dev/kvm is accessible or not and we only need to re-run the
>>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
>>>
>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
>>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
>>> ---
>>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
>>>  1 file changed, 52 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
>>> index e228f52ec0bb..85516954149b 100644
>>> --- a/src/qemu/qemu_capabilities.c
>>> +++ b/src/qemu/qemu_capabilities.c
>>> @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
>>>      virArch hostArch;
>>>      unsigned int microcodeVersion;
>>>      char *kernelVersion;
>>> +
>>> +    /* cache whether /dev/kvm is usable as runUid:runGuid */
>>> +    virTristateBool kvmUsable;
>>> +    time_t kvmCtime;
>>>  };
>>>  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
>>>  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
>>> @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
>>>  }
>>>  
>>>  
>>> +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
>>> +static bool
>>> +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
>>> +{
>>> +    struct stat sb;
>>> +    static const char *kvm_device = "/dev/kvm";
>>> +    virTristateBool value;
>>> +    virTristateBool cached_value = priv->kvmUsable;
>>> +    time_t kvm_ctime;
>>> +    time_t cached_kvm_ctime = priv->kvmCtime;
>>> +
>>> +    if (stat(kvm_device, &sb) < 0) {
>>> +        virReportSystemError(errno,
>>> +                             _("Failed to stat %s"), kvm_device);
>>> +        return false;
>>> +    }
>>> +    kvm_ctime = sb.st_ctime;
>>
>> This doesn't feel right. /dev/kvm ctime is changed every time qemu is
>> started or powered off (try running stat over it before and after a
>> domain is started/shut off). So effectively we will fork more often than
>> we would think. Should we cache inode number instead? Because for all
>> that we care is simply if the file is there.
> 
> Urgh, that is a bit strange and not the usual semantics for timestamps :-(

Indeed.

> 
> We can't stat the inode - the code was explicitly trying to cope with the
> way /dev/kvm can change permissions when udev rules get applied. We would
> have to compare the user, group, permissions mask and even ACL, or a hash
> of those.

Well, we can use ctime as suggested and post a patch for kernel to fix
ctime behaviour. Until the patch is merged our behaviour would be
suboptimal, but still better than it is now.

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Daniel P. Berrangé 5 years, 5 months ago
On Tue, Oct 30, 2018 at 11:08:45AM +0100, Michal Privoznik wrote:
> On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
> > On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
> >> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
> >>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
> >>> group. This reduces the overhead of the QEMU capabilities cache
> >>> lookup. Before this patch there were many fork() calls used for
> >>> checking whether /dev/kvm is accessible. Now we store the result
> >>> whether /dev/kvm is accessible or not and we only need to re-run the
> >>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
> >>>
> >>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
> >>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
> >>> ---
> >>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
> >>>  1 file changed, 52 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
> >>> index e228f52ec0bb..85516954149b 100644
> >>> --- a/src/qemu/qemu_capabilities.c
> >>> +++ b/src/qemu/qemu_capabilities.c
> >>> @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
> >>>      virArch hostArch;
> >>>      unsigned int microcodeVersion;
> >>>      char *kernelVersion;
> >>> +
> >>> +    /* cache whether /dev/kvm is usable as runUid:runGuid */
> >>> +    virTristateBool kvmUsable;
> >>> +    time_t kvmCtime;
> >>>  };
> >>>  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
> >>>  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
> >>> @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
> >>>  }
> >>>  
> >>>  
> >>> +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
> >>> +static bool
> >>> +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
> >>> +{
> >>> +    struct stat sb;
> >>> +    static const char *kvm_device = "/dev/kvm";
> >>> +    virTristateBool value;
> >>> +    virTristateBool cached_value = priv->kvmUsable;
> >>> +    time_t kvm_ctime;
> >>> +    time_t cached_kvm_ctime = priv->kvmCtime;
> >>> +
> >>> +    if (stat(kvm_device, &sb) < 0) {
> >>> +        virReportSystemError(errno,
> >>> +                             _("Failed to stat %s"), kvm_device);
> >>> +        return false;
> >>> +    }
> >>> +    kvm_ctime = sb.st_ctime;
> >>
> >> This doesn't feel right. /dev/kvm ctime is changed every time qemu is
> >> started or powered off (try running stat over it before and after a
> >> domain is started/shut off). So effectively we will fork more often than
> >> we would think. Should we cache inode number instead? Because for all
> >> that we care is simply if the file is there.
> > 
> > Urgh, that is a bit strange and not the usual semantics for timestamps :-(
> 
> Indeed.
> 
> > 
> > We can't stat the inode - the code was explicitly trying to cope with the
> > way /dev/kvm can change permissions when udev rules get applied. We would
> > have to compare the user, group, permissions mask and even ACL, or a hash
> > of those.
> 
> Well, we can use ctime as suggested and post a patch for kernel to fix
> ctime behaviour. Until the patch is merged our behaviour would be
> suboptimal, but still better than it is now.

I guess lets talk to KVM team for their input on this and then decide
what todo.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Daniel P. Berrangé 5 years, 5 months ago
On Tue, Oct 30, 2018 at 10:32:08AM +0000, Daniel P. Berrangé wrote:
> On Tue, Oct 30, 2018 at 11:08:45AM +0100, Michal Privoznik wrote:
> > On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
> > > On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
> > >> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
> > >>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
> > >>> group. This reduces the overhead of the QEMU capabilities cache
> > >>> lookup. Before this patch there were many fork() calls used for
> > >>> checking whether /dev/kvm is accessible. Now we store the result
> > >>> whether /dev/kvm is accessible or not and we only need to re-run the
> > >>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
> > >>>
> > >>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
> > >>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
> > >>> ---
> > >>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
> > >>>  1 file changed, 52 insertions(+), 2 deletions(-)
> > >>>
> > >>> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
> > >>> index e228f52ec0bb..85516954149b 100644
> > >>> --- a/src/qemu/qemu_capabilities.c
> > >>> +++ b/src/qemu/qemu_capabilities.c
> > >>> @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
> > >>>      virArch hostArch;
> > >>>      unsigned int microcodeVersion;
> > >>>      char *kernelVersion;
> > >>> +
> > >>> +    /* cache whether /dev/kvm is usable as runUid:runGuid */
> > >>> +    virTristateBool kvmUsable;
> > >>> +    time_t kvmCtime;
> > >>>  };
> > >>>  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
> > >>>  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
> > >>> @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
> > >>>  }
> > >>>  
> > >>>  
> > >>> +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
> > >>> +static bool
> > >>> +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
> > >>> +{
> > >>> +    struct stat sb;
> > >>> +    static const char *kvm_device = "/dev/kvm";
> > >>> +    virTristateBool value;
> > >>> +    virTristateBool cached_value = priv->kvmUsable;
> > >>> +    time_t kvm_ctime;
> > >>> +    time_t cached_kvm_ctime = priv->kvmCtime;
> > >>> +
> > >>> +    if (stat(kvm_device, &sb) < 0) {
> > >>> +        virReportSystemError(errno,
> > >>> +                             _("Failed to stat %s"), kvm_device);
> > >>> +        return false;
> > >>> +    }
> > >>> +    kvm_ctime = sb.st_ctime;
> > >>
> > >> This doesn't feel right. /dev/kvm ctime is changed every time qemu is
> > >> started or powered off (try running stat over it before and after a
> > >> domain is started/shut off). So effectively we will fork more often than
> > >> we would think. Should we cache inode number instead? Because for all
> > >> that we care is simply if the file is there.
> > > 
> > > Urgh, that is a bit strange and not the usual semantics for timestamps :-(
> > 
> > Indeed.
> > 
> > > 
> > > We can't stat the inode - the code was explicitly trying to cope with the
> > > way /dev/kvm can change permissions when udev rules get applied. We would
> > > have to compare the user, group, permissions mask and even ACL, or a hash
> > > of those.
> > 
> > Well, we can use ctime as suggested and post a patch for kernel to fix
> > ctime behaviour. Until the patch is merged our behaviour would be
> > suboptimal, but still better than it is now.
> 
> I guess lets talk to KVM team for their input on this and then decide
> what todo.

Hmm, I wonder if it is not actually a kernel problem, but rather something
in userspace genuinely touching the device in a way that caues these
timestamps to be updated.

eg I vaguely recall a udev rule that resets permissions on device nodes
whenever an FD is closed, which might cause this kind of behaviour

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Michal Privoznik 5 years, 5 months ago
On 10/30/2018 01:55 PM, Daniel P. Berrangé wrote:
> On Tue, Oct 30, 2018 at 10:32:08AM +0000, Daniel P. Berrangé wrote:
>> On Tue, Oct 30, 2018 at 11:08:45AM +0100, Michal Privoznik wrote:
>>> On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
>>>> On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
>>>>> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
>>>>>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
>>>>>> group. This reduces the overhead of the QEMU capabilities cache
>>>>>> lookup. Before this patch there were many fork() calls used for
>>>>>> checking whether /dev/kvm is accessible. Now we store the result
>>>>>> whether /dev/kvm is accessible or not and we only need to re-run the
>>>>>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
>>>>>>
>>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
>>>>>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
>>>>>> ---
>>>>>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
>>>>>>  1 file changed, 52 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
>>>>>> index e228f52ec0bb..85516954149b 100644
>>>>>> --- a/src/qemu/qemu_capabilities.c
>>>>>> +++ b/src/qemu/qemu_capabilities.c
>>>>>> @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
>>>>>>      virArch hostArch;
>>>>>>      unsigned int microcodeVersion;
>>>>>>      char *kernelVersion;
>>>>>> +
>>>>>> +    /* cache whether /dev/kvm is usable as runUid:runGuid */
>>>>>> +    virTristateBool kvmUsable;
>>>>>> +    time_t kvmCtime;
>>>>>>  };
>>>>>>  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
>>>>>>  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
>>>>>> @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
>>>>>>  }
>>>>>>  
>>>>>>  
>>>>>> +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
>>>>>> +static bool
>>>>>> +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
>>>>>> +{
>>>>>> +    struct stat sb;
>>>>>> +    static const char *kvm_device = "/dev/kvm";
>>>>>> +    virTristateBool value;
>>>>>> +    virTristateBool cached_value = priv->kvmUsable;
>>>>>> +    time_t kvm_ctime;
>>>>>> +    time_t cached_kvm_ctime = priv->kvmCtime;
>>>>>> +
>>>>>> +    if (stat(kvm_device, &sb) < 0) {
>>>>>> +        virReportSystemError(errno,
>>>>>> +                             _("Failed to stat %s"), kvm_device);
>>>>>> +        return false;
>>>>>> +    }
>>>>>> +    kvm_ctime = sb.st_ctime;
>>>>>
>>>>> This doesn't feel right. /dev/kvm ctime is changed every time qemu is
>>>>> started or powered off (try running stat over it before and after a
>>>>> domain is started/shut off). So effectively we will fork more often than
>>>>> we would think. Should we cache inode number instead? Because for all
>>>>> that we care is simply if the file is there.
>>>>
>>>> Urgh, that is a bit strange and not the usual semantics for timestamps :-(
>>>
>>> Indeed.
>>>
>>>>
>>>> We can't stat the inode - the code was explicitly trying to cope with the
>>>> way /dev/kvm can change permissions when udev rules get applied. We would
>>>> have to compare the user, group, permissions mask and even ACL, or a hash
>>>> of those.
>>>
>>> Well, we can use ctime as suggested and post a patch for kernel to fix
>>> ctime behaviour. Until the patch is merged our behaviour would be
>>> suboptimal, but still better than it is now.
>>
>> I guess lets talk to KVM team for their input on this and then decide
>> what todo.
> 
> Hmm, I wonder if it is not actually a kernel problem, but rather something
> in userspace genuinely touching the device in a way that caues these
> timestamps to be updated.
> 

It is kernel problem. In my testing, the moment I call:

 ioctl(kvm, KVM_CREATE_VM, 0);

all three times (atime, mtime, ctime) are changed. However, prior to
that I'm calling some 'query' ioctls (KVM_GET_API_VERSION,
KVM_CHECK_EXTENSION) but none of the Xtime is changed.


> eg I vaguely recall a udev rule that resets permissions on device nodes
> whenever an FD is closed, which might cause this kind of behaviour

Huh, I did not know that udev can hook on close(2).

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Michal Privoznik 5 years, 5 months ago
On 10/30/2018 02:46 PM, Michal Privoznik wrote:
> On 10/30/2018 01:55 PM, Daniel P. Berrangé wrote:
>> On Tue, Oct 30, 2018 at 10:32:08AM +0000, Daniel P. Berrangé wrote:
>>> On Tue, Oct 30, 2018 at 11:08:45AM +0100, Michal Privoznik wrote:
>>>> On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
>>>>> On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
>>>>>> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
>>>>>>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
>>>>>>> group. This reduces the overhead of the QEMU capabilities cache
>>>>>>> lookup. Before this patch there were many fork() calls used for
>>>>>>> checking whether /dev/kvm is accessible. Now we store the result
>>>>>>> whether /dev/kvm is accessible or not and we only need to re-run the
>>>>>>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
>>>>>>>
>>>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
>>>>>>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
>>>>>>> ---
>>>>>>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
>>>>>>>  1 file changed, 52 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
>>>>>>> index e228f52ec0bb..85516954149b 100644
>>>>>>> --- a/src/qemu/qemu_capabilities.c
>>>>>>> +++ b/src/qemu/qemu_capabilities.c
>>>>>>> @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
>>>>>>>      virArch hostArch;
>>>>>>>      unsigned int microcodeVersion;
>>>>>>>      char *kernelVersion;
>>>>>>> +
>>>>>>> +    /* cache whether /dev/kvm is usable as runUid:runGuid */
>>>>>>> +    virTristateBool kvmUsable;
>>>>>>> +    time_t kvmCtime;
>>>>>>>  };
>>>>>>>  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
>>>>>>>  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
>>>>>>> @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
>>>>>>>  }
>>>>>>>  
>>>>>>>  
>>>>>>> +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
>>>>>>> +static bool
>>>>>>> +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
>>>>>>> +{
>>>>>>> +    struct stat sb;
>>>>>>> +    static const char *kvm_device = "/dev/kvm";
>>>>>>> +    virTristateBool value;
>>>>>>> +    virTristateBool cached_value = priv->kvmUsable;
>>>>>>> +    time_t kvm_ctime;
>>>>>>> +    time_t cached_kvm_ctime = priv->kvmCtime;
>>>>>>> +
>>>>>>> +    if (stat(kvm_device, &sb) < 0) {
>>>>>>> +        virReportSystemError(errno,
>>>>>>> +                             _("Failed to stat %s"), kvm_device);
>>>>>>> +        return false;
>>>>>>> +    }
>>>>>>> +    kvm_ctime = sb.st_ctime;
>>>>>>
>>>>>> This doesn't feel right. /dev/kvm ctime is changed every time qemu is
>>>>>> started or powered off (try running stat over it before and after a
>>>>>> domain is started/shut off). So effectively we will fork more often than
>>>>>> we would think. Should we cache inode number instead? Because for all
>>>>>> that we care is simply if the file is there.
>>>>>
>>>>> Urgh, that is a bit strange and not the usual semantics for timestamps :-(
>>>>
>>>> Indeed.
>>>>
>>>>>
>>>>> We can't stat the inode - the code was explicitly trying to cope with the
>>>>> way /dev/kvm can change permissions when udev rules get applied. We would
>>>>> have to compare the user, group, permissions mask and even ACL, or a hash
>>>>> of those.
>>>>
>>>> Well, we can use ctime as suggested and post a patch for kernel to fix
>>>> ctime behaviour. Until the patch is merged our behaviour would be
>>>> suboptimal, but still better than it is now.
>>>
>>> I guess lets talk to KVM team for their input on this and then decide
>>> what todo.
>>
>> Hmm, I wonder if it is not actually a kernel problem, but rather something
>> in userspace genuinely touching the device in a way that caues these
>> timestamps to be updated.
>>
> 
> It is kernel problem. In my testing, the moment I call:
> 
>  ioctl(kvm, KVM_CREATE_VM, 0);

Okay, I have to retract this claim. 'udevadm monitor' shows some events:

KERNEL[3631.129645] change   /devices/virtual/misc/kvm (misc)
UDEV  [3631.130816] change   /devices/virtual/misc/kvm (misc)

and stopping udevd leaves all three times untouched. So it is udev after
all. I just don't know how to find the rule that is causing the issue.
Anyway, as for this patch, I think we can merge it in the end, can't we?

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Daniel P. Berrangé 5 years, 5 months ago
On Tue, Oct 30, 2018 at 03:45:36PM +0100, Michal Privoznik wrote:
> On 10/30/2018 02:46 PM, Michal Privoznik wrote:
> > On 10/30/2018 01:55 PM, Daniel P. Berrangé wrote:
> >> On Tue, Oct 30, 2018 at 10:32:08AM +0000, Daniel P. Berrangé wrote:
> >>> On Tue, Oct 30, 2018 at 11:08:45AM +0100, Michal Privoznik wrote:
> >>>> On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
> >>>>> On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
> >>>>>> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
> >>>>>>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
> >>>>>>> group. This reduces the overhead of the QEMU capabilities cache
> >>>>>>> lookup. Before this patch there were many fork() calls used for
> >>>>>>> checking whether /dev/kvm is accessible. Now we store the result
> >>>>>>> whether /dev/kvm is accessible or not and we only need to re-run the
> >>>>>>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
> >>>>>>>
> >>>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
> >>>>>>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
> >>>>>>> ---
> >>>>>>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
> >>>>>>>  1 file changed, 52 insertions(+), 2 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
> >>>>>>> index e228f52ec0bb..85516954149b 100644
> >>>>>>> --- a/src/qemu/qemu_capabilities.c
> >>>>>>> +++ b/src/qemu/qemu_capabilities.c
> >>>>>>> @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
> >>>>>>>      virArch hostArch;
> >>>>>>>      unsigned int microcodeVersion;
> >>>>>>>      char *kernelVersion;
> >>>>>>> +
> >>>>>>> +    /* cache whether /dev/kvm is usable as runUid:runGuid */
> >>>>>>> +    virTristateBool kvmUsable;
> >>>>>>> +    time_t kvmCtime;
> >>>>>>>  };
> >>>>>>>  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
> >>>>>>>  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
> >>>>>>> @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
> >>>>>>>  }
> >>>>>>>  
> >>>>>>>  
> >>>>>>> +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
> >>>>>>> +static bool
> >>>>>>> +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
> >>>>>>> +{
> >>>>>>> +    struct stat sb;
> >>>>>>> +    static const char *kvm_device = "/dev/kvm";
> >>>>>>> +    virTristateBool value;
> >>>>>>> +    virTristateBool cached_value = priv->kvmUsable;
> >>>>>>> +    time_t kvm_ctime;
> >>>>>>> +    time_t cached_kvm_ctime = priv->kvmCtime;
> >>>>>>> +
> >>>>>>> +    if (stat(kvm_device, &sb) < 0) {
> >>>>>>> +        virReportSystemError(errno,
> >>>>>>> +                             _("Failed to stat %s"), kvm_device);
> >>>>>>> +        return false;
> >>>>>>> +    }
> >>>>>>> +    kvm_ctime = sb.st_ctime;
> >>>>>>
> >>>>>> This doesn't feel right. /dev/kvm ctime is changed every time qemu is
> >>>>>> started or powered off (try running stat over it before and after a
> >>>>>> domain is started/shut off). So effectively we will fork more often than
> >>>>>> we would think. Should we cache inode number instead? Because for all
> >>>>>> that we care is simply if the file is there.
> >>>>>
> >>>>> Urgh, that is a bit strange and not the usual semantics for timestamps :-(
> >>>>
> >>>> Indeed.
> >>>>
> >>>>>
> >>>>> We can't stat the inode - the code was explicitly trying to cope with the
> >>>>> way /dev/kvm can change permissions when udev rules get applied. We would
> >>>>> have to compare the user, group, permissions mask and even ACL, or a hash
> >>>>> of those.
> >>>>
> >>>> Well, we can use ctime as suggested and post a patch for kernel to fix
> >>>> ctime behaviour. Until the patch is merged our behaviour would be
> >>>> suboptimal, but still better than it is now.
> >>>
> >>> I guess lets talk to KVM team for their input on this and then decide
> >>> what todo.
> >>
> >> Hmm, I wonder if it is not actually a kernel problem, but rather something
> >> in userspace genuinely touching the device in a way that caues these
> >> timestamps to be updated.
> >>
> > 
> > It is kernel problem. In my testing, the moment I call:
> > 
> >  ioctl(kvm, KVM_CREATE_VM, 0);
> 
> Okay, I have to retract this claim. 'udevadm monitor' shows some events:
> 
> KERNEL[3631.129645] change   /devices/virtual/misc/kvm (misc)
> UDEV  [3631.130816] change   /devices/virtual/misc/kvm (misc)
> 
> and stopping udevd leaves all three times untouched. So it is udev after
> all. I just don't know how to find the rule that is causing the issue.
> Anyway, as for this patch, I think we can merge it in the end, can't we?

Not really. Udev is in use everywhere, so this behaviour makes the
patch useless in practice, even though it is technically right in
theory :-(

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Michal Privoznik 5 years, 5 months ago
On 10/30/2018 04:07 PM, Daniel P. Berrangé wrote:
> On Tue, Oct 30, 2018 at 03:45:36PM +0100, Michal Privoznik wrote:
>> On 10/30/2018 02:46 PM, Michal Privoznik wrote:
>>> On 10/30/2018 01:55 PM, Daniel P. Berrangé wrote:
>>>> On Tue, Oct 30, 2018 at 10:32:08AM +0000, Daniel P. Berrangé wrote:
>>>>> On Tue, Oct 30, 2018 at 11:08:45AM +0100, Michal Privoznik wrote:
>>>>>> On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
>>>>>>> On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
>>>>>>>> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
>>>>>>>>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
>>>>>>>>> group. This reduces the overhead of the QEMU capabilities cache
>>>>>>>>> lookup. Before this patch there were many fork() calls used for
>>>>>>>>> checking whether /dev/kvm is accessible. Now we store the result
>>>>>>>>> whether /dev/kvm is accessible or not and we only need to re-run the
>>>>>>>>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
>>>>>>>>>
>>>>>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
>>>>>>>>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
>>>>>>>>> ---
>>>>>>>>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
>>>>>>>>>  1 file changed, 52 insertions(+), 2 deletions(-)
>>>>>>>>>


> Not really. Udev is in use everywhere, so this behaviour makes the
> patch useless in practice, even though it is technically right in
> theory :-(
> 

Well, caching owner + seclabels + ACLs won't help either. What if user
loads some profile into AppArmor or something that denies previously
allowed access to /dev/kvm (or vice versa)? What I am saying is that
there are some security models which base their decisions on something
else than file attributes.

Honestly, I don't have solution.

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Daniel P. Berrangé 5 years, 5 months ago
On Tue, Oct 30, 2018 at 04:47:32PM +0100, Michal Privoznik wrote:
> On 10/30/2018 04:07 PM, Daniel P. Berrangé wrote:
> > On Tue, Oct 30, 2018 at 03:45:36PM +0100, Michal Privoznik wrote:
> >> On 10/30/2018 02:46 PM, Michal Privoznik wrote:
> >>> On 10/30/2018 01:55 PM, Daniel P. Berrangé wrote:
> >>>> On Tue, Oct 30, 2018 at 10:32:08AM +0000, Daniel P. Berrangé wrote:
> >>>>> On Tue, Oct 30, 2018 at 11:08:45AM +0100, Michal Privoznik wrote:
> >>>>>> On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
> >>>>>>> On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
> >>>>>>>> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
> >>>>>>>>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
> >>>>>>>>> group. This reduces the overhead of the QEMU capabilities cache
> >>>>>>>>> lookup. Before this patch there were many fork() calls used for
> >>>>>>>>> checking whether /dev/kvm is accessible. Now we store the result
> >>>>>>>>> whether /dev/kvm is accessible or not and we only need to re-run the
> >>>>>>>>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
> >>>>>>>>>
> >>>>>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
> >>>>>>>>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
> >>>>>>>>> ---
> >>>>>>>>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
> >>>>>>>>>  1 file changed, 52 insertions(+), 2 deletions(-)
> >>>>>>>>>
> 
> 
> > Not really. Udev is in use everywhere, so this behaviour makes the
> > patch useless in practice, even though it is technically right in
> > theory :-(
> > 
> 
> Well, caching owner + seclabels + ACLs won't help either. What if user
> loads some profile into AppArmor or something that denies previously
> allowed access to /dev/kvm (or vice versa)? What I am saying is that
> there are some security models which base their decisions on something
> else than file attributes.

The virFileAccessibleAs check for /dev/kvm was put in their to ensure
that we correctly report usability of KVM in the capabilities XML
according to file permissions/ownership. Essentially KVM is not usable
until the udev rule is applied to change permissions to world accessible
or to set the kvm group.

I don't think we need to care aout selinux/apparmor restrictions - just
need to be no worse than what we cope with today, which is just perms
and owner/group.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Ján Tomko 5 years, 5 months ago
On Tue, Oct 30, 2018 at 03:53:59PM +0000, Daniel P. Berrangé wrote:
>On Tue, Oct 30, 2018 at 04:47:32PM +0100, Michal Privoznik wrote:
>> Well, caching owner + seclabels + ACLs won't help either. What if user
>> loads some profile into AppArmor or something that denies previously
>> allowed access to /dev/kvm (or vice versa)? What I am saying is that
>> there are some security models which base their decisions on something
>> else than file attributes.
>
>The virFileAccessibleAs check for /dev/kvm was put in their to ensure
>that we correctly report usability of KVM in the capabilities XML
>according to file permissions/ownership. Essentially KVM is not usable
>until the udev rule is applied to change permissions to world accessible
>or to set the kvm group.
>

Can libvirt be run sooner than the permissions are set up during regular
start-up, or this is just for the case of the user randomly touching the
permissions?

IIRC the problem was that users with vmx disabled in BIOS setup needed
to delete libvirt's qemuCaps cache manually after enabling it even after
restarting the system.
To catch that, all we'd have to do is run the check once per daemon
lifetime,

>I don't think we need to care aout selinux/apparmor restrictions - just
>need to be no worse than what we cope with today, which is just perms
>and owner/group.

but that could be considered worse according to this metric.

Jano
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Michal Privoznik 5 years, 5 months ago
On 11/01/2018 05:11 AM, Ján Tomko wrote:
> On Tue, Oct 30, 2018 at 03:53:59PM +0000, Daniel P. Berrangé wrote:
>> On Tue, Oct 30, 2018 at 04:47:32PM +0100, Michal Privoznik wrote:
>>> Well, caching owner + seclabels + ACLs won't help either. What if user
>>> loads some profile into AppArmor or something that denies previously
>>> allowed access to /dev/kvm (or vice versa)? What I am saying is that
>>> there are some security models which base their decisions on something
>>> else than file attributes.
>>
>> The virFileAccessibleAs check for /dev/kvm was put in their to ensure
>> that we correctly report usability of KVM in the capabilities XML
>> according to file permissions/ownership. Essentially KVM is not usable
>> until the udev rule is applied to change permissions to world accessible
>> or to set the kvm group.
>>
> 
> Can libvirt be run sooner than the permissions are set up during regular
> start-up, or this is just for the case of the user randomly touching the
> permissions?

Sure it can. And it is. For instance, I have ConsoleKit installed on my
system and when I log in as a regular user into my system, it adds an
ACL entry to /dev/kvm to allow access to the user. Libvirtd is already
running at that point (well, except not really because I don't run it as
a service since I'm running it from git all the time. But you get the
picture).

> 
> IIRC the problem was that users with vmx disabled in BIOS setup needed
> to delete libvirt's qemuCaps cache manually after enabling it even after
> restarting the system.

Sounds like it. The commit that introduced the check is d87df9bd39b.

Honestly, I don't think we need to care about perms - we can assume
those are set properly (as they don't change often). What we have to
care about is inserting/removing kvm module (even though it's not
necessary these days since virtualbox has learned how to co-exist with
KVM).

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Daniel P. Berrangé 5 years, 5 months ago
On Thu, Nov 01, 2018 at 09:07:05AM +0100, Michal Privoznik wrote:
> On 11/01/2018 05:11 AM, Ján Tomko wrote:
> > IIRC the problem was that users with vmx disabled in BIOS setup needed
> > to delete libvirt's qemuCaps cache manually after enabling it even after
> > restarting the system.
> 
> Sounds like it. The commit that introduced the check is d87df9bd39b.
> 
> Honestly, I don't think we need to care about perms - we can assume
> those are set properly (as they don't change often). What we have to

We can't assume permissions are correct. One of the core issues we hit
was that the udev rule which sets the permissions has been in the QEMU
RPM historically. So you can have the kernel module loaded automatically
by systemd when it detects VMX, but its permissions will be 0600 until
the qemu-kvm RPM is installed and udev is triggered to fix the permissions
to 0666.

> care about is inserting/removing kvm module (even though it's not
> necessary these days since virtualbox has learned how to co-exist with
> KVM).

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Martin Kletzander 5 years, 5 months ago
On Tue, Oct 30, 2018 at 03:07:59PM +0000, Daniel P. Berrangé wrote:
>On Tue, Oct 30, 2018 at 03:45:36PM +0100, Michal Privoznik wrote:
>> On 10/30/2018 02:46 PM, Michal Privoznik wrote:
>> > On 10/30/2018 01:55 PM, Daniel P. Berrangé wrote:
>> >> On Tue, Oct 30, 2018 at 10:32:08AM +0000, Daniel P. Berrangé wrote:
>> >>> On Tue, Oct 30, 2018 at 11:08:45AM +0100, Michal Privoznik wrote:
>> >>>> On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
>> >>>>> On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
>> >>>>>> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
>> >>>>>>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
>> >>>>>>> group. This reduces the overhead of the QEMU capabilities cache
>> >>>>>>> lookup. Before this patch there were many fork() calls used for
>> >>>>>>> checking whether /dev/kvm is accessible. Now we store the result
>> >>>>>>> whether /dev/kvm is accessible or not and we only need to re-run the
>> >>>>>>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
>> >>>>>>>
>> >>>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
>> >>>>>>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
>> >>>>>>> ---
>> >>>>>>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
>> >>>>>>>  1 file changed, 52 insertions(+), 2 deletions(-)
>> >>>>>>>
>> >>>>>>> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
>> >>>>>>> index e228f52ec0bb..85516954149b 100644
>> >>>>>>> --- a/src/qemu/qemu_capabilities.c
>> >>>>>>> +++ b/src/qemu/qemu_capabilities.c
>> >>>>>>> @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
>> >>>>>>>      virArch hostArch;
>> >>>>>>>      unsigned int microcodeVersion;
>> >>>>>>>      char *kernelVersion;
>> >>>>>>> +
>> >>>>>>> +    /* cache whether /dev/kvm is usable as runUid:runGuid */
>> >>>>>>> +    virTristateBool kvmUsable;
>> >>>>>>> +    time_t kvmCtime;
>> >>>>>>>  };
>> >>>>>>>  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
>> >>>>>>>  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
>> >>>>>>> @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
>> >>>>>>>  }
>> >>>>>>>
>> >>>>>>>
>> >>>>>>> +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
>> >>>>>>> +static bool
>> >>>>>>> +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
>> >>>>>>> +{
>> >>>>>>> +    struct stat sb;
>> >>>>>>> +    static const char *kvm_device = "/dev/kvm";
>> >>>>>>> +    virTristateBool value;
>> >>>>>>> +    virTristateBool cached_value = priv->kvmUsable;
>> >>>>>>> +    time_t kvm_ctime;
>> >>>>>>> +    time_t cached_kvm_ctime = priv->kvmCtime;
>> >>>>>>> +
>> >>>>>>> +    if (stat(kvm_device, &sb) < 0) {
>> >>>>>>> +        virReportSystemError(errno,
>> >>>>>>> +                             _("Failed to stat %s"), kvm_device);
>> >>>>>>> +        return false;
>> >>>>>>> +    }
>> >>>>>>> +    kvm_ctime = sb.st_ctime;
>> >>>>>>
>> >>>>>> This doesn't feel right. /dev/kvm ctime is changed every time qemu is
>> >>>>>> started or powered off (try running stat over it before and after a
>> >>>>>> domain is started/shut off). So effectively we will fork more often than
>> >>>>>> we would think. Should we cache inode number instead? Because for all
>> >>>>>> that we care is simply if the file is there.
>> >>>>>
>> >>>>> Urgh, that is a bit strange and not the usual semantics for timestamps :-(
>> >>>>
>> >>>> Indeed.
>> >>>>
>> >>>>>
>> >>>>> We can't stat the inode - the code was explicitly trying to cope with the
>> >>>>> way /dev/kvm can change permissions when udev rules get applied. We would
>> >>>>> have to compare the user, group, permissions mask and even ACL, or a hash
>> >>>>> of those.
>> >>>>
>> >>>> Well, we can use ctime as suggested and post a patch for kernel to fix
>> >>>> ctime behaviour. Until the patch is merged our behaviour would be
>> >>>> suboptimal, but still better than it is now.
>> >>>
>> >>> I guess lets talk to KVM team for their input on this and then decide
>> >>> what todo.
>> >>
>> >> Hmm, I wonder if it is not actually a kernel problem, but rather something
>> >> in userspace genuinely touching the device in a way that caues these
>> >> timestamps to be updated.
>> >>
>> >
>> > It is kernel problem. In my testing, the moment I call:
>> >
>> >  ioctl(kvm, KVM_CREATE_VM, 0);
>>
>> Okay, I have to retract this claim. 'udevadm monitor' shows some events:
>>
>> KERNEL[3631.129645] change   /devices/virtual/misc/kvm (misc)
>> UDEV  [3631.130816] change   /devices/virtual/misc/kvm (misc)
>>
>> and stopping udevd leaves all three times untouched. So it is udev after
>> all. I just don't know how to find the rule that is causing the issue.
>> Anyway, as for this patch, I think we can merge it in the end, can't we?
>
>Not really. Udev is in use everywhere, so this behaviour makes the
>patch useless in practice, even though it is technically right in
>theory :-(
>

Does it?  With this behaviour we still do the "expensive" work after any machine
has started.  But for one machine starting it still has the effect of running it
only once.  And we *need* to run it for each machine unless we also cache the
result per (at least) user:group of that machine as every machine can run under
different user:group.

I'll go through the patch again (just skimmed it the first time), but I think
this actually still makes it better (although not perfect).  I wonder why the
udev rule does not fire only on change (why doesn't it say ACTION=="change").

>Regards,
>Daniel
>-- 
>|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
>|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
>|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>
>--
>libvir-list mailing list
>libvir-list@redhat.com
>https://www.redhat.com/mailman/listinfo/libvir-list
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Martin Kletzander 5 years, 5 months ago
On Thu, Nov 01, 2018 at 09:31:50AM +0100, Martin Kletzander wrote:
>On Tue, Oct 30, 2018 at 03:07:59PM +0000, Daniel P. Berrangé wrote:
>>On Tue, Oct 30, 2018 at 03:45:36PM +0100, Michal Privoznik wrote:
>>> On 10/30/2018 02:46 PM, Michal Privoznik wrote:
>>> > On 10/30/2018 01:55 PM, Daniel P. Berrangé wrote:
>>> >> On Tue, Oct 30, 2018 at 10:32:08AM +0000, Daniel P. Berrangé wrote:
>>> >>> On Tue, Oct 30, 2018 at 11:08:45AM +0100, Michal Privoznik wrote:
>>> >>>> On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
>>> >>>>> On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
>>> >>>>>> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
>>> >>>>>>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
>>> >>>>>>> group. This reduces the overhead of the QEMU capabilities cache
>>> >>>>>>> lookup. Before this patch there were many fork() calls used for
>>> >>>>>>> checking whether /dev/kvm is accessible. Now we store the result
>>> >>>>>>> whether /dev/kvm is accessible or not and we only need to re-run the
>>> >>>>>>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
>>> >>>>>>>
>>> >>>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
>>> >>>>>>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
>>> >>>>>>> ---
>>> >>>>>>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
>>> >>>>>>>  1 file changed, 52 insertions(+), 2 deletions(-)
>>> >>>>>>>
>>> >>>>>>> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
>>> >>>>>>> index e228f52ec0bb..85516954149b 100644
>>> >>>>>>> --- a/src/qemu/qemu_capabilities.c
>>> >>>>>>> +++ b/src/qemu/qemu_capabilities.c
>>> >>>>>>> @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
>>> >>>>>>>      virArch hostArch;
>>> >>>>>>>      unsigned int microcodeVersion;
>>> >>>>>>>      char *kernelVersion;
>>> >>>>>>> +
>>> >>>>>>> +    /* cache whether /dev/kvm is usable as runUid:runGuid */
>>> >>>>>>> +    virTristateBool kvmUsable;
>>> >>>>>>> +    time_t kvmCtime;
>>> >>>>>>>  };
>>> >>>>>>>  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
>>> >>>>>>>  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
>>> >>>>>>> @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
>>> >>>>>>>  }
>>> >>>>>>>
>>> >>>>>>>
>>> >>>>>>> +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
>>> >>>>>>> +static bool
>>> >>>>>>> +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
>>> >>>>>>> +{
>>> >>>>>>> +    struct stat sb;
>>> >>>>>>> +    static const char *kvm_device = "/dev/kvm";
>>> >>>>>>> +    virTristateBool value;
>>> >>>>>>> +    virTristateBool cached_value = priv->kvmUsable;
>>> >>>>>>> +    time_t kvm_ctime;
>>> >>>>>>> +    time_t cached_kvm_ctime = priv->kvmCtime;
>>> >>>>>>> +
>>> >>>>>>> +    if (stat(kvm_device, &sb) < 0) {
>>> >>>>>>> +        virReportSystemError(errno,
>>> >>>>>>> +                             _("Failed to stat %s"), kvm_device);
>>> >>>>>>> +        return false;
>>> >>>>>>> +    }
>>> >>>>>>> +    kvm_ctime = sb.st_ctime;
>>> >>>>>>
>>> >>>>>> This doesn't feel right. /dev/kvm ctime is changed every time qemu is
>>> >>>>>> started or powered off (try running stat over it before and after a
>>> >>>>>> domain is started/shut off). So effectively we will fork more often than
>>> >>>>>> we would think. Should we cache inode number instead? Because for all
>>> >>>>>> that we care is simply if the file is there.
>>> >>>>>
>>> >>>>> Urgh, that is a bit strange and not the usual semantics for timestamps :-(
>>> >>>>
>>> >>>> Indeed.
>>> >>>>
>>> >>>>>
>>> >>>>> We can't stat the inode - the code was explicitly trying to cope with the
>>> >>>>> way /dev/kvm can change permissions when udev rules get applied. We would
>>> >>>>> have to compare the user, group, permissions mask and even ACL, or a hash
>>> >>>>> of those.
>>> >>>>
>>> >>>> Well, we can use ctime as suggested and post a patch for kernel to fix
>>> >>>> ctime behaviour. Until the patch is merged our behaviour would be
>>> >>>> suboptimal, but still better than it is now.
>>> >>>
>>> >>> I guess lets talk to KVM team for their input on this and then decide
>>> >>> what todo.
>>> >>
>>> >> Hmm, I wonder if it is not actually a kernel problem, but rather something
>>> >> in userspace genuinely touching the device in a way that caues these
>>> >> timestamps to be updated.
>>> >>
>>> >
>>> > It is kernel problem. In my testing, the moment I call:
>>> >
>>> >  ioctl(kvm, KVM_CREATE_VM, 0);
>>>
>>> Okay, I have to retract this claim. 'udevadm monitor' shows some events:
>>>
>>> KERNEL[3631.129645] change   /devices/virtual/misc/kvm (misc)
>>> UDEV  [3631.130816] change   /devices/virtual/misc/kvm (misc)
>>>
>>> and stopping udevd leaves all three times untouched. So it is udev after
>>> all. I just don't know how to find the rule that is causing the issue.
>>> Anyway, as for this patch, I think we can merge it in the end, can't we?
>>
>>Not really. Udev is in use everywhere, so this behaviour makes the
>>patch useless in practice, even though it is technically right in
>>theory :-(
>>
>
>Does it?  With this behaviour we still do the "expensive" work after any machine
>has started.  But for one machine starting it still has the effect of running it
>only once.  And we *need* to run it for each machine unless we also cache the
>result per (at least) user:group of that machine as every machine can run under
>different user:group.
>
>I'll go through the patch again (just skimmed it the first time), but I think
>this actually still makes it better (although not perfect).  I wonder why the
>udev rule does not fire only on change (why doesn't it say ACTION=="change").
>

Of course I meant to write ACTION="add".

>>Regards,
>>Daniel
>>--
>>|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
>>|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
>>|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>>
>>--
>>libvir-list mailing list
>>libvir-list@redhat.com
>>https://www.redhat.com/mailman/listinfo/libvir-list



>--
>libvir-list mailing list
>libvir-list@redhat.com
>https://www.redhat.com/mailman/listinfo/libvir-list

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Daniel P. Berrangé 5 years, 5 months ago
On Thu, Nov 01, 2018 at 09:31:50AM +0100, Martin Kletzander wrote:
> On Tue, Oct 30, 2018 at 03:07:59PM +0000, Daniel P. Berrangé wrote:
> > On Tue, Oct 30, 2018 at 03:45:36PM +0100, Michal Privoznik wrote:
> > > On 10/30/2018 02:46 PM, Michal Privoznik wrote:
> > > >
> > > > It is kernel problem. In my testing, the moment I call:
> > > >
> > > >  ioctl(kvm, KVM_CREATE_VM, 0);
> > > 
> > > Okay, I have to retract this claim. 'udevadm monitor' shows some events:
> > > 
> > > KERNEL[3631.129645] change   /devices/virtual/misc/kvm (misc)
> > > UDEV  [3631.130816] change   /devices/virtual/misc/kvm (misc)
> > > 
> > > and stopping udevd leaves all three times untouched. So it is udev after
> > > all. I just don't know how to find the rule that is causing the issue.
> > > Anyway, as for this patch, I think we can merge it in the end, can't we?
> > 
> > Not really. Udev is in use everywhere, so this behaviour makes the
> > patch useless in practice, even though it is technically right in
> > theory :-(
> > 
> 
> Does it?  With this behaviour we still do the "expensive" work after any machine
> has started.  But for one machine starting it still has the effect of running it
> only once.  And we *need* to run it for each machine unless we also cache the
> result per (at least) user:group of that machine as every machine can run under
> different user:group.

Yes, with the current patch it still rechecks it for each VM startup, but
I was going to suggest the caching of this is simply put in a global static
variable as there's no reason to record this device permissions state in
the per VM caps cache.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Michal Privoznik 5 years, 5 months ago
On 10/30/2018 01:55 PM, Daniel P. Berrangé wrote:
> On Tue, Oct 30, 2018 at 10:32:08AM +0000, Daniel P. Berrangé wrote:
>> On Tue, Oct 30, 2018 at 11:08:45AM +0100, Michal Privoznik wrote:
>>> On 10/30/2018 10:35 AM, Daniel P. Berrangé wrote:
>>>> On Tue, Oct 30, 2018 at 09:13:50AM +0100, Michal Privoznik wrote:
>>>>> On 10/29/2018 06:34 PM, Marc Hartmayer wrote:
>>>>>> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
>>>>>> group. This reduces the overhead of the QEMU capabilities cache
>>>>>> lookup. Before this patch there were many fork() calls used for
>>>>>> checking whether /dev/kvm is accessible. Now we store the result
>>>>>> whether /dev/kvm is accessible or not and we only need to re-run the
>>>>>> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
>>>>>>
>>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
>>>>>> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
>>>>>> ---
>>>>>>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
>>>>>>  1 file changed, 52 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
>>>>>> index e228f52ec0bb..85516954149b 100644
>>>>>> --- a/src/qemu/qemu_capabilities.c
>>>>>> +++ b/src/qemu/qemu_capabilities.c
>>>>>> @@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
>>>>>>      virArch hostArch;
>>>>>>      unsigned int microcodeVersion;
>>>>>>      char *kernelVersion;
>>>>>> +
>>>>>> +    /* cache whether /dev/kvm is usable as runUid:runGuid */
>>>>>> +    virTristateBool kvmUsable;
>>>>>> +    time_t kvmCtime;
>>>>>>  };
>>>>>>  typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
>>>>>>  typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
>>>>>> @@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
>>>>>>  }
>>>>>>  
>>>>>>  
>>>>>> +/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
>>>>>> +static bool
>>>>>> +virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
>>>>>> +{
>>>>>> +    struct stat sb;
>>>>>> +    static const char *kvm_device = "/dev/kvm";
>>>>>> +    virTristateBool value;
>>>>>> +    virTristateBool cached_value = priv->kvmUsable;
>>>>>> +    time_t kvm_ctime;
>>>>>> +    time_t cached_kvm_ctime = priv->kvmCtime;
>>>>>> +
>>>>>> +    if (stat(kvm_device, &sb) < 0) {
>>>>>> +        virReportSystemError(errno,
>>>>>> +                             _("Failed to stat %s"), kvm_device);
>>>>>> +        return false;
>>>>>> +    }
>>>>>> +    kvm_ctime = sb.st_ctime;
>>>>>
>>>>> This doesn't feel right. /dev/kvm ctime is changed every time qemu is
>>>>> started or powered off (try running stat over it before and after a
>>>>> domain is started/shut off). So effectively we will fork more often than
>>>>> we would think. Should we cache inode number instead? Because for all
>>>>> that we care is simply if the file is there.
>>>>
>>>> Urgh, that is a bit strange and not the usual semantics for timestamps :-(
>>>
>>> Indeed.
>>>
>>>>
>>>> We can't stat the inode - the code was explicitly trying to cope with the
>>>> way /dev/kvm can change permissions when udev rules get applied. We would
>>>> have to compare the user, group, permissions mask and even ACL, or a hash
>>>> of those.
>>>
>>> Well, we can use ctime as suggested and post a patch for kernel to fix
>>> ctime behaviour. Until the patch is merged our behaviour would be
>>> suboptimal, but still better than it is now.
>>
>> I guess lets talk to KVM team for their input on this and then decide
>> what todo.
> 
> Hmm, I wonder if it is not actually a kernel problem, but rather something
> in userspace genuinely touching the device in a way that caues these
> timestamps to be updated.
> 
> eg I vaguely recall a udev rule that resets permissions on device nodes
> whenever an FD is closed, which might cause this kind of behaviour

After trying hard to find the rule that mangles the ctime I've found the
following bug in udev:

https://github.com/zippy2/systemd/commit/51915e4e6a64c581da321c25448d80626d8e8408?diff=unified

I've send that as a pull request to systemd. So after all, ctime might
be usable again.

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Martin Kletzander 5 years, 5 months ago
On Mon, Oct 29, 2018 at 06:34:58PM +0100, Marc Hartmayer wrote:
>Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
>group. This reduces the overhead of the QEMU capabilities cache
>lookup. Before this patch there were many fork() calls used for
>checking whether /dev/kvm is accessible. Now we store the result
>whether /dev/kvm is accessible or not and we only need to re-run the
>virFileAccessibleAs check if the ctime of /dev/kvm has changed.
>
>Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
>Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
>---
> src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
> 1 file changed, 52 insertions(+), 2 deletions(-)
>
>diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
>index e228f52ec0bb..85516954149b 100644
>--- a/src/qemu/qemu_capabilities.c
>+++ b/src/qemu/qemu_capabilities.c
>@@ -3238,6 +3238,10 @@ struct _virQEMUCapsCachePriv {
>     virArch hostArch;
>     unsigned int microcodeVersion;
>     char *kernelVersion;
>+
>+    /* cache whether /dev/kvm is usable as runUid:runGuid */

Even though this doesn't solve all of it (the ctime change with udev etc.) it
makes it better.  However this needs to be checked (or cached) per seclabel of
the domain as it can run under non-default user.

>+    virTristateBool kvmUsable;
>+    time_t kvmCtime;
> };
> typedef struct _virQEMUCapsCachePriv virQEMUCapsCachePriv;
> typedef virQEMUCapsCachePriv *virQEMUCapsCachePrivPtr;
>@@ -3824,6 +3828,52 @@ virQEMUCapsSaveFile(void *data,
> }
>
>
>+/* Determine whether '/dev/kvm' is usable as QEMU user:QEMU group. */
>+static bool
>+virQEMUCapsKVMUsable(virQEMUCapsCachePrivPtr priv)
>+{
>+    struct stat sb;
>+    static const char *kvm_device = "/dev/kvm";
>+    virTristateBool value;
>+    virTristateBool cached_value = priv->kvmUsable;
>+    time_t kvm_ctime;
>+    time_t cached_kvm_ctime = priv->kvmCtime;
>+
>+    if (stat(kvm_device, &sb) < 0) {
>+        virReportSystemError(errno,
>+                             _("Failed to stat %s"), kvm_device);
>+        return false;
>+    }
>+    kvm_ctime = sb.st_ctime;
>+
>+    if (kvm_ctime != cached_kvm_ctime) {
>+        VIR_DEBUG("%s has changed (%lld vs %lld)", kvm_device,
>+                  (long long)kvm_ctime, (long long)cached_kvm_ctime);
>+        cached_value = VIR_TRISTATE_BOOL_ABSENT;
>+    }
>+
>+    if (cached_value != VIR_TRISTATE_BOOL_ABSENT)
>+        return cached_value == VIR_TRISTATE_BOOL_YES;
>+
>+    if (virFileAccessibleAs(kvm_device, R_OK | W_OK,
>+                            priv->runUid, priv->runGid) == 0) {
>+        value = VIR_TRISTATE_BOOL_YES;
>+    } else {
>+        value = VIR_TRISTATE_BOOL_NO;
>+    }
>+
>+    /* There is a race window between 'stat' and
>+     * 'virFileAccessibleAs'. However, since we're only interested in
>+     * detecting changes *after* the virFileAccessibleAs check, we can
>+     * neglect this here.
>+     */
>+    priv->kvmCtime = kvm_ctime;
>+    priv->kvmUsable = value;
>+
>+    return value == VIR_TRISTATE_BOOL_YES;
>+}
>+
>+
> static bool
> virQEMUCapsIsValid(void *data,
>                    void *privData)
>@@ -3872,8 +3922,7 @@ virQEMUCapsIsValid(void *data,
>         return true;
>     }
>
>-    kvmUsable = virFileAccessibleAs("/dev/kvm", R_OK | W_OK,
>-                                    priv->runUid, priv->runGid) == 0;
>+    kvmUsable = virQEMUCapsKVMUsable(priv);
>
>     if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_KVM) &&
>         kvmUsable) {
>@@ -4684,6 +4733,7 @@ virQEMUCapsCacheNew(const char *libDir,
>     priv->runUid = runUid;
>     priv->runGid = runGid;
>     priv->microcodeVersion = microcodeVersion;
>+    priv->kvmUsable = VIR_TRISTATE_BOOL_ABSENT;
>
>     if (uname(&uts) == 0 &&
>         virAsprintf(&priv->kernelVersion, "%s %s", uts.release, uts.version) < 0)
>-- 
>2.17.0
>
>--
>libvir-list mailing list
>libvir-list@redhat.com
>https://www.redhat.com/mailman/listinfo/libvir-list
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Introduce caching whether /dev/kvm is accessible
Posted by Daniel P. Berrangé 5 years, 5 months ago
On Mon, Oct 29, 2018 at 06:34:58PM +0100, Marc Hartmayer wrote:
> Introduce caching whether /dev/kvm is usable as the QEMU user:QEMU
> group. This reduces the overhead of the QEMU capabilities cache
> lookup. Before this patch there were many fork() calls used for
> checking whether /dev/kvm is accessible. Now we store the result
> whether /dev/kvm is accessible or not and we only need to re-run the
> virFileAccessibleAs check if the ctime of /dev/kvm has changed.
> 
> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
> ---
>  src/qemu/qemu_capabilities.c | 54 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 52 insertions(+), 2 deletions(-)

For benefit of those who were not at KVM Forum last week, this patch
is addressing the performance problem described starting here:

   https://youtu.be/H3lw50IKqGo?t=1055


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list