From: Marc-André Lureau <marcandre.lureau@redhat.com>
Check anonymous memory is backed by memfd if qemu is capable.
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
tests/qemuxml2argvdata/memfd-memory-numa.args | 28 +++++++++++++++
tests/qemuxml2argvdata/memfd-memory-numa.xml | 36 +++++++++++++++++++
tests/qemuxml2argvtest.c | 5 +++
3 files changed, 69 insertions(+)
create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.args
create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.xml
diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.args b/tests/qemuxml2argvdata/memfd-memory-numa.args
new file mode 100644
index 0000000000..b26c476196
--- /dev/null
+++ b/tests/qemuxml2argvdata/memfd-memory-numa.args
@@ -0,0 +1,28 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-x86_64 \
+-name instance-00000092 \
+-S \
+-machine pc-i440fx-wily,accel=kvm,usb=off,dump-guest-core=off \
+-m 14336 \
+-mem-prealloc \
+-smp 20,sockets=1,cores=8,threads=1 \
+-object memory-backend-memfd,id=ram-node0,hugetlb=yes,hugetlbsize=2097152,share=yes,\
+size=15032385536,host-nodes=3,policy=preferred \
+-numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
+-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
+-display none \
+-no-user-config \
+-nodefaults \
+-chardev socket,id=charmonitor,\
+path=/tmp/lib/domain--1-instance-00000092/monitor.sock,server,nowait \
+-mon chardev=charmonitor,id=monitor,mode=control \
+-rtc base=utc \
+-no-shutdown \
+-no-acpi \
+-usb \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3
diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.xml b/tests/qemuxml2argvdata/memfd-memory-numa.xml
new file mode 100644
index 0000000000..abe93e8c4b
--- /dev/null
+++ b/tests/qemuxml2argvdata/memfd-memory-numa.xml
@@ -0,0 +1,36 @@
+ <domain type='kvm' id='56'>
+ <name>instance-00000092</name>
+ <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid>
+ <memory unit='KiB'>14680064</memory>
+ <currentMemory unit='KiB'>14680064</currentMemory>
+ <memoryBacking>
+ <hugepages>
+ <page size="2" unit="M"/>
+ </hugepages>
+ <source type='anonymous'/>
+ <access mode='shared'/>
+ <allocation mode='immediate'/>
+ </memoryBacking>
+ <numatune>
+ <memnode cellid='0' mode='preferred' nodeset='3'/>
+ </numatune>
+ <vcpu placement='static'>20</vcpu>
+ <os>
+ <type arch='x86_64' machine='pc-i440fx-wily'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <cpu>
+ <topology sockets='1' cores='8' threads='1'/>
+ <numa>
+ <cell id='0' cpus='0-7' memory='14680064' unit='KiB'/>
+ </numa>
+ </cpu>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-x86_64</emulator>
+ <memballoon model='virtio'/>
+ </devices>
+ </domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index 35df63b2ac..76008a8d07 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -2928,6 +2928,11 @@ mymain(void)
DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
QEMU_CAPS_KVM);
+ DO_TEST("memfd-memory-numa",
+ QEMU_CAPS_OBJECT_MEMORY_MEMFD,
+ QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB,
+ QEMU_CAPS_KVM);
+
DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
DO_TEST("cpu-check-full", QEMU_CAPS_KVM);
--
2.19.0.rc1
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
On 09/07/2018 07:32 AM, marcandre.lureau@redhat.com wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
>
> Check anonymous memory is backed by memfd if qemu is capable.
>
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
> tests/qemuxml2argvdata/memfd-memory-numa.args | 28 +++++++++++++++
> tests/qemuxml2argvdata/memfd-memory-numa.xml | 36 +++++++++++++++++++
> tests/qemuxml2argvtest.c | 5 +++
> 3 files changed, 69 insertions(+)
> create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.args
> create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.xml
>
> diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.args b/tests/qemuxml2argvdata/memfd-memory-numa.args
> new file mode 100644
> index 0000000000..b26c476196
> --- /dev/null
> +++ b/tests/qemuxml2argvdata/memfd-memory-numa.args
> @@ -0,0 +1,28 @@
> +LC_ALL=C \
> +PATH=/bin \
> +HOME=/home/test \
> +USER=test \
> +LOGNAME=test \
> +QEMU_AUDIO_DRV=none \
> +/usr/bin/qemu-system-x86_64 \
> +-name instance-00000092 \
> +-S \
> +-machine pc-i440fx-wily,accel=kvm,usb=off,dump-guest-core=off \
> +-m 14336 \
> +-mem-prealloc \
> +-smp 20,sockets=1,cores=8,threads=1 \
> +-object memory-backend-memfd,id=ram-node0,hugetlb=yes,hugetlbsize=2097152,share=yes,\
> +size=15032385536,host-nodes=3,policy=preferred \
Another syntax-check error here, needed to move the "share=yes," to the
subsequent line.
> +-numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
> +-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
> +-display none \
> +-no-user-config \
> +-nodefaults \
> +-chardev socket,id=charmonitor,\
> +path=/tmp/lib/domain--1-instance-00000092/monitor.sock,server,nowait \
> +-mon chardev=charmonitor,id=monitor,mode=control \
> +-rtc base=utc \
> +-no-shutdown \
> +-no-acpi \
> +-usb \
> +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3
> diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.xml b/tests/qemuxml2argvdata/memfd-memory-numa.xml
> new file mode 100644
> index 0000000000..abe93e8c4b
> --- /dev/null
> +++ b/tests/qemuxml2argvdata/memfd-memory-numa.xml
> @@ -0,0 +1,36 @@
> + <domain type='kvm' id='56'>
> + <name>instance-00000092</name>
> + <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid>
> + <memory unit='KiB'>14680064</memory>
> + <currentMemory unit='KiB'>14680064</currentMemory>
> + <memoryBacking>
> + <hugepages>
> + <page size="2" unit="M"/>
> + </hugepages>
> + <source type='anonymous'/>
> + <access mode='shared'/>
> + <allocation mode='immediate'/>
> + </memoryBacking>
> + <numatune>
> + <memnode cellid='0' mode='preferred' nodeset='3'/>
> + </numatune>
> + <vcpu placement='static'>20</vcpu>
> + <os>
> + <type arch='x86_64' machine='pc-i440fx-wily'>hvm</type>
> + <boot dev='hd'/>
> + </os>
> + <cpu>
> + <topology sockets='1' cores='8' threads='1'/>
> + <numa>
> + <cell id='0' cpus='0-7' memory='14680064' unit='KiB'/>
> + </numa>
> + </cpu>
> + <clock offset='utc'/>
> + <on_poweroff>destroy</on_poweroff>
> + <on_reboot>restart</on_reboot>
> + <on_crash>destroy</on_crash>
> + <devices>
> + <emulator>/usr/bin/qemu-system-x86_64</emulator>
> + <memballoon model='virtio'/>
> + </devices>
> + </domain>
> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
> index 35df63b2ac..76008a8d07 100644
> --- a/tests/qemuxml2argvtest.c
> +++ b/tests/qemuxml2argvtest.c
> @@ -2928,6 +2928,11 @@ mymain(void)
> DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
> QEMU_CAPS_KVM);
>
> + DO_TEST("memfd-memory-numa",
> + QEMU_CAPS_OBJECT_MEMORY_MEMFD,
> + QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB,
> + QEMU_CAPS_KVM);
> +
Theoretically, if we have 3.1 capabilties to test against, then this
would use a DO_TEST_CAPS_LATEST, while a "pre-3.1" would still be using
-ramfd, right? That is, using DO_TEST_CAPS_VER w/ "3.0.0" would
generate different results.
I'm conflicted if we should wait for someone to generate the 3.1 caps or
not. For whatever reason, when I post them they're not quite right for
someone else's tastes...
Let's see if anyone else has strong feelings one way or another.
John
> DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
> DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
> DO_TEST("cpu-check-full", QEMU_CAPS_KVM);
>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Hi
On Tue, Sep 11, 2018 at 2:57 AM, John Ferlan <jferlan@redhat.com> wrote:
>
>
> On 09/07/2018 07:32 AM, marcandre.lureau@redhat.com wrote:
>> From: Marc-André Lureau <marcandre.lureau@redhat.com>
>>
>> Check anonymous memory is backed by memfd if qemu is capable.
>>
>> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
>> ---
>> tests/qemuxml2argvdata/memfd-memory-numa.args | 28 +++++++++++++++
>> tests/qemuxml2argvdata/memfd-memory-numa.xml | 36 +++++++++++++++++++
>> tests/qemuxml2argvtest.c | 5 +++
>> 3 files changed, 69 insertions(+)
>> create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.args
>> create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.xml
>>
>> diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.args b/tests/qemuxml2argvdata/memfd-memory-numa.args
>> new file mode 100644
>> index 0000000000..b26c476196
>> --- /dev/null
>> +++ b/tests/qemuxml2argvdata/memfd-memory-numa.args
>> @@ -0,0 +1,28 @@
>> +LC_ALL=C \
>> +PATH=/bin \
>> +HOME=/home/test \
>> +USER=test \
>> +LOGNAME=test \
>> +QEMU_AUDIO_DRV=none \
>> +/usr/bin/qemu-system-x86_64 \
>> +-name instance-00000092 \
>> +-S \
>> +-machine pc-i440fx-wily,accel=kvm,usb=off,dump-guest-core=off \
>> +-m 14336 \
>> +-mem-prealloc \
>> +-smp 20,sockets=1,cores=8,threads=1 \
>> +-object memory-backend-memfd,id=ram-node0,hugetlb=yes,hugetlbsize=2097152,share=yes,\
>> +size=15032385536,host-nodes=3,policy=preferred \
>
> Another syntax-check error here, needed to move the "share=yes," to the
> subsequent line.
>
ok
>> +-numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
>> +-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
>> +-display none \
>> +-no-user-config \
>> +-nodefaults \
>> +-chardev socket,id=charmonitor,\
>> +path=/tmp/lib/domain--1-instance-00000092/monitor.sock,server,nowait \
>> +-mon chardev=charmonitor,id=monitor,mode=control \
>> +-rtc base=utc \
>> +-no-shutdown \
>> +-no-acpi \
>> +-usb \
>> +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3
>> diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.xml b/tests/qemuxml2argvdata/memfd-memory-numa.xml
>> new file mode 100644
>> index 0000000000..abe93e8c4b
>> --- /dev/null
>> +++ b/tests/qemuxml2argvdata/memfd-memory-numa.xml
>> @@ -0,0 +1,36 @@
>> + <domain type='kvm' id='56'>
>> + <name>instance-00000092</name>
>> + <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid>
>> + <memory unit='KiB'>14680064</memory>
>> + <currentMemory unit='KiB'>14680064</currentMemory>
>> + <memoryBacking>
>> + <hugepages>
>> + <page size="2" unit="M"/>
>> + </hugepages>
>> + <source type='anonymous'/>
>> + <access mode='shared'/>
>> + <allocation mode='immediate'/>
>> + </memoryBacking>
>> + <numatune>
>> + <memnode cellid='0' mode='preferred' nodeset='3'/>
>> + </numatune>
>> + <vcpu placement='static'>20</vcpu>
>> + <os>
>> + <type arch='x86_64' machine='pc-i440fx-wily'>hvm</type>
>> + <boot dev='hd'/>
>> + </os>
>> + <cpu>
>> + <topology sockets='1' cores='8' threads='1'/>
>> + <numa>
>> + <cell id='0' cpus='0-7' memory='14680064' unit='KiB'/>
>> + </numa>
>> + </cpu>
>> + <clock offset='utc'/>
>> + <on_poweroff>destroy</on_poweroff>
>> + <on_reboot>restart</on_reboot>
>> + <on_crash>destroy</on_crash>
>> + <devices>
>> + <emulator>/usr/bin/qemu-system-x86_64</emulator>
>> + <memballoon model='virtio'/>
>> + </devices>
>> + </domain>
>> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
>> index 35df63b2ac..76008a8d07 100644
>> --- a/tests/qemuxml2argvtest.c
>> +++ b/tests/qemuxml2argvtest.c
>> @@ -2928,6 +2928,11 @@ mymain(void)
>> DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
>> QEMU_CAPS_KVM);
>>
>> + DO_TEST("memfd-memory-numa",
>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD,
>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB,
>> + QEMU_CAPS_KVM);
>> +
>
> Theoretically, if we have 3.1 capabilties to test against, then this
> would use a DO_TEST_CAPS_LATEST, while a "pre-3.1" would still be using
> -ramfd, right? That is, using DO_TEST_CAPS_VER w/ "3.0.0" would
> generate different results.
>
> I'm conflicted if we should wait for someone to generate the 3.1 caps or
> not. For whatever reason, when I post them they're not quite right for
> someone else's tastes...
>
> Let's see if anyone else has strong feelings one way or another.
>
-memfd is available since 2.12. After patch 1 & 2 are applied, we
should probably switch to use DO_TEST_CAPS_LATEST.
Before 2.12 (or if the capabilities are not exposed by the host qemu)
the argv will use -file. This is already covered by existing tests,
like hugepages-shared.
thanks
> John
>
>> DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
>> DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
>> DO_TEST("cpu-check-full", QEMU_CAPS_KVM);
>>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
[...]
>>> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
>>> index 35df63b2ac..76008a8d07 100644
>>> --- a/tests/qemuxml2argvtest.c
>>> +++ b/tests/qemuxml2argvtest.c
>>> @@ -2928,6 +2928,11 @@ mymain(void)
>>> DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
>>> QEMU_CAPS_KVM);
>>>
>>> + DO_TEST("memfd-memory-numa",
>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD,
>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB,
>>> + QEMU_CAPS_KVM);
>>> +
>>
>> Theoretically, if we have 3.1 capabilties to test against, then this
>> would use a DO_TEST_CAPS_LATEST, while a "pre-3.1" would still be using
>> -ramfd, right? That is, using DO_TEST_CAPS_VER w/ "3.0.0" would
>> generate different results.
>>
>> I'm conflicted if we should wait for someone to generate the 3.1 caps or
>> not. For whatever reason, when I post them they're not quite right for
>> someone else's tastes...
>>
>> Let's see if anyone else has strong feelings one way or another.
>>
>
> -memfd is available since 2.12. After patch 1 & 2 are applied, we
> should probably switch to use DO_TEST_CAPS_LATEST.
>
Theoretically patches 3, 4, and 5 could be one patch, but having
separate also works well for review purposes!
While MEMFD is there is the HUGETLB and comment in page 2 about QEMU 3.1
that is what I was concerned with, especially since 2.12 and 3.0 find
the value...
Looking at the QEMU sources, I see you added the field in commit
dbb9e0f40, which is 2.12 based.
Still reading deeper into the comments in patch 2, it just seems that
@hugetlbsize has some sort run-time issue that gets fixed by 3.1. Harder
for libvirt to detect that an issue exists unless something was added in
3.1 that libvirt could test on for a capability. I'm not sure what the
issue is, but maybe that's something document-able at least with respect
to what values are provided in the XML for memoryBacking.
John
> Before 2.12 (or if the capabilities are not exposed by the host qemu)
> the argv will use -file. This is already covered by existing tests,
> like hugepages-shared.
>
> thanks
>
>> John
>>
>>> DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
>>> DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
>>> DO_TEST("cpu-check-full", QEMU_CAPS_KVM);
>>>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Hi
On Tue, Sep 11, 2018 at 5:21 PM, John Ferlan <jferlan@redhat.com> wrote:
>
> [...]
>
>>>> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
>>>> index 35df63b2ac..76008a8d07 100644
>>>> --- a/tests/qemuxml2argvtest.c
>>>> +++ b/tests/qemuxml2argvtest.c
>>>> @@ -2928,6 +2928,11 @@ mymain(void)
>>>> DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
>>>> QEMU_CAPS_KVM);
>>>>
>>>> + DO_TEST("memfd-memory-numa",
>>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD,
>>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB,
>>>> + QEMU_CAPS_KVM);
>>>> +
>>>
>>> Theoretically, if we have 3.1 capabilties to test against, then this
>>> would use a DO_TEST_CAPS_LATEST, while a "pre-3.1" would still be using
>>> -ramfd, right? That is, using DO_TEST_CAPS_VER w/ "3.0.0" would
>>> generate different results.
>>>
>>> I'm conflicted if we should wait for someone to generate the 3.1 caps or
>>> not. For whatever reason, when I post them they're not quite right for
>>> someone else's tastes...
>>>
>>> Let's see if anyone else has strong feelings one way or another.
>>>
>>
>> -memfd is available since 2.12. After patch 1 & 2 are applied, we
>> should probably switch to use DO_TEST_CAPS_LATEST.
>>
>
> Theoretically patches 3, 4, and 5 could be one patch, but having
> separate also works well for review purposes!
>
> While MEMFD is there is the HUGETLB and comment in page 2 about QEMU 3.1
> that is what I was concerned with, especially since 2.12 and 3.0 find
> the value...
>
> Looking at the QEMU sources, I see you added the field in commit
> dbb9e0f40, which is 2.12 based.
It's added in 2.12:
git describe --contains --match=v2* dbb9e0f40
v2.12.0-rc0~107^2~8
However, only with upcoming patch for 3.1 (queued by Paolo today) will
the hugetlb properties be run-time checked/exposed.
> Still reading deeper into the comments in patch 2, it just seems that
> @hugetlbsize has some sort run-time issue that gets fixed by 3.1. Harder
It's not an issue, but it will help libvirt to figure out before
starting qemu if anonymous memfd hugetlb is supported.
> for libvirt to detect that an issue exists unless something was added in
> 3.1 that libvirt could test on for a capability. I'm not sure what the
> issue is, but maybe that's something document-able at least with respect
> to what values are provided in the XML for memoryBacking.
If you request anonymous memory & hugetlb today, you have a libvirt
error. With the series, if the host/qemu doesn't support it, you will
get an error.
https://libvirt.org/formatdomain.html#elementsMemoryBacking
There is no documentation about the file memory backing requirement
today (it seems). We could explain it and add that a
memfd-hugetlb-capable doesn't need it (when there is no numa
assignment). Is this what you are asking?
>
>
> John
>
>> Before 2.12 (or if the capabilities are not exposed by the host qemu)
>> the argv will use -file. This is already covered by existing tests,
>> like hugepages-shared.
>>
>> thanks
>>
>>> John
>>>
>>>> DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
>>>> DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
>>>> DO_TEST("cpu-check-full", QEMU_CAPS_KVM);
>>>>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
On 09/11/2018 09:45 AM, Marc-André Lureau wrote:
> Hi
>
> On Tue, Sep 11, 2018 at 5:21 PM, John Ferlan <jferlan@redhat.com> wrote:
>>
>> [...]
>>
>>>>> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
>>>>> index 35df63b2ac..76008a8d07 100644
>>>>> --- a/tests/qemuxml2argvtest.c
>>>>> +++ b/tests/qemuxml2argvtest.c
>>>>> @@ -2928,6 +2928,11 @@ mymain(void)
>>>>> DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
>>>>> QEMU_CAPS_KVM);
>>>>>
>>>>> + DO_TEST("memfd-memory-numa",
>>>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD,
>>>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB,
>>>>> + QEMU_CAPS_KVM);
>>>>> +
>>>>
>>>> Theoretically, if we have 3.1 capabilties to test against, then this
>>>> would use a DO_TEST_CAPS_LATEST, while a "pre-3.1" would still be using
>>>> -ramfd, right? That is, using DO_TEST_CAPS_VER w/ "3.0.0" would
>>>> generate different results.
>>>>
>>>> I'm conflicted if we should wait for someone to generate the 3.1 caps or
>>>> not. For whatever reason, when I post them they're not quite right for
>>>> someone else's tastes...
>>>>
>>>> Let's see if anyone else has strong feelings one way or another.
>>>>
>>>
>>> -memfd is available since 2.12. After patch 1 & 2 are applied, we
>>> should probably switch to use DO_TEST_CAPS_LATEST.
>>>
>>
>> Theoretically patches 3, 4, and 5 could be one patch, but having
>> separate also works well for review purposes!
>>
>> While MEMFD is there is the HUGETLB and comment in page 2 about QEMU 3.1
>> that is what I was concerned with, especially since 2.12 and 3.0 find
>> the value...
>>
>> Looking at the QEMU sources, I see you added the field in commit
>> dbb9e0f40, which is 2.12 based.
>
> It's added in 2.12:
> git describe --contains --match=v2* dbb9e0f40
> v2.12.0-rc0~107^2~8
>
> However, only with upcoming patch for 3.1 (queued by Paolo today) will
> the hugetlb properties be run-time checked/exposed.
>
>> Still reading deeper into the comments in patch 2, it just seems that
>> @hugetlbsize has some sort run-time issue that gets fixed by 3.1. Harder
>
> It's not an issue, but it will help libvirt to figure out before
> starting qemu if anonymous memfd hugetlb is supported.
>
>> for libvirt to detect that an issue exists unless something was added in
>> 3.1 that libvirt could test on for a capability. I'm not sure what the
>> issue is, but maybe that's something document-able at least with respect
>> to what values are provided in the XML for memoryBacking.
>
> If you request anonymous memory & hugetlb today, you have a libvirt
> error. With the series, if the host/qemu doesn't support it, you will
> get an error.
Now I'm getting more confused. With this patch series applied, but
without the 3.1 changes, if the anonymous memfd hugetlb is used there
will be a run time issue?
IOW: Does it really only work in 3.1? If so, then we need to figure out
a mechanism for determining that as there's no reason to "default to"
-memfd then for 2.12 and 3.0, right?
>
> https://libvirt.org/formatdomain.html#elementsMemoryBacking
>
> There is no documentation about the file memory backing requirement
> today (it seems). We could explain it and add that a
> memfd-hugetlb-capable doesn't need it (when there is no numa
> assignment). Is this what you are asking?
>
Essentially - I'm sure we'd have to carefully word things to take into
account Michal's position of we don't want to describe the conditions
related to what backend is being used "by default" and for "which
version". Still I think the whether to document or not is related to
what the hugetlb problem is. Tough to say don't use this unless you
have qemu 3.1 installed even though it's supported back to 2.12. I don't
even want to think about describing the migration discussion...
John
>>
>>
>> John
>>
>>> Before 2.12 (or if the capabilities are not exposed by the host qemu)
>>> the argv will use -file. This is already covered by existing tests,
>>> like hugepages-shared.
>>>
>>> thanks
>>>
>>>> John
>>>>
>>>>> DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
>>>>> DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
>>>>> DO_TEST("cpu-check-full", QEMU_CAPS_KVM);
>>>>>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Hi
On Tue, Sep 11, 2018 at 7:39 PM, John Ferlan <jferlan@redhat.com> wrote:
>
>
> On 09/11/2018 09:45 AM, Marc-André Lureau wrote:
>> Hi
>>
>> On Tue, Sep 11, 2018 at 5:21 PM, John Ferlan <jferlan@redhat.com> wrote:
>>>
>>> [...]
>>>
>>>>>> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
>>>>>> index 35df63b2ac..76008a8d07 100644
>>>>>> --- a/tests/qemuxml2argvtest.c
>>>>>> +++ b/tests/qemuxml2argvtest.c
>>>>>> @@ -2928,6 +2928,11 @@ mymain(void)
>>>>>> DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
>>>>>> QEMU_CAPS_KVM);
>>>>>>
>>>>>> + DO_TEST("memfd-memory-numa",
>>>>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD,
>>>>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB,
>>>>>> + QEMU_CAPS_KVM);
>>>>>> +
>>>>>
>>>>> Theoretically, if we have 3.1 capabilties to test against, then this
>>>>> would use a DO_TEST_CAPS_LATEST, while a "pre-3.1" would still be using
>>>>> -ramfd, right? That is, using DO_TEST_CAPS_VER w/ "3.0.0" would
>>>>> generate different results.
>>>>>
>>>>> I'm conflicted if we should wait for someone to generate the 3.1 caps or
>>>>> not. For whatever reason, when I post them they're not quite right for
>>>>> someone else's tastes...
>>>>>
>>>>> Let's see if anyone else has strong feelings one way or another.
>>>>>
>>>>
>>>> -memfd is available since 2.12. After patch 1 & 2 are applied, we
>>>> should probably switch to use DO_TEST_CAPS_LATEST.
>>>>
>>>
>>> Theoretically patches 3, 4, and 5 could be one patch, but having
>>> separate also works well for review purposes!
>>>
>>> While MEMFD is there is the HUGETLB and comment in page 2 about QEMU 3.1
>>> that is what I was concerned with, especially since 2.12 and 3.0 find
>>> the value...
>>>
>>> Looking at the QEMU sources, I see you added the field in commit
>>> dbb9e0f40, which is 2.12 based.
>>
>> It's added in 2.12:
>> git describe --contains --match=v2* dbb9e0f40
>> v2.12.0-rc0~107^2~8
>>
>> However, only with upcoming patch for 3.1 (queued by Paolo today) will
>> the hugetlb properties be run-time checked/exposed.
>>
>>> Still reading deeper into the comments in patch 2, it just seems that
>>> @hugetlbsize has some sort run-time issue that gets fixed by 3.1. Harder
>>
>> It's not an issue, but it will help libvirt to figure out before
>> starting qemu if anonymous memfd hugetlb is supported.
>>
>>> for libvirt to detect that an issue exists unless something was added in
>>> 3.1 that libvirt could test on for a capability. I'm not sure what the
>>> issue is, but maybe that's something document-able at least with respect
>>> to what values are provided in the XML for memoryBacking.
>>
>> If you request anonymous memory & hugetlb today, you have a libvirt
>> error. With the series, if the host/qemu doesn't support it, you will
>> get an error.
>
> Now I'm getting more confused. With this patch series applied, but
> without the 3.1 changes, if the anonymous memfd hugetlb is used there
> will be a run time issue?
>
> IOW: Does it really only work in 3.1? If so, then we need to figure out
> a mechanism for determining that as there's no reason to "default to"
> -memfd then for 2.12 and 3.0, right?
No, it will work with 2.12, 3.0 or 3.1 as long as the host is capable.
What qemu will do in 3.1 is probe a bit the host to check if
hugetlb-memfd is supported by the host.
In all cases, hugetlb allocation (or allocation in general) can still
fail at run time due unsatisfiable request (limits, page size etc).
>
>>
>> https://libvirt.org/formatdomain.html#elementsMemoryBacking
>>
>> There is no documentation about the file memory backing requirement
>> today (it seems). We could explain it and add that a
>> memfd-hugetlb-capable doesn't need it (when there is no numa
>> assignment). Is this what you are asking?
>>
>
> Essentially - I'm sure we'd have to carefully word things to take into
> account Michal's position of we don't want to describe the conditions
> related to what backend is being used "by default" and for "which
> version". Still I think the whether to document or not is related to
> what the hugetlb problem is. Tough to say don't use this unless you
> have qemu 3.1 installed even though it's supported back to 2.12. I don't
> even want to think about describing the migration discussion...
ok, I think it's not worth documenting at this point if we want and
can make things transparent to the user.
>
> John
>
>>>
>>>
>>> John
>>>
>>>> Before 2.12 (or if the capabilities are not exposed by the host qemu)
>>>> the argv will use -file. This is already covered by existing tests,
>>>> like hugepages-shared.
>>>>
>>>> thanks
>>>>
>>>>> John
>>>>>
>>>>>> DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
>>>>>> DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
>>>>>> DO_TEST("cpu-check-full", QEMU_CAPS_KVM);
>>>>>>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
On 09/11/2018 04:48 AM, Marc-André Lureau wrote:
> Hi
>
> On Tue, Sep 11, 2018 at 2:57 AM, John Ferlan <jferlan@redhat.com> wrote:
>>
>>
>> On 09/07/2018 07:32 AM, marcandre.lureau@redhat.com wrote:
>>> From: Marc-André Lureau <marcandre.lureau@redhat.com>
>>>
>>> Check anonymous memory is backed by memfd if qemu is capable.
>>>
>>> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
>>> ---
>>> tests/qemuxml2argvdata/memfd-memory-numa.args | 28 +++++++++++++++
>>> tests/qemuxml2argvdata/memfd-memory-numa.xml | 36 +++++++++++++++++++
>>> tests/qemuxml2argvtest.c | 5 +++
>>> 3 files changed, 69 insertions(+)
>>> create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.args
>>> create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.xml
>>>
>>> diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.args b/tests/qemuxml2argvdata/memfd-memory-numa.args
>>> new file mode 100644
>>> index 0000000000..b26c476196
>>> --- /dev/null
>>> +++ b/tests/qemuxml2argvdata/memfd-memory-numa.args
>>> @@ -0,0 +1,28 @@
>>> +LC_ALL=C \
>>> +PATH=/bin \
>>> +HOME=/home/test \
>>> +USER=test \
>>> +LOGNAME=test \
>>> +QEMU_AUDIO_DRV=none \
>>> +/usr/bin/qemu-system-x86_64 \
>>> +-name instance-00000092 \
>>> +-S \
>>> +-machine pc-i440fx-wily,accel=kvm,usb=off,dump-guest-core=off \
>>> +-m 14336 \
>>> +-mem-prealloc \
>>> +-smp 20,sockets=1,cores=8,threads=1 \
>>> +-object memory-backend-memfd,id=ram-node0,hugetlb=yes,hugetlbsize=2097152,share=yes,\
>>> +size=15032385536,host-nodes=3,policy=preferred \
>>
>> Another syntax-check error here, needed to move the "share=yes," to the
>> subsequent line.
>>
>
> ok
>
>>> +-numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
>>> +-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
>>> +-display none \
>>> +-no-user-config \
>>> +-nodefaults \
>>> +-chardev socket,id=charmonitor,\
>>> +path=/tmp/lib/domain--1-instance-00000092/monitor.sock,server,nowait \
>>> +-mon chardev=charmonitor,id=monitor,mode=control \
>>> +-rtc base=utc \
>>> +-no-shutdown \
>>> +-no-acpi \
>>> +-usb \
>>> +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3
>>> diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.xml b/tests/qemuxml2argvdata/memfd-memory-numa.xml
>>> new file mode 100644
>>> index 0000000000..abe93e8c4b
>>> --- /dev/null
>>> +++ b/tests/qemuxml2argvdata/memfd-memory-numa.xml
>>> @@ -0,0 +1,36 @@
>>> + <domain type='kvm' id='56'>
>>> + <name>instance-00000092</name>
>>> + <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid>
>>> + <memory unit='KiB'>14680064</memory>
>>> + <currentMemory unit='KiB'>14680064</currentMemory>
>>> + <memoryBacking>
>>> + <hugepages>
>>> + <page size="2" unit="M"/>
>>> + </hugepages>
>>> + <source type='anonymous'/>
>>> + <access mode='shared'/>
>>> + <allocation mode='immediate'/>
>>> + </memoryBacking>
>>> + <numatune>
>>> + <memnode cellid='0' mode='preferred' nodeset='3'/>
>>> + </numatune>
>>> + <vcpu placement='static'>20</vcpu>
>>> + <os>
>>> + <type arch='x86_64' machine='pc-i440fx-wily'>hvm</type>>> + <boot dev='hd'/>
>>> + </os>
>>> + <cpu>
>>> + <topology sockets='1' cores='8' threads='1'/>
>>> + <numa>
>>> + <cell id='0' cpus='0-7' memory='14680064' unit='KiB'/>
>>> + </numa>
>>> + </cpu>
>>> + <clock offset='utc'/>
>>> + <on_poweroff>destroy</on_poweroff>
>>> + <on_reboot>restart</on_reboot>
>>> + <on_crash>destroy</on_crash>
>>> + <devices>
>>> + <emulator>/usr/bin/qemu-system-x86_64</emulator>
>>> + <memballoon model='virtio'/>
>>> + </devices>
>>> + </domain>
>>> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
>>> index 35df63b2ac..76008a8d07 100644
>>> --- a/tests/qemuxml2argvtest.c
>>> +++ b/tests/qemuxml2argvtest.c
>>> @@ -2928,6 +2928,11 @@ mymain(void)
>>> DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
>>> QEMU_CAPS_KVM);
>>>
>>> + DO_TEST("memfd-memory-numa",
>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD,
>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB,
>>> + QEMU_CAPS_KVM);
>>> +
>>
>> Theoretically, if we have 3.1 capabilties to test against, then this
>> would use a DO_TEST_CAPS_LATEST, while a "pre-3.1" would still be using
>> -ramfd, right? That is, using DO_TEST_CAPS_VER w/ "3.0.0" would
>> generate different results.
>>
>> I'm conflicted if we should wait for someone to generate the 3.1 caps or
>> not. For whatever reason, when I post them they're not quite right for
>> someone else's tastes...
>>
>> Let's see if anyone else has strong feelings one way or another.
>>
>
> -memfd is available since 2.12. After patch 1 & 2 are applied, we
> should probably switch to use DO_TEST_CAPS_LATEST.
>
hrmph - tried using CAPS_LATEST, and got the error
"CPU topology doesn't match maximum vcpu count"
well *that's* helpful /-|...
The only libvirt test that cares about it currently is
cpu-hotplug-startup and yes, the maxvcpus matches the cpu topology
calculation...
So, as long as I change vcpu count from 20 to 8, rename the
tests/qemuxml2argvdata/memfd-memory-numa.args to
memfd-memory-numa.x86_64-latest.args, and regenerate the output to:
LC_ALL=C \
PATH=/bin \
HOME=/home/test \
USER=test \
LOGNAME=test \
QEMU_AUDIO_DRV=none \
/usr/bin/qemu-system-x86_64 \
-name guest=instance-00000092,debug-threads=on \
-S \
-object secret,id=masterKey0,format=raw,\
file=/tmp/lib/domain--1-instance-00000092/master-key.aes \
-machine pc-i440fx-wily,accel=kvm,usb=off,dump-guest-core=off \
-m 14336 \
-mem-prealloc \
-realtime mlock=off \
-smp 8,sockets=1,cores=8,threads=1 \
-object memory-backend-memfd,id=ram-node0,hugetlb=yes,hugetlbsize=2097152,\
share=yes,size=15032385536,host-nodes=3,policy=preferred \
-numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
-display none \
-no-user-config \
-nodefaults \
-chardev socket,id=charmonitor,fd=1729,server,nowait \
-mon chardev=charmonitor,id=monitor,mode=control \
-rtc base=utc \
-no-shutdown \
-no-acpi \
-boot strict=on \
-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \
-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x2 \
-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\
resourcecontrol=deny \
-msg timestamp=on
Then, the test is happy. The memory-backend-memfd object doesn't change.
So all that's "left":
1. "Add" a check in qemuDomainABIStabilityCheck to ensure we're not
changing from memory-backend-ram to memory-backend-memfd. We already
check that "(src->mem.source != dst->mem.source)" - so we know we're
already anonymous or not.
Any suggestions? If source is anonymous, then what? I think we can use
the qemuDomainObjPrivatePtr in some way to determine that we were
started with -memfd (or not started that way).
2. Get the patches I posted today to cleanup/move the memory backing
checks from domain_conf to qemu_domain:
https://www.redhat.com/archives/libvir-list/2018-September/msg00463.html
reviewed and pushed so that patch4 can use the qemu_domain API to alter
it's hugepages check.
John
> Before 2.12 (or if the capabilities are not exposed by the host qemu)
> the argv will use -file. This is already covered by existing tests,
> like hugepages-shared.
>
> thanks
>
>> John
>>
>>> DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
>>> DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
>>> DO_TEST("cpu-check-full", QEMU_CAPS_KVM);
>>>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Hi
On Wed, Sep 12, 2018 at 4:01 AM, John Ferlan <jferlan@redhat.com> wrote:
>
>
> On 09/11/2018 04:48 AM, Marc-André Lureau wrote:
>> Hi
>>
>> On Tue, Sep 11, 2018 at 2:57 AM, John Ferlan <jferlan@redhat.com> wrote:
>>>
>>>
>>> On 09/07/2018 07:32 AM, marcandre.lureau@redhat.com wrote:
>>>> From: Marc-André Lureau <marcandre.lureau@redhat.com>
>>>>
>>>> Check anonymous memory is backed by memfd if qemu is capable.
>>>>
>>>> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
>>>> ---
>>>> tests/qemuxml2argvdata/memfd-memory-numa.args | 28 +++++++++++++++
>>>> tests/qemuxml2argvdata/memfd-memory-numa.xml | 36 +++++++++++++++++++
>>>> tests/qemuxml2argvtest.c | 5 +++
>>>> 3 files changed, 69 insertions(+)
>>>> create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.args
>>>> create mode 100644 tests/qemuxml2argvdata/memfd-memory-numa.xml
>>>>
>>>> diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.args b/tests/qemuxml2argvdata/memfd-memory-numa.args
>>>> new file mode 100644
>>>> index 0000000000..b26c476196
>>>> --- /dev/null
>>>> +++ b/tests/qemuxml2argvdata/memfd-memory-numa.args
>>>> @@ -0,0 +1,28 @@
>>>> +LC_ALL=C \
>>>> +PATH=/bin \
>>>> +HOME=/home/test \
>>>> +USER=test \
>>>> +LOGNAME=test \
>>>> +QEMU_AUDIO_DRV=none \
>>>> +/usr/bin/qemu-system-x86_64 \
>>>> +-name instance-00000092 \
>>>> +-S \
>>>> +-machine pc-i440fx-wily,accel=kvm,usb=off,dump-guest-core=off \
>>>> +-m 14336 \
>>>> +-mem-prealloc \
>>>> +-smp 20,sockets=1,cores=8,threads=1 \
>>>> +-object memory-backend-memfd,id=ram-node0,hugetlb=yes,hugetlbsize=2097152,share=yes,\
>>>> +size=15032385536,host-nodes=3,policy=preferred \
>>>
>>> Another syntax-check error here, needed to move the "share=yes," to the
>>> subsequent line.
>>>
>>
>> ok
>>
>>>> +-numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
>>>> +-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
>>>> +-display none \
>>>> +-no-user-config \
>>>> +-nodefaults \
>>>> +-chardev socket,id=charmonitor,\
>>>> +path=/tmp/lib/domain--1-instance-00000092/monitor.sock,server,nowait \
>>>> +-mon chardev=charmonitor,id=monitor,mode=control \
>>>> +-rtc base=utc \
>>>> +-no-shutdown \
>>>> +-no-acpi \
>>>> +-usb \
>>>> +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3
>>>> diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.xml b/tests/qemuxml2argvdata/memfd-memory-numa.xml
>>>> new file mode 100644
>>>> index 0000000000..abe93e8c4b
>>>> --- /dev/null
>>>> +++ b/tests/qemuxml2argvdata/memfd-memory-numa.xml
>>>> @@ -0,0 +1,36 @@
>>>> + <domain type='kvm' id='56'>
>>>> + <name>instance-00000092</name>
>>>> + <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid>
>>>> + <memory unit='KiB'>14680064</memory>
>>>> + <currentMemory unit='KiB'>14680064</currentMemory>
>>>> + <memoryBacking>
>>>> + <hugepages>
>>>> + <page size="2" unit="M"/>
>>>> + </hugepages>
>>>> + <source type='anonymous'/>
>>>> + <access mode='shared'/>
>>>> + <allocation mode='immediate'/>
>>>> + </memoryBacking>
>>>> + <numatune>
>>>> + <memnode cellid='0' mode='preferred' nodeset='3'/>
>>>> + </numatune>
>>>> + <vcpu placement='static'>20</vcpu>
>>>> + <os>
>>>> + <type arch='x86_64' machine='pc-i440fx-wily'>hvm</type>>> + <boot dev='hd'/>
>>>> + </os>
>>>> + <cpu>
>>>> + <topology sockets='1' cores='8' threads='1'/>
>>>> + <numa>
>>>> + <cell id='0' cpus='0-7' memory='14680064' unit='KiB'/>
>>>> + </numa>
>>>> + </cpu>
>>>> + <clock offset='utc'/>
>>>> + <on_poweroff>destroy</on_poweroff>
>>>> + <on_reboot>restart</on_reboot>
>>>> + <on_crash>destroy</on_crash>
>>>> + <devices>
>>>> + <emulator>/usr/bin/qemu-system-x86_64</emulator>
>>>> + <memballoon model='virtio'/>
>>>> + </devices>
>>>> + </domain>
>>>> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
>>>> index 35df63b2ac..76008a8d07 100644
>>>> --- a/tests/qemuxml2argvtest.c
>>>> +++ b/tests/qemuxml2argvtest.c
>>>> @@ -2928,6 +2928,11 @@ mymain(void)
>>>> DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
>>>> QEMU_CAPS_KVM);
>>>>
>>>> + DO_TEST("memfd-memory-numa",
>>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD,
>>>> + QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB,
>>>> + QEMU_CAPS_KVM);
>>>> +
>>>
>>> Theoretically, if we have 3.1 capabilties to test against, then this
>>> would use a DO_TEST_CAPS_LATEST, while a "pre-3.1" would still be using
>>> -ramfd, right? That is, using DO_TEST_CAPS_VER w/ "3.0.0" would
>>> generate different results.
>>>
>>> I'm conflicted if we should wait for someone to generate the 3.1 caps or
>>> not. For whatever reason, when I post them they're not quite right for
>>> someone else's tastes...
>>>
>>> Let's see if anyone else has strong feelings one way or another.
>>>
>>
>> -memfd is available since 2.12. After patch 1 & 2 are applied, we
>> should probably switch to use DO_TEST_CAPS_LATEST.
>>
>
> hrmph - tried using CAPS_LATEST, and got the error
>
> "CPU topology doesn't match maximum vcpu count"
>
> well *that's* helpful /-|...
>
> The only libvirt test that cares about it currently is
> cpu-hotplug-startup and yes, the maxvcpus matches the cpu topology
> calculation...
>
> So, as long as I change vcpu count from 20 to 8, rename the
> tests/qemuxml2argvdata/memfd-memory-numa.args to
> memfd-memory-numa.x86_64-latest.args, and regenerate the output to:
>
> LC_ALL=C \
> PATH=/bin \
> HOME=/home/test \
> USER=test \
> LOGNAME=test \
> QEMU_AUDIO_DRV=none \
> /usr/bin/qemu-system-x86_64 \
> -name guest=instance-00000092,debug-threads=on \
> -S \
> -object secret,id=masterKey0,format=raw,\
> file=/tmp/lib/domain--1-instance-00000092/master-key.aes \
> -machine pc-i440fx-wily,accel=kvm,usb=off,dump-guest-core=off \
> -m 14336 \
> -mem-prealloc \
> -realtime mlock=off \
> -smp 8,sockets=1,cores=8,threads=1 \
> -object memory-backend-memfd,id=ram-node0,hugetlb=yes,hugetlbsize=2097152,\
> share=yes,size=15032385536,host-nodes=3,policy=preferred \
> -numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
> -uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
> -display none \
> -no-user-config \
> -nodefaults \
> -chardev socket,id=charmonitor,fd=1729,server,nowait \
> -mon chardev=charmonitor,id=monitor,mode=control \
> -rtc base=utc \
> -no-shutdown \
> -no-acpi \
> -boot strict=on \
> -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \
> -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x2 \
> -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\
> resourcecontrol=deny \
> -msg timestamp=on
>
> Then, the test is happy. The memory-backend-memfd object doesn't change.
ok
>
> So all that's "left":
>
> 1. "Add" a check in qemuDomainABIStabilityCheck to ensure we're not
> changing from memory-backend-ram to memory-backend-memfd. We already
> check that "(src->mem.source != dst->mem.source)" - so we know we're
> already anonymous or not.
>
> Any suggestions? If source is anonymous, then what? I think we can use
> the qemuDomainObjPrivatePtr in some way to determine that we were
> started with -memfd (or not started that way).
No idea how we could save that information across various restarts /
version changes.
Tbh, I would try to migrate, and let qemu fail if something is
incompatible (such as incompatible memory backends or memory region
name mismatch). See also my qemu series "[PATCH 0/9] hostmem-ram: use
whole path for region name with >= 3.1". It feels like libvirt
duplicates some qemu logic/error otherwise.
>
> 2. Get the patches I posted today to cleanup/move the memory backing
> checks from domain_conf to qemu_domain:
>
> https://www.redhat.com/archives/libvir-list/2018-September/msg00463.html
>
> reviewed and pushed so that patch4 can use the qemu_domain API to alter
> it's hugepages check.
done
feel free to update & resend my series, or else I will rebase and resend it
thanks
>
> John
>
>> Before 2.12 (or if the capabilities are not exposed by the host qemu)
>> the argv will use -file. This is already covered by existing tests,
>> like hugepages-shared.
>>
>> thanks
>>
>>> John
>>>
>>>> DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
>>>> DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
>>>> DO_TEST("cpu-check-full", QEMU_CAPS_KVM);
>>>>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
[...] >> >> So all that's "left": >> >> 1. "Add" a check in qemuDomainABIStabilityCheck to ensure we're not >> changing from memory-backend-ram to memory-backend-memfd. We already >> check that "(src->mem.source != dst->mem.source)" - so we know we're >> already anonymous or not. >> >> Any suggestions? If source is anonymous, then what? I think we can use >> the qemuDomainObjPrivatePtr in some way to determine that we were >> started with -memfd (or not started that way). > > No idea how we could save that information across various restarts / > version changes. I think it'd be ugly... I think migration cookies would have to be used... I considered other mechanisms, but each wouldn't quite work. Without writing the code, if we cared to do this, then we'd have: 1. Add a field to qemuDomainObjPrivatePtr that indicates what got started (none, memfd, file, or ram). Add a typedef enum that has unknown, none, memfd, file, and ram. Add the Parse/Format code to handle the field. 2. Modify the qemu_command code to set the field in priv based on what got started, if something got started. The value would be > 0... 3. Mess with the migration cookie logic to add checks for what the source started. On the destination side of that cookie if we had the "right capabilities", then check the source cookie to see what it has. If it didn't have that field, then I think one could assume the source with anonymous memory backing would be using -ram. We'd already fail the src/dst mem.source check if one used -file. I'm not all the versed in the cookies, but I think that'd work "logically thinking" at least. The devil would be in the details. Assuming your 3.1 patches do something to handle the condition, I guess it comes does to how much of a problem it's believed this could be in 2.12 and 3.0 if someone is running -ram and migrates to a host that would default to -memfd. > > Tbh, I would try to migrate, and let qemu fail if something is > incompatible (such as incompatible memory backends or memory region > name mismatch). See also my qemu series "[PATCH 0/9] hostmem-ram: use > whole path for region name with >= 3.1". It feels like libvirt > duplicates some qemu logic/error otherwise. > I'm sure there's lots of duplication, but generally doing the checks in libvirt allow for a bit "easier" (in least in terms of libvirt) backout logic. Once the qemu process starts - if the process eventually dies because of something, then the logging only goes to libvirt log files. If the process fails to start, libvirt does capture and give that information back to the consumer. So call it preventative duplication. I think historically some qemu error messages have been a bit too vague to figure out why something didn't work. >> >> 2. Get the patches I posted today to cleanup/move the memory backing >> checks from domain_conf to qemu_domain: >> >> https://www.redhat.com/archives/libvir-list/2018-September/msg00463.html >> >> reviewed and pushed so that patch4 can use the qemu_domain API to alter >> it's hugepages check. > > done > Thanks - I pushed that... > feel free to update & resend my series, or else I will rebase and resend it > > thanks > OK - I adjusted your changes to handle the previously agreed upon "issues" and was ready to push the series when it dawned on me that the MEMFD and MEMFD_HUGETLB capabilities both use the 2.12 release - so realistically would the latter really be necessary? Again if something doesn't quite work in 2.12 and 3.0 for hugetlb, then perhaps there's something in 3.1 that can be checked. I can remove or keep patch 2. If removed, then just use MEMFD as the basis. Your call. John -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Hi On Thu, Sep 13, 2018 at 2:25 AM, John Ferlan <jferlan@redhat.com> wrote: > > [...] > >>> >>> So all that's "left": >>> >>> 1. "Add" a check in qemuDomainABIStabilityCheck to ensure we're not >>> changing from memory-backend-ram to memory-backend-memfd. We already >>> check that "(src->mem.source != dst->mem.source)" - so we know we're >>> already anonymous or not. >>> >>> Any suggestions? If source is anonymous, then what? I think we can use >>> the qemuDomainObjPrivatePtr in some way to determine that we were >>> started with -memfd (or not started that way). >> >> No idea how we could save that information across various restarts / >> version changes. > > I think it'd be ugly... I think migration cookies would have to be > used... I considered other mechanisms, but each wouldn't quite work. > Without writing the code, if we cared to do this, then we'd have: > > 1. Add a field to qemuDomainObjPrivatePtr that indicates what got > started (none, memfd, file, or ram). Add a typedef enum that has > unknown, none, memfd, file, and ram. Add the Parse/Format code to handle > the field. > > 2. Modify the qemu_command code to set the field in priv based on what > got started, if something got started. The value would be > 0... > > 3. Mess with the migration cookie logic to add checks for what the > source started. On the destination side of that cookie if we had the > "right capabilities", then check the source cookie to see what it has. > If it didn't have that field, then I think one could assume the source > with anonymous memory backing would be using -ram. We'd already fail the > src/dst mem.source check if one used -file. I'm not all the versed in > the cookies, but I think that'd work "logically thinking" at least. The > devil would be in the details. > > Assuming your 3.1 patches do something to handle the condition, I guess > it comes does to how much of a problem it's believed this could be in > 2.12 and 3.0 if someone is running -ram and migrates to a host that > would default to -memfd. I am afraid we will need to do it to handle transparent -memfd usage. I'll look at it with your help. > >> >> Tbh, I would try to migrate, and let qemu fail if something is >> incompatible (such as incompatible memory backends or memory region >> name mismatch). See also my qemu series "[PATCH 0/9] hostmem-ram: use >> whole path for region name with >= 3.1". It feels like libvirt >> duplicates some qemu logic/error otherwise. >> > > I'm sure there's lots of duplication, but generally doing the checks in > libvirt allow for a bit "easier" (in least in terms of libvirt) backout > logic. Once the qemu process starts - if the process eventually dies > because of something, then the logging only goes to libvirt log files. > If the process fails to start, libvirt does capture and give that > information back to the consumer. So call it preventative duplication. I > think historically some qemu error messages have been a bit too vague to > figure out why something didn't work. > >>> >>> 2. Get the patches I posted today to cleanup/move the memory backing >>> checks from domain_conf to qemu_domain: >>> >>> https://www.redhat.com/archives/libvir-list/2018-September/msg00463.html >>> >>> reviewed and pushed so that patch4 can use the qemu_domain API to alter >>> it's hugepages check. >> >> done >> > > Thanks - I pushed that... > >> feel free to update & resend my series, or else I will rebase and resend it >> >> thanks >> > > OK - I adjusted your changes to handle the previously agreed upon > "issues" and was ready to push the series when it dawned on me that the > MEMFD and MEMFD_HUGETLB capabilities both use the 2.12 release - so > realistically would the latter really be necessary? > > Again if something doesn't quite work in 2.12 and 3.0 for hugetlb, then > perhaps there's something in 3.1 that can be checked. > > I can remove or keep patch 2. If removed, then just use MEMFD as the > basis. Your call. I'd keep the MEMFD_HUGETLB check, even with <3.1. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
On 09/13/2018 03:39 AM, Marc-André Lureau wrote: > Hi > > On Thu, Sep 13, 2018 at 2:25 AM, John Ferlan <jferlan@redhat.com> wrote: >> >> [...] >> >>>> >>>> So all that's "left": >>>> >>>> 1. "Add" a check in qemuDomainABIStabilityCheck to ensure we're not >>>> changing from memory-backend-ram to memory-backend-memfd. We already >>>> check that "(src->mem.source != dst->mem.source)" - so we know we're >>>> already anonymous or not. >>>> >>>> Any suggestions? If source is anonymous, then what? I think we can use >>>> the qemuDomainObjPrivatePtr in some way to determine that we were >>>> started with -memfd (or not started that way). >>> >>> No idea how we could save that information across various restarts / >>> version changes. >> >> I think it'd be ugly... I think migration cookies would have to be >> used... I considered other mechanisms, but each wouldn't quite work. >> Without writing the code, if we cared to do this, then we'd have: >> >> 1. Add a field to qemuDomainObjPrivatePtr that indicates what got >> started (none, memfd, file, or ram). Add a typedef enum that has >> unknown, none, memfd, file, and ram. Add the Parse/Format code to handle >> the field. >> >> 2. Modify the qemu_command code to set the field in priv based on what >> got started, if something got started. The value would be > 0... >> >> 3. Mess with the migration cookie logic to add checks for what the >> source started. On the destination side of that cookie if we had the >> "right capabilities", then check the source cookie to see what it has. >> If it didn't have that field, then I think one could assume the source >> with anonymous memory backing would be using -ram. We'd already fail the >> src/dst mem.source check if one used -file. I'm not all the versed in >> the cookies, but I think that'd work "logically thinking" at least. The >> devil would be in the details. >> >> Assuming your 3.1 patches do something to handle the condition, I guess >> it comes does to how much of a problem it's believed this could be in >> 2.12 and 3.0 if someone is running -ram and migrates to a host that >> would default to -memfd. > > I am afraid we will need to do it to handle transparent -memfd usage. > I'll look at it with your help. > Let's see what I can cobble together. I'll repost the series a bit later today hopefully. John >> >>> >>> Tbh, I would try to migrate, and let qemu fail if something is >>> incompatible (such as incompatible memory backends or memory region >>> name mismatch). See also my qemu series "[PATCH 0/9] hostmem-ram: use >>> whole path for region name with >= 3.1". It feels like libvirt >>> duplicates some qemu logic/error otherwise. >>> >> >> I'm sure there's lots of duplication, but generally doing the checks in >> libvirt allow for a bit "easier" (in least in terms of libvirt) backout >> logic. Once the qemu process starts - if the process eventually dies >> because of something, then the logging only goes to libvirt log files. >> If the process fails to start, libvirt does capture and give that >> information back to the consumer. So call it preventative duplication. I >> think historically some qemu error messages have been a bit too vague to >> figure out why something didn't work. >> >>>> >>>> 2. Get the patches I posted today to cleanup/move the memory backing >>>> checks from domain_conf to qemu_domain: >>>> >>>> https://www.redhat.com/archives/libvir-list/2018-September/msg00463.html >>>> >>>> reviewed and pushed so that patch4 can use the qemu_domain API to alter >>>> it's hugepages check. >>> >>> done >>> >> >> Thanks - I pushed that... >> >>> feel free to update & resend my series, or else I will rebase and resend it >>> >>> thanks >>> >> >> OK - I adjusted your changes to handle the previously agreed upon >> "issues" and was ready to push the series when it dawned on me that the >> MEMFD and MEMFD_HUGETLB capabilities both use the 2.12 release - so >> realistically would the latter really be necessary? >> >> Again if something doesn't quite work in 2.12 and 3.0 for hugetlb, then >> perhaps there's something in 3.1 that can be checked. >> >> I can remove or keep patch 2. If removed, then just use MEMFD as the >> basis. Your call. > > I'd keep the MEMFD_HUGETLB check, even with <3.1. > -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
On 09/13/2018 10:09 AM, John Ferlan wrote:
>
>
> On 09/13/2018 03:39 AM, Marc-André Lureau wrote:
>> Hi
>>
>> On Thu, Sep 13, 2018 at 2:25 AM, John Ferlan <jferlan@redhat.com> wrote:
>>>
>>> [...]
>>>
>>>>>
>>>>> So all that's "left":
>>>>>
>>>>> 1. "Add" a check in qemuDomainABIStabilityCheck to ensure we're not
>>>>> changing from memory-backend-ram to memory-backend-memfd. We already
>>>>> check that "(src->mem.source != dst->mem.source)" - so we know we're
>>>>> already anonymous or not.
>>>>>
>>>>> Any suggestions? If source is anonymous, then what? I think we can use
>>>>> the qemuDomainObjPrivatePtr in some way to determine that we were
>>>>> started with -memfd (or not started that way).
>>>>
>>>> No idea how we could save that information across various restarts /
>>>> version changes.
>>>
>>> I think it'd be ugly... I think migration cookies would have to be
>>> used... I considered other mechanisms, but each wouldn't quite work.
>>> Without writing the code, if we cared to do this, then we'd have:
>>>
>>> 1. Add a field to qemuDomainObjPrivatePtr that indicates what got
>>> started (none, memfd, file, or ram). Add a typedef enum that has
>>> unknown, none, memfd, file, and ram. Add the Parse/Format code to handle
>>> the field.
>>>
>>> 2. Modify the qemu_command code to set the field in priv based on what
>>> got started, if something got started. The value would be > 0...
>>>
>>> 3. Mess with the migration cookie logic to add checks for what the
>>> source started. On the destination side of that cookie if we had the
>>> "right capabilities", then check the source cookie to see what it has.
>>> If it didn't have that field, then I think one could assume the source
>>> with anonymous memory backing would be using -ram. We'd already fail the
>>> src/dst mem.source check if one used -file. I'm not all the versed in
>>> the cookies, but I think that'd work "logically thinking" at least. The
>>> devil would be in the details.
>>>
>>> Assuming your 3.1 patches do something to handle the condition, I guess
>>> it comes does to how much of a problem it's believed this could be in
>>> 2.12 and 3.0 if someone is running -ram and migrates to a host that
>>> would default to -memfd.
>>
>> I am afraid we will need to do it to handle transparent -memfd usage.
>> I'll look at it with your help.
>>
>
> Let's see what I can cobble together. I'll repost the series a bit later
> today hopefully.
>
After spending a few hours on this, the cookies just don't help enough
or I don't know/understand enough about their usage.
I keep coming back to the problem of how do we disallow a migration from
a host that has/knows about and uses anonymous memfd to one that doesn't
know about it. Similarly, if a domain source w/ "file" or "ram" (whether
at startup time or via hotplug) is migrated to a target host that would
generate memfd - we have no mechanism to stop the migration because we
have no way to tell what it was running, especially since what gets
started isn't just based off the source type - hugepages have a
tangential role. Lots of logic stuffed into qemu_command that probably
should have been in some qemuDomainPrepareMemtune API.
So unfortunately, I think the only safe way is to create a new source
type ("anonmem", "anonfile", "anonmemfd", ??) and describe it as lightly
as the other entries are described (ironically the document default of
"anonymous" could be "file" or it could be "ram" based 3 other factors
not described in the docs). At least with a new type name/value we can
guarantee that someone selects it by name rather than the multipurpose
"anonymous" type. I think it would mean moving the caps checks to a bit
later in the code, search for "otherwise check the required capability".
Unless someone still brave enough to keep reading this stream has an
idea to try. I'm tapped out!
John
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
On 09/13/2018 11:51 PM, John Ferlan wrote:
>
>
> On 09/13/2018 10:09 AM, John Ferlan wrote:
>>
>>
>> On 09/13/2018 03:39 AM, Marc-André Lureau wrote:
>>> Hi
>>>
>>> On Thu, Sep 13, 2018 at 2:25 AM, John Ferlan <jferlan@redhat.com> wrote:
>>>>
>>>> [...]
>>>>
>>>>>>
>>>>>> So all that's "left":
>>>>>>
>>>>>> 1. "Add" a check in qemuDomainABIStabilityCheck to ensure we're not
>>>>>> changing from memory-backend-ram to memory-backend-memfd. We already
>>>>>> check that "(src->mem.source != dst->mem.source)" - so we know we're
>>>>>> already anonymous or not.
>>>>>>
>>>>>> Any suggestions? If source is anonymous, then what? I think we can use
>>>>>> the qemuDomainObjPrivatePtr in some way to determine that we were
>>>>>> started with -memfd (or not started that way).
>>>>>
>>>>> No idea how we could save that information across various restarts /
>>>>> version changes.
>>>>
>>>> I think it'd be ugly... I think migration cookies would have to be
>>>> used... I considered other mechanisms, but each wouldn't quite work.
>>>> Without writing the code, if we cared to do this, then we'd have:
>>>>
>>>> 1. Add a field to qemuDomainObjPrivatePtr that indicates what got
>>>> started (none, memfd, file, or ram). Add a typedef enum that has
>>>> unknown, none, memfd, file, and ram. Add the Parse/Format code to handle
>>>> the field.
>>>>
>>>> 2. Modify the qemu_command code to set the field in priv based on what
>>>> got started, if something got started. The value would be > 0...
>>>>
>>>> 3. Mess with the migration cookie logic to add checks for what the
>>>> source started. On the destination side of that cookie if we had the
>>>> "right capabilities", then check the source cookie to see what it has.
>>>> If it didn't have that field, then I think one could assume the source
>>>> with anonymous memory backing would be using -ram. We'd already fail the
>>>> src/dst mem.source check if one used -file. I'm not all the versed in
>>>> the cookies, but I think that'd work "logically thinking" at least. The
>>>> devil would be in the details.
>>>>
>>>> Assuming your 3.1 patches do something to handle the condition, I guess
>>>> it comes does to how much of a problem it's believed this could be in
>>>> 2.12 and 3.0 if someone is running -ram and migrates to a host that
>>>> would default to -memfd.
>>>
>>> I am afraid we will need to do it to handle transparent -memfd usage.
>>> I'll look at it with your help.
>>>
>>
>> Let's see what I can cobble together. I'll repost the series a bit later
>> today hopefully.
>>
>
> After spending a few hours on this, the cookies just don't help enough
> or I don't know/understand enough about their usage.
>
> I keep coming back to the problem of how do we disallow a migration from
> a host that has/knows about and uses anonymous memfd to one that doesn't
> know about it. Similarly, if a domain source w/ "file" or "ram" (whether
> at startup time or via hotplug) is migrated to a target host that would
> generate memfd - we have no mechanism to stop the migration because we
> have no way to tell what it was running, especially since what gets
> started isn't just based off the source type - hugepages have a
> tangential role. Lots of logic stuffed into qemu_command that probably
> should have been in some qemuDomainPrepareMemtune API.
>
> So unfortunately, I think the only safe way is to create a new source
> type ("anonmem", "anonfile", "anonmemfd", ??) and describe it as lightly
> as the other entries are described (ironically the document default of
> "anonymous" could be "file" or it could be "ram" based 3 other factors
> not described in the docs). At least with a new type name/value we can
> guarantee that someone selects it by name rather than the multipurpose
> "anonymous" type. I think it would mean moving the caps checks to a bit
> later in the code, search for "otherwise check the required capability".
>
> Unless someone still brave enough to keep reading this stream has an
> idea to try. I'm tapped out!
We can have an element/attribute in status XML/migration XML saying
which backend we've used. This is slightly tricky because we have more
places then one where users can tune confuguration such that we use
different backends. My personal favorite is:
<memoryBacking>
<hugepages>
<page size='2048' unit='KiB' nodeset='1'/>
</hugepages>
</memoryBacking>
<cpu>
<numa>
<cell id='0' cpus='0' memory='1048576' unit='KiB'/>
<cell id='1' cpus='1' memory='1048576' unit='KiB' memAccess='shared'/>
<cell id='2' cpus='2' memory='1048576' unit='KiB' memAccess='private'/>
<cell id='3' cpus='3' memory='1048576' unit='KiB'/>
</numa>
</cpu>
<devices>
<memory model='dimm'>
<target>
<size unit='KiB'>524288</size>
<node>1</node>
</target>
<address type='dimm' slot='0' base='0x100000000'/>
</memory>
</devices>
So what we can have is:
<hugepages>
<page size=.... backend='memory-backend-file'/>
</hugepages>
<cell id='0' cpus='0' memory='1048576' unit='KiB' backend='memory-backend-ram'/>
<cell id='1' cpus='1' memory='1048576' unit='KiB' memAccess='shared' backend='memory-backend-file'/>
<cell id='2' cpus='2' memory='1048576' unit='KiB' memAccess='private' backend='memory-backend-file/>
<cell id='3' cpus='3' memory='1048576' unit='KiB' backend='memory-backend-ram'/>
<devices>
<memory model='dimm' backend='memory-backend-ram'/>
..
</devices>
This way we know what backend was used on the source (in saved state)
and the only thing we need to know on dst (on restore) is to check if
given backend is available.
I don't think putting anything in migration cookies is going to help.
It might help migration if anything but it will definitely keep
save/restore broken as there are no migration cookies.
Michal
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Hi
On Fri, Sep 14, 2018 at 11:44 AM, Michal Prívozník <mprivozn@redhat.com> wrote:
> On 09/13/2018 11:51 PM, John Ferlan wrote:
>>
>>
>> On 09/13/2018 10:09 AM, John Ferlan wrote:
>>>
>>>
>>> On 09/13/2018 03:39 AM, Marc-André Lureau wrote:
>>>> Hi
>>>>
>>>> On Thu, Sep 13, 2018 at 2:25 AM, John Ferlan <jferlan@redhat.com> wrote:
>>>>>
>>>>> [...]
>>>>>
>>>>>>>
>>>>>>> So all that's "left":
>>>>>>>
>>>>>>> 1. "Add" a check in qemuDomainABIStabilityCheck to ensure we're not
>>>>>>> changing from memory-backend-ram to memory-backend-memfd. We already
>>>>>>> check that "(src->mem.source != dst->mem.source)" - so we know we're
>>>>>>> already anonymous or not.
>>>>>>>
>>>>>>> Any suggestions? If source is anonymous, then what? I think we can use
>>>>>>> the qemuDomainObjPrivatePtr in some way to determine that we were
>>>>>>> started with -memfd (or not started that way).
>>>>>>
>>>>>> No idea how we could save that information across various restarts /
>>>>>> version changes.
>>>>>
>>>>> I think it'd be ugly... I think migration cookies would have to be
>>>>> used... I considered other mechanisms, but each wouldn't quite work.
>>>>> Without writing the code, if we cared to do this, then we'd have:
>>>>>
>>>>> 1. Add a field to qemuDomainObjPrivatePtr that indicates what got
>>>>> started (none, memfd, file, or ram). Add a typedef enum that has
>>>>> unknown, none, memfd, file, and ram. Add the Parse/Format code to handle
>>>>> the field.
>>>>>
>>>>> 2. Modify the qemu_command code to set the field in priv based on what
>>>>> got started, if something got started. The value would be > 0...
>>>>>
>>>>> 3. Mess with the migration cookie logic to add checks for what the
>>>>> source started. On the destination side of that cookie if we had the
>>>>> "right capabilities", then check the source cookie to see what it has.
>>>>> If it didn't have that field, then I think one could assume the source
>>>>> with anonymous memory backing would be using -ram. We'd already fail the
>>>>> src/dst mem.source check if one used -file. I'm not all the versed in
>>>>> the cookies, but I think that'd work "logically thinking" at least. The
>>>>> devil would be in the details.
>>>>>
>>>>> Assuming your 3.1 patches do something to handle the condition, I guess
>>>>> it comes does to how much of a problem it's believed this could be in
>>>>> 2.12 and 3.0 if someone is running -ram and migrates to a host that
>>>>> would default to -memfd.
>>>>
>>>> I am afraid we will need to do it to handle transparent -memfd usage.
>>>> I'll look at it with your help.
>>>>
>>>
>>> Let's see what I can cobble together. I'll repost the series a bit later
>>> today hopefully.
>>>
>>
>> After spending a few hours on this, the cookies just don't help enough
>> or I don't know/understand enough about their usage.
>>
>> I keep coming back to the problem of how do we disallow a migration from
>> a host that has/knows about and uses anonymous memfd to one that doesn't
>> know about it. Similarly, if a domain source w/ "file" or "ram" (whether
>> at startup time or via hotplug) is migrated to a target host that would
>> generate memfd - we have no mechanism to stop the migration because we
>> have no way to tell what it was running, especially since what gets
>> started isn't just based off the source type - hugepages have a
>> tangential role. Lots of logic stuffed into qemu_command that probably
>> should have been in some qemuDomainPrepareMemtune API.
>>
>> So unfortunately, I think the only safe way is to create a new source
>> type ("anonmem", "anonfile", "anonmemfd", ??) and describe it as lightly
>> as the other entries are described (ironically the document default of
>> "anonymous" could be "file" or it could be "ram" based 3 other factors
>> not described in the docs). At least with a new type name/value we can
>> guarantee that someone selects it by name rather than the multipurpose
>> "anonymous" type. I think it would mean moving the caps checks to a bit
>> later in the code, search for "otherwise check the required capability".
>>
>> Unless someone still brave enough to keep reading this stream has an
>> idea to try. I'm tapped out!
>
> We can have an element/attribute in status XML/migration XML saying
> which backend we've used. This is slightly tricky because we have more
> places then one where users can tune confuguration such that we use
> different backends. My personal favorite is:
>
> <memoryBacking>
> <hugepages>
> <page size='2048' unit='KiB' nodeset='1'/>
> </hugepages>
> </memoryBacking>
>
> <cpu>
> <numa>
> <cell id='0' cpus='0' memory='1048576' unit='KiB'/>
> <cell id='1' cpus='1' memory='1048576' unit='KiB' memAccess='shared'/>
> <cell id='2' cpus='2' memory='1048576' unit='KiB' memAccess='private'/>
> <cell id='3' cpus='3' memory='1048576' unit='KiB'/>
> </numa>
> </cpu>
>
> <devices>
> <memory model='dimm'>
> <target>
> <size unit='KiB'>524288</size>
> <node>1</node>
> </target>
> <address type='dimm' slot='0' base='0x100000000'/>
> </memory>
> </devices>
>
>
> So what we can have is:
>
> <hugepages>
> <page size=.... backend='memory-backend-file'/>
> </hugepages>
>
> <cell id='0' cpus='0' memory='1048576' unit='KiB' backend='memory-backend-ram'/>
> <cell id='1' cpus='1' memory='1048576' unit='KiB' memAccess='shared' backend='memory-backend-file'/>
> <cell id='2' cpus='2' memory='1048576' unit='KiB' memAccess='private' backend='memory-backend-file/>
> <cell id='3' cpus='3' memory='1048576' unit='KiB' backend='memory-backend-ram'/>
>
> <devices>
> <memory model='dimm' backend='memory-backend-ram'/>
That's a bit overkill to me, since we don't have (yet) the capacity
for a user to select the memory backend, and the value is a
qemu-specific detail.
> ..
> </devices>
>
>
> This way we know what backend was used on the source (in saved state)
> and the only thing we need to know on dst (on restore) is to check if
> given backend is available.
>
> I don't think putting anything in migration cookies is going to help.
> It might help migration if anything but it will definitely keep
> save/restore broken as there are no migration cookies.
Ah, too bad. I am not familar enough with migration and save/restore
in libvirt. But I started to imagine how the migration cookie could
have been used.
Is there only in the domain XML we can save information?
If yes, then either we go with your proposal (although I wonder if it
should be qemu: namespaced) or can we introduce libvirt capabilites?
(something as simple as
<capabilities><qemu-memorybackend-memfd</capabilities>) ?
thanks!
>
> Michal
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
On 09/17/2018 11:30 AM, Marc-André Lureau wrote:
> Hi
>
> On Fri, Sep 14, 2018 at 11:44 AM, Michal Prívozník <mprivozn@redhat.com> wrote:
>> On 09/13/2018 11:51 PM, John Ferlan wrote:
>>>
>>>
>>> On 09/13/2018 10:09 AM, John Ferlan wrote:
>>>>
>>>>
>>>> On 09/13/2018 03:39 AM, Marc-André Lureau wrote:
>>>>> Hi
>>>>>
>>>>> On Thu, Sep 13, 2018 at 2:25 AM, John Ferlan <jferlan@redhat.com> wrote:
>>>>>>
>>>>>> [...]
>>>>>>
>>>>>>>>
>>>>>>>> So all that's "left":
>>>>>>>>
>>>>>>>> 1. "Add" a check in qemuDomainABIStabilityCheck to ensure we're not
>>>>>>>> changing from memory-backend-ram to memory-backend-memfd. We already
>>>>>>>> check that "(src->mem.source != dst->mem.source)" - so we know we're
>>>>>>>> already anonymous or not.
>>>>>>>>
>>>>>>>> Any suggestions? If source is anonymous, then what? I think we can use
>>>>>>>> the qemuDomainObjPrivatePtr in some way to determine that we were
>>>>>>>> started with -memfd (or not started that way).
>>>>>>>
>>>>>>> No idea how we could save that information across various restarts /
>>>>>>> version changes.
>>>>>>
>>>>>> I think it'd be ugly... I think migration cookies would have to be
>>>>>> used... I considered other mechanisms, but each wouldn't quite work.
>>>>>> Without writing the code, if we cared to do this, then we'd have:
>>>>>>
>>>>>> 1. Add a field to qemuDomainObjPrivatePtr that indicates what got
>>>>>> started (none, memfd, file, or ram). Add a typedef enum that has
>>>>>> unknown, none, memfd, file, and ram. Add the Parse/Format code to handle
>>>>>> the field.
>>>>>>
>>>>>> 2. Modify the qemu_command code to set the field in priv based on what
>>>>>> got started, if something got started. The value would be > 0...
>>>>>>
>>>>>> 3. Mess with the migration cookie logic to add checks for what the
>>>>>> source started. On the destination side of that cookie if we had the
>>>>>> "right capabilities", then check the source cookie to see what it has.
>>>>>> If it didn't have that field, then I think one could assume the source
>>>>>> with anonymous memory backing would be using -ram. We'd already fail the
>>>>>> src/dst mem.source check if one used -file. I'm not all the versed in
>>>>>> the cookies, but I think that'd work "logically thinking" at least. The
>>>>>> devil would be in the details.
>>>>>>
>>>>>> Assuming your 3.1 patches do something to handle the condition, I guess
>>>>>> it comes does to how much of a problem it's believed this could be in
>>>>>> 2.12 and 3.0 if someone is running -ram and migrates to a host that
>>>>>> would default to -memfd.
>>>>>
>>>>> I am afraid we will need to do it to handle transparent -memfd usage.
>>>>> I'll look at it with your help.
>>>>>
>>>>
>>>> Let's see what I can cobble together. I'll repost the series a bit later
>>>> today hopefully.
>>>>
>>>
>>> After spending a few hours on this, the cookies just don't help enough
>>> or I don't know/understand enough about their usage.
>>>
>>> I keep coming back to the problem of how do we disallow a migration from
>>> a host that has/knows about and uses anonymous memfd to one that doesn't
>>> know about it. Similarly, if a domain source w/ "file" or "ram" (whether
>>> at startup time or via hotplug) is migrated to a target host that would
>>> generate memfd - we have no mechanism to stop the migration because we
>>> have no way to tell what it was running, especially since what gets
>>> started isn't just based off the source type - hugepages have a
>>> tangential role. Lots of logic stuffed into qemu_command that probably
>>> should have been in some qemuDomainPrepareMemtune API.
>>>
>>> So unfortunately, I think the only safe way is to create a new source
>>> type ("anonmem", "anonfile", "anonmemfd", ??) and describe it as lightly
>>> as the other entries are described (ironically the document default of
>>> "anonymous" could be "file" or it could be "ram" based 3 other factors
>>> not described in the docs). At least with a new type name/value we can
>>> guarantee that someone selects it by name rather than the multipurpose
>>> "anonymous" type. I think it would mean moving the caps checks to a bit
>>> later in the code, search for "otherwise check the required capability".
>>>
>>> Unless someone still brave enough to keep reading this stream has an
>>> idea to try. I'm tapped out!
>>
>> We can have an element/attribute in status XML/migration XML saying
>> which backend we've used. This is slightly tricky because we have more
>> places then one where users can tune confuguration such that we use
>> different backends. My personal favorite is:
>>
>> <memoryBacking>
>> <hugepages>
>> <page size='2048' unit='KiB' nodeset='1'/>
>> </hugepages>
>> </memoryBacking>
>>
>> <cpu>
>> <numa>
>> <cell id='0' cpus='0' memory='1048576' unit='KiB'/>
>> <cell id='1' cpus='1' memory='1048576' unit='KiB' memAccess='shared'/>
>> <cell id='2' cpus='2' memory='1048576' unit='KiB' memAccess='private'/>
>> <cell id='3' cpus='3' memory='1048576' unit='KiB'/>
>> </numa>
>> </cpu>
>>
>> <devices>
>> <memory model='dimm'>
>> <target>
>> <size unit='KiB'>524288</size>
>> <node>1</node>
>> </target>
>> <address type='dimm' slot='0' base='0x100000000'/>
>> </memory>
>> </devices>
>>
>>
>> So what we can have is:
>>
>> <hugepages>
>> <page size=.... backend='memory-backend-file'/>
>> </hugepages>
>>
>> <cell id='0' cpus='0' memory='1048576' unit='KiB' backend='memory-backend-ram'/>
>> <cell id='1' cpus='1' memory='1048576' unit='KiB' memAccess='shared' backend='memory-backend-file'/>
>> <cell id='2' cpus='2' memory='1048576' unit='KiB' memAccess='private' backend='memory-backend-file/>
>> <cell id='3' cpus='3' memory='1048576' unit='KiB' backend='memory-backend-ram'/>
>>
>> <devices>
>> <memory model='dimm' backend='memory-backend-ram'/>
>
> That's a bit overkill to me, since we don't have (yet) the capacity
> for a user to select the memory backend, and the value is a
> qemu-specific detail.
So status XML is not something we parse from user. It's produced by
libvirt and it's a superset of user provided XML and some runtime
information. For instance, look around the lines where
VIR_DOMAIN_DEF_PARSE_STATUS flag occurs.
>
>> ..
>> </devices>
>>
>>
>> This way we know what backend was used on the source (in saved state)
>> and the only thing we need to know on dst (on restore) is to check if
>> given backend is available.
>>
>> I don't think putting anything in migration cookies is going to help.
>> It might help migration if anything but it will definitely keep
>> save/restore broken as there are no migration cookies.
>
> Ah, too bad. I am not familar enough with migration and save/restore
> in libvirt. But I started to imagine how the migration cookie could
> have been used.
>From qemu POV, there's no difference between migration and save/restore.
All of them is a migration except save/restore is migration to/from a
file (FD actually).
>
> Is there only in the domain XML we can save information?
Yes, status XML. That's where libvirt keeps its runtime information (and
which backend was used falls exactly into this category) so that it is
preserved on the daemon restart.
>
> If yes, then either we go with your proposal (although I wonder if it
> should be qemu: namespaced) or can we introduce libvirt capabilites?
> (something as simple as
> <capabilities><qemu-memorybackend-memfd</capabilities>) ?
No need. Once again, this is not something that users will ever see, nor
libvirt would parse it when parsing input from user.
The same applies for migration XML. These two are different in some
aspects, but that is not critical for this feature. It's sufficient to
say for now that status XML preserves runtime data between daemon
restarts (we want freshly restarted libvirt to remember what backend was
used) and migration XML preserves runtime data on migration (we want the
destination to know what backend is used).
Michal
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Hi
On Mon, Sep 17, 2018 at 3:07 PM, Michal Privoznik <mprivozn@redhat.com> wrote:
> On 09/17/2018 11:30 AM, Marc-André Lureau wrote:
>> Hi
>>
>> On Fri, Sep 14, 2018 at 11:44 AM, Michal Prívozník <mprivozn@redhat.com> wrote:
>>> On 09/13/2018 11:51 PM, John Ferlan wrote:
>>>>
>>>>
>>>> On 09/13/2018 10:09 AM, John Ferlan wrote:
>>>>>
>>>>>
>>>>> On 09/13/2018 03:39 AM, Marc-André Lureau wrote:
>>>>>> Hi
>>>>>>
>>>>>> On Thu, Sep 13, 2018 at 2:25 AM, John Ferlan <jferlan@redhat.com> wrote:
>>>>>>>
>>>>>>> [...]
>>>>>>>
>>>>>>>>>
>>>>>>>>> So all that's "left":
>>>>>>>>>
>>>>>>>>> 1. "Add" a check in qemuDomainABIStabilityCheck to ensure we're not
>>>>>>>>> changing from memory-backend-ram to memory-backend-memfd. We already
>>>>>>>>> check that "(src->mem.source != dst->mem.source)" - so we know we're
>>>>>>>>> already anonymous or not.
>>>>>>>>>
>>>>>>>>> Any suggestions? If source is anonymous, then what? I think we can use
>>>>>>>>> the qemuDomainObjPrivatePtr in some way to determine that we were
>>>>>>>>> started with -memfd (or not started that way).
>>>>>>>>
>>>>>>>> No idea how we could save that information across various restarts /
>>>>>>>> version changes.
>>>>>>>
>>>>>>> I think it'd be ugly... I think migration cookies would have to be
>>>>>>> used... I considered other mechanisms, but each wouldn't quite work.
>>>>>>> Without writing the code, if we cared to do this, then we'd have:
>>>>>>>
>>>>>>> 1. Add a field to qemuDomainObjPrivatePtr that indicates what got
>>>>>>> started (none, memfd, file, or ram). Add a typedef enum that has
>>>>>>> unknown, none, memfd, file, and ram. Add the Parse/Format code to handle
>>>>>>> the field.
>>>>>>>
>>>>>>> 2. Modify the qemu_command code to set the field in priv based on what
>>>>>>> got started, if something got started. The value would be > 0...
>>>>>>>
>>>>>>> 3. Mess with the migration cookie logic to add checks for what the
>>>>>>> source started. On the destination side of that cookie if we had the
>>>>>>> "right capabilities", then check the source cookie to see what it has.
>>>>>>> If it didn't have that field, then I think one could assume the source
>>>>>>> with anonymous memory backing would be using -ram. We'd already fail the
>>>>>>> src/dst mem.source check if one used -file. I'm not all the versed in
>>>>>>> the cookies, but I think that'd work "logically thinking" at least. The
>>>>>>> devil would be in the details.
>>>>>>>
>>>>>>> Assuming your 3.1 patches do something to handle the condition, I guess
>>>>>>> it comes does to how much of a problem it's believed this could be in
>>>>>>> 2.12 and 3.0 if someone is running -ram and migrates to a host that
>>>>>>> would default to -memfd.
>>>>>>
>>>>>> I am afraid we will need to do it to handle transparent -memfd usage.
>>>>>> I'll look at it with your help.
>>>>>>
>>>>>
>>>>> Let's see what I can cobble together. I'll repost the series a bit later
>>>>> today hopefully.
>>>>>
>>>>
>>>> After spending a few hours on this, the cookies just don't help enough
>>>> or I don't know/understand enough about their usage.
>>>>
>>>> I keep coming back to the problem of how do we disallow a migration from
>>>> a host that has/knows about and uses anonymous memfd to one that doesn't
>>>> know about it. Similarly, if a domain source w/ "file" or "ram" (whether
>>>> at startup time or via hotplug) is migrated to a target host that would
>>>> generate memfd - we have no mechanism to stop the migration because we
>>>> have no way to tell what it was running, especially since what gets
>>>> started isn't just based off the source type - hugepages have a
>>>> tangential role. Lots of logic stuffed into qemu_command that probably
>>>> should have been in some qemuDomainPrepareMemtune API.
>>>>
>>>> So unfortunately, I think the only safe way is to create a new source
>>>> type ("anonmem", "anonfile", "anonmemfd", ??) and describe it as lightly
>>>> as the other entries are described (ironically the document default of
>>>> "anonymous" could be "file" or it could be "ram" based 3 other factors
>>>> not described in the docs). At least with a new type name/value we can
>>>> guarantee that someone selects it by name rather than the multipurpose
>>>> "anonymous" type. I think it would mean moving the caps checks to a bit
>>>> later in the code, search for "otherwise check the required capability".
>>>>
>>>> Unless someone still brave enough to keep reading this stream has an
>>>> idea to try. I'm tapped out!
>>>
>>> We can have an element/attribute in status XML/migration XML saying
>>> which backend we've used. This is slightly tricky because we have more
>>> places then one where users can tune confuguration such that we use
>>> different backends. My personal favorite is:
>>>
>>> <memoryBacking>
>>> <hugepages>
>>> <page size='2048' unit='KiB' nodeset='1'/>
>>> </hugepages>
>>> </memoryBacking>
>>>
>>> <cpu>
>>> <numa>
>>> <cell id='0' cpus='0' memory='1048576' unit='KiB'/>
>>> <cell id='1' cpus='1' memory='1048576' unit='KiB' memAccess='shared'/>
>>> <cell id='2' cpus='2' memory='1048576' unit='KiB' memAccess='private'/>
>>> <cell id='3' cpus='3' memory='1048576' unit='KiB'/>
>>> </numa>
>>> </cpu>
>>>
>>> <devices>
>>> <memory model='dimm'>
>>> <target>
>>> <size unit='KiB'>524288</size>
>>> <node>1</node>
>>> </target>
>>> <address type='dimm' slot='0' base='0x100000000'/>
>>> </memory>
>>> </devices>
>>>
>>>
>>> So what we can have is:
>>>
>>> <hugepages>
>>> <page size=.... backend='memory-backend-file'/>
>>> </hugepages>
>>>
>>> <cell id='0' cpus='0' memory='1048576' unit='KiB' backend='memory-backend-ram'/>
>>> <cell id='1' cpus='1' memory='1048576' unit='KiB' memAccess='shared' backend='memory-backend-file'/>
>>> <cell id='2' cpus='2' memory='1048576' unit='KiB' memAccess='private' backend='memory-backend-file/>
>>> <cell id='3' cpus='3' memory='1048576' unit='KiB' backend='memory-backend-ram'/>
>>>
>>> <devices>
>>> <memory model='dimm' backend='memory-backend-ram'/>
>>
>> That's a bit overkill to me, since we don't have (yet) the capacity
>> for a user to select the memory backend, and the value is a
>> qemu-specific detail.
>
> So status XML is not something we parse from user. It's produced by
> libvirt and it's a superset of user provided XML and some runtime
> information. For instance, look around the lines where
> VIR_DOMAIN_DEF_PARSE_STATUS flag occurs.
>
>>
>>> ..
>>> </devices>
>>>
>>>
>>> This way we know what backend was used on the source (in saved state)
>>> and the only thing we need to know on dst (on restore) is to check if
>>> given backend is available.
>>>
>>> I don't think putting anything in migration cookies is going to help.
>>> It might help migration if anything but it will definitely keep
>>> save/restore broken as there are no migration cookies.
>>
>> Ah, too bad. I am not familar enough with migration and save/restore
>> in libvirt. But I started to imagine how the migration cookie could
>> have been used.
>
> From qemu POV, there's no difference between migration and save/restore.
> All of them is a migration except save/restore is migration to/from a
> file (FD actually).
>
>>
>> Is there only in the domain XML we can save information?
>
> Yes, status XML. That's where libvirt keeps its runtime information (and
> which backend was used falls exactly into this category) so that it is
> preserved on the daemon restart.
>
>>
>> If yes, then either we go with your proposal (although I wonder if it
>> should be qemu: namespaced) or can we introduce libvirt capabilites?
>> (something as simple as
>> <capabilities><qemu-memorybackend-memfd</capabilities>) ?
>
> No need. Once again, this is not something that users will ever see, nor
> libvirt would parse it when parsing input from user.
>
> The same applies for migration XML. These two are different in some
> aspects, but that is not critical for this feature. It's sufficient to
> say for now that status XML preserves runtime data between daemon
> restarts (we want freshly restarted libvirt to remember what backend was
> used) and migration XML preserves runtime data on migration (we want the
> destination to know what backend is used).
Ok
Wouldn't it be easier to have <source type="memfd"/>
Daniel didn't have a strong objection against it, it was more of a
suggestion for "anonymous" type improvement:
https://www.redhat.com/archives/libvir-list/2018-August/msg01841.html
Eventually, "anonymous" could be smartly changed to "memfd" by libvirt
when possible (from a non-resume start)
thanks
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
© 2016 - 2026 Red Hat, Inc.