When device is behind the IOMMU then driver need to pass the device
address when programing the bus master. The patch uses VirtioRingMap() to
map the VRING system physical address to device address.
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Contributed-under: TianoCore Contribution Agreement 1.1
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
OvmfPkg/VirtioNetDxe/VirtioNet.h | 2 +
OvmfPkg/VirtioNetDxe/Events.c | 7 ++++
OvmfPkg/VirtioNetDxe/SnpInitialize.c | 40 ++++++++++++++++----
OvmfPkg/VirtioNetDxe/SnpShutdown.c | 2 +
4 files changed, 43 insertions(+), 8 deletions(-)
diff --git a/OvmfPkg/VirtioNetDxe/VirtioNet.h b/OvmfPkg/VirtioNetDxe/VirtioNet.h
index 710859bc6115..d80d441b50a4 100644
--- a/OvmfPkg/VirtioNetDxe/VirtioNet.h
+++ b/OvmfPkg/VirtioNetDxe/VirtioNet.h
@@ -82,10 +82,12 @@ typedef struct {
EFI_HANDLE MacHandle; // VirtioNetDriverBindingStart
VRING RxRing; // VirtioNetInitRing
+ VOID *RxRingMap; // VirtioRingMap
UINT8 *RxBuf; // VirtioNetInitRx
UINT16 RxLastUsed; // VirtioNetInitRx
VRING TxRing; // VirtioNetInitRing
+ VOID *TxRingMap; // VirtioRingMap
UINT16 TxMaxPending; // VirtioNetInitTx
UINT16 TxCurPending; // VirtioNetInitTx
UINT16 *TxFreeStack; // VirtioNetInitTx
diff --git a/OvmfPkg/VirtioNetDxe/Events.c b/OvmfPkg/VirtioNetDxe/Events.c
index 5be1af6ffbee..6950c4d56df1 100644
--- a/OvmfPkg/VirtioNetDxe/Events.c
+++ b/OvmfPkg/VirtioNetDxe/Events.c
@@ -88,4 +88,11 @@ VirtioNetExitBoot (
if (Dev->Snm.State == EfiSimpleNetworkInitialized) {
Dev->VirtIo->SetDeviceStatus (Dev->VirtIo, 0);
}
+
+ //
+ // Unmap Tx and Rx rings so that hypervisor will not be able get readable data
+ // after device is reset.
+ //
+ Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->TxRingMap);
+ Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->RxRingMap);
}
diff --git a/OvmfPkg/VirtioNetDxe/SnpInitialize.c b/OvmfPkg/VirtioNetDxe/SnpInitialize.c
index 0ecfe044a977..803a38bd4239 100644
--- a/OvmfPkg/VirtioNetDxe/SnpInitialize.c
+++ b/OvmfPkg/VirtioNetDxe/SnpInitialize.c
@@ -35,11 +35,13 @@
the network device.
@param[out] Ring The virtio-ring inside the VNET_DEV structure,
corresponding to Selector.
+ @param[out] Mapping A token return from the VirtioRingMap().
@retval EFI_UNSUPPORTED The queue size reported by the virtio-net device is
too small.
@return Status codes from VIRTIO_CFG_WRITE(),
- VIRTIO_CFG_READ() and VirtioRingInit().
+ VIRTIO_CFG_READ(), VirtioRingInit() and
+ VirtioRingMap().
@retval EFI_SUCCESS Ring initialized.
*/
@@ -49,11 +51,13 @@ EFIAPI
VirtioNetInitRing (
IN OUT VNET_DEV *Dev,
IN UINT16 Selector,
- OUT VRING *Ring
+ OUT VRING *Ring,
+ OUT VOID **Mapping
)
{
EFI_STATUS Status;
UINT16 QueueSize;
+ UINT64 RingBaseShift;
//
// step 4b -- allocate selected queue
@@ -79,30 +83,38 @@ VirtioNetInitRing (
return Status;
}
+ Status = VirtioRingMap (Dev->VirtIo, Ring, &RingBaseShift, Mapping);
+ if (EFI_ERROR (Status)) {
+ goto ReleaseQueue;
+ }
+
//
// Additional steps for MMIO: align the queue appropriately, and set the
// size. If anything fails from here on, we must release the ring resources.
//
Status = Dev->VirtIo->SetQueueNum (Dev->VirtIo, QueueSize);
if (EFI_ERROR (Status)) {
- goto ReleaseQueue;
+ goto UnmapQueue;
}
Status = Dev->VirtIo->SetQueueAlign (Dev->VirtIo, EFI_PAGE_SIZE);
if (EFI_ERROR (Status)) {
- goto ReleaseQueue;
+ goto UnmapQueue;
}
//
// step 4c -- report GPFN (guest-physical frame number) of queue
//
- Status = Dev->VirtIo->SetQueueAddress (Dev->VirtIo, Ring, 0);
+ Status = Dev->VirtIo->SetQueueAddress (Dev->VirtIo, Ring, RingBaseShift);
if (EFI_ERROR (Status)) {
- goto ReleaseQueue;
+ goto UnmapQueue;
}
return EFI_SUCCESS;
+UnmapQueue:
+ Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Mapping);
+
ReleaseQueue:
VirtioRingUninit (Dev->VirtIo, Ring);
@@ -456,12 +468,22 @@ VirtioNetInitialize (
//
// step 4b, 4c -- allocate and report virtqueues
//
- Status = VirtioNetInitRing (Dev, VIRTIO_NET_Q_RX, &Dev->RxRing);
+ Status = VirtioNetInitRing (
+ Dev,
+ VIRTIO_NET_Q_RX,
+ &Dev->RxRing,
+ &Dev->RxRingMap
+ );
if (EFI_ERROR (Status)) {
goto DeviceFailed;
}
- Status = VirtioNetInitRing (Dev, VIRTIO_NET_Q_TX, &Dev->TxRing);
+ Status = VirtioNetInitRing (
+ Dev,
+ VIRTIO_NET_Q_TX,
+ &Dev->TxRing,
+ &Dev->TxRingMap
+ );
if (EFI_ERROR (Status)) {
goto ReleaseRxRing;
}
@@ -510,9 +532,11 @@ AbortDevice:
Dev->VirtIo->SetDeviceStatus (Dev->VirtIo, 0);
ReleaseTxRing:
+ Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->TxRingMap);
VirtioRingUninit (Dev->VirtIo, &Dev->TxRing);
ReleaseRxRing:
+ Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->RxRingMap);
VirtioRingUninit (Dev->VirtIo, &Dev->RxRing);
DeviceFailed:
diff --git a/OvmfPkg/VirtioNetDxe/SnpShutdown.c b/OvmfPkg/VirtioNetDxe/SnpShutdown.c
index 5e84191fbbdd..36f3253e77ad 100644
--- a/OvmfPkg/VirtioNetDxe/SnpShutdown.c
+++ b/OvmfPkg/VirtioNetDxe/SnpShutdown.c
@@ -67,7 +67,9 @@ VirtioNetShutdown (
Dev->VirtIo->SetDeviceStatus (Dev->VirtIo, 0);
VirtioNetShutdownRx (Dev);
VirtioNetShutdownTx (Dev);
+ Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->TxRingMap);
VirtioRingUninit (Dev->VirtIo, &Dev->TxRing);
+ Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->RxRingMap);
VirtioRingUninit (Dev->VirtIo, &Dev->RxRing);
Dev->Snm.State = EfiSimpleNetworkStarted;
--
2.7.4
_______________________________________________
edk2-devel mailing list
edk2-devel@lists.01.org
https://lists.01.org/mailman/listinfo/edk2-devel
On 09/01/17 13:24, Brijesh Singh wrote: > When device is behind the IOMMU then driver need to pass the device > address when programing the bus master. The patch uses VirtioRingMap() to > map the VRING system physical address to device address. > > Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org> > Cc: Jordan Justen <jordan.l.justen@intel.com> > Cc: Tom Lendacky <thomas.lendacky@amd.com> > Cc: Laszlo Ersek <lersek@redhat.com> > Contributed-under: TianoCore Contribution Agreement 1.1 > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > --- > OvmfPkg/VirtioNetDxe/VirtioNet.h | 2 + > OvmfPkg/VirtioNetDxe/Events.c | 7 ++++ > OvmfPkg/VirtioNetDxe/SnpInitialize.c | 40 ++++++++++++++++---- > OvmfPkg/VirtioNetDxe/SnpShutdown.c | 2 + > 4 files changed, 43 insertions(+), 8 deletions(-) > > diff --git a/OvmfPkg/VirtioNetDxe/VirtioNet.h b/OvmfPkg/VirtioNetDxe/VirtioNet.h > index 710859bc6115..d80d441b50a4 100644 > --- a/OvmfPkg/VirtioNetDxe/VirtioNet.h > +++ b/OvmfPkg/VirtioNetDxe/VirtioNet.h > @@ -82,10 +82,12 @@ typedef struct { > EFI_HANDLE MacHandle; // VirtioNetDriverBindingStart > > VRING RxRing; // VirtioNetInitRing > + VOID *RxRingMap; // VirtioRingMap > UINT8 *RxBuf; // VirtioNetInitRx > UINT16 RxLastUsed; // VirtioNetInitRx > > VRING TxRing; // VirtioNetInitRing > + VOID *TxRingMap; // VirtioRingMap > UINT16 TxMaxPending; // VirtioNetInitTx > UINT16 TxCurPending; // VirtioNetInitTx > UINT16 *TxFreeStack; // VirtioNetInitTx (1) Hmmm, here I could be inconsistent with my earlier requests for the other drivers, but, in order to remain consistent with the comments here, I think the new comments should say "VirtioNetInitRing" too. Namely, the existent RxRing and TxRing fields are set up on the following call path: VirtioNetInitialize() -- this is the exported Initialize() protocol member VirtioNetInitRing() VirtioRingInit() -- set up here VirtioRingMap() -- this is added now So, because we name VirtioNetInitRing() -- and not VirtioRingInit() -- for "RxRing" and "TxRing", the comments for "RxRingMap" and "TxRingMap" should also say VirtioNetInitRing. > diff --git a/OvmfPkg/VirtioNetDxe/Events.c b/OvmfPkg/VirtioNetDxe/Events.c > index 5be1af6ffbee..6950c4d56df1 100644 > --- a/OvmfPkg/VirtioNetDxe/Events.c > +++ b/OvmfPkg/VirtioNetDxe/Events.c > @@ -88,4 +88,11 @@ VirtioNetExitBoot ( > if (Dev->Snm.State == EfiSimpleNetworkInitialized) { > Dev->VirtIo->SetDeviceStatus (Dev->VirtIo, 0); > } > + > + // > + // Unmap Tx and Rx rings so that hypervisor will not be able get readable data > + // after device is reset. > + // > + Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->TxRingMap); > + Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->RxRingMap); > } (2) This is not correct; the mappings will exist if, and only if, VirtioNetInitialize() completed successfully. That is equivalent to (Dev->Snm.State == EfiSimpleNetworkInitialized). Therefore please move both of these calls into the bracket just above, visible in the context, after the virtio reset. > diff --git a/OvmfPkg/VirtioNetDxe/SnpInitialize.c b/OvmfPkg/VirtioNetDxe/SnpInitialize.c > index 0ecfe044a977..803a38bd4239 100644 > --- a/OvmfPkg/VirtioNetDxe/SnpInitialize.c > +++ b/OvmfPkg/VirtioNetDxe/SnpInitialize.c > @@ -35,11 +35,13 @@ > the network device. > @param[out] Ring The virtio-ring inside the VNET_DEV structure, > corresponding to Selector. > + @param[out] Mapping A token return from the VirtioRingMap(). > > @retval EFI_UNSUPPORTED The queue size reported by the virtio-net device is > too small. > @return Status codes from VIRTIO_CFG_WRITE(), > - VIRTIO_CFG_READ() and VirtioRingInit(). > + VIRTIO_CFG_READ(), VirtioRingInit() and > + VirtioRingMap(). > @retval EFI_SUCCESS Ring initialized. > */ > > @@ -49,11 +51,13 @@ EFIAPI > VirtioNetInitRing ( > IN OUT VNET_DEV *Dev, > IN UINT16 Selector, > - OUT VRING *Ring > + OUT VRING *Ring, > + OUT VOID **Mapping > ) > { > EFI_STATUS Status; > UINT16 QueueSize; > + UINT64 RingBaseShift; > > // > // step 4b -- allocate selected queue > @@ -79,30 +83,38 @@ VirtioNetInitRing ( > return Status; > } > (3) Please add a comment here: // // If anything fails from here on, we must release the ring resources. // > + Status = VirtioRingMap (Dev->VirtIo, Ring, &RingBaseShift, Mapping); (4) Please use a local variable here; we shouldn't modify *Mapping until the function succeeds fully. > + if (EFI_ERROR (Status)) { > + goto ReleaseQueue; > + } > + > // > // Additional steps for MMIO: align the queue appropriately, and set the > // size. If anything fails from here on, we must release the ring resources. > // (5) In the above comment, please replace "release" with "unmap". > Status = Dev->VirtIo->SetQueueNum (Dev->VirtIo, QueueSize); > if (EFI_ERROR (Status)) { > - goto ReleaseQueue; > + goto UnmapQueue; > } > > Status = Dev->VirtIo->SetQueueAlign (Dev->VirtIo, EFI_PAGE_SIZE); > if (EFI_ERROR (Status)) { > - goto ReleaseQueue; > + goto UnmapQueue; > } > > // > // step 4c -- report GPFN (guest-physical frame number) of queue > // > - Status = Dev->VirtIo->SetQueueAddress (Dev->VirtIo, Ring, 0); > + Status = Dev->VirtIo->SetQueueAddress (Dev->VirtIo, Ring, RingBaseShift); > if (EFI_ERROR (Status)) { > - goto ReleaseQueue; > + goto UnmapQueue; > } > (6) So we should set *Mapping from the local variable here. > return EFI_SUCCESS; > > +UnmapQueue: > + Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Mapping); > + (7) The last argument should be (*Mapping), not (Mapping). But, given my points (4) and (6), the local variable should be used here. > ReleaseQueue: > VirtioRingUninit (Dev->VirtIo, Ring); > > @@ -456,12 +468,22 @@ VirtioNetInitialize ( > // > // step 4b, 4c -- allocate and report virtqueues > // > - Status = VirtioNetInitRing (Dev, VIRTIO_NET_Q_RX, &Dev->RxRing); > + Status = VirtioNetInitRing ( > + Dev, > + VIRTIO_NET_Q_RX, > + &Dev->RxRing, > + &Dev->RxRingMap > + ); > if (EFI_ERROR (Status)) { > goto DeviceFailed; > } > > - Status = VirtioNetInitRing (Dev, VIRTIO_NET_Q_TX, &Dev->TxRing); > + Status = VirtioNetInitRing ( > + Dev, > + VIRTIO_NET_Q_TX, > + &Dev->TxRing, > + &Dev->TxRingMap > + ); > if (EFI_ERROR (Status)) { > goto ReleaseRxRing; > } > @@ -510,9 +532,11 @@ AbortDevice: > Dev->VirtIo->SetDeviceStatus (Dev->VirtIo, 0); > > ReleaseTxRing: > + Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->TxRingMap); > VirtioRingUninit (Dev->VirtIo, &Dev->TxRing); > > ReleaseRxRing: > + Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->RxRingMap); > VirtioRingUninit (Dev->VirtIo, &Dev->RxRing); > > DeviceFailed: > diff --git a/OvmfPkg/VirtioNetDxe/SnpShutdown.c b/OvmfPkg/VirtioNetDxe/SnpShutdown.c > index 5e84191fbbdd..36f3253e77ad 100644 > --- a/OvmfPkg/VirtioNetDxe/SnpShutdown.c > +++ b/OvmfPkg/VirtioNetDxe/SnpShutdown.c > @@ -67,7 +67,9 @@ VirtioNetShutdown ( > Dev->VirtIo->SetDeviceStatus (Dev->VirtIo, 0); > VirtioNetShutdownRx (Dev); > VirtioNetShutdownTx (Dev); > + Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->TxRingMap); > VirtioRingUninit (Dev->VirtIo, &Dev->TxRing); > + Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, Dev->RxRingMap); > VirtioRingUninit (Dev->VirtIo, &Dev->RxRing); > > Dev->Snm.State = EfiSimpleNetworkStarted; > (8) The logic seems good (all locations are covered correctly), but I feel we're adding too many standalone UnmapSharedBuffer() calls. Therefore, please prepend a patch to the series that does the following: (8a) affected files: "VirtioNet.h", "SnpSharedHelpers.c" (8b) leading comments for the new function are not needed in *either* "VirtioNet.h" *or* "SnpSharedHelpers.c". The global comments currently seen in "SnpSharedHelpers.c" suffice. (8c) the new function should be added right under VirtioNetShutdownTx(). (8d) This is the new function: VOID EFIAPI VirtioNetUninitRing ( IN OUT VNET_DEV *Dev, IN OUT VRING *Ring ) { VirtioRingUninit (Dev->VirtIo, Ring); } (EFIAPI wouldn't be strictly necessary, but I'd like to remain consistent with the rest of the code.) (8e) The VirtioRingUninit() calls in VirtioNetInitialize() and in VirtioNetShutdown() -- four (4) in total -- should be replaced with calls to this new function. (8f) In this extra patch, please modify the file "OvmfPkg/VirtioNetDxe/TechNotes.txt" to the following state: > +-------------------------+ > | EfiSimpleNetworkStarted | > +-------------------------+ > | ^ > [SnpInitialize.c] | | [SnpShutdown.c] > VirtioNetInitialize | | VirtioNetShutdown > VirtioNetInitRing {Rx, Tx} | | VirtioNetShutdownRx [SnpSharedHelpers.c] > VirtioRingInit | | VirtioNetShutdownTx [SnpSharedHelpers.c] > VirtioNetInitTx | | VirtioNetUninitRing [SnpSharedHelpers.c] > VirtioNetInitRx | | {Tx, Rx} > | | VirtioRingUninit > v | > +-----------------------------+ > | EfiSimpleNetworkInitialized | > +-----------------------------+ (9) Then, in v2 of this patch, (9a) modify the VirtioNetUninitRing() function like this: VOID EFIAPI VirtioNetUninitRing ( IN OUT VNET_DEV *Dev, IN OUT VRING *Ring, IN VOID *RingMap ) { Dev->VirtIo->UnmapSharedBuffer (Dev->VirtIo, RingMap); VirtioRingUninit (Dev->VirtIo, Ring); } (9b) The documentation of the "Mapping" parameter, for the VirtioNetInitialize() function, should say, "a resulting token to pass to VirtioNetUninitRing()". (9c) modify the call sites to pass in Dev->TxRingMap / Dev->RxRingMap as appropriate. (9d) modify the documentation again, to the following state: > +-------------------------+ > | EfiSimpleNetworkStarted | > +-------------------------+ > | ^ > [SnpInitialize.c] | | [SnpShutdown.c] > VirtioNetInitialize | | VirtioNetShutdown > VirtioNetInitRing {Rx, Tx} | | VirtioNetShutdownRx [SnpSharedHelpers.c] > VirtioRingInit | | VirtioNetShutdownTx [SnpSharedHelpers.c] > VirtioRingMap | | VirtioNetUninitRing [SnpSharedHelpers.c] > VirtioNetInitTx | | {Tx, Rx} > VirtioNetInitRx | | VirtIo->UnmapSharedBuffer > | | VirtioRingUninit > v | > +-----------------------------+ > | EfiSimpleNetworkInitialized | > +-----------------------------+ (10) Please modify the subject to say "VRINGs" (plural). Please also modify the commit message similarly: "map the VRING system physical address[es] to device address[es]". Would it be OK with you to submit only these two patches next? I wouldn't like to look at the rest of the patches just now. I expect quite a few tweaks -- especially because I would *really* like to keep "TechNotes.txt" up to date! --, and I'd like to keep my focus, and advance in small steps. I must re-read TechNotes.txt myself, in parallel to the progress that we make with this series. Once we consider these patches complete, we can test them and commit them in isolation. Further versions of the series won't have to repeat these patches. I'll strive to be very responsive, so that you don't have to wait long after every small step. Thank you, Laszlo _______________________________________________ edk2-devel mailing list edk2-devel@lists.01.org https://lists.01.org/mailman/listinfo/edk2-devel
Hi Laszlo, Thanks for quick reviews. I will go through each feedback address them in v2. On 09/05/2017 06:47 AM, Laszlo Ersek wrote: [...] > > Please also modify the commit message similarly: "map the VRING system > physical address[es] to device address[es]". > > > Would it be OK with you to submit only these two patches next? I saw that you looked at other patches, just wondering, do you still want me to submit two patches and advance in small steps ? > > I wouldn't like to look at the rest of the patches just now. I expect > quite a few tweaks -- especially because I would *really* like to keep > "TechNotes.txt" up to date! --, and I'd like to keep my focus, and > advance in small steps. I must re-read TechNotes.txt myself, in parallel > to the progress that we make with this series. > > Once we consider these patches complete, we can test them and commit > them in isolation. Further versions of the series won't have to repeat > these patches. > > I'll strive to be very responsive, so that you don't have to wait long > after every small step. > _______________________________________________ edk2-devel mailing list edk2-devel@lists.01.org https://lists.01.org/mailman/listinfo/edk2-devel
On 09/05/17 20:57, Brijesh Singh wrote: > Hi Laszlo, > > Thanks for quick reviews. I will go through each feedback address them > in v2. > > > > On 09/05/2017 06:47 AM, Laszlo Ersek wrote: > > [...] > >> >> Please also modify the commit message similarly: "map the VRING system >> physical address[es] to device address[es]". >> >> >> Would it be OK with you to submit only these two patches next? > > > I saw that you looked at other patches, just wondering, do you still > want me > to submit two patches and advance in small steps ? I'm not so sure anymore... I haven't looked at patch #3 yet. I guess I could review it tomorrow. ... Based on the discussion in the other thread ("[edk2] [PATCH 0/4] MdeModulePkg: some PCI HC drivers: unmap common buffers at ExitBootServices()"), I'm worried that we might need another restructuring for the IOMMU driver, and for the ExitBootServices() handlers for the virtio drivers (removing the Unmap() calls). If that's the case, then it wouldn't be good if you wasted work on VirtioNetExitBoot() in v2 of this series. Also under patch v1 #4, I requested that you not use VirtioOperationBusMasterRead for DMA that remains pending after the protocol member function returns, because we cannot Unmap() BusMasterRead in VirtioNetExitBoot() without freeing memory. Dependent of the outcome of said other thread, this might not be good advice after all. I'd be pretty disappointed if we had to go back to the drawing board at this stage. In my opinion, the UEFI-2.7 spec doesn't offer any facilities to us that would let us reliably and safely Unmap() bus master operations (= re-encrypt memory) at ExitBootServices(), for such PCI device drivers that we cannot modify. Whatever we do at this point looks like a hack: * Option #1: modify Virtio and other PCI drivers to use only CommonBuffer operations for asynch DMA, and manually Unmap() those operations in the ExitBootServices() handlers of the drivers. In addition, guarantee that Unmap() for CommonBuffer will not release memory. This is the approach I've been supporting. We could implement it for OVMF, because the community controls most of the platform (QEMU, KVM, OVMF), OVMF is 100% open source, and we can propose patches for other (e.g. MdeModulePkg) PCI drivers in edk2, if necessary. Problem: PCI drivers are not required by the spec, or the Driver Writer's Guide, to Unmap() on ExitBootServices() (and then to Unmap() only CommonBuffer). Also, PciIo implementations are not required by the spec to behave like this (= not free memory when Unmap()ing CommonBuffer). We can satisfy those assumptions in OvmfPkg, but perhaps not in MdeModulePkg drivers. * Option #2: add an ExitBootServices() handler to the IOMMU driver, and clean up mappings (= re-encrypt memory) after the PCI / Virtio drivers are finalized in their ExitBootServices() handlers. Ignore mappings in the drivers' ExitBootServices() handlers. Problem: the keyword is "after". Under this approach, we *must* clean up the mappings in the IOMMU driver, but we *must not* do that before the device drivers are finished. And the UEFI spec does not allow us to express a dependency order between ExitBootServices() handlers. We could hack that around by deferring the actual cleanup to *another* event handler, signaled from the IOMMU's "normal" ExitBootServices() handler. Problem: the UEFI spec does not promise that signaling events from ExitBootServices() handlers is safe. Problem: if some PCI driver does the same trick for whatever reason in its ExitBootServices() handler, then we're back to square one. * Option #3: ignore pending mappings (= decrypted memory areas) at ExitBootServices(), leave them all to the OS. Problem: how will the OS find them? Should we introduce another UEFI configuration table? Config tables are generally allocated in BootServicesData type memory, so the boot loader has to save them. Who will write the grub2 code? Who will write the Linux kernel code to parse the table, and to re-encrypt the memory (inherited from the firmware) in-place? * Option #4: ignore pending mappings (= decrypted memory areas) at ExitBootServices(). Ignore them in the OS too. Let's hope that "it's just a few pages of stale data, it won't compromise guest confidentiality when the hypervisor is breached". Doesn't sound like a good sales pitch for SEV. Our approach thus far has been option #1 for the OvmfPkg VirtIo drivers, and option #4 for all PCI drivers outside of OvmfPkg. If you are fine with this, I am as well; we can continue this approach. (We should be conscious of it though.) My other series (see above) was inteded to extend option #1 to some MdeModulePkg drivers (the main ones that we pull into OVMF, USB 1-2-3 and AHCI), and to start a discussion. If you believe that extending option #1 to MdeModulePkg would be better for SEV, we should await the outcome of that discussion. (Please do participate.) Thanks, Laszlo >> I wouldn't like to look at the rest of the patches just now. I expect >> quite a few tweaks -- especially because I would *really* like to keep >> "TechNotes.txt" up to date! --, and I'd like to keep my focus, and >> advance in small steps. I must re-read TechNotes.txt myself, in parallel >> to the progress that we make with this series. >> >> Once we consider these patches complete, we can test them and commit >> them in isolation. Further versions of the series won't have to repeat >> these patches. >> >> I'll strive to be very responsive, so that you don't have to wait long >> after every small step. >> > _______________________________________________ edk2-devel mailing list edk2-devel@lists.01.org https://lists.01.org/mailman/listinfo/edk2-devel
On 5 September 2017 at 21:17, Laszlo Ersek <lersek@redhat.com> wrote: [...] > ... Based on the discussion in the other thread ("[edk2] [PATCH 0/4] > MdeModulePkg: some PCI HC drivers: unmap common buffers at > ExitBootServices()"), I'm worried that we might need another > restructuring for the IOMMU driver, and for the ExitBootServices() > handlers for the virtio drivers (removing the Unmap() calls). > > If that's the case, then it wouldn't be good if you wasted work on > VirtioNetExitBoot() in v2 of this series. > > Also under patch v1 #4, I requested that you not use > VirtioOperationBusMasterRead for DMA that remains pending after the > protocol member function returns, because we cannot Unmap() > BusMasterRead in VirtioNetExitBoot() without freeing memory. Dependent > of the outcome of said other thread, this might not be good advice after > all. > > I'd be pretty disappointed if we had to go back to the drawing board at > this stage. In my opinion, the UEFI-2.7 spec doesn't offer any > facilities to us that would let us reliably and safely Unmap() bus > master operations (= re-encrypt memory) at ExitBootServices(), for such > PCI device drivers that we cannot modify. Whatever we do at this point > looks like a hack: > > > * Option #1: modify Virtio and other PCI drivers to use only > CommonBuffer operations for asynch DMA, and manually Unmap() those > operations in the ExitBootServices() handlers of the drivers. In > addition, guarantee that Unmap() for CommonBuffer will not release > memory. > > This is the approach I've been supporting. We could implement it for > OVMF, because the community controls most of the platform (QEMU, > KVM, OVMF), OVMF is 100% open source, and we can propose patches for > other (e.g. MdeModulePkg) PCI drivers in edk2, if necessary. > > Problem: PCI drivers are not required by the spec, or the Driver > Writer's Guide, to Unmap() on ExitBootServices() (and then to Unmap() > only CommonBuffer). Also, PciIo implementations are not required by > the spec to behave like this (= not free memory when Unmap()ing > CommonBuffer). We can satisfy those assumptions in OvmfPkg, but > perhaps not in MdeModulePkg drivers. > I think you will have much bigger problems with out of tree PCI drivers that never use Map/Unmap in the first place, because stuff just works on PCs if you omit them. This is actually the reason I am quite pleased with this SEV support: it means x86 drivers will start breaking in the same way as they would have on ARM systems with non-coherent DMA or non-1:1 mapped PCI, and vendors will have to clean up their code anyway, not just for ARM compatibility. The only requirement imposed on DMA capable devices is that they cease to perform DMA after ExitBootServices. Anything beyond that should not be the responsibility of the device driver, simply because that requirement did not exist before, and so we cannot go back in time and add it. > * Option #2: add an ExitBootServices() handler to the IOMMU driver, and > clean up mappings (= re-encrypt memory) after the PCI / Virtio drivers > are finalized in their ExitBootServices() handlers. Ignore mappings in > the drivers' ExitBootServices() handlers. > > Problem: the keyword is "after". Under this approach, we *must* clean > up the mappings in the IOMMU driver, but we *must not* do that before > the device drivers are finished. And the UEFI spec does not allow us > to express a dependency order between ExitBootServices() handlers. > > We could hack that around by deferring the actual cleanup to *another* > event handler, signaled from the IOMMU's "normal" ExitBootServices() > handler. > > Problem: the UEFI spec does not promise that signaling events from > ExitBootServices() handlers is safe. > > Problem: if some PCI driver does the same trick for whatever reason in > its ExitBootServices() handler, then we're back to square one. > > I think this is the only feasible option. Fortunately, we don't have to solve this at the spec level, but only have to deal with the implementation. What we need is a method in the IOMMU protocol that is invoked when the ExitBootServices implementation is about to return EFI_SUCCESS (which means it could be the second time it was called). This severely limits what the method is actually capable of doing, and I think it should not be allowed to call any boot or DXE services at all, but it should still be sufficient for some linked list traversals and CopyMem() operations, and whatever else is needed to re-encrypt the memory. And actually, this is not only useful for SEV, other IOMMU drivers will have to deal with the same issue: in most cases, you'll want to disable it before handing over to the OS, but this can never be done safely in a EBS event handler for precisely the reasons you pointed out. Most PCI devices probably deal with this gracefully, but tearing down mappings should simply be avoided if a device could still be accessing it. _______________________________________________ edk2-devel mailing list edk2-devel@lists.01.org https://lists.01.org/mailman/listinfo/edk2-devel
On 09/05/17 23:11, Ard Biesheuvel wrote: > On 5 September 2017 at 21:17, Laszlo Ersek <lersek@redhat.com> wrote: > [...] >> ... Based on the discussion in the other thread ("[edk2] [PATCH 0/4] >> MdeModulePkg: some PCI HC drivers: unmap common buffers at >> ExitBootServices()"), I'm worried that we might need another >> restructuring for the IOMMU driver, and for the ExitBootServices() >> handlers for the virtio drivers (removing the Unmap() calls). >> >> If that's the case, then it wouldn't be good if you wasted work on >> VirtioNetExitBoot() in v2 of this series. >> >> Also under patch v1 #4, I requested that you not use >> VirtioOperationBusMasterRead for DMA that remains pending after the >> protocol member function returns, because we cannot Unmap() >> BusMasterRead in VirtioNetExitBoot() without freeing memory. Dependent >> of the outcome of said other thread, this might not be good advice after >> all. >> >> I'd be pretty disappointed if we had to go back to the drawing board at >> this stage. In my opinion, the UEFI-2.7 spec doesn't offer any >> facilities to us that would let us reliably and safely Unmap() bus >> master operations (= re-encrypt memory) at ExitBootServices(), for such >> PCI device drivers that we cannot modify. Whatever we do at this point >> looks like a hack: >> >> >> * Option #1: modify Virtio and other PCI drivers to use only >> CommonBuffer operations for asynch DMA, and manually Unmap() those >> operations in the ExitBootServices() handlers of the drivers. In >> addition, guarantee that Unmap() for CommonBuffer will not release >> memory. >> >> This is the approach I've been supporting. We could implement it for >> OVMF, because the community controls most of the platform (QEMU, >> KVM, OVMF), OVMF is 100% open source, and we can propose patches for >> other (e.g. MdeModulePkg) PCI drivers in edk2, if necessary. >> >> Problem: PCI drivers are not required by the spec, or the Driver >> Writer's Guide, to Unmap() on ExitBootServices() (and then to Unmap() >> only CommonBuffer). Also, PciIo implementations are not required by >> the spec to behave like this (= not free memory when Unmap()ing >> CommonBuffer). We can satisfy those assumptions in OvmfPkg, but >> perhaps not in MdeModulePkg drivers. >> > > I think you will have much bigger problems with out of tree PCI > drivers that never use Map/Unmap in the first place, because stuff > just works on PCs if you omit them. I'm currently not worried about out-of-tree PCI drivers in the least :) I'm worried about finding common grounds with MdeModulePkg drivers, because they do use Map/Unmap, they seek to conform to the UEFI spec (mostly), and they are an example / reference implementation for other dirvers / driver writers. If out-of-tree drivers break, let them break (as first step); exactly for the reason you state. > This is actually the reason I am quite pleased with this SEV support: > it means x86 drivers will start breaking in the same way as they would > have on ARM systems with non-coherent DMA or non-1:1 mapped PCI, and > vendors will have to clean up their code anyway, not just for ARM > compatibility. I certainly agree this is a benefit! From the virtio conversion thus far (even from the mostly-reviewer side), I can say it is a lot of work. > The only requirement imposed on DMA capable devices is that they cease > to perform DMA after ExitBootServices. Anything beyond that should not > be the responsibility of the device driver, simply because that > requirement did not exist before, and so we cannot go back in time and > add it. Fine by me (as a general guideline, not necessarily as a practical one); what can we use instead, for closing down the IOMMU holes late enough, originally opened by the PciIo.Map() calls? > >> * Option #2: add an ExitBootServices() handler to the IOMMU driver, and >> clean up mappings (= re-encrypt memory) after the PCI / Virtio drivers >> are finalized in their ExitBootServices() handlers. Ignore mappings in >> the drivers' ExitBootServices() handlers. >> >> Problem: the keyword is "after". Under this approach, we *must* clean >> up the mappings in the IOMMU driver, but we *must not* do that before >> the device drivers are finished. And the UEFI spec does not allow us >> to express a dependency order between ExitBootServices() handlers. >> >> We could hack that around by deferring the actual cleanup to *another* >> event handler, signaled from the IOMMU's "normal" ExitBootServices() >> handler. >> >> Problem: the UEFI spec does not promise that signaling events from >> ExitBootServices() handlers is safe. >> >> Problem: if some PCI driver does the same trick for whatever reason in >> its ExitBootServices() handler, then we're back to square one. >> >> > > I think this is the only feasible option. Fortunately, we don't have > to solve this at the spec level, but only have to deal with the > implementation. > > What we need is a method in the IOMMU protocol that is invoked when > the ExitBootServices implementation is about to return EFI_SUCCESS Yes, after "everything else" is done. (Of course, this is the age-old problem with UEFI, when components that were originally meant to be independent now try to order themselves in some fashion. For example, "let me just install, or patch, this ACPI table or configuration table quickly at ReadyToBoot, *after* everyone else is done, and I get a good look at system state". Now combine two such DXE drivers into a firmware, and hilarity ensues as they fight for the last word.) No facility exists to my knowledge (on the spec level) that would enable such fine delaying or dependency ordering between EBS handlers. The only ordering is between notification functions enqueued at TPL_NOTIFY vs. TPL_CALLBACK, and there is a guarantee (I think?) that if you get signalled demonstrably later (i.e., not via an event group), then you get queued for later, within the same TPL. The problem (for this discussion) is that any random PCI driver can register its EBS event notification function at either TPL_NOTIFY or at TPL_CALLBACK, plus that all EBS events are signaled together as an event group. Consequently, if the IOMMU driver registers its EBS notifier at TPL_NOTIFY, it is guaranteed to run earlier than all notifiers with TPL_CALLBACK (which is exactly what we *don't* want). If the IOMMU registers its EBS handler at TPL_CALLBACK, then (due to being signaled through a large event group) the notifier will still be invoked somewhere in the middle of a bunch of other TPL_CALLBACK-level notifiers -- that is, not necessarily in the last position. Hence my floating of a hack to re-queue the notification (to another TPL_CALLBACK handler), so that we get to the "end of the low-prio queue". But calling SignalEvent() from an EBS handler is not explicitly permitted in the spec. (Below you even suggest that an EBS handler should not call any boot service.) Of course, if CoreExitBootServices() can be updated specifically for this use case, I won't protest. > > (which means it could be the second time it was called). Side remark: the CoreExitBootServices() implementation does not notice memory map changes incurred by the ExitBootServices() handlers, in my interpretation. CoreExitBootServices() has a MapKey (= "memmap generation") check early on, in CoreTerminateMemoryMap(). This check catches memmap changes between the last GetMemoryMap(), and the call to ExitBootServices(). After this check succeeds, the notify functions are kicked, and on this path, CoreExitBootServices() simply cannot return any other value than EFI_SUCCESS. So whatever mess the individual callbacks make, it doesn't notice or report. Perhaps this is better for your argument, actually. I'm not fully sure. > This severely > limits what the method is actually capable of doing, and I think it > should not be allowed to call any boot or DXE services at all, but it > should still be sufficient for some linked list traversals and > CopyMem() operations, and whatever else is needed to re-encrypt the > memory. Yes, the necessary actions are sufficiently low-level for this. The problem is the ordering. > > And actually, this is not only useful for SEV, other IOMMU drivers > will have to deal with the same issue: in most cases, you'll want to > disable it before handing over to the OS, but this can never be done > safely in a EBS event handler for precisely the reasons you pointed > out. Most PCI devices probably deal with this gracefully, but tearing > down mappings should simply be avoided if a device could still be > accessing it. > Fully agreed. Once Jiewen adds the policy option to lock down VT-d at EBS (I hope I understood that plan right), dependent on OS support for the IOMMU, the IOMMU faults can show up just the same at EBS, until the rest of the PCI driver EBS handlers finish up. Thanks! Laszlo _______________________________________________ edk2-devel mailing list edk2-devel@lists.01.org https://lists.01.org/mailman/listinfo/edk2-devel
On 5 September 2017 at 22:59, Laszlo Ersek <lersek@redhat.com> wrote: > On 09/05/17 23:11, Ard Biesheuvel wrote: >> On 5 September 2017 at 21:17, Laszlo Ersek <lersek@redhat.com> wrote: [...] >>> * Option #2: add an ExitBootServices() handler to the IOMMU driver, and >>> clean up mappings (= re-encrypt memory) after the PCI / Virtio drivers >>> are finalized in their ExitBootServices() handlers. Ignore mappings in >>> the drivers' ExitBootServices() handlers. >>> >>> Problem: the keyword is "after". Under this approach, we *must* clean >>> up the mappings in the IOMMU driver, but we *must not* do that before >>> the device drivers are finished. And the UEFI spec does not allow us >>> to express a dependency order between ExitBootServices() handlers. >>> >>> We could hack that around by deferring the actual cleanup to *another* >>> event handler, signaled from the IOMMU's "normal" ExitBootServices() >>> handler. >>> >>> Problem: the UEFI spec does not promise that signaling events from >>> ExitBootServices() handlers is safe. >>> >>> Problem: if some PCI driver does the same trick for whatever reason in >>> its ExitBootServices() handler, then we're back to square one. >>> >>> >> >> I think this is the only feasible option. Fortunately, we don't have >> to solve this at the spec level, but only have to deal with the >> implementation. >> >> What we need is a method in the IOMMU protocol that is invoked when >> the ExitBootServices implementation is about to return EFI_SUCCESS > > Yes, after "everything else" is done. > > (Of course, this is the age-old problem with UEFI, when components that > were originally meant to be independent now try to order themselves in > some fashion. For example, "let me just install, or patch, this ACPI > table or configuration table quickly at ReadyToBoot, *after* everyone > else is done, and I get a good look at system state". Now combine two > such DXE drivers into a firmware, and hilarity ensues as they fight for > the last word.) > > No facility exists to my knowledge (on the spec level) that would enable > such fine delaying or dependency ordering between EBS handlers. The only > ordering is between notification functions enqueued at TPL_NOTIFY vs. > TPL_CALLBACK, and there is a guarantee (I think?) that if you get > signalled demonstrably later (i.e., not via an event group), then you > get queued for later, within the same TPL. > > The problem (for this discussion) is that any random PCI driver can > register its EBS event notification function at either TPL_NOTIFY or at > TPL_CALLBACK, plus that all EBS events are signaled together as an event > group. Consequently, if the IOMMU driver registers its EBS notifier at > TPL_NOTIFY, it is guaranteed to run earlier than all notifiers with > TPL_CALLBACK (which is exactly what we *don't* want). If the IOMMU > registers its EBS handler at TPL_CALLBACK, then (due to being signaled > through a large event group) the notifier will still be invoked > somewhere in the middle of a bunch of other TPL_CALLBACK-level notifiers > -- that is, not necessarily in the last position. > > Hence my floating of a hack to re-queue the notification (to another > TPL_CALLBACK handler), so that we get to the "end of the low-prio > queue". But calling SignalEvent() from an EBS handler is not explicitly > permitted in the spec. (Below you even suggest that an EBS handler > should not call any boot service.) > > > Of course, if CoreExitBootServices() can be updated specifically for > this use case, I won't protest. > I think this is the only solution, and not unreasonable given that the IOMMU protocol is private to EDK2. >> >> (which means it could be the second time it was called). > > Side remark: the CoreExitBootServices() implementation does not notice > memory map changes incurred by the ExitBootServices() handlers, in my > interpretation. > > CoreExitBootServices() has a MapKey (= "memmap generation") check early > on, in CoreTerminateMemoryMap(). This check catches memmap changes > between the last GetMemoryMap(), and the call to ExitBootServices(). > > After this check succeeds, the notify functions are kicked, and on this > path, CoreExitBootServices() simply cannot return any other value than > EFI_SUCCESS. So whatever mess the individual callbacks make, it doesn't > notice or report. > CoreExitBootServices() disables the timer first, and so it will return with event dispatch disabled if it fails, ensuring that it is no longer possible for an event handler to be invoked between GetMemoryMap and ExitBootServices. > Perhaps this is better for your argument, actually. I'm not fully sure. > Not really :-) >> This severely >> limits what the method is actually capable of doing, and I think it >> should not be allowed to call any boot or DXE services at all, but it >> should still be sufficient for some linked list traversals and >> CopyMem() operations, and whatever else is needed to re-encrypt the >> memory. > > Yes, the necessary actions are sufficiently low-level for this. The > problem is the ordering. > >> >> And actually, this is not only useful for SEV, other IOMMU drivers >> will have to deal with the same issue: in most cases, you'll want to >> disable it before handing over to the OS, but this can never be done >> safely in a EBS event handler for precisely the reasons you pointed >> out. Most PCI devices probably deal with this gracefully, but tearing >> down mappings should simply be avoided if a device could still be >> accessing it. >> > > Fully agreed. Once Jiewen adds the policy option to lock down VT-d at > EBS (I hope I understood that plan right), dependent on OS support for > the IOMMU, the IOMMU faults can show up just the same at EBS, until the > rest of the PCI driver EBS handlers finish up. > Indeed. I think it is justified to treat the IOMMU protocol specially. _______________________________________________ edk2-devel mailing list edk2-devel@lists.01.org https://lists.01.org/mailman/listinfo/edk2-devel
On 09/06/17 00:18, Ard Biesheuvel wrote: > On 5 September 2017 at 22:59, Laszlo Ersek <lersek@redhat.com> wrote: >> On 09/05/17 23:11, Ard Biesheuvel wrote: >>> (which means it could be the second time it was called). >> >> Side remark: the CoreExitBootServices() implementation does not notice >> memory map changes incurred by the ExitBootServices() handlers, in my >> interpretation. >> >> CoreExitBootServices() has a MapKey (= "memmap generation") check early >> on, in CoreTerminateMemoryMap(). This check catches memmap changes >> between the last GetMemoryMap(), and the call to ExitBootServices(). >> >> After this check succeeds, the notify functions are kicked, and on this >> path, CoreExitBootServices() simply cannot return any other value than >> EFI_SUCCESS. So whatever mess the individual callbacks make, it doesn't >> notice or report. >> > > CoreExitBootServices() disables the timer first, and so it will return > with event dispatch disabled if it fails, ensuring that it is no > longer possible for an event handler to be invoked between > GetMemoryMap and ExitBootServices. True, but that's not what I meant -- I meant that, if an EBS handler violates its contract and changes the memory map (= "it makes mess"), on the call stack of CoreNotifySignalList (&gEfiEventExitBootServicesGuid); then CoreExitBootServices() won't notice it, it won't return an error, and the caller of EBS() won't go back to calling GetMemoryMap() again. > Indeed. I think it is justified to treat the IOMMU protocol specially. The MemoryProtectionExitBootServicesCallback() function call looks like prior art for such customizations. This would mean more delay for SEV, but it would be a very desirable cleanup, for the long term! Thanks! Laszlo _______________________________________________ edk2-devel mailing list edk2-devel@lists.01.org https://lists.01.org/mailman/listinfo/edk2-devel
On 5 September 2017 at 23:37, Laszlo Ersek <lersek@redhat.com> wrote: > On 09/06/17 00:18, Ard Biesheuvel wrote: >> On 5 September 2017 at 22:59, Laszlo Ersek <lersek@redhat.com> wrote: >>> On 09/05/17 23:11, Ard Biesheuvel wrote: > >>>> (which means it could be the second time it was called). >>> >>> Side remark: the CoreExitBootServices() implementation does not notice >>> memory map changes incurred by the ExitBootServices() handlers, in my >>> interpretation. >>> >>> CoreExitBootServices() has a MapKey (= "memmap generation") check early >>> on, in CoreTerminateMemoryMap(). This check catches memmap changes >>> between the last GetMemoryMap(), and the call to ExitBootServices(). >>> >>> After this check succeeds, the notify functions are kicked, and on this >>> path, CoreExitBootServices() simply cannot return any other value than >>> EFI_SUCCESS. So whatever mess the individual callbacks make, it doesn't >>> notice or report. >>> >> >> CoreExitBootServices() disables the timer first, and so it will return >> with event dispatch disabled if it fails, ensuring that it is no >> longer possible for an event handler to be invoked between >> GetMemoryMap and ExitBootServices. > > True, but that's not what I meant -- I meant that, if an EBS handler > violates its contract and changes the memory map (= "it makes mess"), on > the call stack of > > CoreNotifySignalList (&gEfiEventExitBootServicesGuid); > > then CoreExitBootServices() won't notice it, it won't return an error, > and the caller of EBS() won't go back to calling GetMemoryMap() again. > Ah right. So the only thing the memory map key protects against is inadvertent interruptions by event handlers that modify the memory map after GetMemoryMap(). It does not protect against programming errors. _______________________________________________ edk2-devel mailing list edk2-devel@lists.01.org https://lists.01.org/mailman/listinfo/edk2-devel
© 2016 - 2024 Red Hat, Inc.