aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Rozman <simon@rozman.si>2019-07-17 11:53:25 +0200
committerSimon Rozman <simon@rozman.si>2019-07-17 12:22:48 +0200
commite7fad38a074bc90e4401fa3d2be7ade43f4c2a9d (patch)
tree70691156d5fa6c80bd36d010eafef51afa4e1436
parentSpin for a bit before falling back to event object (diff)
downloadwintun-e7fad38a074bc90e4401fa3d2be7ade43f4c2a9d.tar.xz
wintun-e7fad38a074bc90e4401fa3d2be7ade43f4c2a9d.zip
Improve lock retention when sending
NDIS may call MINIPORT_SEND_NET_BUFFER_LISTS from parallel threads to queue as many packets as fast as possible. Initial implementation of ring buffers used a spin lock to completely serialize sending packets making it sub-optimal and burning large amount of CPU. This commit uses locked section to allocate space for packet(s) in the ring. It copies the packets unlocked, then it locks again to adjust the ring tail. Signed-off-by: Simon Rozman <simon@rozman.si>
-rw-r--r--wintun.c167
1 files changed, 123 insertions, 44 deletions
diff --git a/wintun.c b/wintun.c
index fc11967..e7729da 100644
--- a/wintun.c
+++ b/wintun.c
@@ -28,7 +28,8 @@
/* Memory alignment of packets and rings */
#define TUN_ALIGNMENT sizeof(ULONG)
-#define TUN_ALIGN(Size) (((ULONG)(Size) + (ULONG)(TUN_ALIGNMENT - 1)) & ~(ULONG)(TUN_ALIGNMENT - 1))
+#define TUN_ALIGN(Size) (((ULONG)(Size) + ((ULONG)TUN_ALIGNMENT - 1)) & ~((ULONG)TUN_ALIGNMENT - 1))
+#define TUN_IS_ALIGNED(Size) (!((ULONG)(Size) & ((ULONG)TUN_ALIGNMENT - 1)))
/* Maximum IP packet size */
#define TUN_MAX_IP_PACKET_SIZE 0xFFFF
/* Maximum packet size */
@@ -140,6 +141,11 @@ typedef struct _TUN_CTX
ULONG Capacity;
KEVENT *TailMoved;
KSPIN_LOCK Lock;
+ ULONG RingTail; /* We need a private tail offset to keep track of ring allocation without disturbing the client. */
+ struct
+ {
+ NET_BUFFER_LIST *Head, *Tail;
+ } ActiveNbls;
} Send;
struct
@@ -225,6 +231,31 @@ TunIndicateStatus(_In_ NDIS_HANDLE MiniportAdapterHandle, _In_ NDIS_MEDIA_CONNEC
NdisMIndicateStatusEx(MiniportAdapterHandle, &Indication);
}
+static void
+TunNblSetTailAndMarkActive(_Inout_ NET_BUFFER_LIST *Nbl, _In_ ULONG Tail)
+{
+ ASSERT(TUN_IS_ALIGNED(Tail)); /* Alignment ensures bit 0 will be 0 (0=active, 1=completed). */
+ NET_BUFFER_LIST_MINIPORT_RESERVED(Nbl)[0] = (VOID *)Tail;
+}
+
+static ULONG
+TunNblGetTail(_In_ NET_BUFFER_LIST *Nbl)
+{
+ return (ULONG)((ULONG_PTR)(NET_BUFFER_LIST_MINIPORT_RESERVED(Nbl)[0]) & ~((ULONG_PTR)TUN_ALIGNMENT - 1));
+}
+
+static void
+TunNblMarkCompleted(_Inout_ NET_BUFFER_LIST *Nbl)
+{
+ *(ULONG_PTR *)&NET_BUFFER_LIST_MINIPORT_RESERVED(Nbl)[0] |= 1;
+}
+
+static BOOLEAN
+TunNblIsCompleted(_In_ NET_BUFFER_LIST *Nbl)
+{
+ return (ULONG_PTR)(NET_BUFFER_LIST_MINIPORT_RESERVED(Nbl)[0]) & 1;
+}
+
static MINIPORT_SEND_NET_BUFFER_LISTS TunSendNetBufferLists;
_Use_decl_annotations_
static void
@@ -235,76 +266,120 @@ TunSendNetBufferLists(
ULONG SendFlags)
{
TUN_CTX *Ctx = (TUN_CTX *)MiniportAdapterContext;
- LONG64 SentPacketsCount = 0, SentPacketsSize = 0, DiscardedPacketsCount = 0;
-
- /* TODO: This handler is called by NDIS in parallel. Consider implementing a lock-less MPSC ring. */
-
+ LONG64 SentPacketsCount = 0, SentPacketsSize = 0, ErrorPacketsCount = 0, DiscardedPacketsCount = 0;
KIRQL Irql = ExAcquireSpinLockShared(&Ctx->TransitionLock);
LONG Flags = InterlockedGet(&Ctx->Flags);
+ TUN_RING *Ring = Ctx->Device.Send.Ring;
+ ULONG RingCapacity = Ctx->Device.Send.Capacity;
- for (NET_BUFFER_LIST *Nbl = NetBufferLists; Nbl; Nbl = NET_BUFFER_LIST_NEXT_NBL(Nbl))
+ for (NET_BUFFER_LIST *Nbl = NetBufferLists, *NblNext; Nbl; Nbl = NblNext)
{
+ NblNext = NET_BUFFER_LIST_NEXT_NBL(Nbl);
+
+ /* Measure NBL. */
+ ULONG PacketsCount = 0, RequiredRingSpace = 0;
for (NET_BUFFER *Nb = NET_BUFFER_LIST_FIRST_NB(Nbl); Nb; Nb = NET_BUFFER_NEXT_NB(Nb))
{
- NDIS_STATUS Status;
- if ((Status = NDIS_STATUS_ADAPTER_REMOVED, !(Flags & TUN_FLAGS_PRESENT)) ||
- (Status = NDIS_STATUS_PAUSED, !(Flags & TUN_FLAGS_RUNNING)) ||
- (Status = NDIS_STATUS_MEDIA_DISCONNECTED, !(Flags & TUN_FLAGS_CONNECTED)))
- goto cleanupDiscardPacket;
-
- TUN_RING *Ring = Ctx->Device.Send.Ring;
- ULONG RingCapacity = Ctx->Device.Send.Capacity;
- ULONG PacketSize = NET_BUFFER_DATA_LENGTH(Nb);
- if (Status = NDIS_STATUS_INVALID_LENGTH, PacketSize > TUN_MAX_IP_PACKET_SIZE)
- goto cleanupDiscardPacket;
- ULONG AlignedPacketSize = TUN_ALIGN(sizeof(TUN_PACKET) + PacketSize);
+ PacketsCount++;
+ UINT PacketSize = NET_BUFFER_DATA_LENGTH(Nb);
+ if (PacketSize <= TUN_MAX_IP_PACKET_SIZE)
+ RequiredRingSpace += TUN_ALIGN(sizeof(TUN_PACKET) + PacketSize);
+ }
- KLOCK_QUEUE_HANDLE LockHandle;
- KeAcquireInStackQueuedSpinLock(&Ctx->Device.Send.Lock, &LockHandle);
+ NDIS_STATUS Status;
+ if ((Status = NDIS_STATUS_ADAPTER_REMOVED, !(Flags & TUN_FLAGS_PRESENT)) ||
+ (Status = NDIS_STATUS_PAUSED, !(Flags & TUN_FLAGS_RUNNING)) ||
+ (Status = NDIS_STATUS_MEDIA_DISCONNECTED, !(Flags & TUN_FLAGS_CONNECTED)))
+ goto skipNbl;
- ULONG RingHead = InterlockedGetU(&Ring->Head);
- if (Status = NDIS_STATUS_ADAPTER_NOT_READY, RingHead >= RingCapacity)
- goto cleanupReleaseSpinLock;
+ /* Allocate space for packet(s) in the ring. */
+ ULONG RingHead = InterlockedGetU(&Ring->Head);
+ if (Status = NDIS_STATUS_ADAPTER_NOT_READY, RingHead >= RingCapacity)
+ goto skipNbl;
+
+ KLOCK_QUEUE_HANDLE LockHandle;
+ KeAcquireInStackQueuedSpinLock(&Ctx->Device.Send.Lock, &LockHandle);
+
+ ULONG RingTail = Ctx->Device.Send.RingTail;
+ ASSERT(RingTail < RingCapacity);
+
+ ULONG RingSpace = TUN_RING_WRAP(RingHead - RingTail - TUN_ALIGNMENT, RingCapacity);
+ if (Status = NDIS_STATUS_BUFFER_OVERFLOW, RingSpace < RequiredRingSpace)
+ goto cleanupKeReleaseInStackQueuedSpinLock;
+
+ TunNblSetTailAndMarkActive(
+ Nbl, Ctx->Device.Send.RingTail = TUN_RING_WRAP(RingTail + RequiredRingSpace, RingCapacity));
+ *(Ctx->Device.Send.ActiveNbls.Head ? &NET_BUFFER_LIST_NEXT_NBL(Ctx->Device.Send.ActiveNbls.Tail)
+ : &Ctx->Device.Send.ActiveNbls.Head) = Nbl;
+ Ctx->Device.Send.ActiveNbls.Tail = Nbl;
- ULONG RingTail = InterlockedGetU(&Ring->Tail);
- if (Status = NDIS_STATUS_ADAPTER_NOT_READY, RingTail >= RingCapacity)
- goto cleanupReleaseSpinLock;
+ KeReleaseInStackQueuedSpinLock(&LockHandle);
- ULONG RingSpace = TUN_RING_WRAP(RingHead - RingTail - TUN_ALIGNMENT, RingCapacity);
- if (Status = NDIS_STATUS_BUFFER_OVERFLOW, AlignedPacketSize > RingSpace)
- goto cleanupReleaseSpinLock;
+ /* Copy packet(s). */
+ for (NET_BUFFER *Nb = NET_BUFFER_LIST_FIRST_NB(Nbl); Nb; Nb = NET_BUFFER_NEXT_NB(Nb))
+ {
+ UINT PacketSize = NET_BUFFER_DATA_LENGTH(Nb);
+ if (Status = NDIS_STATUS_INVALID_LENGTH, PacketSize > TUN_MAX_IP_PACKET_SIZE)
+ goto skipPacket;
TUN_PACKET *Packet = (TUN_PACKET *)(Ring->Data + RingTail);
Packet->Size = PacketSize;
void *NbData = NdisGetDataBuffer(Nb, PacketSize, Packet->Data, 1, 0);
if (!NbData)
- goto cleanupReleaseSpinLock;
- if (NbData != Packet->Data)
- NdisMoveMemory(Packet->Data, NbData, PacketSize);
- InterlockedExchangeU(&Ring->Tail, TUN_RING_WRAP(RingTail + AlignedPacketSize, RingCapacity));
- KeSetEvent(Ctx->Device.Send.TailMoved, IO_NETWORK_INCREMENT, FALSE);
+ {
+ NdisZeroMemory(
+ Packet->Data, PacketSize); /* The space for the packet has already been allocated in the ring. Write
+ null-packet rather than fixing the gap in the ring. */
+ DiscardedPacketsCount++;
+ }
+ else
+ {
+ if (NbData != Packet->Data)
+ NdisMoveMemory(Packet->Data, NbData, PacketSize);
+ SentPacketsCount++;
+ SentPacketsSize += PacketSize;
+ }
- KeReleaseInStackQueuedSpinLock(&LockHandle);
- SentPacketsCount++;
- SentPacketsSize += PacketSize;
+ RingTail = TUN_RING_WRAP(RingTail + TUN_ALIGN(sizeof(TUN_PACKET) + PacketSize), RingCapacity);
continue;
- cleanupReleaseSpinLock:
- KeReleaseInStackQueuedSpinLock(&LockHandle);
- cleanupDiscardPacket:
- DiscardedPacketsCount++;
+ skipPacket:
+ ErrorPacketsCount++;
NET_BUFFER_LIST_STATUS(Nbl) = Status;
}
- }
+ ASSERT(RingTail == TunNblGetTail(Nbl));
- NdisMSendNetBufferListsComplete(
- Ctx->MiniportAdapterHandle, NetBufferLists, NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL);
+ /* Adjust the ring tail. */
+ TunNblMarkCompleted(Nbl);
+ KeAcquireInStackQueuedSpinLock(&Ctx->Device.Send.Lock, &LockHandle);
+ while (Ctx->Device.Send.ActiveNbls.Head && TunNblIsCompleted(Ctx->Device.Send.ActiveNbls.Head))
+ {
+ NET_BUFFER_LIST *CompletedNbl = Ctx->Device.Send.ActiveNbls.Head;
+ Ctx->Device.Send.ActiveNbls.Head = NET_BUFFER_LIST_NEXT_NBL(CompletedNbl);
+ InterlockedExchangeU(&Ring->Tail, TunNblGetTail(CompletedNbl));
+ KeSetEvent(Ctx->Device.Send.TailMoved, IO_NETWORK_INCREMENT, FALSE);
+ NET_BUFFER_LIST_NEXT_NBL(CompletedNbl) = NULL;
+ NdisMSendNetBufferListsComplete(
+ Ctx->MiniportAdapterHandle, CompletedNbl, NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL);
+ }
+ KeReleaseInStackQueuedSpinLock(&LockHandle);
+ continue;
+
+ cleanupKeReleaseInStackQueuedSpinLock:
+ KeReleaseInStackQueuedSpinLock(&LockHandle);
+ skipNbl:
+ NET_BUFFER_LIST_STATUS(Nbl) = Status;
+ NET_BUFFER_LIST_NEXT_NBL(Nbl) = NULL;
+ NdisMSendNetBufferListsComplete(Ctx->MiniportAdapterHandle, Nbl, 0);
+ DiscardedPacketsCount += PacketsCount;
+ }
ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql);
InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutOctets, SentPacketsSize);
InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutUcastOctets, SentPacketsSize);
InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutUcastPkts, SentPacketsCount);
+ InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifOutErrors, ErrorPacketsCount);
InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifOutDiscards, DiscardedPacketsCount);
}
@@ -519,6 +594,10 @@ TunDispatchRegisterBuffers(_Inout_ TUN_CTX *Ctx, _Inout_ IRP *Irp)
if (Status = STATUS_INSUFFICIENT_RESOURCES, !Ctx->Device.Send.Ring)
goto cleanupSendUnlockPages;
+ Ctx->Device.Send.RingTail = InterlockedGetU(&Ctx->Device.Send.Ring->Tail);
+ if (Status = STATUS_INVALID_PARAMETER, Ctx->Device.Send.RingTail >= Ctx->Device.Send.Capacity)
+ goto cleanupSendUnlockPages;
+
/* Analyze and lock receive ring. */
Ctx->Device.Receive.Capacity = TUN_RING_CAPACITY(Rrb->Receive.RingSize);
if (Status = STATUS_INVALID_PARAMETER,