From 0bcb822e5b4ee6408c5bcb5ad4d4e61b394a834e Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Tue, 19 Jan 2021 09:02:16 -0800 Subject: device: overhaul device state management This commit simplifies device state management. It creates a single unified state variable and documents its semantics. It also makes state changes more atomic. As an example of the sort of bug that occurred due to non-atomic state changes, the following sequence of events used to occur approximately every 2.5 million test runs: * RoutineTUNEventReader received an EventDown event. * It called device.Down, which called device.setUpDown. * That set device.state.changing, but did not yet attempt to lock device.state.Mutex. * Test completion called device.Close. * device.Close locked device.state.Mutex. * device.Close blocked on a call to device.state.stopping.Wait. * device.setUpDown then attempted to lock device.state.Mutex and blocked. Deadlock results. setUpDown cannot progress because device.state.Mutex is locked. Until setUpDown returns, RoutineTUNEventReader cannot call device.state.stopping.Done. Until device.state.stopping.Done gets called, device.state.stopping.Wait is blocked. As long as device.state.stopping.Wait is blocked, device.state.Mutex cannot be unlocked. This commit fixes that deadlock by holding device.state.mu when checking that the device is not closed. Signed-off-by: Josh Bleecher Snyder --- device/send.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'device/send.go') diff --git a/device/send.go b/device/send.go index b9bcb33..982fec0 100644 --- a/device/send.go +++ b/device/send.go @@ -225,7 +225,7 @@ func (device *Device) RoutineReadFromTUN() { size, err := device.tun.device.Read(elem.buffer[:], offset) if err != nil { - if !device.isClosed.Get() { + if !device.isClosed() { device.log.Errorf("Failed to read packet from TUN device: %v", err) device.Close() } @@ -291,7 +291,7 @@ func (peer *Peer) StagePacket(elem *QueueOutboundElement) { func (peer *Peer) SendStagedPackets() { top: - if len(peer.queue.staged) == 0 || !peer.device.isUp.Get() { + if len(peer.queue.staged) == 0 || !peer.device.isUp() { return } -- cgit v1.2.3-59-g8ed1b