/* SPDX-License-Identifier: MIT * * Copyright (C) 2017-2021 WireGuard LLC. All Rights Reserved. */ package tun /* Implementation of the TUN device interface for linux */ import ( "bytes" "errors" "fmt" "os" "sync" "syscall" "time" "unsafe" "golang.org/x/net/ipv6" "golang.org/x/sys/unix" "golang.zx2c4.com/wireguard/rwcancel" ) const ( cloneDevicePath = "/dev/net/tun" ifReqSize = unix.IFNAMSIZ + 64 ) type NativeTun struct { tunFile *os.File index int32 // if index errors chan error // async error handling events chan Event // device related events nopi bool // the device was passed IFF_NO_PI netlinkSock int netlinkCancel *rwcancel.RWCancel hackListenerClosed sync.Mutex statusListenersShutdown chan struct{} closeOnce sync.Once nameOnce sync.Once // guards calling initNameCache, which sets following fields nameCache string // name of interface nameErr error } func (tun *NativeTun) File() *os.File { return tun.tunFile } func (tun *NativeTun) routineHackListener() { defer tun.hackListenerClosed.Unlock() /* This is needed for the detection to work across network namespaces * If you are reading this and know a better method, please get in touch. */ last := 0 const ( up = 1 down = 2 ) for { sysconn, err := tun.tunFile.SyscallConn() if err != nil { return } err2 := sysconn.Control(func(fd uintptr) { _, err = unix.Write(int(fd), nil) }) if err2 != nil { return } switch err { case unix.EINVAL: if last != up { // If the tunnel is up, it reports that write() is // allowed but we provided invalid data. tun.events <- EventUp last = up } case unix.EIO: if last != down { // If the tunnel is down, it reports that no I/O // is possible, without checking our provided data. tun.events <- EventDown last = down } default: return } select { case <-time.After(time.Second): // nothing case <-tun.statusListenersShutdown: return } } } func createNetlinkSocket() (int, error) { sock, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW, unix.NETLINK_ROUTE) if err != nil { return -1, err } saddr := &unix.SockaddrNetlink{ Family: unix.AF_NETLINK, Groups: unix.RTMGRP_LINK | unix.RTMGRP_IPV4_IFADDR | unix.RTMGRP_IPV6_IFADDR, } err = unix.Bind(sock, saddr) if err != nil { return -1, err } return sock, nil } func (tun *NativeTun) routineNetlinkListener() { defer func() { unix.Close(tun.netlinkSock) tun.hackListenerClosed.Lock() close(tun.events) tun.netlinkCancel.Close() }() for msg := make([]byte, 1<<16); ; { var err error var msgn int for { msgn, _, _, _, err = unix.Recvmsg(tun.netlinkSock, msg[:], nil, 0) if err == nil || !rwcancel.RetryAfterError(err) { break } if !tun.netlinkCancel.ReadyRead() { tun.errors <- fmt.Errorf("netlink socket closed: %w", err) return } } if err != nil { tun.errors <- fmt.Errorf("failed to receive netlink message: %w", err) return } select { case <-tun.statusListenersShutdown: return default: } wasEverUp := false for remain := msg[:msgn]; len(remain) >= unix.SizeofNlMsghdr; { hdr := *(*unix.NlMsghdr)(unsafe.Pointer(&remain[0])) if int(hdr.Len) > len(remain) { break } switch hdr.Type { case unix.NLMSG_DONE: remain = []byte{} case unix.RTM_NEWLINK: info := *(*unix.IfInfomsg)(unsafe.Pointer(&remain[unix.SizeofNlMsghdr])) remain = remain[hdr.Len:] if info.Index != tun.index { // not our interface continue } if info.Flags&unix.IFF_RUNNING != 0 { tun.events <- EventUp wasEverUp = true } if info.Flags&unix.IFF_RUNNING == 0 { // Don't emit EventDown before we've ever emitted EventUp. // This avoids a startup race with HackListener, which // might detect Up before we have finished reporting Down. if wasEverUp { tun.events <- EventDown } } tun.events <- EventMTUUpdate default: remain = remain[hdr.Len:] } } } } func getIFIndex(name string) (int32, error) { fd, err := unix.Socket( unix.AF_INET, unix.SOCK_DGRAM, 0, ) if err != nil { return 0, err } defer unix.Close(fd) var ifr [ifReqSize]byte copy(ifr[:], name) _, _, errno := unix.Syscall( unix.SYS_IOCTL, uintptr(fd), uintptr(unix.SIOCGIFINDEX), uintptr(unsafe.Pointer(&ifr[0])), ) if errno != 0 { return 0, errno } return *(*int32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])), nil } func (tun *NativeTun) setMTU(n int) error { name, err := tun.Name() if err != nil { return err } // open datagram socket fd, err := unix.Socket( unix.AF_INET, unix.SOCK_DGRAM, 0, ) if err != nil { return err } defer unix.Close(fd) // do ioctl call var ifr [ifReqSize]byte copy(ifr[:], name) *(*uint32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])) = uint32(n) _, _, errno := unix.Syscall( unix.SYS_IOCTL, uintptr(fd), uintptr(unix.SIOCSIFMTU), uintptr(unsafe.Pointer(&ifr[0])), ) if errno != 0 { return fmt.Errorf("failed to set MTU of TUN device: %w", errno) } return nil } func (tun *NativeTun) MTU() (int, error) { name, err := tun.Name() if err != nil { return 0, err } // open datagram socket fd, err := unix.Socket( unix.AF_INET, unix.SOCK_DGRAM, 0, ) if err != nil { return 0, err } defer unix.Close(fd) // do ioctl call var ifr [ifReqSize]byte copy(ifr[:], name) _, _, errno := unix.Syscall( unix.SYS_IOCTL, uintptr(fd), uintptr(unix.SIOCGIFMTU), uintptr(unsafe.Pointer(&ifr[0])), ) if errno != 0 { return 0, fmt.Errorf("failed to get MTU of TUN device: %w", errno) } return int(*(*int32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ]))), nil } func (tun *NativeTun) Name() (string, error) { tun.nameOnce.Do(tun.initNameCache) return tun.nameCache, tun.nameErr } func (tun *NativeTun) initNameCache() { tun.nameCache, tun.nameErr = tun.nameSlow() } func (tun *NativeTun) nameSlow() (string, error) { sysconn, err := tun.tunFile.SyscallConn() if err != nil { return "", err } var ifr [ifReqSize]byte var errno syscall.Errno err = sysconn.Control(func(fd uintptr) { _, _, errno = unix.Syscall( unix.SYS_IOCTL, fd, uintptr(unix.TUNGETIFF), uintptr(unsafe.Pointer(&ifr[0])), ) }) if err != nil { return "", fmt.Errorf("failed to get name of TUN device: %w", err) } if errno != 0 { return "", fmt.Errorf("failed to get name of TUN device: %w", errno) } name := ifr[:] if i := bytes.IndexByte(name, 0); i != -1 { name = name[:i] } return string(name), nil } func (tun *NativeTun) Write(buf []byte, offset int) (int, error) { if tun.nopi { buf = buf[offset:] } else { // reserve space for header buf = buf[offset-4:] // add packet information header buf[0] = 0x00 buf[1] = 0x00 if buf[4]>>4 == ipv6.Version { buf[2] = 0x86 buf[3] = 0xdd } else { buf[2] = 0x08 buf[3] = 0x00 } } n, err := tun.tunFile.Write(buf) if errors.Is(err, syscall.EBADFD) { err = os.ErrClosed } return n, err } func (tun *NativeTun) Flush() error { // TODO: can flushing be implemented by buffering and using sendmmsg? return nil } func (tun *NativeTun) Read(buf []byte, offset int) (n int, err error) { select { case err = <-tun.errors: default: if tun.nopi { n, err = tun.tunFile.Read(buf[offset:]) } else { buff := buf[offset-4:] n, err = tun.tunFile.Read(buff[:]) if errors.Is(err, syscall.EBADFD) { err = os.ErrClosed } if n < 4 { n = 0 } else { n -= 4 } } } return } func (tun *NativeTun) Events() chan Event { return tun.events } func (tun *NativeTun) Close() error { var err1, err2 error tun.closeOnce.Do(func() { if tun.statusListenersShutdown != nil { close(tun.statusListenersShutdown) if tun.netlinkCancel != nil { err1 = tun.netlinkCancel.Cancel() } } else if tun.events != nil { close(tun.events) } err2 = tun.tunFile.Close() }) if err1 != nil { return err1 } return err2 } func CreateTUN(name string, mtu int) (Device, error) { nfd, err := unix.Open(cloneDevicePath, os.O_RDWR, 0) if err != nil { if os.IsNotExist(err) { return nil, fmt.Errorf("CreateTUN(%q) failed; %s does not exist", name, cloneDevicePath) } return nil, err } var ifr [ifReqSize]byte var flags uint16 = unix.IFF_TUN // | unix.IFF_NO_PI (disabled for TUN status hack) nameBytes := []byte(name) if len(nameBytes) >= unix.IFNAMSIZ { unix.Close(nfd) return nil, fmt.Errorf("interface name too long: %w", unix.ENAMETOOLONG) } copy(ifr[:], nameBytes) *(*uint16)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])) = flags _, _, errno := unix.Syscall( unix.SYS_IOCTL, uintptr(nfd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&ifr[0])), ) if errno != 0 { unix.Close(nfd) return nil, errno } err = unix.SetNonblock(nfd, true) if err != nil { unix.Close(nfd) return nil, err } // Note that the above -- open,ioctl,nonblock -- must happen prior to handing it to netpoll as below this line. fd := os.NewFile(uintptr(nfd), cloneDevicePath) return CreateTUNFromFile(fd, mtu) } func CreateTUNFromFile(file *os.File, mtu int) (Device, error) { tun := &NativeTun{ tunFile: file, events: make(chan Event, 5), errors: make(chan error, 5), statusListenersShutdown: make(chan struct{}), nopi: false, } name, err := tun.Name() if err != nil { return nil, err } // start event listener tun.index, err = getIFIndex(name) if err != nil { return nil, err } tun.netlinkSock, err = createNetlinkSocket() if err != nil { return nil, err } tun.netlinkCancel, err = rwcancel.NewRWCancel(tun.netlinkSock) if err != nil { unix.Close(tun.netlinkSock) return nil, err } tun.hackListenerClosed.Lock() go tun.routineNetlinkListener() go tun.routineHackListener() // cross namespace err = tun.setMTU(mtu) if err != nil { unix.Close(tun.netlinkSock) return nil, err } return tun, nil } func CreateUnmonitoredTUNFromFD(fd int) (Device, string, error) { err := unix.SetNonblock(fd, true) if err != nil { return nil, "", err } file := os.NewFile(uintptr(fd), "/dev/tun") tun := &NativeTun{ tunFile: file, events: make(chan Event, 5), errors: make(chan error, 5), nopi: true, } name, err := tun.Name() if err != nil { return nil, "", err } return tun, name, nil }