diff options
80 files changed, 2032 insertions, 1140 deletions
diff --git a/contrib/examples/launchd/README b/contrib/examples/launchd/README new file mode 100644 index 0000000..67f8d3c --- /dev/null +++ b/contrib/examples/launchd/README @@ -0,0 +1,12 @@ +WireGuard for Launchd +===================== + +The example `com.wireguard.wg0.plist` file may be used for running wg-quick(8) +as a launchd service. Note that the `PATH` variable is modified to point to +the PATH used by Homebrew or Macports, so that it uses the non-system bash(1). + +Usage +----- + +$ sudo cp com.wireguard.wg0.plist /Library/LaunchDaemons +$ sudo launchctl load /Library/LaunchDaemons/com.wireguard.wg0.plist diff --git a/contrib/examples/launchd/com.wireguard.wg0.plist b/contrib/examples/launchd/com.wireguard.wg0.plist new file mode 100644 index 0000000..9fc0141 --- /dev/null +++ b/contrib/examples/launchd/com.wireguard.wg0.plist @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd";> +<plist version="1.0"> +<dict> + <key>Label</key> + <string>com.wireguard.wg0</string> + <key>ProgramArguments</key> + <array> + <string>/usr/local/bin/wg-quick</string> + <string>up</string> + <string>/usr/local/etc/wireguard/wg0.conf</string> + </array> + <key>OnDemand</key> + <false/> + <key>RunAtLoad</key> + <true/> + <key>TimeOut</key> + <integer>90</integer> + <key>EnvironmentVariables</key> + <dict> + <key>PATH</key> + <string>/usr/local/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin</string> + </dict> +</dict> +</plist> diff --git a/contrib/examples/nat-hole-punching/README b/contrib/examples/nat-hole-punching/README index 46e6201..99c20e5 100644 --- a/contrib/examples/nat-hole-punching/README +++ b/contrib/examples/nat-hole-punching/README @@ -9,8 +9,8 @@ Compile with: Server is 1.2.3.4 and is on the public internet accepting UDP:49918. -Client A is NAT'd and doesnt't know its IP address. -Client B is NAT'd and doesnt't know its IP address. +Client A is NAT'd and doesn't know its IP address. +Client B is NAT'd and doesn't know its IP address. Server runs: diff --git a/contrib/examples/reresolve-dns/reresolve-dns.sh b/contrib/examples/reresolve-dns/reresolve-dns.sh index e579f86..8ab3635 100755 --- a/contrib/examples/reresolve-dns/reresolve-dns.sh +++ b/contrib/examples/reresolve-dns/reresolve-dns.sh @@ -15,7 +15,7 @@ INTERFACE="${BASH_REMATCH[1]}" process_peer() { [[ $PEER_SECTION -ne 1 || -z $PUBLIC_KEY || -z $ENDPOINT ]] && return 0 - [[ $(wg show "$INTERFACE" latest-handshakes) =~ ^${PUBLIC_KEY//+/\\+}\ ([0-9]+)$ ]] || return 0 + [[ $(wg show "$INTERFACE" latest-handshakes) =~ ${PUBLIC_KEY//+/\\+}\ ([0-9]+) ]] || return 0 (( ($(date +%s) - ${BASH_REMATCH[1]}) > 135 )) || return 0 wg set "$INTERFACE" peer "$PUBLIC_KEY" endpoint "$ENDPOINT" reset_peer_section @@ -5,6 +5,7 @@ ccflags-y := -O3 -fvisibility=hidden ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG -g ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' +ccflags-y += -Wframe-larger-than=2048 wireguard-y := main.o noise.o device.o peer.o timers.o queueing.o send.o receive.o socket.o peerlookup.o allowedips.o ratelimiter.o cookie.o netlink.o diff --git a/src/Kconfig b/src/Kconfig index 3a68884..156e9db 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -4,7 +4,8 @@ config WIREGUARD depends on IPV6 || !IPV6 select NET_UDP_TUNNEL select DST_CACHE - select CRYPTO_BLKCIPHER + select CRYPTO + select CRYPTO_ALGAPI select VFP select VFPv3 if CPU_V7 select NEON if CPU_V7 diff --git a/src/Makefile b/src/Makefile index 24b2ba0..8226038 100644 --- a/src/Makefile +++ b/src/Makefile @@ -2,7 +2,8 @@ # # Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -KERNELDIR ?= /lib/modules/$(shell uname -r)/build +KERNELRELEASE ?= $(shell uname -r) +KERNELDIR ?= /lib/modules/$(KERNELRELEASE)/build PREFIX ?= /usr DESTDIR ?= SRCDIR ?= $(PREFIX)/src @@ -45,7 +46,7 @@ clean: module-install: @$(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install - $(DEPMOD) -a + $(DEPMOD) -a $(KERNELRELEASE) install: module-install tools-install diff --git a/src/allowedips.c b/src/allowedips.c index 610aab0..72667d5 100644 --- a/src/allowedips.c +++ b/src/allowedips.c @@ -299,14 +299,18 @@ void wg_allowedips_free(struct allowedips *table, struct mutex *lock) RCU_INIT_POINTER(table->root4, NULL); RCU_INIT_POINTER(table->root6, NULL); if (rcu_access_pointer(old4)) { - root_remove_peer_lists(old4); - call_rcu(&rcu_dereference_protected(old4, - lockdep_is_held(lock))->rcu, root_free_rcu); + struct allowedips_node *node = rcu_dereference_protected(old4, + lockdep_is_held(lock)); + + root_remove_peer_lists(node); + call_rcu(&node->rcu, root_free_rcu); } if (rcu_access_pointer(old6)) { - root_remove_peer_lists(old6); - call_rcu(&rcu_dereference_protected(old6, - lockdep_is_held(lock))->rcu, root_free_rcu); + struct allowedips_node *node = rcu_dereference_protected(old6, + lockdep_is_held(lock)); + + root_remove_peer_lists(node); + call_rcu(&node->rcu, root_free_rcu); } } diff --git a/src/compat/Kbuild.include b/src/compat/Kbuild.include index 403b69a..db4b0a6 100644 --- a/src/compat/Kbuild.include +++ b/src/compat/Kbuild.include @@ -8,37 +8,37 @@ ccflags-y += -include $(kbuild-dir)/compat/compat.h asflags-y += -include $(kbuild-dir)/compat/compat-asm.h ifeq ($(wildcard $(srctree)/include/linux/ptr_ring.h),) -ccflags-y += -I$(src)/compat/ptr_ring/include +ccflags-y += -I$(kbuild-dir)/compat/ptr_ring/include endif ifeq ($(wildcard $(srctree)/include/linux/siphash.h),) -ccflags-y += -I$(src)/compat/siphash/include +ccflags-y += -I$(kbuild-dir)/compat/siphash/include wireguard-y += compat/siphash/siphash.o endif ifeq ($(wildcard $(srctree)/include/net/dst_cache.h),) -ccflags-y += -I$(src)/compat/dst_cache/include +ccflags-y += -I$(kbuild-dir)/compat/dst_cache/include wireguard-y += compat/dst_cache/dst_cache.o endif ifeq ($(wildcard $(srctree)/arch/x86/include/asm/intel-family.h)$(CONFIG_X86),y) -ccflags-y += -I$(src)/compat/intel-family-x86/include +ccflags-y += -I$(kbuild-dir)/compat/intel-family-x86/include endif ifeq ($(wildcard $(srctree)/arch/x86/include/asm/fpu/api.h)$(CONFIG_X86),y) -ccflags-y += -I$(src)/compat/fpu-x86/include +ccflags-y += -I$(kbuild-dir)/compat/fpu-x86/include endif ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/simd.h)$(shell grep -s -F "generic-y += simd.h" "$(srctree)/arch/$(SRCARCH)/Kbuild" "$(srctree)/arch/$(SRCARCH)/Makefile"),) -ccflags-y += -I$(src)/compat/simd-asm/include +ccflags-y += -I$(kbuild-dir)/compat/simd-asm/include endif ifeq ($(wildcard $(srctree)/include/linux/simd.h),) -ccflags-y += -I$(src)/compat/simd/include +ccflags-y += -I$(kbuild-dir)/compat/simd/include endif ifeq ($(wildcard $(srctree)/include/net/udp_tunnel.h),) -ccflags-y += -I$(src)/compat/udp_tunnel/include +ccflags-y += -I$(kbuild-dir)/compat/udp_tunnel/include wireguard-y += compat/udp_tunnel/udp_tunnel.o endif @@ -47,11 +47,21 @@ ccflags-y += -include $(kbuild-dir)/compat/memneq/include.h wireguard-y += compat/memneq/memneq.o endif +ifeq ($(shell grep -s -F "addr_gen_mode" "$(srctree)/include/linux/ipv6.h"),) +ccflags-y += -DCOMPAT_CANNOT_USE_DEV_CNF +endif + +ifdef CONFIG_HZ +ifeq ($(wildcard $(srctree)/include/generated/timeconst.h),) +ccflags-y += $(shell echo 'define gcd(a,b){auto t;while(b){t=b;b=a%b;a=t;};return a;};hz=$(CONFIG_HZ);cd=gcd(hz,1000000);print "-DHZ_TO_USEC_NUM=",1000000/cd," -DHZ_TO_USEC_DEN=",hz/cd;halt;' | bc -q) +endif +endif + ifeq ($(wildcard $(srctree)/arch/arm/include/asm/neon.h)$(CONFIG_ARM),y) -ccflags-y += -I$(src)/compat/neon-arm/include +ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include endif ifeq ($(wildcard $(srctree)/arch/arm64/include/asm/neon.h)$(CONFIG_ARM64),y) -ccflags-y += -I$(src)/compat/neon-arm/include +ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include endif ifeq ($(CONFIG_X86_64),y) @@ -75,4 +85,14 @@ ifeq ($(CONFIG_X86_64),y) ccflags-y += $(avx512_instr) asflags-y += $(avx512_instr) endif + ifeq ($(bmi2_instr),) + bmi2_instr :=$(call as-instr,mulx %rax$(comma)%rax$(comma)%rax,-DCONFIG_AS_BMI2=1) + ccflags-y += $(bmi2_instr) + asflags-y += $(bmi2_instr) + endif + ifeq ($(adx_instr),) + adx_instr :=$(call as-instr,adcx %rax$(comma)%rax,-DCONFIG_AS_ADX=1) + ccflags-y += $(adx_instr) + asflags-y += $(adx_instr) + endif endif diff --git a/src/compat/compat-asm.h b/src/compat/compat-asm.h index f5c5bc2..bafd70b 100644 --- a/src/compat/compat-asm.h +++ b/src/compat/compat-asm.h @@ -40,4 +40,9 @@ #undef pull #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0) +#define SYM_FUNC_START ENTRY +#define SYM_FUNC_END ENDPROC +#endif + #endif /* _WG_COMPATASM_H */ diff --git a/src/compat/compat.h b/src/compat/compat.h index 6ee3ac8..824f57c 100644 --- a/src/compat/compat.h +++ b/src/compat/compat.h @@ -14,11 +14,15 @@ #ifdef RHEL_MAJOR #if RHEL_MAJOR == 7 #define ISRHEL7 +#elif RHEL_MAJOR == 8 +#define ISRHEL8 #endif #endif #ifdef UTS_UBUNTU_RELEASE_ABI #if LINUX_VERSION_CODE == KERNEL_VERSION(3, 13, 11) #define ISUBUNTU1404 +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) +#define ISUBUNTU1604 #endif #endif #ifdef CONFIG_SUSE_KERNEL @@ -43,6 +47,7 @@ #endif #include <linux/cache.h> +#include <linux/init.h> #ifndef __ro_after_init #define __ro_after_init __read_mostly #endif @@ -85,7 +90,7 @@ (LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 27) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || \ (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || \ (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 40) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || \ - (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 54))) && !defined(ISUBUNTU1404) + (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 54))) && !defined(ISUBUNTU1404) && (!defined(ISRHEL7) || RHEL_MINOR < 7) /* TODO: remove < 7 workaround once CentOS 7.7 comes out. */ #include <linux/if.h> #include <net/ip_tunnels.h> #define IP6_ECN_set_ce(a, b) IP6_ECN_set_ce(b) @@ -126,7 +131,7 @@ static inline void skb_reset_tc(struct sk_buff *skb) #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) #include <linux/random.h> #include <linux/siphash.h> -static inline u32 __wgcompat_get_random_u32(void) +static inline u32 __compat_get_random_u32(void) { static siphash_key_t key; static u32 counter = 0; @@ -141,7 +146,7 @@ static inline u32 __wgcompat_get_random_u32(void) #endif return siphash_2u32(counter++, get_random_int(), &key); } -#define get_random_u32 __wgcompat_get_random_u32 +#define get_random_u32 __compat_get_random_u32 #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) && !defined(ISRHEL7) @@ -181,7 +186,7 @@ static inline void netif_keep_dst(struct net_device *dev) #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7) #include "checksum/checksum_partial_compat.h" -static inline void *our_pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) +static inline void *__compat_pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) { if (tail != skb) { skb->data_len += len; @@ -189,7 +194,7 @@ static inline void *our_pskb_put(struct sk_buff *skb, struct sk_buff *tail, int } return skb_put(tail, len); } -#define pskb_put our_pskb_put +#define pskb_put __compat_pskb_put #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7) @@ -216,10 +221,11 @@ static inline void skb_scrub_packet(struct sk_buff *skb, bool xnet) #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) || defined(ISUBUNTU1404)) && !defined(ISRHEL7) #include <linux/random.h> -static inline u32 prandom_u32_max(u32 ep_ro) +static inline u32 __compat_prandom_u32_max(u32 ep_ro) { - return (u32)(((u64) prandom_u32() * ep_ro) >> 32); + return (u32)(((u64)prandom_u32() * ep_ro) >> 32); } +#define prandom_u32_max __compat_prandom_u32_max #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 75) && !defined(ISRHEL7) @@ -272,8 +278,8 @@ static inline void memzero_explicit(void *s, size_t count) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && !defined(ISRHEL7) -static const struct in6_addr our_in6addr_any = IN6ADDR_ANY_INIT; -#define in6addr_any our_in6addr_any +static const struct in6_addr __compat_in6addr_any = IN6ADDR_ANY_INIT; +#define in6addr_any __compat_in6addr_any #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && !defined(ISOPENSUSE15) @@ -320,7 +326,7 @@ static inline int wait_for_random_bytes(void) } #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && !defined(ISRHEL8) #include <linux/random.h> #include <linux/slab.h> struct rng_is_initialized_callback { @@ -387,30 +393,43 @@ static inline int get_random_bytes_wait(void *buf, int nbytes) #define system_power_efficient_wq system_unbound_wq #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) && !defined(ISRHEL7) +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0) +#include <linux/ktime.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) #include <linux/hrtimer.h> -static inline u64 ktime_get_boot_ns(void) +#ifndef ktime_get_real_ts64 +#define timespec64 timespec +#define ktime_get_real_ts64 ktime_get_real_ts +#endif +#else +#include <linux/timekeeping.h> +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +static inline u64 __compat_jiffies64_to_nsecs(u64 j) { - return ktime_to_ns(ktime_get_boottime()); +#if !(NSEC_PER_SEC % HZ) + return (NSEC_PER_SEC / HZ) * j; +#else + return div_u64(j * HZ_TO_USEC_NUM, HZ_TO_USEC_DEN) * 1000; +#endif } +#define jiffies64_to_nsecs __compat_jiffies64_to_nsecs #endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) +static inline u64 ktime_get_coarse_boottime_ns(void) +{ #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -#include <linux/hrtimer.h> + return ktime_to_ns(ktime_get_boottime()); +#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 12) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 53) + return ktime_to_ns(ktime_mono_to_any(ns_to_ktime(jiffies64_to_nsecs(get_jiffies_64())), TK_OFFS_BOOT)); #else -#include <linux/timekeeping.h> + return ktime_to_ns(ktime_get_coarse_boottime()); #endif -static inline u64 __wgcompat_ktime_get_boot_fast_ns(void) -{ - return ktime_get_boot_ns(); } -#define ktime_get_boot_fast_ns __wgcompat_ktime_get_boot_fast_ns #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) #include <linux/inetdevice.h> -static inline __be32 our_confirm_addr_indev(struct in_device *in_dev, __be32 dst, __be32 local, int scope) +static inline __be32 __compat_confirm_addr_indev(struct in_device *in_dev, __be32 dst, __be32 local, int scope) { int same = 0; __be32 addr = 0; @@ -437,17 +456,17 @@ static inline __be32 our_confirm_addr_indev(struct in_device *in_dev, __be32 dst } endfor_ifa(in_dev); return same ? addr : 0; } -static inline __be32 our_inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope) +static inline __be32 __compat_inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope) { __be32 addr = 0; struct net_device *dev; if (in_dev) - return our_confirm_addr_indev(in_dev, dst, local, scope); + return __compat_confirm_addr_indev(in_dev, dst, local, scope); rcu_read_lock(); for_each_netdev_rcu(net, dev) { in_dev = __in_dev_get_rcu(dev); if (in_dev) { - addr = our_confirm_addr_indev(in_dev, dst, local, scope); + addr = __compat_confirm_addr_indev(in_dev, dst, local, scope); if (addr) break; } @@ -455,14 +474,14 @@ static inline __be32 our_inet_confirm_addr(struct net *net, struct in_device *in rcu_read_unlock(); return addr; } -#define inet_confirm_addr our_inet_confirm_addr +#define inet_confirm_addr __compat_inet_confirm_addr #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/slab.h> -static inline void *kvmalloc_ours(size_t size, gfp_t flags) +static inline void *__compat_kvmalloc(size_t size, gfp_t flags) { gfp_t kmalloc_flags = flags; void *ret; @@ -476,25 +495,25 @@ static inline void *kvmalloc_ours(size_t size, gfp_t flags) return ret; return __vmalloc(size, flags, PAGE_KERNEL); } -static inline void *kvzalloc_ours(size_t size, gfp_t flags) +static inline void *__compat_kvzalloc(size_t size, gfp_t flags) { - return kvmalloc_ours(size, flags | __GFP_ZERO); + return __compat_kvmalloc(size, flags | __GFP_ZERO); } -#define kvmalloc kvmalloc_ours -#define kvzalloc kvzalloc_ours +#define kvmalloc __compat_kvmalloc +#define kvzalloc __compat_kvzalloc #endif #if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 41)) && !defined(ISUBUNTU1404) #include <linux/vmalloc.h> #include <linux/mm.h> -static inline void kvfree_ours(const void *addr) +static inline void __compat_kvfree(const void *addr) { if (is_vmalloc_addr(addr)) vfree(addr); else kfree(addr); } -#define kvfree kvfree_ours +#define kvfree __compat_kvfree #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 9) @@ -548,16 +567,16 @@ static inline struct nlattr **genl_family_attrbuf(const struct genl_family *fami #endif #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 2) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 16) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 65) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 101) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 84) -#define ___COMPAT_NETLINK_DUMP_BLOCK { \ +#define __COMPAT_NETLINK_DUMP_BLOCK { \ int ret; \ skb->end -= nlmsg_total_size(sizeof(int)); \ ret = wg_get_device_dump_real(skb, cb); \ skb->end += nlmsg_total_size(sizeof(int)); \ return ret; \ } -#define ___COMPAT_NETLINK_DUMP_OVERRIDE +#define __COMPAT_NETLINK_DUMP_OVERRIDE #else -#define ___COMPAT_NETLINK_DUMP_BLOCK return wg_get_device_dump_real(skb, cb); +#define __COMPAT_NETLINK_DUMP_BLOCK return wg_get_device_dump_real(skb, cb); #endif #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 25) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 87) #define wg_get_device_dump(a, b) wg_get_device_dump_real(a, b); \ @@ -568,14 +587,14 @@ static int wg_get_device_dump(a, b) { \ if (ret) \ return ret; \ } \ - ___COMPAT_NETLINK_DUMP_BLOCK \ + __COMPAT_NETLINK_DUMP_BLOCK \ } \ static int wg_get_device_dump_real(a, b) #define COMPAT_CANNOT_USE_NETLINK_START -#elif defined(___COMPAT_NETLINK_DUMP_OVERRIDE) +#elif defined(__COMPAT_NETLINK_DUMP_OVERRIDE) #define wg_get_device_dump(a, b) wg_get_device_dump_real(a, b); \ static int wg_get_device_dump(a, b) { \ - ___COMPAT_NETLINK_DUMP_BLOCK \ + __COMPAT_NETLINK_DUMP_BLOCK \ } \ static int wg_get_device_dump_real(a, b) #endif @@ -584,10 +603,6 @@ static int wg_get_device_dump_real(a, b) #define COMPAT_CANNOT_USE_IN6_DEV_GET #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) -#define COMPAT_CANNOT_USE_DEV_CNF -#endif - #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) #define COMPAT_CANNOT_USE_IFF_NO_QUEUE #endif @@ -599,7 +614,7 @@ static int wg_get_device_dump_real(a, b) #include <asm/xcr.h> static inline int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) { - return xgetbv(XCR_XFEATURE_ENABLED_MASK) & xfeatures_needed; + return boot_cpu_has(X86_FEATURE_XSAVE) && xgetbv(XCR_XFEATURE_ENABLED_MASK) & xfeatures_needed; } #endif #ifndef XFEATURE_MASK_YMM @@ -632,8 +647,8 @@ static inline int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_n #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) -struct _____dummy_container { char dev; }; -#define netdev_notifier_info net_device *)data); __attribute((unused)) char _____dummy = ((struct _____dummy_container +struct __compat_dummy_container { char dev; }; +#define netdev_notifier_info net_device *)data); __attribute((unused)) char __compat_dummy_variable = ((struct __compat_dummy_container #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) @@ -645,11 +660,6 @@ struct _____dummy_container { char dev; }; #define COMPAT_CANNOT_USE_AVX512 #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -#define timespec64 timespec -#define ktime_get_real_ts64 ktime_get_real_ts -#endif - #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) #include <net/genetlink.h> #define genl_dump_check_consistent(a, b) genl_dump_check_consistent(a, b, &genl_family) @@ -680,7 +690,7 @@ static inline void *skb_put_data(struct sk_buff *skb, const void *data, unsigned #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) #include <linux/atomic.h> #ifndef atomic_read_acquire -#define atomic_read_acquire(v) ({ int ___p1 = atomic_read(v); smp_rmb(); ___p1; }) +#define atomic_read_acquire(v) ({ int __compat_p1 = atomic_read(v); smp_rmb(); __compat_p1; }) #endif #ifndef atomic_set_release #define atomic_set_release(v, i) ({ smp_wmb(); atomic_set(v, i); }) @@ -744,7 +754,7 @@ static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2, #define read_cpuid_part() read_cpuid_part_number() #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) && !defined(ISRHEL7) #define hlist_add_behind(a, b) hlist_add_after(b, a) #endif @@ -778,6 +788,9 @@ struct __kernel_timespec { #define skb_probe_transport_header(a) skb_probe_transport_header(a, 0) #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) && !defined(ISRHEL7) +#define ignore_df local_df +#endif #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) /* Note that all intentional uses of the non-_bh variety need to explicitly @@ -811,13 +824,64 @@ static __always_inline void old_rcu_barrier(void) #define COMPAT_CANNOT_DEPRECIATE_BH_RCU #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 10) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 10) && !defined(ISRHEL8) static inline void skb_mark_not_on_list(struct sk_buff *skb) { skb->next = NULL; } #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) && !defined(ISRHEL8) +#define NLA_EXACT_LEN NLA_UNSPEC +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) +#define NLA_MIN_LEN NLA_UNSPEC +#define COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) && defined(__aarch64__) +#define cpu_have_named_feature(name) (elf_hwcap & (HWCAP_ ## name)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) +#include <linux/stddef.h> +#ifndef offsetofend +#define offsetofend(TYPE, MEMBER) (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0) +#define genl_dumpit_info(cb) ({ \ + struct { struct nlattr **attrs; } *a = (void *)((u8 *)cb->args + offsetofend(struct dump_ctx, next_allowedip)); \ + BUILD_BUG_ON(sizeof(cb->args) < offsetofend(struct dump_ctx, next_allowedip) + sizeof(*a)); \ + a->attrs = genl_family_attrbuf(&genl_family); \ + if (nlmsg_parse(cb->nlh, GENL_HDRLEN + genl_family.hdrsize, a->attrs, genl_family.maxattr, device_policy, NULL) < 0) \ + memset(a->attrs, 0, (genl_family.maxattr + 1) * sizeof(struct nlattr *)); \ + a; \ +}) +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 5) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 18) +#define ipv6_dst_lookup_flow(a, b, c, d) ipv6_dst_lookup(a, b, &dst, c) + (void *)0 ?: dst +#endif + +#if defined(ISUBUNTU1604) +#include <linux/siphash.h> +#ifndef _WG_LINUX_SIPHASH_H +#define hsiphash_2u32 siphash_2u32 +#define hsiphash_3u32 siphash_3u32 +#define hsiphash_key_t siphash_key_t +#endif +#endif + +#ifdef CONFIG_VE +#include <linux/netdev_features.h> +#ifdef NETIF_F_VIRTUAL +#undef NETIF_F_LLTX +#define NETIF_F_LLTX (__NETIF_F(LLTX) | __NETIF_F(VIRTUAL)) +#endif +#endif + /* https://github.com/ClangBuiltLinux/linux/issues/7 */ #if defined( __clang__) && (!defined(CONFIG_CLANG_VERSION) || CONFIG_CLANG_VERSION < 80000) #include <linux/bug.h> diff --git a/src/compat/udp_tunnel/udp_tunnel.c b/src/compat/udp_tunnel/udp_tunnel.c index dc47d61..ae43566 100644 --- a/src/compat/udp_tunnel/udp_tunnel.c +++ b/src/compat/udp_tunnel/udp_tunnel.c @@ -17,7 +17,7 @@ /* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */ static udp_tunnel_encap_rcv_t encap_rcv = NULL; -static void our_sk_data_ready(struct sock *sk +static void __compat_sk_data_ready(struct sock *sk #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) ,int unused_vulnerable_length_param #endif @@ -86,7 +86,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, rcu_assign_sk_user_data(sock->sk, cfg->sk_user_data); /* We force the cast in this awful way, due to various Android kernels * backporting things stupidly. */ - *(void **)&sock->sk->sk_data_ready = (void *)our_sk_data_ready; + *(void **)&sock->sk->sk_data_ready = (void *)__compat_sk_data_ready; } #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) @@ -131,12 +131,12 @@ static void udp_set_csum(bool nocheck, struct sk_buff *skb, #endif -static void fake_destructor(struct sk_buff *skb) +static void __compat_fake_destructor(struct sk_buff *skb) { } #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -static void our_iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, +static void __compat_iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) { @@ -174,7 +174,7 @@ static void our_iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, tstats->tx_bytes -= 8; u64_stats_update_end(&tstats->syncp); } -#define iptunnel_xmit our_iptunnel_xmit +#define iptunnel_xmit __compat_iptunnel_xmit #endif void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, @@ -203,7 +203,7 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb if (!skb->sk) skb->sk = sk; if (!skb->destructor) - skb->destructor = fake_destructor; + skb->destructor = __compat_fake_destructor; #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) ret = #endif @@ -379,7 +379,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, if (!skb->sk) skb->sk = sk; if (!skb->destructor) - skb->destructor = fake_destructor; + skb->destructor = __compat_fake_destructor; ip6tunnel_xmit(skb, dev); return 0; diff --git a/src/compat/udp_tunnel/udp_tunnel_partial_compat.h b/src/compat/udp_tunnel/udp_tunnel_partial_compat.h index a4b5a14..0605896 100644 --- a/src/compat/udp_tunnel/udp_tunnel_partial_compat.h +++ b/src/compat/udp_tunnel/udp_tunnel_partial_compat.h @@ -19,7 +19,7 @@ #include <net/ip6_checksum.h> #include <net/ip6_tunnel.h> #endif -static inline void fake_destructor(struct sk_buff *skb) +static inline void __compat_fake_destructor(struct sk_buff *skb) { } typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); @@ -30,7 +30,7 @@ struct udp_tunnel_sock_cfg { }; /* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */ static udp_tunnel_encap_rcv_t encap_rcv = NULL; -static void our_sk_data_ready(struct sock *sk) +static void __compat_sk_data_ready(struct sock *sk) { struct sk_buff *skb; while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { @@ -46,7 +46,7 @@ static inline void setup_udp_tunnel_sock(struct net *net, struct socket *sock, inet_sk(sk)->mc_loop = 0; encap_rcv = cfg->encap_rcv; rcu_assign_sk_user_data(sk, cfg->sk_user_data); - sk->sk_data_ready = our_sk_data_ready; + sk->sk_data_ready = __compat_sk_data_ready; } static inline void udp_tunnel_sock_release(struct socket *sock) { @@ -122,13 +122,13 @@ static inline int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *ds #include <linux/if.h> #include <net/udp_tunnel.h> #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0) -static inline void fake_destructor(struct sk_buff *skb) +static inline void __compat_fake_destructor(struct sk_buff *skb) { } #endif -#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; if (!(c)->destructor) (c)->destructor = fake_destructor; if (!(c)->sk) (c)->sk = (b); ret__ = udp_tunnel_xmit_skb(a, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0) +#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; if (!(c)->destructor) (c)->destructor = __compat_fake_destructor; if (!(c)->sk) (c)->sk = (b); ret__ = udp_tunnel_xmit_skb(a, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0) #if IS_ENABLED(CONFIG_IPV6) -#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { if (!(c)->destructor) (c)->destructor = fake_destructor; if (!(c)->sk) (c)->sk = (b); udp_tunnel6_xmit_skb(a, c, d, e, f, g, h, j, k, l); } while(0) +#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { if (!(c)->destructor) (c)->destructor = __compat_fake_destructor; if (!(c)->sk) (c)->sk = (b); udp_tunnel6_xmit_skb(a, c, d, e, f, g, h, j, k, l); } while(0) #endif #else @@ -156,7 +156,7 @@ static inline void fake_destructor(struct sk_buff *skb) #include <linux/skbuff.h> #include <linux/if.h> #include <net/udp_tunnel.h> -struct udp_port_cfg_new { +struct __compat_udp_port_cfg { u8 family; union { struct in_addr local_ip; @@ -174,7 +174,7 @@ struct udp_port_cfg_new { __be16 peer_udp_port; unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1; }; -static inline int __maybe_unused udp_sock_create_new(struct net *net, struct udp_port_cfg_new *cfg, struct socket **sockp) +static inline int __maybe_unused __compat_udp_sock_create(struct net *net, struct __compat_udp_port_cfg *cfg, struct socket **sockp) { struct udp_port_cfg old_cfg = { .family = cfg->family, @@ -221,6 +221,6 @@ static inline int __maybe_unused udp_sock_create_new(struct net *net, struct udp #endif return -EPFNOSUPPORT; } -#define udp_port_cfg udp_port_cfg_new -#define udp_sock_create(a, b, c) udp_sock_create_new(a, b, c) +#define udp_port_cfg __compat_udp_port_cfg +#define udp_sock_create(a, b, c) __compat_udp_sock_create(a, b, c) #endif diff --git a/src/cookie.c b/src/cookie.c index a2ddbcd..8b7d1fe 100644 --- a/src/cookie.c +++ b/src/cookie.c @@ -20,7 +20,7 @@ void wg_cookie_checker_init(struct cookie_checker *checker, struct wg_device *wg) { init_rwsem(&checker->secret_lock); - checker->secret_birthdate = ktime_get_boot_fast_ns(); + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); get_random_bytes(checker->secret, NOISE_HASH_LEN); checker->device = wg; } @@ -96,7 +96,7 @@ static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, if (wg_birthdate_has_expired(checker->secret_birthdate, COOKIE_SECRET_MAX_AGE)) { down_write(&checker->secret_lock); - checker->secret_birthdate = ktime_get_boot_fast_ns(); + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); get_random_bytes(checker->secret, NOISE_HASH_LEN); up_write(&checker->secret_lock); } @@ -222,7 +222,7 @@ void wg_cookie_message_consume(struct message_handshake_cookie *src, if (ret) { down_write(&peer->latest_cookie.lock); memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN); - peer->latest_cookie.birthdate = ktime_get_boot_fast_ns(); + peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns(); peer->latest_cookie.is_valid = true; peer->latest_cookie.have_sent_mac1 = false; up_write(&peer->latest_cookie.lock); diff --git a/src/crypto/Kbuild.include b/src/crypto/Kbuild.include index 460684d..f2a312e 100644 --- a/src/crypto/Kbuild.include +++ b/src/crypto/Kbuild.include @@ -48,7 +48,7 @@ targets := $(patsubst $(kbuild-dir)/%.pl,%.S,$(wildcard $(patsubst %.o,$(kbuild- .SECONDARY: wireguard-y += $(addprefix crypto/zinc/,$(zinc-y)) -ccflags-y += -I$(src)/crypto/include +ccflags-y += -I$(kbuild-dir)/crypto/include ccflags-$(CONFIG_ZINC_ARCH_X86_64) += -DCONFIG_ZINC_ARCH_X86_64 ccflags-$(CONFIG_ZINC_ARCH_ARM) += -DCONFIG_ZINC_ARCH_ARM ccflags-$(CONFIG_ZINC_ARCH_ARM64) += -DCONFIG_ZINC_ARCH_ARM64 diff --git a/src/crypto/include/zinc/chacha20poly1305.h b/src/crypto/include/zinc/chacha20poly1305.h index ce72740..e3339f0 100644 --- a/src/crypto/include/zinc/chacha20poly1305.h +++ b/src/crypto/include/zinc/chacha20poly1305.h @@ -22,9 +22,9 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEY_SIZE]); -bool __must_check chacha20poly1305_encrypt_sg( - struct scatterlist *dst, struct scatterlist *src, const size_t src_len, - const u8 *ad, const size_t ad_len, const u64 nonce, +bool __must_check chacha20poly1305_encrypt_sg_inplace( + struct scatterlist *src, const size_t src_len, const u8 *ad, + const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context); bool __must_check @@ -32,9 +32,9 @@ chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEY_SIZE]); -bool __must_check chacha20poly1305_decrypt_sg( - struct scatterlist *dst, struct scatterlist *src, const size_t src_len, - const u8 *ad, const size_t ad_len, const u64 nonce, +bool __must_check chacha20poly1305_decrypt_sg_inplace( + struct scatterlist *src, size_t src_len, const u8 *ad, + const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context); void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, diff --git a/src/crypto/zinc/blake2s/blake2s-x86_64-glue.c b/src/crypto/zinc/blake2s/blake2s-x86_64-glue.c index 9a956be..f8cda59 100644 --- a/src/crypto/zinc/blake2s/blake2s-x86_64-glue.c +++ b/src/crypto/zinc/blake2s/blake2s-x86_64-glue.c @@ -8,22 +8,21 @@ #include <asm/processor.h> #include <asm/fpu/api.h> -asmlinkage void blake2s_compress_avx(struct blake2s_state *state, - const u8 *block, const size_t nblocks, - const u32 inc); +asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, const u8 *block, const size_t nblocks, const u32 inc); -static bool blake2s_use_avx __ro_after_init; +static bool blake2s_use_ssse3 __ro_after_init; static bool blake2s_use_avx512 __ro_after_init; -static bool *const blake2s_nobs[] __initconst = { &blake2s_use_avx512 }; +static bool *const blake2s_nobs[] __initconst = { &blake2s_use_ssse3, + &blake2s_use_avx512 }; static void __init blake2s_fpu_init(void) { - blake2s_use_avx = - boot_cpu_has(X86_FEATURE_AVX) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); + blake2s_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3); #ifndef COMPAT_CANNOT_USE_AVX512 blake2s_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && @@ -47,7 +46,7 @@ static inline bool blake2s_compress_arch(struct blake2s_state *state, simd_get(&simd_context); - if (!IS_ENABLED(CONFIG_AS_AVX) || !blake2s_use_avx || + if (!IS_ENABLED(CONFIG_AS_SSSE3) || !blake2s_use_ssse3 || !simd_use(&simd_context)) goto out; used_arch = true; @@ -59,7 +58,7 @@ static inline bool blake2s_compress_arch(struct blake2s_state *state, if (IS_ENABLED(CONFIG_AS_AVX512) && blake2s_use_avx512) blake2s_compress_avx512(state, block, blocks, inc); else - blake2s_compress_avx(state, block, blocks, inc); + blake2s_compress_ssse3(state, block, blocks, inc); nblocks -= blocks; if (!nblocks) diff --git a/src/crypto/zinc/blake2s/blake2s-x86_64.S b/src/crypto/zinc/blake2s/blake2s-x86_64.S index 675288f..24910b7 100644 --- a/src/crypto/zinc/blake2s/blake2s-x86_64.S +++ b/src/crypto/zinc/blake2s/blake2s-x86_64.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - * Copyright (C) 2017 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. + * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. */ #include <linux/linkage.h> @@ -16,595 +16,168 @@ ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302 .section .rodata.cst16.ROR328, "aM", @progbits, 16 .align 16 ROR328: .octa 0x0C0F0E0D080B0A090407060500030201 -#ifdef CONFIG_AS_AVX512 -.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 640 +.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160 .align 64 SIGMA: -.long 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15 -.long 11, 2, 12, 14, 9, 8, 15, 3, 4, 0, 13, 6, 10, 1, 7, 5 -.long 10, 12, 11, 6, 5, 9, 13, 3, 4, 15, 14, 2, 0, 7, 8, 1 -.long 10, 9, 7, 0, 11, 14, 1, 12, 6, 2, 15, 3, 13, 8, 5, 4 -.long 4, 9, 8, 13, 14, 0, 10, 11, 7, 3, 12, 1, 5, 6, 15, 2 -.long 2, 10, 4, 14, 13, 3, 9, 11, 6, 5, 7, 12, 15, 1, 8, 0 -.long 4, 11, 14, 8, 13, 10, 12, 5, 2, 1, 15, 3, 9, 7, 0, 6 -.long 6, 12, 0, 13, 15, 2, 1, 10, 4, 5, 11, 14, 8, 3, 9, 7 -.long 14, 5, 4, 12, 9, 7, 3, 10, 2, 0, 6, 15, 11, 1, 13, 8 -.long 11, 7, 13, 10, 12, 14, 0, 15, 4, 5, 6, 9, 2, 1, 8, 3 +.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 +.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7 +.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1 +.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0 +.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8 +.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14 +.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2 +.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6 +.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4 +.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12 +#ifdef CONFIG_AS_AVX512 +.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640 +.align 64 +SIGMA2: +.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 +.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 +.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 +.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 +.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 +.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 +.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 +.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 +.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 +.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 #endif /* CONFIG_AS_AVX512 */ .text -#ifdef CONFIG_AS_AVX -ENTRY(blake2s_compress_avx) - movl %ecx, %ecx - testq %rdx, %rdx +#ifdef CONFIG_AS_SSSE3 +SYM_FUNC_START(blake2s_compress_ssse3) + testq %rdx,%rdx je .Lendofloop - .align 32 + movdqu (%rdi),%xmm0 + movdqu 0x10(%rdi),%xmm1 + movdqa ROT16(%rip),%xmm12 + movdqa ROR328(%rip),%xmm13 + movdqu 0x20(%rdi),%xmm14 + movq %rcx,%xmm15 + leaq SIGMA+0xa0(%rip),%r8 + jmp .Lbeginofloop + .align 32 .Lbeginofloop: - addq %rcx, 32(%rdi) - vmovdqu IV+16(%rip), %xmm1 - vmovdqu (%rsi), %xmm4 - vpxor 32(%rdi), %xmm1, %xmm1 - vmovdqu 16(%rsi), %xmm3 - vshufps $136, %xmm3, %xmm4, %xmm6 - vmovdqa ROT16(%rip), %xmm7 - vpaddd (%rdi), %xmm6, %xmm6 - vpaddd 16(%rdi), %xmm6, %xmm6 - vpxor %xmm6, %xmm1, %xmm1 - vmovdqu IV(%rip), %xmm8 - vpshufb %xmm7, %xmm1, %xmm1 - vmovdqu 48(%rsi), %xmm5 - vpaddd %xmm1, %xmm8, %xmm8 - vpxor 16(%rdi), %xmm8, %xmm9 - vmovdqu 32(%rsi), %xmm2 - vpblendw $12, %xmm3, %xmm5, %xmm13 - vshufps $221, %xmm5, %xmm2, %xmm12 - vpunpckhqdq %xmm2, %xmm4, %xmm14 - vpslld $20, %xmm9, %xmm0 - vpsrld $12, %xmm9, %xmm9 - vpxor %xmm0, %xmm9, %xmm0 - vshufps $221, %xmm3, %xmm4, %xmm9 - vpaddd %xmm9, %xmm6, %xmm9 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vmovdqa ROR328(%rip), %xmm6 - vpshufb %xmm6, %xmm1, %xmm1 - vpaddd %xmm1, %xmm8, %xmm8 - vpxor %xmm8, %xmm0, %xmm0 - vpshufd $147, %xmm1, %xmm1 - vpshufd $78, %xmm8, %xmm8 - vpslld $25, %xmm0, %xmm10 - vpsrld $7, %xmm0, %xmm0 - vpxor %xmm10, %xmm0, %xmm0 - vshufps $136, %xmm5, %xmm2, %xmm10 - vpshufd $57, %xmm0, %xmm0 - vpaddd %xmm10, %xmm9, %xmm9 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vpaddd %xmm12, %xmm9, %xmm9 - vpblendw $12, %xmm2, %xmm3, %xmm12 - vpshufb %xmm7, %xmm1, %xmm1 - vpaddd %xmm1, %xmm8, %xmm8 - vpxor %xmm8, %xmm0, %xmm10 - vpslld $20, %xmm10, %xmm0 - vpsrld $12, %xmm10, %xmm10 - vpxor %xmm0, %xmm10, %xmm0 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vpshufb %xmm6, %xmm1, %xmm1 - vpaddd %xmm1, %xmm8, %xmm8 - vpxor %xmm8, %xmm0, %xmm0 - vpshufd $57, %xmm1, %xmm1 - vpshufd $78, %xmm8, %xmm8 - vpslld $25, %xmm0, %xmm10 - vpsrld $7, %xmm0, %xmm0 - vpxor %xmm10, %xmm0, %xmm0 - vpslldq $4, %xmm5, %xmm10 - vpblendw $240, %xmm10, %xmm12, %xmm12 - vpshufd $147, %xmm0, %xmm0 - vpshufd $147, %xmm12, %xmm12 - vpaddd %xmm9, %xmm12, %xmm12 - vpaddd %xmm0, %xmm12, %xmm12 - vpxor %xmm12, %xmm1, %xmm1 - vpshufb %xmm7, %xmm1, %xmm1 - vpaddd %xmm1, %xmm8, %xmm8 - vpxor %xmm8, %xmm0, %xmm11 - vpslld $20, %xmm11, %xmm9 - vpsrld $12, %xmm11, %xmm11 - vpxor %xmm9, %xmm11, %xmm0 - vpshufd $8, %xmm2, %xmm9 - vpblendw $192, %xmm5, %xmm3, %xmm11 - vpblendw $240, %xmm11, %xmm9, %xmm9 - vpshufd $177, %xmm9, %xmm9 - vpaddd %xmm12, %xmm9, %xmm9 - vpaddd %xmm0, %xmm9, %xmm11 - vpxor %xmm11, %xmm1, %xmm1 - vpshufb %xmm6, %xmm1, %xmm1 - vpaddd %xmm1, %xmm8, %xmm8 - vpxor %xmm8, %xmm0, %xmm9 - vpshufd $147, %xmm1, %xmm1 - vpshufd $78, %xmm8, %xmm8 - vpslld $25, %xmm9, %xmm0 - vpsrld $7, %xmm9, %xmm9 - vpxor %xmm0, %xmm9, %xmm0 - vpslldq $4, %xmm3, %xmm9 - vpblendw $48, %xmm9, %xmm2, %xmm9 - vpblendw $240, %xmm9, %xmm4, %xmm9 - vpshufd $57, %xmm0, %xmm0 - vpshufd $177, %xmm9, %xmm9 - vpaddd %xmm11, %xmm9, %xmm9 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vpshufb %xmm7, %xmm1, %xmm1 - vpaddd %xmm1, %xmm8, %xmm11 - vpxor %xmm11, %xmm0, %xmm0 - vpslld $20, %xmm0, %xmm8 - vpsrld $12, %xmm0, %xmm0 - vpxor %xmm8, %xmm0, %xmm0 - vpunpckhdq %xmm3, %xmm4, %xmm8 - vpblendw $12, %xmm10, %xmm8, %xmm12 - vpshufd $177, %xmm12, %xmm12 - vpaddd %xmm9, %xmm12, %xmm9 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vpshufb %xmm6, %xmm1, %xmm1 - vpaddd %xmm1, %xmm11, %xmm11 - vpxor %xmm11, %xmm0, %xmm0 - vpshufd $57, %xmm1, %xmm1 - vpshufd $78, %xmm11, %xmm11 - vpslld $25, %xmm0, %xmm12 - vpsrld $7, %xmm0, %xmm0 - vpxor %xmm12, %xmm0, %xmm0 - vpunpckhdq %xmm5, %xmm2, %xmm12 - vpshufd $147, %xmm0, %xmm0 - vpblendw $15, %xmm13, %xmm12, %xmm12 - vpslldq $8, %xmm5, %xmm13 - vpshufd $210, %xmm12, %xmm12 - vpaddd %xmm9, %xmm12, %xmm9 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vpshufb %xmm7, %xmm1, %xmm1 - vpaddd %xmm1, %xmm11, %xmm11 - vpxor %xmm11, %xmm0, %xmm0 - vpslld $20, %xmm0, %xmm12 - vpsrld $12, %xmm0, %xmm0 - vpxor %xmm12, %xmm0, %xmm0 - vpunpckldq %xmm4, %xmm2, %xmm12 - vpblendw $240, %xmm4, %xmm12, %xmm12 - vpblendw $192, %xmm13, %xmm12, %xmm12 - vpsrldq $12, %xmm3, %xmm13 - vpaddd %xmm12, %xmm9, %xmm9 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vpshufb %xmm6, %xmm1, %xmm1 - vpaddd %xmm1, %xmm11, %xmm11 - vpxor %xmm11, %xmm0, %xmm0 - vpshufd $147, %xmm1, %xmm1 - vpshufd $78, %xmm11, %xmm11 - vpslld $25, %xmm0, %xmm12 - vpsrld $7, %xmm0, %xmm0 - vpxor %xmm12, %xmm0, %xmm0 - vpblendw $60, %xmm2, %xmm4, %xmm12 - vpblendw $3, %xmm13, %xmm12, %xmm12 - vpshufd $57, %xmm0, %xmm0 - vpshufd $78, %xmm12, %xmm12 - vpaddd %xmm9, %xmm12, %xmm9 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vpshufb %xmm7, %xmm1, %xmm1 - vpaddd %xmm1, %xmm11, %xmm11 - vpxor %xmm11, %xmm0, %xmm12 - vpslld $20, %xmm12, %xmm13 - vpsrld $12, %xmm12, %xmm0 - vpblendw $51, %xmm3, %xmm4, %xmm12 - vpxor %xmm13, %xmm0, %xmm0 - vpblendw $192, %xmm10, %xmm12, %xmm10 - vpslldq $8, %xmm2, %xmm12 - vpshufd $27, %xmm10, %xmm10 - vpaddd %xmm9, %xmm10, %xmm9 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vpshufb %xmm6, %xmm1, %xmm1 - vpaddd %xmm1, %xmm11, %xmm11 - vpxor %xmm11, %xmm0, %xmm0 - vpshufd $57, %xmm1, %xmm1 - vpshufd $78, %xmm11, %xmm11 - vpslld $25, %xmm0, %xmm10 - vpsrld $7, %xmm0, %xmm0 - vpxor %xmm10, %xmm0, %xmm0 - vpunpckhdq %xmm2, %xmm8, %xmm10 - vpshufd $147, %xmm0, %xmm0 - vpblendw $12, %xmm5, %xmm10, %xmm10 - vpshufd $210, %xmm10, %xmm10 - vpaddd %xmm9, %xmm10, %xmm9 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vpshufb %xmm7, %xmm1, %xmm1 - vpaddd %xmm1, %xmm11, %xmm11 - vpxor %xmm11, %xmm0, %xmm10 - vpslld $20, %xmm10, %xmm0 - vpsrld $12, %xmm10, %xmm10 - vpxor %xmm0, %xmm10, %xmm0 - vpblendw $12, %xmm4, %xmm5, %xmm10 - vpblendw $192, %xmm12, %xmm10, %xmm10 - vpunpckldq %xmm2, %xmm4, %xmm12 - vpshufd $135, %xmm10, %xmm10 - vpaddd %xmm9, %xmm10, %xmm9 - vpaddd %xmm0, %xmm9, %xmm9 - vpxor %xmm9, %xmm1, %xmm1 - vpshufb %xmm6, %xmm1, %xmm1 - vpaddd %xmm1, %xmm11, %xmm13 - vpxor %xmm13, %xmm0, %xmm0 - vpshufd $147, %xmm1, %xmm1 - vpshufd $78, %xmm13, %xmm13 - vpslld $25, %xmm0, %xmm10 - vpsrld $7, %xmm0, %xmm0 - vpxor %xmm10, %xmm0, %xmm0 - vpblendw $15, %xmm3, %xmm4, %xmm10 - vpblendw $192, %xmm5, %xmm10, %xmm10 - vpshufd $57, %xmm0, %xmm0 - vpshufd $198, %xmm10, %xmm10 - vpaddd %xmm9, %xmm10, %xmm10 - vpaddd %xmm0, %xmm10, %xmm10 - vpxor %xmm10, %xmm1, %xmm1 - vpshufb %xmm7, %xmm1, %xmm1 - vpaddd %xmm1, %xmm13, %xmm13 - vpxor %xmm13, %xmm0, %xmm9 - vpslld $20, %xmm9, %xmm0 - vpsrld $12, %xmm9, %xmm9 - vpxor %xmm0, %xmm9, %xmm0 - vpunpckhdq %xmm2, %xmm3, %xmm9 - vpunpcklqdq %xmm12, %xmm9, %xmm15 - vpunpcklqdq %xmm12, %xmm8, %xmm12 - vpblendw $15, %xmm5, %xmm8, %xmm8 - vpaddd %xmm15, %xmm10, %xmm15 - vpaddd %xmm0, %xmm15, %xmm15 - vpxor %xmm15, %xmm1, %xmm1 - vpshufd $141, %xmm8, %xmm8 - vpshufb %xmm6, %xmm1, %xmm1 - vpaddd %xmm1, %xmm13, %xmm13 - vpxor %xmm13, %xmm0, %xmm0 - vpshufd $57, %xmm1, %xmm1 - vpshufd $78, %xmm13, %xmm13 - vpslld $25, %xmm0, %xmm10 - vpsrld $7, %xmm0, %xmm0 - vpxor %xmm10, %xmm0, %xmm0 - vpunpcklqdq %xmm2, %xmm3, %xmm10 - vpshufd $147, %xmm0, %xmm0 - vpblendw $51, %xmm14, %xmm10, %xmm14 - vpshufd $135, %xmm14, %xmm14 - vpaddd %xmm15, %xmm14, %xmm14 - vpaddd %xmm0, %xmm14, %xmm14 - vpxor %xmm14, %xmm1, %xmm1 - vpunpcklqdq %xmm3, %xmm4, %xmm15 - vpshufb %xmm7, %xmm1, %xmm1 - vpaddd %xmm1, %xmm13, %xmm13 - vpxor %xmm13, %xmm0, %xmm0 - vpslld $20, %xmm0, %xmm11 - vpsrld $12, %xmm0, %xmm0 - vpxor %xmm11, %xmm0, %xmm0 - vpunpckhqdq %xmm5, %xmm3, %xmm11 - vpblendw $51, %xmm15, %xmm11, %xmm11 - vpunpckhqdq %xmm3, %xmm5, %xmm15 - vpaddd %xmm11, %xmm14, %xmm11 - vpaddd %xmm0, %xmm11, %xmm11 - vpxor %xmm11, %xmm1, %xmm1 - vpshufb %xmm6, %xmm1, %xmm1 - vpaddd %xmm1, %xmm13, %xmm13 - vpxor %xmm13, %xmm0, %xmm0 - vpshufd $147, %xmm1, %xmm1 - vpshufd $78, %xmm13, %xmm13 - vpslld $25, %xmm0, %xmm14 - vpsrld $7, %xmm0, %xmm0 - vpxor %xmm14, %xmm0, %xmm14 - vpunpckhqdq %xmm4, %xmm2, %xmm0 - vpshufd $57, %xmm14, %xmm14 - vpblendw $51, %xmm15, %xmm0, %xmm15 - vpaddd %xmm15, %xmm11, %xmm15 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm1, %xmm1 - vpshufb %xmm7, %xmm1, %xmm1 - vpaddd %xmm1, %xmm13, %xmm13 - vpxor %xmm13, %xmm14, %xmm14 - vpslld $20, %xmm14, %xmm11 - vpsrld $12, %xmm14, %xmm14 - vpxor %xmm11, %xmm14, %xmm14 - vpblendw $3, %xmm2, %xmm4, %xmm11 - vpslldq $8, %xmm11, %xmm0 - vpblendw $15, %xmm5, %xmm0, %xmm0 - vpshufd $99, %xmm0, %xmm0 - vpaddd %xmm15, %xmm0, %xmm15 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm1, %xmm0 - vpaddd %xmm12, %xmm15, %xmm15 - vpshufb %xmm6, %xmm0, %xmm0 - vpaddd %xmm0, %xmm13, %xmm13 - vpxor %xmm13, %xmm14, %xmm14 - vpshufd $57, %xmm0, %xmm0 - vpshufd $78, %xmm13, %xmm13 - vpslld $25, %xmm14, %xmm1 - vpsrld $7, %xmm14, %xmm14 - vpxor %xmm1, %xmm14, %xmm14 - vpblendw $3, %xmm5, %xmm4, %xmm1 - vpshufd $147, %xmm14, %xmm14 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm0, %xmm0 - vpshufb %xmm7, %xmm0, %xmm0 - vpaddd %xmm0, %xmm13, %xmm13 - vpxor %xmm13, %xmm14, %xmm14 - vpslld $20, %xmm14, %xmm12 - vpsrld $12, %xmm14, %xmm14 - vpxor %xmm12, %xmm14, %xmm14 - vpsrldq $4, %xmm2, %xmm12 - vpblendw $60, %xmm12, %xmm1, %xmm1 - vpaddd %xmm1, %xmm15, %xmm15 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm0, %xmm0 - vpblendw $12, %xmm4, %xmm3, %xmm1 - vpshufb %xmm6, %xmm0, %xmm0 - vpaddd %xmm0, %xmm13, %xmm13 - vpxor %xmm13, %xmm14, %xmm14 - vpshufd $147, %xmm0, %xmm0 - vpshufd $78, %xmm13, %xmm13 - vpslld $25, %xmm14, %xmm12 - vpsrld $7, %xmm14, %xmm14 - vpxor %xmm12, %xmm14, %xmm14 - vpsrldq $4, %xmm5, %xmm12 - vpblendw $48, %xmm12, %xmm1, %xmm1 - vpshufd $33, %xmm5, %xmm12 - vpshufd $57, %xmm14, %xmm14 - vpshufd $108, %xmm1, %xmm1 - vpblendw $51, %xmm12, %xmm10, %xmm12 - vpaddd %xmm15, %xmm1, %xmm15 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm0, %xmm0 - vpaddd %xmm12, %xmm15, %xmm15 - vpshufb %xmm7, %xmm0, %xmm0 - vpaddd %xmm0, %xmm13, %xmm1 - vpxor %xmm1, %xmm14, %xmm14 - vpslld $20, %xmm14, %xmm13 - vpsrld $12, %xmm14, %xmm14 - vpxor %xmm13, %xmm14, %xmm14 - vpslldq $12, %xmm3, %xmm13 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm0, %xmm0 - vpshufb %xmm6, %xmm0, %xmm0 - vpaddd %xmm0, %xmm1, %xmm1 - vpxor %xmm1, %xmm14, %xmm14 - vpshufd $57, %xmm0, %xmm0 - vpshufd $78, %xmm1, %xmm1 - vpslld $25, %xmm14, %xmm12 - vpsrld $7, %xmm14, %xmm14 - vpxor %xmm12, %xmm14, %xmm14 - vpblendw $51, %xmm5, %xmm4, %xmm12 - vpshufd $147, %xmm14, %xmm14 - vpblendw $192, %xmm13, %xmm12, %xmm12 - vpaddd %xmm12, %xmm15, %xmm15 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm0, %xmm0 - vpsrldq $4, %xmm3, %xmm12 - vpshufb %xmm7, %xmm0, %xmm0 - vpaddd %xmm0, %xmm1, %xmm1 - vpxor %xmm1, %xmm14, %xmm14 - vpslld $20, %xmm14, %xmm13 - vpsrld $12, %xmm14, %xmm14 - vpxor %xmm13, %xmm14, %xmm14 - vpblendw $48, %xmm2, %xmm5, %xmm13 - vpblendw $3, %xmm12, %xmm13, %xmm13 - vpshufd $156, %xmm13, %xmm13 - vpaddd %xmm15, %xmm13, %xmm15 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm0, %xmm0 - vpshufb %xmm6, %xmm0, %xmm0 - vpaddd %xmm0, %xmm1, %xmm1 - vpxor %xmm1, %xmm14, %xmm14 - vpshufd $147, %xmm0, %xmm0 - vpshufd $78, %xmm1, %xmm1 - vpslld $25, %xmm14, %xmm13 - vpsrld $7, %xmm14, %xmm14 - vpxor %xmm13, %xmm14, %xmm14 - vpunpcklqdq %xmm2, %xmm4, %xmm13 - vpshufd $57, %xmm14, %xmm14 - vpblendw $12, %xmm12, %xmm13, %xmm12 - vpshufd $180, %xmm12, %xmm12 - vpaddd %xmm15, %xmm12, %xmm15 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm0, %xmm0 - vpshufb %xmm7, %xmm0, %xmm0 - vpaddd %xmm0, %xmm1, %xmm1 - vpxor %xmm1, %xmm14, %xmm14 - vpslld $20, %xmm14, %xmm12 - vpsrld $12, %xmm14, %xmm14 - vpxor %xmm12, %xmm14, %xmm14 - vpunpckhqdq %xmm9, %xmm4, %xmm12 - vpshufd $198, %xmm12, %xmm12 - vpaddd %xmm15, %xmm12, %xmm15 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm0, %xmm0 - vpaddd %xmm15, %xmm8, %xmm15 - vpshufb %xmm6, %xmm0, %xmm0 - vpaddd %xmm0, %xmm1, %xmm1 - vpxor %xmm1, %xmm14, %xmm14 - vpshufd $57, %xmm0, %xmm0 - vpshufd $78, %xmm1, %xmm1 - vpslld $25, %xmm14, %xmm12 - vpsrld $7, %xmm14, %xmm14 - vpxor %xmm12, %xmm14, %xmm14 - vpsrldq $4, %xmm4, %xmm12 - vpshufd $147, %xmm14, %xmm14 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm15, %xmm0, %xmm0 - vpshufb %xmm7, %xmm0, %xmm0 - vpaddd %xmm0, %xmm1, %xmm1 - vpxor %xmm1, %xmm14, %xmm14 - vpslld $20, %xmm14, %xmm8 - vpsrld $12, %xmm14, %xmm14 - vpxor %xmm14, %xmm8, %xmm14 - vpblendw $48, %xmm5, %xmm2, %xmm8 - vpblendw $3, %xmm12, %xmm8, %xmm8 - vpunpckhqdq %xmm5, %xmm4, %xmm12 - vpshufd $75, %xmm8, %xmm8 - vpblendw $60, %xmm10, %xmm12, %xmm10 - vpaddd %xmm15, %xmm8, %xmm15 - vpaddd %xmm14, %xmm15, %xmm15 - vpxor %xmm0, %xmm15, %xmm0 - vpshufd $45, %xmm10, %xmm10 - vpshufb %xmm6, %xmm0, %xmm0 - vpaddd %xmm15, %xmm10, %xmm15 - vpaddd %xmm0, %xmm1, %xmm1 - vpxor %xmm1, %xmm14, %xmm14 - vpshufd $147, %xmm0, %xmm0 - vpshufd $78, %xmm1, %xmm1 - vpslld $25, %xmm14, %xmm8 - vpsrld $7, %xmm14, %xmm14 - vpxor %xmm14, %xmm8, %xmm8 - vpshufd $57, %xmm8, %xmm8 - vpaddd %xmm8, %xmm15, %xmm15 - vpxor %xmm0, %xmm15, %xmm0 - vpshufb %xmm7, %xmm0, %xmm0 - vpaddd %xmm0, %xmm1, %xmm1 - vpxor %xmm8, %xmm1, %xmm8 - vpslld $20, %xmm8, %xmm10 - vpsrld $12, %xmm8, %xmm8 - vpxor %xmm8, %xmm10, %xmm10 - vpunpckldq %xmm3, %xmm4, %xmm8 - vpunpcklqdq %xmm9, %xmm8, %xmm9 - vpaddd %xmm9, %xmm15, %xmm9 - vpaddd %xmm10, %xmm9, %xmm9 - vpxor %xmm0, %xmm9, %xmm8 - vpshufb %xmm6, %xmm8, %xmm8 - vpaddd %xmm8, %xmm1, %xmm1 - vpxor %xmm1, %xmm10, %xmm10 - vpshufd $57, %xmm8, %xmm8 - vpshufd $78, %xmm1, %xmm1 - vpslld $25, %xmm10, %xmm12 - vpsrld $7, %xmm10, %xmm10 - vpxor %xmm10, %xmm12, %xmm10 - vpblendw $48, %xmm4, %xmm3, %xmm12 - vpshufd $147, %xmm10, %xmm0 - vpunpckhdq %xmm5, %xmm3, %xmm10 - vpshufd $78, %xmm12, %xmm12 - vpunpcklqdq %xmm4, %xmm10, %xmm10 - vpblendw $192, %xmm2, %xmm10, %xmm10 - vpshufhw $78, %xmm10, %xmm10 - vpaddd %xmm10, %xmm9, %xmm10 - vpaddd %xmm0, %xmm10, %xmm10 - vpxor %xmm8, %xmm10, %xmm8 - vpshufb %xmm7, %xmm8, %xmm8 - vpaddd %xmm8, %xmm1, %xmm1 - vpxor %xmm0, %xmm1, %xmm9 - vpslld $20, %xmm9, %xmm0 - vpsrld $12, %xmm9, %xmm9 - vpxor %xmm9, %xmm0, %xmm0 - vpunpckhdq %xmm5, %xmm4, %xmm9 - vpblendw $240, %xmm9, %xmm2, %xmm13 - vpshufd $39, %xmm13, %xmm13 - vpaddd %xmm10, %xmm13, %xmm10 - vpaddd %xmm0, %xmm10, %xmm10 - vpxor %xmm8, %xmm10, %xmm8 - vpblendw $12, %xmm4, %xmm2, %xmm13 - vpshufb %xmm6, %xmm8, %xmm8 - vpslldq $4, %xmm13, %xmm13 - vpblendw $15, %xmm5, %xmm13, %xmm13 - vpaddd %xmm8, %xmm1, %xmm1 - vpxor %xmm1, %xmm0, %xmm0 - vpaddd %xmm13, %xmm10, %xmm13 - vpshufd $147, %xmm8, %xmm8 - vpshufd $78, %xmm1, %xmm1 - vpslld $25, %xmm0, %xmm14 - vpsrld $7, %xmm0, %xmm0 - vpxor %xmm0, %xmm14, %xmm14 - vpshufd $57, %xmm14, %xmm14 - vpaddd %xmm14, %xmm13, %xmm13 - vpxor %xmm8, %xmm13, %xmm8 - vpaddd %xmm13, %xmm12, %xmm12 - vpshufb %xmm7, %xmm8, %xmm8 - vpaddd %xmm8, %xmm1, %xmm1 - vpxor %xmm14, %xmm1, %xmm14 - vpslld $20, %xmm14, %xmm10 - vpsrld $12, %xmm14, %xmm14 - vpxor %xmm14, %xmm10, %xmm10 - vpaddd %xmm10, %xmm12, %xmm12 - vpxor %xmm8, %xmm12, %xmm8 - vpshufb %xmm6, %xmm8, %xmm8 - vpaddd %xmm8, %xmm1, %xmm1 - vpxor %xmm1, %xmm10, %xmm0 - vpshufd $57, %xmm8, %xmm8 - vpshufd $78, %xmm1, %xmm1 - vpslld $25, %xmm0, %xmm10 - vpsrld $7, %xmm0, %xmm0 - vpxor %xmm0, %xmm10, %xmm10 - vpblendw $48, %xmm2, %xmm3, %xmm0 - vpblendw $15, %xmm11, %xmm0, %xmm0 - vpshufd $147, %xmm10, %xmm10 - vpshufd $114, %xmm0, %xmm0 - vpaddd %xmm12, %xmm0, %xmm0 - vpaddd %xmm10, %xmm0, %xmm0 - vpxor %xmm8, %xmm0, %xmm8 - vpshufb %xmm7, %xmm8, %xmm8 - vpaddd %xmm8, %xmm1, %xmm1 - vpxor %xmm10, %xmm1, %xmm10 - vpslld $20, %xmm10, %xmm11 - vpsrld $12, %xmm10, %xmm10 - vpxor %xmm10, %xmm11, %xmm10 - vpslldq $4, %xmm4, %xmm11 - vpblendw $192, %xmm11, %xmm3, %xmm3 - vpunpckldq %xmm5, %xmm4, %xmm4 - vpshufd $99, %xmm3, %xmm3 - vpaddd %xmm0, %xmm3, %xmm3 - vpaddd %xmm10, %xmm3, %xmm3 - vpxor %xmm8, %xmm3, %xmm11 - vpunpckldq %xmm5, %xmm2, %xmm0 - vpblendw $192, %xmm2, %xmm5, %xmm2 - vpshufb %xmm6, %xmm11, %xmm11 - vpunpckhqdq %xmm0, %xmm9, %xmm0 - vpblendw $15, %xmm4, %xmm2, %xmm4 - vpaddd %xmm11, %xmm1, %xmm1 - vpxor %xmm1, %xmm10, %xmm10 - vpshufd $147, %xmm11, %xmm11 - vpshufd $201, %xmm0, %xmm0 - vpslld $25, %xmm10, %xmm8 - vpsrld $7, %xmm10, %xmm10 - vpxor %xmm10, %xmm8, %xmm10 - vpshufd $78, %xmm1, %xmm1 - vpaddd %xmm3, %xmm0, %xmm0 - vpshufd $27, %xmm4, %xmm4 - vpshufd $57, %xmm10, %xmm10 - vpaddd %xmm10, %xmm0, %xmm0 - vpxor %xmm11, %xmm0, %xmm11 - vpaddd %xmm0, %xmm4, %xmm0 - vpshufb %xmm7, %xmm11, %xmm7 - vpaddd %xmm7, %xmm1, %xmm1 - vpxor %xmm10, %xmm1, %xmm10 - vpslld $20, %xmm10, %xmm8 - vpsrld $12, %xmm10, %xmm10 - vpxor %xmm10, %xmm8, %xmm8 - vpaddd %xmm8, %xmm0, %xmm0 - vpxor %xmm7, %xmm0, %xmm7 - vpshufb %xmm6, %xmm7, %xmm6 - vpaddd %xmm6, %xmm1, %xmm1 - vpxor %xmm1, %xmm8, %xmm8 - vpshufd $78, %xmm1, %xmm1 - vpshufd $57, %xmm6, %xmm6 - vpslld $25, %xmm8, %xmm2 - vpsrld $7, %xmm8, %xmm8 - vpxor %xmm8, %xmm2, %xmm8 - vpxor (%rdi), %xmm1, %xmm1 - vpshufd $147, %xmm8, %xmm8 - vpxor %xmm0, %xmm1, %xmm0 - vmovups %xmm0, (%rdi) - vpxor 16(%rdi), %xmm8, %xmm0 - vpxor %xmm6, %xmm0, %xmm6 - vmovups %xmm6, 16(%rdi) - addq $64, %rsi + movdqa %xmm0,%xmm10 + movdqa %xmm1,%xmm11 + paddq %xmm15,%xmm14 + movdqa IV(%rip),%xmm2 + movdqa %xmm14,%xmm3 + pxor IV+0x10(%rip),%xmm3 + leaq SIGMA(%rip),%rcx +.Lroundloop: + movzbl (%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0x1(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0x2(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x3(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + punpckldq %xmm5,%xmm4 + punpckldq %xmm7,%xmm6 + punpcklqdq %xmm6,%xmm4 + paddd %xmm4,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm12,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0xc,%xmm1 + pslld $0x14,%xmm8 + por %xmm8,%xmm1 + movzbl 0x4(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0x5(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x6(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0x7(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + punpckldq %xmm6,%xmm5 + punpckldq %xmm4,%xmm7 + punpcklqdq %xmm7,%xmm5 + paddd %xmm5,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm13,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0x7,%xmm1 + pslld $0x19,%xmm8 + por %xmm8,%xmm1 + pshufd $0x93,%xmm0,%xmm0 + pshufd $0x4e,%xmm3,%xmm3 + pshufd $0x39,%xmm2,%xmm2 + movzbl 0x8(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x9(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0xa(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0xb(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + punpckldq %xmm7,%xmm6 + punpckldq %xmm5,%xmm4 + punpcklqdq %xmm4,%xmm6 + paddd %xmm6,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm12,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0xc,%xmm1 + pslld $0x14,%xmm8 + por %xmm8,%xmm1 + movzbl 0xc(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0xd(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0xe(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0xf(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + punpckldq %xmm4,%xmm7 + punpckldq %xmm6,%xmm5 + punpcklqdq %xmm5,%xmm7 + paddd %xmm7,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm13,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0x7,%xmm1 + pslld $0x19,%xmm8 + por %xmm8,%xmm1 + pshufd $0x39,%xmm0,%xmm0 + pshufd $0x4e,%xmm3,%xmm3 + pshufd $0x93,%xmm2,%xmm2 + addq $0x10,%rcx + cmpq %r8,%rcx + jnz .Lroundloop + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm1 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm1 + addq $0x40,%rsi decq %rdx - jnz .Lbeginofloop + jnz .Lbeginofloop + movdqu %xmm0,(%rdi) + movdqu %xmm1,0x10(%rdi) + movdqu %xmm14,0x20(%rdi) .Lendofloop: ret -ENDPROC(blake2s_compress_avx) -#endif /* CONFIG_AS_AVX */ +SYM_FUNC_END(blake2s_compress_ssse3) +#endif /* CONFIG_AS_SSSE3 */ #ifdef CONFIG_AS_AVX512 -ENTRY(blake2s_compress_avx512) +SYM_FUNC_START(blake2s_compress_avx512) vmovdqu (%rdi),%xmm0 vmovdqu 0x10(%rdi),%xmm1 vmovdqu 0x20(%rdi),%xmm4 @@ -622,7 +195,7 @@ ENTRY(blake2s_compress_avx512) vmovdqu (%rsi),%ymm6 vmovdqu 0x20(%rsi),%ymm7 addq $0x40,%rsi - leaq SIGMA(%rip),%rax + leaq SIGMA2(%rip),%rax movb $0xa,%cl .Lblake2s_compress_avx512_roundloop: addq $0x40,%rax @@ -647,9 +220,9 @@ ENTRY(blake2s_compress_avx512) vpaddd %xmm3,%xmm2,%xmm2 vpxor %xmm2,%xmm1,%xmm1 vprord $0x7,%xmm1,%xmm1 - vpshufd $0x39,%xmm1,%xmm1 - vpshufd $0x4e,%xmm2,%xmm2 - vpshufd $0x93,%xmm3,%xmm3 + vpshufd $0x93,%xmm0,%xmm0 + vpshufd $0x4e,%xmm3,%xmm3 + vpshufd $0x39,%xmm2,%xmm2 vpaddd %xmm9,%xmm0,%xmm0 vpaddd %xmm1,%xmm0,%xmm0 vpxor %xmm0,%xmm3,%xmm3 @@ -665,9 +238,9 @@ ENTRY(blake2s_compress_avx512) vpaddd %xmm3,%xmm2,%xmm2 vpxor %xmm2,%xmm1,%xmm1 vprord $0x7,%xmm1,%xmm1 - vpshufd $0x93,%xmm1,%xmm1 - vpshufd $0x4e,%xmm2,%xmm2 - vpshufd $0x39,%xmm3,%xmm3 + vpshufd $0x39,%xmm0,%xmm0 + vpshufd $0x4e,%xmm3,%xmm3 + vpshufd $0x93,%xmm2,%xmm2 decb %cl jne .Lblake2s_compress_avx512_roundloop vpxor %xmm10,%xmm0,%xmm0 @@ -681,5 +254,5 @@ ENTRY(blake2s_compress_avx512) vmovdqu %xmm4,0x20(%rdi) vzeroupper retq -ENDPROC(blake2s_compress_avx512) +SYM_FUNC_END(blake2s_compress_avx512) #endif /* CONFIG_AS_AVX512 */ diff --git a/src/crypto/zinc/chacha20/chacha20-arm-glue.c b/src/crypto/zinc/chacha20/chacha20-arm-glue.c index 84c3bb7..41e2e79 100644 --- a/src/crypto/zinc/chacha20/chacha20-arm-glue.c +++ b/src/crypto/zinc/chacha20/chacha20-arm-glue.c @@ -21,7 +21,7 @@ static bool *const chacha20_nobs[] __initconst = { &chacha20_use_neon }; static void __init chacha20_fpu_init(void) { #if defined(CONFIG_ZINC_ARCH_ARM64) - chacha20_use_neon = elf_hwcap & HWCAP_ASIMD; + chacha20_use_neon = cpu_have_named_feature(ASIMD); #elif defined(CONFIG_ZINC_ARCH_ARM) switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A7: diff --git a/src/crypto/zinc/chacha20/chacha20-arm.pl b/src/crypto/zinc/chacha20/chacha20-arm.pl index 6a7d62c..6785383 100644 --- a/src/crypto/zinc/chacha20/chacha20-arm.pl +++ b/src/crypto/zinc/chacha20/chacha20-arm.pl @@ -686,9 +686,9 @@ my ($a,$b,$c,$d,$t)=@_; "&vshr_u32 ($b,$t,25)", "&vsli_32 ($b,$t,7)", - "&vext_8 ($c,$c,$c,8)", - "&vext_8 ($b,$b,$b,$odd?12:4)", - "&vext_8 ($d,$d,$d,$odd?4:12)" + "&vext_8 ($a,$a,$a,$odd?4:12)", + "&vext_8 ($d,$d,$d,8)", + "&vext_8 ($c,$c,$c,$odd?12:4)" ); } diff --git a/src/crypto/zinc/chacha20/chacha20-arm64.pl b/src/crypto/zinc/chacha20/chacha20-arm64.pl index fc63cc8..ac14a99 100644 --- a/src/crypto/zinc/chacha20/chacha20-arm64.pl +++ b/src/crypto/zinc/chacha20/chacha20-arm64.pl @@ -378,9 +378,9 @@ my ($a,$b,$c,$d,$t)=@_; "&ushr ('$b','$t',25)", "&sli ('$b','$t',7)", - "&ext ('$c','$c','$c',8)", - "&ext ('$d','$d','$d',$odd?4:12)", - "&ext ('$b','$b','$b',$odd?12:4)" + "&ext ('$a','$a','$a',$odd?4:12)", + "&ext ('$d','$d','$d',8)", + "&ext ('$c','$c','$c',$odd?12:4)" ); } diff --git a/src/crypto/zinc/chacha20/chacha20-unrolled-arm.S b/src/crypto/zinc/chacha20/chacha20-unrolled-arm.S index 2140319..8fb4bc2 100644 --- a/src/crypto/zinc/chacha20/chacha20-unrolled-arm.S +++ b/src/crypto/zinc/chacha20/chacha20-unrolled-arm.S @@ -394,7 +394,7 @@ * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8], * const u32 iv[4]); */ -ENTRY(chacha20_arm) +SYM_FUNC_START(chacha20_arm) cmp r2, #0 // len == 0? reteq lr @@ -428,12 +428,12 @@ ENTRY(chacha20_arm) add sp, #76 pop {r4-r11, pc} -ENDPROC(chacha20_arm) +SYM_FUNC_END(chacha20_arm) /* * void hchacha20_arm(const u32 state[16], u32 out[8]); */ -ENTRY(hchacha20_arm) +SYM_FUNC_START(hchacha20_arm) push {r1,r4-r11,lr} mov r14, r0 @@ -458,4 +458,4 @@ ENTRY(hchacha20_arm) stm r4, {X0,X1,X2,X3,X12,X13,X14,X15} pop {r4-r11,pc} -ENDPROC(hchacha20_arm) +SYM_FUNC_END(hchacha20_arm) diff --git a/src/crypto/zinc/chacha20/chacha20-x86_64.pl b/src/crypto/zinc/chacha20/chacha20-x86_64.pl index 38532f8..29906a6 100644 --- a/src/crypto/zinc/chacha20/chacha20-x86_64.pl +++ b/src/crypto/zinc/chacha20/chacha20-x86_64.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause # -# Copyright (C) 2017-2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. +# Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. # Copyright (C) 2017-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. # Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. # @@ -124,7 +124,7 @@ sub declare_function() { my ($name, $align, $nargs) = @_; if($kernel) { $code .= ".align $align\n"; - $code .= "ENTRY($name)\n"; + $code .= "SYM_FUNC_START($name)\n"; $code .= ".L$name:\n"; } else { $code .= ".globl $name\n"; @@ -137,7 +137,7 @@ sub declare_function() { sub end_function() { my ($name) = @_; if($kernel) { - $code .= "ENDPROC($name)\n"; + $code .= "SYM_FUNC_END($name)\n"; } else { $code .= ".size $name,.-$name\n"; } @@ -525,15 +525,15 @@ $code.=<<___; 1: ___ &SSSE3ROUND(); - &pshufd ($c,$c,0b01001110); - &pshufd ($b,$b,0b00111001); - &pshufd ($d,$d,0b10010011); + &pshufd ($a,$a,0b10010011); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b00111001); &nop (); &SSSE3ROUND(); - &pshufd ($c,$c,0b01001110); - &pshufd ($b,$b,0b10010011); - &pshufd ($d,$d,0b00111001); + &pshufd ($a,$a,0b00111001); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b10010011); &dec ($counter); &jnz ("1b"); @@ -600,15 +600,15 @@ $code.=<<___; .Loop_ssse3: ___ &SSSE3ROUND(); - &pshufd ($c,$c,0b01001110); - &pshufd ($b,$b,0b00111001); - &pshufd ($d,$d,0b10010011); + &pshufd ($a,$a,0b10010011); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b00111001); &nop (); &SSSE3ROUND(); - &pshufd ($c,$c,0b01001110); - &pshufd ($b,$b,0b10010011); - &pshufd ($d,$d,0b00111001); + &pshufd ($a,$a,0b00111001); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b10010011); &dec ($counter); &jnz (".Loop_ssse3"); @@ -770,20 +770,20 @@ $code.=<<___; .Loop_128: ___ &SSSE3ROUND_2x(); - &pshufd ($c,$c,0b01001110); - &pshufd ($b,$b,0b00111001); - &pshufd ($d,$d,0b10010011); - &pshufd ($c1,$c1,0b01001110); - &pshufd ($b1,$b1,0b00111001); - &pshufd ($d1,$d1,0b10010011); + &pshufd ($a,$a,0b10010011); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b00111001); + &pshufd ($a1,$a1,0b10010011); + &pshufd ($d1,$d1,0b01001110); + &pshufd ($c1,$c1,0b00111001); &SSSE3ROUND_2x(); - &pshufd ($c,$c,0b01001110); - &pshufd ($b,$b,0b10010011); - &pshufd ($d,$d,0b00111001); - &pshufd ($c1,$c1,0b01001110); - &pshufd ($b1,$b1,0b10010011); - &pshufd ($d1,$d1,0b00111001); + &pshufd ($a,$a,0b00111001); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b10010011); + &pshufd ($a1,$a1,0b00111001); + &pshufd ($d1,$d1,0b01001110); + &pshufd ($c1,$c1,0b10010011); &dec ($counter); &jnz (".Loop_128"); diff --git a/src/crypto/zinc/chacha20poly1305.c b/src/crypto/zinc/chacha20poly1305.c index 0001c92..571a64e 100644 --- a/src/crypto/zinc/chacha20poly1305.c +++ b/src/crypto/zinc/chacha20poly1305.c @@ -18,16 +18,7 @@ #include <linux/init.h> #include <crypto/scatterwalk.h> // For blkcipher_walk. -static const u8 pad0[16] = { 0 }; - -static struct blkcipher_desc desc = { .tfm = &(struct crypto_blkcipher){ - .base = { .__crt_alg = &(struct crypto_alg){ - .cra_blocksize = 1, -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - .cra_alignmask = sizeof(u32) - 1 -#endif - } } -} }; +static const u8 pad0[CHACHA20_BLOCK_SIZE] = { 0 }; static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, @@ -82,22 +73,25 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, } EXPORT_SYMBOL(chacha20poly1305_encrypt); -bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, - struct scatterlist *src, const size_t src_len, - const u8 *ad, const size_t ad_len, - const u64 nonce, - const u8 key[CHACHA20POLY1305_KEY_SIZE], - simd_context_t *simd_context) +bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, + const size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE], + simd_context_t *simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; - int ret = 0; - struct blkcipher_walk walk; + struct sg_mapping_iter miter; + size_t partial = 0; + ssize_t sl; union { + u8 chacha20_stream[CHACHA20_BLOCK_SIZE]; u8 block0[POLY1305_KEY_SIZE]; u8 mac[POLY1305_MAC_SIZE]; __le64 lens[2]; - } b = { { 0 } }; + } b __aligned(16) = { { 0 } }; + chacha20_init(&chacha20_state, key, nonce); chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), @@ -108,32 +102,43 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); - if (likely(src_len)) { - blkcipher_walk_init(&walk, dst, src, src_len); - ret = blkcipher_walk_virt_block(&desc, &walk, - CHACHA20_BLOCK_SIZE); - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - size_t chunk_len = - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - - chacha20(&chacha20_state, walk.dst.virt.addr, - walk.src.virt.addr, chunk_len, simd_context); - poly1305_update(&poly1305_state, walk.dst.virt.addr, - chunk_len, simd_context); - simd_relax(simd_context); - ret = blkcipher_walk_done(&desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); + sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC); + for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) { + u8 *addr = miter.addr; + size_t length = min_t(size_t, sl, miter.length); + + if (unlikely(partial)) { + size_t l = min(length, CHACHA20_BLOCK_SIZE - partial); + + crypto_xor(addr, b.chacha20_stream + partial, l); + partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1); + + addr += l; + length -= l; } - if (walk.nbytes) { - chacha20(&chacha20_state, walk.dst.virt.addr, - walk.src.virt.addr, walk.nbytes, simd_context); - poly1305_update(&poly1305_state, walk.dst.virt.addr, - walk.nbytes, simd_context); - ret = blkcipher_walk_done(&desc, &walk, 0); + + if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) { + size_t l = length; + + if (unlikely(length < sl)) + l &= ~(CHACHA20_BLOCK_SIZE - 1); + chacha20(&chacha20_state, addr, addr, l, simd_context); + addr += l; + length -= l; + } + + if (unlikely(length > 0)) { + chacha20(&chacha20_state, b.chacha20_stream, pad0, + CHACHA20_BLOCK_SIZE, simd_context); + crypto_xor(addr, b.chacha20_stream, length); + partial = length; } + + poly1305_update(&poly1305_state, miter.addr, + min_t(size_t, sl, miter.length), simd_context); + + simd_relax(simd_context); } - if (unlikely(ret)) - goto err; poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); @@ -143,14 +148,22 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_final(&poly1305_state, b.mac, simd_context); - scatterwalk_map_and_copy(b.mac, dst, src_len, sizeof(b.mac), 1); -err: + if (likely(sl <= -POLY1305_MAC_SIZE)) + poly1305_final(&poly1305_state, miter.addr + miter.length + sl, + simd_context); + + sg_miter_stop(&miter); + + if (unlikely(sl > -POLY1305_MAC_SIZE)) { + poly1305_final(&poly1305_state, b.mac, simd_context); + scatterwalk_map_and_copy(b.mac, src, src_len, sizeof(b.mac), 1); + } + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(&b, sizeof(b)); - return !ret; + return true; } -EXPORT_SYMBOL(chacha20poly1305_encrypt_sg); +EXPORT_SYMBOL(chacha20poly1305_encrypt_sg_inplace); static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, @@ -217,29 +230,32 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, } EXPORT_SYMBOL(chacha20poly1305_decrypt); -bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, - struct scatterlist *src, const size_t src_len, - const u8 *ad, const size_t ad_len, - const u64 nonce, - const u8 key[CHACHA20POLY1305_KEY_SIZE], - simd_context_t *simd_context) +bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, + size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE], + simd_context_t *simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; - struct blkcipher_walk walk; - int ret = 0; - size_t dst_len; + struct sg_mapping_iter miter; + size_t partial = 0; + ssize_t sl; union { + u8 chacha20_stream[CHACHA20_BLOCK_SIZE]; u8 block0[POLY1305_KEY_SIZE]; struct { u8 read_mac[POLY1305_MAC_SIZE]; u8 computed_mac[POLY1305_MAC_SIZE]; }; __le64 lens[2]; - } b = { { 0 } }; + } b __aligned(16) = { { 0 } }; + bool ret = false; if (unlikely(src_len < POLY1305_MAC_SIZE)) - return false; + return ret; + src_len -= POLY1305_MAC_SIZE; chacha20_init(&chacha20_state, key, nonce); chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), @@ -250,52 +266,74 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); - dst_len = src_len - POLY1305_MAC_SIZE; - if (likely(dst_len)) { - blkcipher_walk_init(&walk, dst, src, dst_len); - ret = blkcipher_walk_virt_block(&desc, &walk, - CHACHA20_BLOCK_SIZE); - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - size_t chunk_len = - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - - poly1305_update(&poly1305_state, walk.src.virt.addr, - chunk_len, simd_context); - chacha20(&chacha20_state, walk.dst.virt.addr, - walk.src.virt.addr, chunk_len, simd_context); - simd_relax(simd_context); - ret = blkcipher_walk_done(&desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); + sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC); + for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) { + u8 *addr = miter.addr; + size_t length = min_t(size_t, sl, miter.length); + + poly1305_update(&poly1305_state, addr, length, simd_context); + + if (unlikely(partial)) { + size_t l = min(length, CHACHA20_BLOCK_SIZE - partial); + + crypto_xor(addr, b.chacha20_stream + partial, l); + partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1); + + addr += l; + length -= l; } - if (walk.nbytes) { - poly1305_update(&poly1305_state, walk.src.virt.addr, - walk.nbytes, simd_context); - chacha20(&chacha20_state, walk.dst.virt.addr, - walk.src.virt.addr, walk.nbytes, simd_context); - ret = blkcipher_walk_done(&desc, &walk, 0); + + if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) { + size_t l = length; + + if (unlikely(length < sl)) + l &= ~(CHACHA20_BLOCK_SIZE - 1); + chacha20(&chacha20_state, addr, addr, l, simd_context); + addr += l; + length -= l; + } + + if (unlikely(length > 0)) { + chacha20(&chacha20_state, b.chacha20_stream, pad0, + CHACHA20_BLOCK_SIZE, simd_context); + crypto_xor(addr, b.chacha20_stream, length); + partial = length; } + + simd_relax(simd_context); } - if (unlikely(ret)) - goto err; - poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); - b.lens[1] = cpu_to_le64(dst_len); + b.lens[1] = cpu_to_le64(src_len); poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_final(&poly1305_state, b.computed_mac, simd_context); + if (likely(sl <= -POLY1305_MAC_SIZE)) { + poly1305_final(&poly1305_state, b.computed_mac, simd_context); + ret = !crypto_memneq(b.computed_mac, + miter.addr + miter.length + sl, + POLY1305_MAC_SIZE); + } + + sg_miter_stop(&miter); + + if (unlikely(sl > -POLY1305_MAC_SIZE)) { + poly1305_final(&poly1305_state, b.computed_mac, simd_context); + scatterwalk_map_and_copy(b.read_mac, src, src_len, + sizeof(b.read_mac), 0); + ret = !crypto_memneq(b.read_mac, b.computed_mac, + POLY1305_MAC_SIZE); + + } - scatterwalk_map_and_copy(b.read_mac, src, dst_len, POLY1305_MAC_SIZE, 0); - ret = crypto_memneq(b.read_mac, b.computed_mac, POLY1305_MAC_SIZE); -err: memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(&b, sizeof(b)); - return !ret; + return ret; } -EXPORT_SYMBOL(chacha20poly1305_decrypt_sg); +EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace); void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, diff --git a/src/crypto/zinc/curve25519/curve25519-arm.S b/src/crypto/zinc/curve25519/curve25519-arm.S index 0ef1431..8eca8a1 100644 --- a/src/crypto/zinc/curve25519/curve25519-arm.S +++ b/src/crypto/zinc/curve25519/curve25519-arm.S @@ -15,7 +15,7 @@ .arch armv7-a .align 4 -ENTRY(curve25519_neon) +SYM_FUNC_START(curve25519_neon) push {r4-r11, lr} mov ip, sp sub r3, sp, #704 @@ -2060,5 +2060,5 @@ ENTRY(curve25519_neon) movw r0, #0 mov sp, ip pop {r4-r11, pc} -ENDPROC(curve25519_neon) +SYM_FUNC_END(curve25519_neon) #endif diff --git a/src/crypto/zinc/curve25519/curve25519-x86_64-glue.c b/src/crypto/zinc/curve25519/curve25519-x86_64-glue.c index 19c86c6..d62bd37 100644 --- a/src/crypto/zinc/curve25519/curve25519-x86_64-glue.c +++ b/src/crypto/zinc/curve25519/curve25519-x86_64-glue.c @@ -15,8 +15,10 @@ static bool *const curve25519_nobs[] __initconst = { static void __init curve25519_fpu_init(void) { - curve25519_use_bmi2 = boot_cpu_has(X86_FEATURE_BMI2); - curve25519_use_adx = boot_cpu_has(X86_FEATURE_BMI2) && + curve25519_use_bmi2 = IS_ENABLED(CONFIG_AS_BMI2) && + boot_cpu_has(X86_FEATURE_BMI2); + curve25519_use_adx = IS_ENABLED(CONFIG_AS_ADX) && + boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX); } @@ -24,10 +26,10 @@ static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], const u8 basepoint[CURVE25519_KEY_SIZE]) { - if (curve25519_use_adx) { + if (IS_ENABLED(CONFIG_AS_ADX) && curve25519_use_adx) { curve25519_adx(mypublic, secret, basepoint); return true; - } else if (curve25519_use_bmi2) { + } else if (IS_ENABLED(CONFIG_AS_BMI2) && curve25519_use_bmi2) { curve25519_bmi2(mypublic, secret, basepoint); return true; } @@ -37,10 +39,10 @@ static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]) { - if (curve25519_use_adx) { + if (IS_ENABLED(CONFIG_AS_ADX) && curve25519_use_adx) { curve25519_adx_base(pub, secret); return true; - } else if (curve25519_use_bmi2) { + } else if (IS_ENABLED(CONFIG_AS_BMI2) && curve25519_use_bmi2) { curve25519_bmi2_base(pub, secret); return true; } diff --git a/src/crypto/zinc/curve25519/curve25519-x86_64.c b/src/crypto/zinc/curve25519/curve25519-x86_64.c index 3d1806f..e8af3e2 100644 --- a/src/crypto/zinc/curve25519/curve25519-x86_64.c +++ b/src/crypto/zinc/curve25519/curve25519-x86_64.c @@ -582,6 +582,7 @@ __aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = { 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL }; +#ifdef CONFIG_AS_ADX /* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7] * a is two 256-bit integers: a0[0:3] and a1[4:7] * b is two 256-bit integers: b0[0:3] and b1[4:7] @@ -736,7 +737,9 @@ static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a, : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15"); } +#endif +#ifdef CONFIG_AS_BMI2 static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a, const u64 *const b) { @@ -885,7 +888,9 @@ static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a, : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r15"); } +#endif +#ifdef CONFIG_AS_ADX static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a) { asm volatile( @@ -1010,7 +1015,9 @@ static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a) : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15"); } +#endif +#ifdef CONFIG_AS_BMI2 static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a) { asm volatile( @@ -1136,7 +1143,9 @@ static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a) : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15"); } +#endif +#ifdef CONFIG_AS_ADX static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) { asm volatile( @@ -1201,7 +1210,9 @@ static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11"); } +#endif +#ifdef CONFIG_AS_BMI2 static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) { asm volatile( @@ -1264,7 +1275,9 @@ static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11"); } +#endif +#ifdef CONFIG_AS_ADX static void mul_256x256_integer_adx(u64 *const c, const u64 *const a, const u64 *const b) { @@ -1354,7 +1367,9 @@ static void mul_256x256_integer_adx(u64 *const c, const u64 *const a, : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15"); } +#endif +#ifdef CONFIG_AS_BMI2 static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a, const u64 *const b) { @@ -1433,7 +1448,9 @@ static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a, : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r15"); } +#endif +#ifdef CONFIG_AS_ADX static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a) { asm volatile( @@ -1499,7 +1516,9 @@ static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a) : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15"); } +#endif +#ifdef CONFIG_AS_BMI2 static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a) { asm volatile( @@ -1566,7 +1585,9 @@ static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a) : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15"); } +#endif +#ifdef CONFIG_AS_ADX static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a) { asm volatile( @@ -1603,7 +1624,9 @@ static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a) : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11"); } +#endif +#ifdef CONFIG_AS_BMI2 static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) { asm volatile( @@ -1639,7 +1662,9 @@ static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11"); } +#endif +#ifdef CONFIG_AS_ADX static __always_inline void add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b) { @@ -1671,7 +1696,9 @@ add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b) : "r"(c), "r"(a), "r"(b) : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); } +#endif +#ifdef CONFIG_AS_BMI2 static __always_inline void add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b) { @@ -1702,6 +1729,7 @@ add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b) : "r"(c), "r"(a), "r"(b) : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); } +#endif static __always_inline void sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b) @@ -1769,6 +1797,7 @@ mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a) "%r11"); } +#ifdef CONFIG_AS_ADX static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a) { struct { @@ -1815,7 +1844,9 @@ static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a) memzero_explicit(&m, sizeof(m)); } +#endif +#ifdef CONFIG_AS_BMI2 static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) { struct { @@ -1862,6 +1893,7 @@ static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) memzero_explicit(&m, sizeof(m)); } +#endif /* Given c, a 256-bit number, fred_eltfp25519_1w updates c * with a number such that 0 <= C < 2**255-19. @@ -1939,6 +1971,7 @@ static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE], const u8 private_key[CURVE25519_KEY_SIZE], const u8 session_key[CURVE25519_KEY_SIZE]) { +#ifdef CONFIG_AS_ADX struct { u64 buffer[4 * NUM_WORDS_ELTFP25519]; u64 coordinates[4 * NUM_WORDS_ELTFP25519]; @@ -2034,11 +2067,13 @@ static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE], fred_eltfp25519_1w((u64 *)shared); memzero_explicit(&m, sizeof(m)); +#endif } static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE], const u8 private_key[CURVE25519_KEY_SIZE]) { +#ifdef CONFIG_AS_ADX struct { u64 buffer[4 * NUM_WORDS_ELTFP25519]; u64 coordinates[4 * NUM_WORDS_ELTFP25519]; @@ -2128,12 +2163,14 @@ static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE], fred_eltfp25519_1w((u64 *)session_key); memzero_explicit(&m, sizeof(m)); +#endif } static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE], const u8 private_key[CURVE25519_KEY_SIZE], const u8 session_key[CURVE25519_KEY_SIZE]) { +#ifdef CONFIG_AS_BMI2 struct { u64 buffer[4 * NUM_WORDS_ELTFP25519]; u64 coordinates[4 * NUM_WORDS_ELTFP25519]; @@ -2229,11 +2266,13 @@ static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE], fred_eltfp25519_1w((u64 *)shared); memzero_explicit(&m, sizeof(m)); +#endif } static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE], const u8 private_key[CURVE25519_KEY_SIZE]) { +#ifdef CONFIG_AS_BMI2 struct { u64 buffer[4 * NUM_WORDS_ELTFP25519]; u64 coordinates[4 * NUM_WORDS_ELTFP25519]; @@ -2323,4 +2362,5 @@ static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE], fred_eltfp25519_1w((u64 *)session_key); memzero_explicit(&m, sizeof(m)); +#endif } diff --git a/src/crypto/zinc/poly1305/poly1305-arm-glue.c b/src/crypto/zinc/poly1305/poly1305-arm-glue.c index a80f046..291fe4b 100644 --- a/src/crypto/zinc/poly1305/poly1305-arm-glue.c +++ b/src/crypto/zinc/poly1305/poly1305-arm-glue.c @@ -20,7 +20,7 @@ static bool *const poly1305_nobs[] __initconst = { &poly1305_use_neon }; static void __init poly1305_fpu_init(void) { #if defined(CONFIG_ZINC_ARCH_ARM64) - poly1305_use_neon = elf_hwcap & HWCAP_ASIMD; + poly1305_use_neon = cpu_have_named_feature(ASIMD); #elif defined(CONFIG_ZINC_ARCH_ARM) poly1305_use_neon = elf_hwcap & HWCAP_NEON; #endif diff --git a/src/crypto/zinc/poly1305/poly1305-x86_64.pl b/src/crypto/zinc/poly1305/poly1305-x86_64.pl index 37ed869..f994855 100644 --- a/src/crypto/zinc/poly1305/poly1305-x86_64.pl +++ b/src/crypto/zinc/poly1305/poly1305-x86_64.pl @@ -35,7 +35,7 @@ # Skylake-X system performance. Since we are likely to suppress # AVX512F capability flag [at least on Skylake-X], conversion serves # as kind of "investment protection". Note that next *lake processor, -# Cannolake, has AVX512IFMA code path to execute... +# Cannonlake, has AVX512IFMA code path to execute... # # Numbers are cycles per processed byte with poly1305_blocks alone, # measured with rdtsc at fixed clock frequency. @@ -109,7 +109,7 @@ sub declare_function() { my ($name, $align, $nargs) = @_; if($kernel) { $code .= ".align $align\n"; - $code .= "ENTRY($name)\n"; + $code .= "SYM_FUNC_START($name)\n"; $code .= ".L$name:\n"; } else { $code .= ".globl $name\n"; @@ -122,7 +122,7 @@ sub declare_function() { sub end_function() { my ($name) = @_; if($kernel) { - $code .= "ENDPROC($name)\n"; + $code .= "SYM_FUNC_END($name)\n"; } else { $code .= ".size $name,.-$name\n"; } diff --git a/src/crypto/zinc/selftest/chacha20poly1305.c b/src/crypto/zinc/selftest/chacha20poly1305.c index dba9cd7..c58ac6e 100644 --- a/src/crypto/zinc/selftest/chacha20poly1305.c +++ b/src/crypto/zinc/selftest/chacha20poly1305.c @@ -8879,15 +8879,15 @@ decryption_success(bool func_ret, bool expect_failure, int memcmp_result) static bool __init chacha20poly1305_selftest(void) { enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 }; - size_t i; - u8 *computed_output = NULL, *heap_src = NULL; + size_t i, j, k, total_len; + u8 *computed_output = NULL, *input = NULL; bool success = true, ret; simd_context_t simd_context; - struct scatterlist sg_src, sg_dst; + struct scatterlist sg_src[3]; - heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); - if (!heap_src || !computed_output) { + input = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); + if (!computed_output || !input) { pr_err("chacha20poly1305 self-test malloc: FAIL\n"); success = false; goto out; @@ -8916,15 +8916,12 @@ static bool __init chacha20poly1305_selftest(void) for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { if (chacha20poly1305_enc_vectors[i].nlen != 8) continue; - memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); - memcpy(heap_src, chacha20poly1305_enc_vectors[i].input, + memcpy(computed_output, chacha20poly1305_enc_vectors[i].input, chacha20poly1305_enc_vectors[i].ilen); - sg_init_one(&sg_src, heap_src, - chacha20poly1305_enc_vectors[i].ilen); - sg_init_one(&sg_dst, computed_output, + sg_init_one(sg_src, computed_output, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE); - ret = chacha20poly1305_encrypt_sg(&sg_dst, &sg_src, + ret = chacha20poly1305_encrypt_sg_inplace(sg_src, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, @@ -8963,15 +8960,11 @@ static bool __init chacha20poly1305_selftest(void) } simd_get(&simd_context); for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { - memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); - memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, + memcpy(computed_output, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen); - sg_init_one(&sg_src, heap_src, + sg_init_one(sg_src, computed_output, chacha20poly1305_dec_vectors[i].ilen); - sg_init_one(&sg_dst, computed_output, - chacha20poly1305_dec_vectors[i].ilen - - POLY1305_MAC_SIZE); - ret = chacha20poly1305_decrypt_sg(&sg_dst, &sg_src, + ret = chacha20poly1305_decrypt_sg_inplace(sg_src, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, @@ -9027,8 +9020,57 @@ static bool __init chacha20poly1305_selftest(void) } } + simd_get(&simd_context); + for (total_len = POLY1305_MAC_SIZE; IS_ENABLED(DEBUG_CHACHA20POLY1305_SLOW_CHUNK_TEST) + && total_len <= 1 << 10; ++total_len) { + for (i = 0; i <= total_len; ++i) { + for (j = i; j <= total_len; ++j) { + sg_init_table(sg_src, 3); + sg_set_buf(&sg_src[0], input, i); + sg_set_buf(&sg_src[1], input + i, j - i); + sg_set_buf(&sg_src[2], input + j, total_len - j); + memset(computed_output, 0, total_len); + memset(input, 0, total_len); + + if (!chacha20poly1305_encrypt_sg_inplace(sg_src, + total_len - POLY1305_MAC_SIZE, NULL, 0, + 0, enc_key001, &simd_context)) + goto chunkfail; + chacha20poly1305_encrypt(computed_output, + computed_output, + total_len - POLY1305_MAC_SIZE, NULL, 0, 0, + enc_key001); + if (memcmp(computed_output, input, total_len)) + goto chunkfail;; + if (!chacha20poly1305_decrypt(computed_output, + input, total_len, NULL, 0, 0, enc_key001)) + goto chunkfail; + for (k = 0; k < total_len - POLY1305_MAC_SIZE; ++k) { + if (computed_output[k]) + goto chunkfail; + } + if (!chacha20poly1305_decrypt_sg_inplace(sg_src, + total_len, NULL, 0, 0, enc_key001, + &simd_context)) + goto chunkfail; + for (k = 0; k < total_len - POLY1305_MAC_SIZE; ++k) { + if (input[k]) + goto chunkfail; + } + continue; + + chunkfail: + pr_err("chacha20poly1305 chunked self-test %zu/%zu/%zu: FAIL\n", + total_len, i, j); + success = false; + } + + } + } + simd_put(&simd_context); + out: - kfree(heap_src); kfree(computed_output); + kfree(input); return success; } diff --git a/src/device.c b/src/device.c index 8735935..e888ac3 100644 --- a/src/device.c +++ b/src/device.c @@ -112,9 +112,7 @@ static int wg_stop(struct net_device *dev) wg_timers_stop(peer); wg_noise_handshake_clear(&peer->handshake); wg_noise_keypairs_clear(&peer->keypairs); - atomic64_set(&peer->last_sent_handshake, - ktime_get_boot_fast_ns() - - (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); } mutex_unlock(&wg->device_update_lock); skb_queue_purge(&wg->incoming_handshakes); @@ -173,8 +171,8 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb(skb); skb = segs; } - do { - next = skb->next; + + skb_list_walk_safe(skb, skb, next) { skb_mark_not_on_list(skb); skb = skb_share_check(skb, GFP_ATOMIC); @@ -189,7 +187,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) PACKET_CB(skb)->mtu = mtu; __skb_queue_tail(&packets, skb); - } while ((skb = next) != NULL); + } spin_lock_bh(&peer->staged_packet_queue.lock); /* If the queue is getting too big, we start removing the oldest packets diff --git a/src/device.h b/src/device.h index b15a8be..c91f305 100644 --- a/src/device.h +++ b/src/device.h @@ -62,4 +62,12 @@ struct wg_device { int wg_device_init(void); void wg_device_uninit(void); +/* Later after the dust settles, this can be moved into include/linux/skbuff.h, + * where virtually all code that deals with GSO segs can benefit, around ~30 + * drivers as of writing. + */ +#define skb_list_walk_safe(first, skb, next) \ + for (skb = first, next = skb->next; skb; \ + skb = next, next = skb ? skb->next : NULL) + #endif /* _WG_DEVICE_H */ diff --git a/src/dkms.conf b/src/dkms.conf index 97a919a..cdcd2e7 100644 --- a/src/dkms.conf +++ b/src/dkms.conf @@ -1,5 +1,5 @@ PACKAGE_NAME="wireguard" -PACKAGE_VERSION="0.0.20190406" +PACKAGE_VERSION="0.0.20191219" AUTOINSTALL=yes BUILT_MODULE_NAME="wireguard" @@ -12,7 +12,6 @@ #include "uapi/wireguard.h" #include "crypto/zinc.h" -#include <linux/version.h> #include <linux/init.h> #include <linux/module.h> #include <linux/genetlink.h> diff --git a/src/messages.h b/src/messages.h index 3cfd1c5..f415cdd 100644 --- a/src/messages.h +++ b/src/messages.h @@ -38,7 +38,7 @@ enum counter_values { }; enum limits { - REKEY_AFTER_MESSAGES = U64_MAX - 0xffff, + REKEY_AFTER_MESSAGES = 1ULL << 60, REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1, REKEY_TIMEOUT = 5, REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3, diff --git a/src/netlink.c b/src/netlink.c index b179b31..9bf2f84 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -13,14 +13,15 @@ #include <linux/if.h> #include <net/genetlink.h> #include <net/sock.h> +#include <crypto/algapi.h> static struct genl_family genl_family; static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = { [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 }, [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [WGDEVICE_A_PRIVATE_KEY] = { .len = NOISE_PUBLIC_KEY_LEN }, - [WGDEVICE_A_PUBLIC_KEY] = { .len = NOISE_PUBLIC_KEY_LEN }, + [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, [WGDEVICE_A_FLAGS] = { .type = NLA_U32 }, [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 }, [WGDEVICE_A_FWMARK] = { .type = NLA_U32 }, @@ -28,12 +29,12 @@ static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = { }; static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { - [WGPEER_A_PUBLIC_KEY] = { .len = NOISE_PUBLIC_KEY_LEN }, - [WGPEER_A_PRESHARED_KEY] = { .len = NOISE_SYMMETRIC_KEY_LEN }, + [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN }, [WGPEER_A_FLAGS] = { .type = NLA_U32 }, - [WGPEER_A_ENDPOINT] = { .len = sizeof(struct sockaddr) }, + [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) }, [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 }, - [WGPEER_A_LAST_HANDSHAKE_TIME] = { .len = sizeof(struct __kernel_timespec) }, + [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) }, [WGPEER_A_RX_BYTES] = { .type = NLA_U64 }, [WGPEER_A_TX_BYTES] = { .type = NLA_U64 }, [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED }, @@ -42,7 +43,7 @@ static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = { [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 }, - [WGALLOWEDIP_A_IPADDR] = { .len = sizeof(struct in_addr) }, + [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) }, [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 } }; @@ -90,12 +91,21 @@ static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr, return 0; } +struct dump_ctx { + struct wg_device *wg; + struct wg_peer *next_peer; + u64 allowedips_seq; + struct allowedips_node *next_allowedip; +}; + +#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args) + static int -get_peer(struct wg_peer *peer, struct allowedips_node **next_allowedips_node, - u64 *allowedips_seq, struct sk_buff *skb) +get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) { + struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0); - struct allowedips_node *allowedips_node = *next_allowedips_node; + struct allowedips_node *allowedips_node = ctx->next_allowedip; bool fail; if (!peer_nest) @@ -151,9 +161,9 @@ get_peer(struct wg_peer *peer, struct allowedips_node **next_allowedips_node, } if (!allowedips_node) goto no_allowedips; - if (!*allowedips_seq) - *allowedips_seq = peer->device->peer_allowedips.seq; - else if (*allowedips_seq != peer->device->peer_allowedips.seq) + if (!ctx->allowedips_seq) + ctx->allowedips_seq = peer->device->peer_allowedips.seq; + else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) goto no_allowedips; allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); @@ -169,15 +179,15 @@ get_peer(struct wg_peer *peer, struct allowedips_node **next_allowedips_node, if (get_allowedips(skb, ip, cidr, family)) { nla_nest_end(skb, allowedips_nest); nla_nest_end(skb, peer_nest); - *next_allowedips_node = allowedips_node; + ctx->next_allowedip = allowedips_node; return -EMSGSIZE; } } nla_nest_end(skb, allowedips_nest); no_allowedips: nla_nest_end(skb, peer_nest); - *next_allowedips_node = NULL; - *allowedips_seq = 0; + ctx->next_allowedip = NULL; + ctx->allowedips_seq = 0; return 0; err: nla_nest_cancel(skb, peer_nest); @@ -186,37 +196,29 @@ err: static int wg_get_device_start(struct netlink_callback *cb) { - struct nlattr **attrs = genl_family_attrbuf(&genl_family); struct wg_device *wg; - int ret; - ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + genl_family.hdrsize, attrs, - genl_family.maxattr, device_policy, NULL); - if (ret < 0) - return ret; - wg = lookup_interface(attrs, cb->skb); + wg = lookup_interface(genl_dumpit_info(cb)->attrs, cb->skb); if (IS_ERR(wg)) return PTR_ERR(wg); - cb->args[0] = (long)wg; + DUMP_CTX(cb)->wg = wg; return 0; } static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct wg_peer *peer, *next_peer_cursor, *last_peer_cursor; + struct wg_peer *peer, *next_peer_cursor; + struct dump_ctx *ctx = DUMP_CTX(cb); + struct wg_device *wg = ctx->wg; struct nlattr *peers_nest; - struct wg_device *wg; int ret = -EMSGSIZE; bool done = true; void *hdr; - wg = (struct wg_device *)cb->args[0]; - next_peer_cursor = (struct wg_peer *)cb->args[1]; - last_peer_cursor = (struct wg_peer *)cb->args[1]; - rtnl_lock(); mutex_lock(&wg->device_update_lock); cb->seq = wg->device_update_gen; + next_peer_cursor = ctx->next_peer; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE); @@ -224,7 +226,7 @@ static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out; genl_dump_check_consistent(cb, hdr); - if (!last_peer_cursor) { + if (!ctx->next_peer) { if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT, wg->incoming_port) || nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) || @@ -257,15 +259,14 @@ static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) * coherent dump anyway, so they'll try again. */ if (list_empty(&wg->peer_list) || - (last_peer_cursor && list_empty(&last_peer_cursor->peer_list))) { + (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { nla_nest_cancel(skb, peers_nest); goto out; } lockdep_assert_held(&wg->device_update_lock); - peer = list_prepare_entry(last_peer_cursor, &wg->peer_list, peer_list); + peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { - if (get_peer(peer, (struct allowedips_node **)&cb->args[2], - (u64 *)&cb->args[4] /* and args[5] */, skb)) { + if (get_peer(peer, skb, ctx)) { done = false; break; } @@ -276,7 +277,7 @@ static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) out: if (!ret && !done && next_peer_cursor) wg_peer_get(next_peer_cursor); - wg_peer_put(last_peer_cursor); + wg_peer_put(ctx->next_peer); mutex_unlock(&wg->device_update_lock); rtnl_unlock(); @@ -286,10 +287,10 @@ out: } genlmsg_end(skb, hdr); if (done) { - cb->args[1] = 0; + ctx->next_peer = NULL; return 0; } - cb->args[1] = (long)next_peer_cursor; + ctx->next_peer = next_peer_cursor; return skb->len; /* At this point, we can't really deal ourselves with safely zeroing out @@ -300,12 +301,11 @@ out: static int wg_get_device_done(struct netlink_callback *cb) { - struct wg_device *wg = (struct wg_device *)cb->args[0]; - struct wg_peer *peer = (struct wg_peer *)cb->args[1]; + struct dump_ctx *ctx = DUMP_CTX(cb); - if (wg) - dev_put(wg->dev); - wg_peer_put(peer); + if (ctx->wg) + dev_put(ctx->wg->dev); + wg_peer_put(ctx->next_peer); return 0; } @@ -368,8 +368,12 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs) if (attrs[WGPEER_A_PRESHARED_KEY] && nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN) preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]); + if (attrs[WGPEER_A_FLAGS]) flags = nla_get_u32(attrs[WGPEER_A_FLAGS]); + ret = -EOPNOTSUPP; + if (flags & ~__WGPEER_F_ALL) + goto out; ret = -EPFNOSUPPORT; if (attrs[WGPEER_A_PROTOCOL_VERSION]) { @@ -379,10 +383,10 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs) peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, nla_data(attrs[WGPEER_A_PUBLIC_KEY])); + ret = 0; if (!peer) { /* Peer doesn't exist yet. Add a new one. */ - ret = -ENODEV; - if (flags & WGPEER_F_REMOVE_ME) - goto out; /* Tried to remove a non-existing peer. */ + if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY)) + goto out; /* The peer is new, so there aren't allowed IPs to remove. */ flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS; @@ -403,17 +407,22 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs) } up_read(&wg->static_identity.lock); - ret = -ENOMEM; peer = wg_peer_create(wg, public_key, preshared_key); - if (!peer) + if (IS_ERR(peer)) { + /* Similar to the above, if the key is invalid, we skip + * it without fanfare, so that services don't need to + * worry about doing key validation themselves. + */ + ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer); + peer = NULL; goto out; + } /* Take additional reference, as though we've just been * looked up. */ wg_peer_get(peer); } - ret = 0; if (flags & WGPEER_F_REMOVE_ME) { wg_peer_remove(peer); goto out; @@ -487,6 +496,7 @@ out: static int wg_set_device(struct sk_buff *skb, struct genl_info *info) { struct wg_device *wg = lookup_interface(info->attrs, skb); + u32 flags = 0; int ret; if (IS_ERR(wg)) { @@ -497,6 +507,12 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); mutex_lock(&wg->device_update_lock); + if (info->attrs[WGDEVICE_A_FLAGS]) + flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]); + ret = -EOPNOTSUPP; + if (flags & ~__WGDEVICE_F_ALL) + goto out; + ret = -EPERM; if ((info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) && @@ -520,9 +536,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) goto out; } - if (info->attrs[WGDEVICE_A_FLAGS] && - nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]) & - WGDEVICE_F_REPLACE_PEERS) + if (flags & WGDEVICE_F_REPLACE_PEERS) wg_peer_remove_all(wg); if (info->attrs[WGDEVICE_A_PRIVATE_KEY] && @@ -532,6 +546,10 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) u8 public_key[NOISE_PUBLIC_KEY_LEN]; struct wg_peer *peer, *temp; + if (!crypto_memneq(wg->static_identity.static_private, + private_key, NOISE_PUBLIC_KEY_LEN)) + goto skip_set_private_key; + /* We remove before setting, to prevent race, which means doing * two 25519-genpub ops. */ @@ -549,12 +567,15 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) private_key); list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { - if (!wg_noise_precompute_static_static(peer)) + if (wg_noise_precompute_static_static(peer)) + wg_noise_expire_current_peer_keypairs(peer); + else wg_peer_remove(peer); } wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); up_write(&wg->static_identity.lock); } +skip_set_private_key: if (info->attrs[WGDEVICE_A_PEERS]) { struct nlattr *attr, *peer[WGPEER_A_MAX + 1]; @@ -596,12 +617,16 @@ struct genl_ops genl_ops[] = { #endif .dumpit = wg_get_device_dump, .done = wg_get_device_done, +#ifdef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY .policy = device_policy, +#endif .flags = GENL_UNS_ADMIN_PERM }, { .cmd = WG_CMD_SET_DEVICE, .doit = wg_set_device, +#ifdef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY .policy = device_policy, +#endif .flags = GENL_UNS_ADMIN_PERM } }; @@ -618,6 +643,9 @@ __ro_after_init = { .version = WG_GENL_VERSION, .maxattr = WGDEVICE_A_MAX, .module = THIS_MODULE, +#ifndef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY + .policy = device_policy, +#endif .netnsok = true }; diff --git a/src/noise.c b/src/noise.c index 5b073bd..269b69f 100644 --- a/src/noise.c +++ b/src/noise.c @@ -183,6 +183,25 @@ void wg_noise_keypairs_clear(struct noise_keypairs *keypairs) spin_unlock_bh(&keypairs->keypair_update_lock); } +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + + wg_noise_handshake_clear(&peer->handshake); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + + spin_lock_bh(&peer->keypairs.keypair_update_lock); + keypair = rcu_dereference_protected(peer->keypairs.next_keypair, + lockdep_is_held(&peer->keypairs.keypair_update_lock)); + if (keypair) + keypair->sending.is_valid = false; + keypair = rcu_dereference_protected(peer->keypairs.current_keypair, + lockdep_is_held(&peer->keypairs.keypair_update_lock)); + if (keypair) + keypair->sending.is_valid = false; + spin_unlock_bh(&peer->keypairs.keypair_update_lock); +} + static void add_new_keypair(struct noise_keypairs *keypairs, struct noise_keypair *new_keypair) { @@ -352,7 +371,7 @@ static void symmetric_key_init(struct noise_symmetric_key *key) atomic64_set(&key->counter.counter, 0); memset(key->counter.receive.backtrack, 0, sizeof(key->counter.receive.backtrack)); - key->birthdate = ktime_get_boot_fast_ns(); + key->birthdate = ktime_get_coarse_boottime_ns(); key->is_valid = true; } @@ -547,6 +566,7 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, u8 s[NOISE_PUBLIC_KEY_LEN]; u8 e[NOISE_PUBLIC_KEY_LEN]; u8 t[NOISE_TIMESTAMP_LEN]; + u64 initiation_consumption; down_read(&wg->static_identity.lock); if (unlikely(!wg->static_identity.has_identity)) @@ -585,9 +605,9 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, down_read(&handshake->lock); replay_attack = memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) <= 0; - flood_attack = handshake->last_initiation_consumption + + flood_attack = (s64)handshake->last_initiation_consumption + NSEC_PER_SEC / INITIATIONS_PER_SECOND > - ktime_get_boot_fast_ns(); + (s64)ktime_get_coarse_boottime_ns(); up_read(&handshake->lock); if (replay_attack || flood_attack) goto out; @@ -595,11 +615,14 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, /* Success! Copy everything to peer */ down_write(&handshake->lock); memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); - memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN); + if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0) + memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN); memcpy(handshake->hash, hash, NOISE_HASH_LEN); memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); handshake->remote_index = src->sender_index; - handshake->last_initiation_consumption = ktime_get_boot_fast_ns(); + if ((s64)(handshake->last_initiation_consumption - + (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0) + handshake->last_initiation_consumption = initiation_consumption; handshake->state = HANDSHAKE_CONSUMED_INITIATION; up_write(&handshake->lock); ret_peer = peer; diff --git a/src/noise.h b/src/noise.h index 9c2cc62..138a07b 100644 --- a/src/noise.h +++ b/src/noise.h @@ -13,7 +13,6 @@ #include <linux/atomic.h> #include <linux/rwsem.h> #include <linux/mutex.h> -#include <linux/ktime.h> #include <linux/kref.h> union noise_counter { @@ -101,11 +100,18 @@ bool wg_noise_handshake_init(struct noise_handshake *handshake, const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], struct wg_peer *peer); void wg_noise_handshake_clear(struct noise_handshake *handshake); +static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) +{ + atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() - + (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC); +} + void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now); struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair); void wg_noise_keypairs_clear(struct noise_keypairs *keypairs); bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, struct noise_keypair *received_keypair); +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer); void wg_noise_set_static_identity_private_key( struct noise_static_identity *static_identity, @@ -22,20 +22,23 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]) { struct wg_peer *peer; + int ret = -ENOMEM; lockdep_assert_held(&wg->device_update_lock); if (wg->num_peers >= MAX_PEERS_PER_DEVICE) - return NULL; + return ERR_PTR(ret); peer = kzalloc(sizeof(*peer), GFP_KERNEL); if (unlikely(!peer)) - return NULL; + return ERR_PTR(ret); peer->device = wg; if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity, - public_key, preshared_key, peer)) + public_key, preshared_key, peer)) { + ret = -EKEYREJECTED; goto err_1; + } if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) goto err_1; if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, @@ -56,9 +59,7 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, rwlock_init(&peer->endpoint_lock); kref_init(&peer->refcount); skb_queue_head_init(&peer->staged_packet_queue); - atomic64_set(&peer->last_sent_handshake, - ktime_get_boot_fast_ns() - - (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state); netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll, NAPI_POLL_WEIGHT); @@ -76,7 +77,7 @@ err_2: dst_cache_destroy(&peer->endpoint_cache); err_1: kfree(peer); - return NULL; + return ERR_PTR(ret); } struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer) @@ -177,8 +178,8 @@ void wg_peer_remove(struct wg_peer *peer) void wg_peer_remove_all(struct wg_device *wg) { - struct list_head dead_peers = LIST_HEAD_INIT(dead_peers); struct wg_peer *peer, *temp; + LIST_HEAD(dead_peers); lockdep_assert_held(&wg->device_update_lock); diff --git a/src/queueing.h b/src/queueing.h index f8de703..e49a464 100644 --- a/src/queueing.h +++ b/src/queueing.h @@ -177,8 +177,7 @@ static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, wg_peer_put(peer); } -static inline void wg_queue_enqueue_per_peer_napi(struct crypt_queue *queue, - struct sk_buff *skb, +static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, enum packet_state state) { /* We take a reference, because as soon as we call atomic_set, the diff --git a/src/ratelimiter.c b/src/ratelimiter.c index fd09190..e33ec72 100644 --- a/src/ratelimiter.c +++ b/src/ratelimiter.c @@ -66,7 +66,7 @@ static void entry_uninit(struct ratelimiter_entry *entry) /* Calling this function with a NULL work uninits all entries. */ static void wg_ratelimiter_gc_entries(struct work_struct *work) { - const u64 now = ktime_get_boot_fast_ns(); + const u64 now = ktime_get_coarse_boottime_ns(); struct ratelimiter_entry *entry; struct hlist_node *temp; unsigned int i; @@ -130,7 +130,7 @@ bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net) * as part of the rate. */ spin_lock(&entry->lock); - now = ktime_get_boot_fast_ns(); + now = ktime_get_coarse_boottime_ns(); tokens = min_t(u64, TOKEN_MAX, entry->tokens + now - entry->last_time_ns); @@ -155,7 +155,7 @@ bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net) entry->ip = ip; INIT_HLIST_NODE(&entry->hash); spin_lock_init(&entry->lock); - entry->last_time_ns = ktime_get_boot_fast_ns(); + entry->last_time_ns = ktime_get_coarse_boottime_ns(); entry->tokens = TOKEN_MAX - PACKET_COST; spin_lock(&table_lock); hlist_add_head_rcu(&entry->hash, bucket); diff --git a/src/receive.c b/src/receive.c index 51d06d3..e00f0f4 100644 --- a/src/receive.c +++ b/src/receive.c @@ -120,7 +120,7 @@ static void wg_receive_handshake_packet(struct wg_device *wg, under_load = skb_queue_len(&wg->incoming_handshakes) >= MAX_QUEUED_INCOMING_HANDSHAKES / 8; if (under_load) - last_under_load = ktime_get_boot_fast_ns(); + last_under_load = ktime_get_coarse_boottime_ns(); else if (last_under_load) under_load = !wg_birthdate_has_expired(last_under_load, 1); mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, @@ -281,9 +281,9 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key, if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) return false; - if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, - PACKET_CB(skb)->nonce, key->key, - simd_context)) + if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, + PACKET_CB(skb)->nonce, key->key, + simd_context)) return false; /* Another ugly situation of pushing and pulling the header so as to @@ -382,7 +382,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, /* We've already verified the Poly1305 auth tag, which means this packet * was not modified in transit. We can therefore tell the networking * stack that all checksums of every layer of encapsulation have already - * been checked "by the hardware" and therefore is unneccessary to check + * been checked "by the hardware" and therefore is unnecessary to check * again in software. */ skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -521,8 +521,7 @@ void wg_packet_decrypt_worker(struct work_struct *work) &PACKET_CB(skb)->keypair->receiving, &simd_context)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; - wg_queue_enqueue_per_peer_napi(&PACKET_PEER(skb)->rx_queue, skb, - state); + wg_queue_enqueue_per_peer_napi(skb, state); simd_relax(&simd_context); } @@ -551,7 +550,7 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu); if (unlikely(ret == -EPIPE)) - wg_queue_enqueue_per_peer(&peer->rx_queue, skb, PACKET_STATE_DEAD); + wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD); if (likely(!ret || ret == -EPIPE)) { rcu_read_unlock_bh(); return; diff --git a/src/selftest/allowedips.c b/src/selftest/allowedips.c index 6e244a9..846db14 100644 --- a/src/selftest/allowedips.c +++ b/src/selftest/allowedips.c @@ -307,12 +307,12 @@ static __init bool randomized_test(void) if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr, peer, &mutex) < 0) { pr_err("allowedips random self-test malloc: FAIL\n"); - goto free; + goto free_locked; } if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip, cidr, peer) < 0) { pr_err("allowedips random self-test malloc: FAIL\n"); - goto free; + goto free_locked; } for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { memcpy(mutated, ip, 4); @@ -334,12 +334,12 @@ static __init bool randomized_test(void) (struct in_addr *)mutated, cidr, peer, &mutex) < 0) { pr_err("allowedips random malloc: FAIL\n"); - goto free; + goto free_locked; } if (horrible_allowedips_insert_v4(&h, (struct in_addr *)mutated, cidr, peer)) { pr_err("allowedips random self-test malloc: FAIL\n"); - goto free; + goto free_locked; } } } @@ -351,12 +351,12 @@ static __init bool randomized_test(void) if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr, peer, &mutex) < 0) { pr_err("allowedips random self-test malloc: FAIL\n"); - goto free; + goto free_locked; } if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip, cidr, peer) < 0) { pr_err("allowedips random self-test malloc: FAIL\n"); - goto free; + goto free_locked; } for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { memcpy(mutated, ip, 16); @@ -378,13 +378,13 @@ static __init bool randomized_test(void) (struct in6_addr *)mutated, cidr, peer, &mutex) < 0) { pr_err("allowedips random self-test malloc: FAIL\n"); - goto free; + goto free_locked; } if (horrible_allowedips_insert_v6( &h, (struct in6_addr *)mutated, cidr, peer)) { pr_err("allowedips random self-test malloc: FAIL\n"); - goto free; + goto free_locked; } } } @@ -417,6 +417,7 @@ static __init bool randomized_test(void) free: mutex_lock(&mutex); +free_locked: wg_allowedips_free(&t, &mutex); mutex_unlock(&mutex); horrible_allowedips_free(&h); @@ -27,7 +27,7 @@ static void wg_packet_send_handshake_initiation(struct wg_peer *peer) REKEY_TIMEOUT)) return; /* This function is rate limited. */ - atomic64_set(&peer->last_sent_handshake, ktime_get_boot_fast_ns()); + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr); @@ -37,7 +37,7 @@ static void wg_packet_send_handshake_initiation(struct wg_peer *peer) wg_timers_any_authenticated_packet_traversal(peer); wg_timers_any_authenticated_packet_sent(peer); atomic64_set(&peer->last_sent_handshake, - ktime_get_boot_fast_ns()); + ktime_get_coarse_boottime_ns()); wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet), HANDSHAKE_DSCP); wg_timers_handshake_initiated(peer); @@ -87,7 +87,7 @@ void wg_packet_send_handshake_response(struct wg_peer *peer) { struct message_handshake_response packet; - atomic64_set(&peer->last_sent_handshake, ktime_get_boot_fast_ns()); + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr); @@ -100,7 +100,7 @@ void wg_packet_send_handshake_response(struct wg_peer *peer) wg_timers_any_authenticated_packet_traversal(peer); wg_timers_any_authenticated_packet_sent(peer); atomic64_set(&peer->last_sent_handshake, - ktime_get_boot_fast_ns()); + ktime_get_coarse_boottime_ns()); wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet), HANDSHAKE_DSCP); @@ -207,9 +207,10 @@ static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair, if (skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(plaintext_len)) <= 0) return false; - return chacha20poly1305_encrypt_sg(sg, sg, plaintext_len, NULL, 0, - PACKET_CB(skb)->nonce, - keypair->sending.key, simd_context); + return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0, + PACKET_CB(skb)->nonce, + keypair->sending.key, + simd_context); } void wg_packet_send_keepalive(struct wg_peer *peer) @@ -233,17 +234,6 @@ void wg_packet_send_keepalive(struct wg_peer *peer) wg_packet_send_staged_packets(peer); } -#define skb_walk_null_queue_safe(first, skb, next) \ - for (skb = first, next = skb->next; skb; \ - skb = next, next = skb ? skb->next : NULL) -static void skb_free_null_queue(struct sk_buff *first) -{ - struct sk_buff *skb, *next; - - skb_walk_null_queue_safe(first, skb, next) - dev_kfree_skb(skb); -} - static void wg_packet_create_data_done(struct sk_buff *first, struct wg_peer *peer) { @@ -252,7 +242,7 @@ static void wg_packet_create_data_done(struct sk_buff *first, wg_timers_any_authenticated_packet_traversal(peer); wg_timers_any_authenticated_packet_sent(peer); - skb_walk_null_queue_safe(first, skb, next) { + skb_list_walk_safe(first, skb, next) { is_keepalive = skb->len == message_data_len(0); if (likely(!wg_socket_send_skb_to_peer(peer, skb, PACKET_CB(skb)->ds) && !is_keepalive)) @@ -284,7 +274,7 @@ void wg_packet_tx_worker(struct work_struct *work) if (likely(state == PACKET_STATE_CRYPTED)) wg_packet_create_data_done(first, peer); else - skb_free_null_queue(first); + kfree_skb_list(first); wg_noise_keypair_put(keypair, false); wg_peer_put(peer); @@ -302,7 +292,7 @@ void wg_packet_encrypt_worker(struct work_struct *work) while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { enum packet_state state = PACKET_STATE_CRYPTED; - skb_walk_null_queue_safe(first, skb, next) { + skb_list_walk_safe(first, skb, next) { if (likely(encrypt_packet(skb, PACKET_CB(first)->keypair, &simd_context))) { @@ -343,7 +333,7 @@ err: return; wg_noise_keypair_put(PACKET_CB(first)->keypair, false); wg_peer_put(peer); - skb_free_null_queue(first); + kfree_skb_list(first); } void wg_packet_purge_staged_packets(struct wg_peer *peer) diff --git a/src/socket.c b/src/socket.c index 5a77b0c..c46256d 100644 --- a/src/socket.c +++ b/src/socket.c @@ -31,7 +31,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, struct sock *sock; int ret = 0; - skb->next = skb->prev = NULL; + skb_mark_not_on_list(skb); skb->dev = wg->dev; skb->mark = wg->fwmark; @@ -86,6 +86,8 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, if (cache) dst_cache_set_ip4(cache, &rt->dst, fl.saddr); } + + skb->ignore_df = 1; udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, fl.fl4_dport, false, false); @@ -115,7 +117,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, struct sock *sock; int ret = 0; - skb->next = skb->prev = NULL; + skb_mark_not_on_list(skb); skb->dev = wg->dev; skb->mark = wg->fwmark; @@ -140,9 +142,10 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, if (cache) dst_cache_reset(cache); } - ret = ipv6_stub->ipv6_dst_lookup(sock_net(sock), sock, &dst, - &fl); - if (unlikely(ret)) { + dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, + NULL); + if (unlikely(IS_ERR(dst))) { + ret = PTR_ERR(dst); net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret); goto err; @@ -157,6 +160,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, dst_cache_set_ip6(cache, dst, &fl.saddr); } + skb->ignore_df = 1; udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, ip6_dst_hoplimit(dst), 0, fl.fl6_sport, fl.fl6_dport, false); @@ -407,7 +411,7 @@ retry: } #endif - wg_socket_reinit(wg, new4 ? new4->sk : NULL, new6 ? new6->sk : NULL); + wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); return 0; } diff --git a/src/tests/netns.sh b/src/tests/netns.sh index 7cbbfce..e7310d9 100755 --- a/src/tests/netns.sh +++ b/src/tests/netns.sh @@ -76,8 +76,10 @@ ip0 link add dev wg0 type wireguard ip0 link set wg0 netns $netns2 key1="$(pp wg genkey)" key2="$(pp wg genkey)" +key3="$(pp wg genkey)" pub1="$(pp wg pubkey <<<"$key1")" pub2="$(pp wg pubkey <<<"$key2")" +pub3="$(pp wg pubkey <<<"$key3")" psk="$(pp wg genpsk)" [[ -n $key1 && -n $key2 && -n $psk ]] @@ -221,6 +223,14 @@ kill $ncat_pid n1 wg set wg0 peer "$more_specific_key" remove [[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]] +# Test that we can change private keys keys and immediately handshake +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2 +n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 private-key <(echo "$key3") +n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove +n1 ping -W 1 -c 1 192.168.241.2 + ip1 link del wg0 ip2 link del wg0 @@ -231,7 +241,7 @@ ip2 link del wg0 # │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │ # │ │ wg0 │─────────────│vethc│───────────┼────┼────│vethrc│ │vethrs│──────────────┼─────┼──│veths│────────────│ wg0 │ │ # │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├──────┴─────────┐ ├──────┴────────────┐ │ │ ├─────┴──────────┐ ├─────┴──────────┐ │ -# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.100/24│ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │ +# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │ # │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │ # │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │ # └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘ @@ -270,6 +280,26 @@ n2 ping -W 1 -c 1 192.168.241.1 # Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`). pp sleep 3 n2 ping -W 1 -c 1 192.168.241.1 +n1 wg set wg0 peer "$pub2" persistent-keepalive 0 + +# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. +ip1 -6 addr add fc00::9/96 dev vethc +ip1 -6 route add default via fc00::1 +ip2 -4 addr add 192.168.99.7/32 dev wg0 +ip2 -6 addr add abab::1111/128 dev wg0 +n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111 +ip1 -6 route add default dev wg0 table 51820 +ip1 -6 rule add not fwmark 51820 table 51820 +ip1 -6 rule add table main suppress_prefixlength 0 +ip1 -4 route add default dev wg0 table 51820 +ip1 -4 rule add not fwmark 51820 table 51820 +ip1 -4 rule add table main suppress_prefixlength 0 +# suppress_prefixlength only got added in 3.12, and we want to support 3.10+. +if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then + # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. + n1 ping -W 1 -c 100 -f 192.168.99.7 + n1 ping -W 1 -c 100 -f abab::1111 +fi n0 iptables -t nat -F ip0 link del vethrc diff --git a/src/tests/qemu/Makefile b/src/tests/qemu/Makefile index cd0d46e..55aea6a 100644 --- a/src/tests/qemu/Makefile +++ b/src/tests/qemu/Makefile @@ -6,7 +6,7 @@ PWD := $(shell pwd) CHOST := $(shell gcc -dumpmachine) ifneq (,$(ARCH)) -CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(filter-out android,$(wildcard /usr/bin/$(ARCH)-*-gcc)))))) +CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(filter-out android,$(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))) endif ifeq (,$(CBUILD)) CBUILD := $(CHOST) @@ -14,7 +14,7 @@ endif ARCH := $(firstword $(subst -, ,$(CBUILD))) # Set these from the environment to override -KERNEL_VERSION ?= 5.0.4 +KERNEL_VERSION ?= 5.4 KERNEL_VERSION := $(KERNEL_VERSION)$(if $(DEBUG_KERNEL),$(if $(findstring -debug,$(KERNEL_VERSION)),,-debug),) BUILD_PATH ?= $(PWD)/../../../qemu-build/$(ARCH) DISTFILES_PATH ?= $(PWD)/distfiles @@ -54,7 +54,7 @@ $(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/re $(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/)) $(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/)) $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/)) -$(eval $(call tar_download,IPROUTE2,iproute2,4.13.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/)) +$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/)) $(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/)) $(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/)) $(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#)) @@ -360,15 +360,10 @@ $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) $(MAKE) -C $(BASH_PATH) $(STRIP) -s $@ -$(eval $(call file_download,271-uapi-libc-compat.h-do-not-rely-on-__GLIBC__.patch,https://raw.githubusercontent.com/lede-project/source/master/package/network/utils/iproute2/patches/)) -$(eval $(call file_download,272-uapi-if_ether.h-prevent-redefinition-of-struct-ethhd.patch,https://raw.githubusercontent.com/lede-project/source/master/package/network/utils/iproute2/patches/)) - -$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) | $(DISTFILES_PATH)/271-uapi-libc-compat.h-do-not-rely-on-__GLIBC__.patch $(DISTFILES_PATH)/272-uapi-if_ether.h-prevent-redefinition-of-struct-ethhd.patch +$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - patch -d $(IPROUTE2_PATH) -p 1 < $(DISTFILES_PATH)/271-uapi-libc-compat.h-do-not-rely-on-__GLIBC__.patch - patch -d $(IPROUTE2_PATH) -p 1 < $(DISTFILES_PATH)/272-uapi-if_ether.h-prevent-redefinition-of-struct-ethhd.patch - printf 'CC=$(CC)\nPKG_CONFIG=pkg-config\nTC_CONFIG_XT=n\nTC_CONFIG_ATM=n\nTC_CONFIG_IPSET=n\nIP_CONFIG_SETNS=y\nHAVE_ELF=n\nHAVE_MNL=y\nHAVE_BERKELEY_DB=n\nHAVE_LATEX=n\nHAVE_PDFLATEX=n\n' > $(IPROUTE2_PATH)/Config + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile touch $@ diff --git a/src/tests/qemu/arch/powerpc64le.config b/src/tests/qemu/arch/powerpc64le.config index b28b547..990c510 100644 --- a/src/tests/qemu/arch/powerpc64le.config +++ b/src/tests/qemu/arch/powerpc64le.config @@ -3,6 +3,7 @@ CONFIG_PPC_PSERIES=y CONFIG_ALTIVEC=y CONFIG_VSX=y CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_RADIX_MMU=y CONFIG_HVC_CONSOLE=y CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_CMDLINE_BOOL=y diff --git a/src/tests/qemu/debug.config b/src/tests/qemu/debug.config index 9f6c236..5e7fa10 100644 --- a/src/tests/qemu/debug.config +++ b/src/tests/qemu/debug.config @@ -25,6 +25,7 @@ CONFIG_KASAN=y CONFIG_KASAN_INLINE=y CONFIG_UBSAN=y CONFIG_UBSAN_SANITIZE_ALL=y +CONFIG_UBSAN_NO_ALIGNMENT=y CONFIG_UBSAN_NULL=y CONFIG_DEBUG_KMEMLEAK=y CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 diff --git a/src/tests/qemu/init.c b/src/tests/qemu/init.c index 0c3f843..51e5dde 100644 --- a/src/tests/qemu/init.c +++ b/src/tests/qemu/init.c @@ -158,7 +158,7 @@ static void kmod_selftests(void) } fclose(file); if (!success) { - puts("\x1b[31m\x1b[1m[-] Tests failed! :-(\x1b[0m"); + puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m"); poweroff(); } } @@ -203,8 +203,19 @@ static void launch_tests(void) if (write(fd, "success\n", 8) != 8) panic("write(success_dev)"); close(fd); - } else - puts("\x1b[31m\x1b[1m[-] Tests failed! :-(\x1b[0m"); + } else { + const char *why = "unknown cause"; + int what = -1; + + if (WIFEXITED(status)) { + why = "exit code"; + what = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + why = "signal"; + what = WTERMSIG(status); + } + printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what); + } } static void ensure_console(void) diff --git a/src/tests/qemu/kernel.config b/src/tests/qemu/kernel.config index 85e7d20..0458314 100644 --- a/src/tests/qemu/kernel.config +++ b/src/tests/qemu/kernel.config @@ -22,6 +22,9 @@ CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_NAT=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_TTY=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_SCRIPT=y diff --git a/src/timers.c b/src/timers.c index 7614c85..d54d32a 100644 --- a/src/timers.c +++ b/src/timers.c @@ -17,7 +17,8 @@ * not sent one for `KEEPALIVE_TIMEOUT` ms. * * - Timer for initiating new handshake if we have sent a packet but after have - * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT)` ms. + * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) + + * jitter` ms. * * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms * if no new keys have been received. @@ -145,7 +146,8 @@ void wg_timers_data_sent(struct wg_peer *peer) { if (!timer_pending(&peer->timer_new_handshake)) mod_peer_timer(peer, &peer->timer_new_handshake, - jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ); + jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ + + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); } /* Should be called after an authenticated data packet is received. */ diff --git a/src/timers.h b/src/timers.h index f9d11fe..f0653dc 100644 --- a/src/timers.h +++ b/src/timers.h @@ -25,7 +25,7 @@ static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds, u64 expiration_seconds) { return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC) - <= (s64)ktime_get_boot_fast_ns(); + <= (s64)ktime_get_coarse_boottime_ns(); } #endif /* _WG_TIMERS_H */ diff --git a/src/tools/Makefile b/src/tools/Makefile index 0eecc57..e342779 100644 --- a/src/tools/Makefile +++ b/src/tools/Makefile @@ -54,6 +54,12 @@ endif ifeq ($(PLATFORM),haiku) LDLIBS += -lnetwork -lbsd endif +ifeq ($(PLATFORM),windows) +CC := x86_64-w64-mingw32-gcc +CFLAGS += -Iwincompat/include -include wincompat/compat.h +LDLIBS += -lws2_32 +wg: wincompat/libc.o wincompat/init.o +endif ifneq ($(V),1) BUILT_IN_LINK.o := $(LINK.o) diff --git a/src/tools/config.c b/src/tools/config.c index d510ea7..db90228 100644 --- a/src/tools/config.c +++ b/src/tools/config.c @@ -174,11 +174,29 @@ static inline bool parse_ip(struct wgallowedip *allowedip, const char *value) return true; } +static inline int parse_dns_retries(void) +{ + unsigned long ret; + char *retries = getenv("WG_ENDPOINT_RESOLUTION_RETRIES"), *end; + + if (!retries) + return 15; + if (!strcmp(retries, "infinity")) + return -1; + + ret = strtoul(retries, &end, 10); + if (*end || ret > INT_MAX) { + fprintf(stderr, "Unable to parse WG_ENDPOINT_RESOLUTION_RETRIES: `%s'\n", retries); + exit(1); + } + return (int)ret; +} + static inline bool parse_endpoint(struct sockaddr *endpoint, const char *value) { char *mutable = strdup(value); char *begin, *end; - int ret; + int ret, retries = parse_dns_retries(); struct addrinfo *resolved; struct addrinfo hints = { .ai_family = AF_UNSPEC, @@ -219,11 +237,11 @@ static inline bool parse_endpoint(struct sockaddr *endpoint, const char *value) *end++ = '\0'; } - for (unsigned int timeout = 1000000;;) { + #define min(a, b) ((a) < (b) ? (a) : (b)) + for (unsigned int timeout = 1000000;; timeout = min(20000000, timeout * 6 / 5)) { ret = getaddrinfo(begin, end, &hints, &resolved); if (!ret) break; - timeout = timeout * 3 / 2; /* The set of return codes that are "permanent failures". All other possibilities are potentially transient. * * This is according to https://sourceware.org/glibc/wiki/NameResolver which states: @@ -238,7 +256,7 @@ static inline bool parse_endpoint(struct sockaddr *endpoint, const char *value) #ifdef EAI_NODATA ret == EAI_NODATA || #endif - timeout >= 90000000) { + (retries >= 0 && !retries--)) { free(mutable); fprintf(stderr, "%s: `%s'\n", ret == EAI_SYSTEM ? strerror(errno) : gai_strerror(ret), value); return false; diff --git a/src/tools/genkey.c b/src/tools/genkey.c index 21d2f7a..b9c2a86 100644 --- a/src/tools/genkey.c +++ b/src/tools/genkey.c @@ -28,6 +28,7 @@ #include "encoding.h" #include "subcommands.h" +#ifndef WINCOMPAT static inline bool __attribute__((__warn_unused_result__)) get_random_bytes(uint8_t *out, size_t len) { ssize_t ret = 0; @@ -63,6 +64,9 @@ static inline bool __attribute__((__warn_unused_result__)) get_random_bytes(uint errno = -ret; return i == len; } +#else +#include "wincompat/getrandom.c" +#endif int genkey_main(int argc, char *argv[]) { diff --git a/src/tools/ipc.c b/src/tools/ipc.c index 7ab3a62..7207efc 100644 --- a/src/tools/ipc.c +++ b/src/tools/ipc.c @@ -95,7 +95,8 @@ static int add_next_to_inflatable_buffer(struct inflatable_buffer *buffer) return 0; } -static FILE *userspace_interface_file(const char *interface) +#ifndef WINCOMPAT +static FILE *userspace_interface_file(const char *iface) { struct stat sbuf; struct sockaddr_un addr = { .sun_family = AF_UNIX }; @@ -103,9 +104,9 @@ static FILE *userspace_interface_file(const char *interface) FILE *f = NULL; errno = EINVAL; - if (strchr(interface, '/')) + if (strchr(iface, '/')) goto out; - ret = snprintf(addr.sun_path, sizeof(addr.sun_path), SOCK_PATH "%s" SOCK_SUFFIX, interface); + ret = snprintf(addr.sun_path, sizeof(addr.sun_path), SOCK_PATH "%s" SOCK_SUFFIX, iface); if (ret < 0) goto out; ret = stat(addr.sun_path, &sbuf); @@ -139,15 +140,15 @@ out: return f; } -static bool userspace_has_wireguard_interface(const char *interface) +static bool userspace_has_wireguard_interface(const char *iface) { struct stat sbuf; struct sockaddr_un addr = { .sun_family = AF_UNIX }; int fd, ret; - if (strchr(interface, '/')) + if (strchr(iface, '/')) return false; - if (snprintf(addr.sun_path, sizeof(addr.sun_path), SOCK_PATH "%s" SOCK_SUFFIX, interface) < 0) + if (snprintf(addr.sun_path, sizeof(addr.sun_path), SOCK_PATH "%s" SOCK_SUFFIX, iface) < 0) return false; if (stat(addr.sun_path, &sbuf) < 0) return false; @@ -197,6 +198,9 @@ out: closedir(dir); return ret; } +#else +#include "wincompat/ipc.c" +#endif static int userspace_set_device(struct wgdevice *dev) { @@ -284,7 +288,7 @@ static int userspace_set_device(struct wgdevice *dev) num; \ }) -static int userspace_get_device(struct wgdevice **out, const char *interface) +static int userspace_get_device(struct wgdevice **out, const char *iface) { struct wgdevice *dev; struct wgpeer *peer = NULL; @@ -298,23 +302,24 @@ static int userspace_get_device(struct wgdevice **out, const char *interface) if (!dev) return -errno; - f = userspace_interface_file(interface); - if (!f) - return -errno; + f = userspace_interface_file(iface); + if (!f) { + ret = -errno; + free(dev); + *out = NULL; + return ret; + } fprintf(f, "get=1\n\n"); fflush(f); - strncpy(dev->name, interface, IFNAMSIZ - 1); + strncpy(dev->name, iface, IFNAMSIZ - 1); dev->name[IFNAMSIZ - 1] = '\0'; while (getline(&key, &line_buffer_len, f) > 0) { line_len = strlen(key); - if (line_len == 1 && key[0] == '\n') { - free(key); - fclose(f); - return ret; - } + if (line_len == 1 && key[0] == '\n') + goto err; value = strchr(key, '='); if (!value || line_len == 0 || key[line_len - 1] != '\n') break; @@ -378,7 +383,7 @@ static int userspace_get_device(struct wgdevice **out, const char *interface) *end++ = '\0'; } if (getaddrinfo(begin, end, &hints, &resolved) != 0) { - errno = ENETUNREACH; + ret = ENETUNREACH; goto err; } if ((resolved->ai_family == AF_INET && resolved->ai_addrlen == sizeof(struct sockaddr_in)) || @@ -433,8 +438,10 @@ static int userspace_get_device(struct wgdevice **out, const char *interface) ret = -EPROTO; err: free(key); - free_wgdevice(dev); - *out = NULL; + if (ret) { + free_wgdevice(dev); + *out = NULL; + } fclose(f); errno = -ret; return ret; @@ -885,7 +892,7 @@ static void coalesce_peers(struct wgdevice *device) } } -static int kernel_get_device(struct wgdevice **device, const char *interface) +static int kernel_get_device(struct wgdevice **device, const char *iface) { int ret = 0; struct nlmsghdr *nlh; @@ -904,7 +911,7 @@ try_again: } nlh = mnlg_msg_prepare(nlg, WG_CMD_GET_DEVICE, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP); - mnl_attr_put_strz(nlh, WGDEVICE_A_IFNAME, interface); + mnl_attr_put_strz(nlh, WGDEVICE_A_IFNAME, iface); if (mnlg_socket_send(nlg, nlh) < 0) { ret = -errno; goto out; @@ -959,14 +966,14 @@ cleanup: return buffer.buffer; } -int ipc_get_device(struct wgdevice **dev, const char *interface) +int ipc_get_device(struct wgdevice **dev, const char *iface) { #ifdef __linux__ - if (userspace_has_wireguard_interface(interface)) - return userspace_get_device(dev, interface); - return kernel_get_device(dev, interface); + if (userspace_has_wireguard_interface(iface)) + return userspace_get_device(dev, iface); + return kernel_get_device(dev, iface); #else - return userspace_get_device(dev, interface); + return userspace_get_device(dev, iface); #endif } diff --git a/src/tools/man/wg.8 b/src/tools/man/wg.8 index 2013825..ac281bc 100644 --- a/src/tools/man/wg.8 +++ b/src/tools/man/wg.8 @@ -94,6 +94,14 @@ Appends the contents of \fI<configuration-filename>\fP, which must be in the format described by \fICONFIGURATION FILE FORMAT\fP below, to the current configuration of \fI<interface>\fP. .TP +\fBsyncconf\fP \fI<interface>\fP \fI<configuration-filename>\fP +Like \fBsetconf\fP, but reads back the existing configuration first +and only makes changes that are explicitly different between the configuration +file and the interface. This is much less efficient than \fBsetconf\fP, +but has the benefit of not disrupting current peer sessions. The contents of +\fI<configuration-filename>\fP must be in the format described by +\fICONFIGURATION FILE FORMAT\fP below. +.TP \fBgenkey\fP Generates a random \fIprivate\fP key in base64 and prints it to standard output. @@ -114,7 +122,7 @@ A private key and a corresponding public key may be generated at once by calling $ wg genkey | tee private.key | wg pubkey > public.key .TP \fBhelp\fP -Show usage message. +Shows usage message. .SH CONFIGURATION FILE FORMAT The configuration file format is based on \fIINI\fP. There are two top level sections @@ -211,6 +219,9 @@ If set to \fIalways\fP, always print ANSI colorized output. If set to \fInever\f .TP .I WG_HIDE_KEYS If set to \fInever\fP, then the pretty-printing \fBshow\fP sub-command will show private and preshared keys in the output. If set to \fIalways\fP, something invalid, or unset, then private and preshared keys will be printed as "(hidden)". +.TP +.I WG_ENDPOINT_RESOLUTION_RETRIES +If set to an integer or to \fIinfinity\fP, DNS resolution for each peer's endpoint will be retried that many times for non-permanent errors, with an increasing delay between retries. If unset, the default is 15 retries. .SH SEE ALSO .BR ip (8), diff --git a/src/tools/setconf.c b/src/tools/setconf.c index 8211ebd..f778f40 100644 --- a/src/tools/setconf.c +++ b/src/tools/setconf.c @@ -13,6 +13,91 @@ #include "ipc.h" #include "subcommands.h" +struct pubkey_origin { + uint8_t *pubkey; + bool from_file; +}; + +static int pubkey_cmp(const void *first, const void *second) +{ + const struct pubkey_origin *a = first, *b = second; + int ret = memcmp(a->pubkey, b->pubkey, WG_KEY_LEN); + if (ret) + return ret; + return a->from_file - b->from_file; +} + +static bool sync_conf(struct wgdevice *file) +{ + struct wgdevice *runtime; + struct wgpeer *peer; + struct pubkey_origin *pubkeys; + size_t peer_count = 0, i = 0; + + if (!file->first_peer) + return true; + + for_each_wgpeer(file, peer) + ++peer_count; + + if (ipc_get_device(&runtime, file->name) != 0) { + perror("Unable to retrieve current interface configuration"); + return false; + } + + if (!runtime->first_peer) { + free_wgdevice(runtime); + return true; + } + + file->flags &= ~WGDEVICE_REPLACE_PEERS; + + for_each_wgpeer(runtime, peer) + ++peer_count; + + pubkeys = calloc(peer_count, sizeof(*pubkeys)); + if (!pubkeys) { + free_wgdevice(runtime); + perror("Public key allocation"); + return false; + } + + for_each_wgpeer(file, peer) { + pubkeys[i].pubkey = peer->public_key; + pubkeys[i].from_file = true; + ++i; + } + for_each_wgpeer(runtime, peer) { + pubkeys[i].pubkey = peer->public_key; + pubkeys[i].from_file = false; + ++i; + } + qsort(pubkeys, peer_count, sizeof(*pubkeys), pubkey_cmp); + + for (i = 0; i < peer_count; ++i) { + if (pubkeys[i].from_file) + continue; + if (i == peer_count - 1 || !pubkeys[i + 1].from_file || memcmp(pubkeys[i].pubkey, pubkeys[i + 1].pubkey, WG_KEY_LEN)) { + peer = calloc(1, sizeof(struct wgpeer)); + if (!peer) { + free_wgdevice(runtime); + free(pubkeys); + perror("Peer allocation"); + return false; + } + peer->flags = WGPEER_REMOVE_ME; + memcpy(peer->public_key, pubkeys[i].pubkey, WG_KEY_LEN); + peer->next_peer = file->first_peer; + file->first_peer = peer; + if (!file->last_peer) + file->last_peer = peer; + } + } + free_wgdevice(runtime); + free(pubkeys); + return true; +} + int setconf_main(int argc, char *argv[]) { struct wgdevice *device = NULL; @@ -50,6 +135,11 @@ int setconf_main(int argc, char *argv[]) strncpy(device->name, argv[1], IFNAMSIZ - 1); device->name[IFNAMSIZ - 1] = '\0'; + if (!strcmp(argv[0], "syncconf")) { + if (!sync_conf(device)) + goto cleanup; + } + if (ipc_set_device(device) != 0) { perror("Unable to modify interface"); goto cleanup; diff --git a/src/tools/systemd/wg-quick@.service b/src/tools/systemd/wg-quick@.service index 9c67af9..af52848 100644 --- a/src/tools/systemd/wg-quick@.service +++ b/src/tools/systemd/wg-quick@.service @@ -14,6 +14,7 @@ Type=oneshot RemainAfterExit=yes ExecStart=/usr/bin/wg-quick up %i ExecStop=/usr/bin/wg-quick down %i +Environment=WG_ENDPOINT_RESOLUTION_RETRIES=infinity [Install] WantedBy=multi-user.target diff --git a/src/tools/wg-quick/android.c b/src/tools/wg-quick/android.c index 5eec36f..ad05895 100644 --- a/src/tools/wg-quick/android.c +++ b/src/tools/wg-quick/android.c @@ -20,6 +20,7 @@ #include <unistd.h> #include <errno.h> #include <regex.h> +#include <dlfcn.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/wait.h> @@ -37,6 +38,7 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) static bool is_exiting = false; +static bool binder_available = false; static void *xmalloc(size_t size) { @@ -235,6 +237,376 @@ _printf_(1, 2) static void cndc(const char *cmd_fmt, ...) } } +/* Values are from AOSP repository platform/frameworks/native in libs/binder/ndk/include_ndk/android/binder_status.h. */ +enum { + STATUS_OK = 0, + STATUS_UNKNOWN_ERROR = -2147483647 - 1, + STATUS_NO_MEMORY = -ENOMEM, + STATUS_INVALID_OPERATION = -ENOSYS, + STATUS_BAD_VALUE = -EINVAL, + STATUS_BAD_TYPE = STATUS_UNKNOWN_ERROR + 1, + STATUS_NAME_NOT_FOUND = -ENOENT, + STATUS_PERMISSION_DENIED = -EPERM, + STATUS_NO_INIT = -ENODEV, + STATUS_ALREADY_EXISTS = -EEXIST, + STATUS_DEAD_OBJECT = -EPIPE, + STATUS_FAILED_TRANSACTION = STATUS_UNKNOWN_ERROR + 2, + STATUS_BAD_INDEX = -EOVERFLOW, + STATUS_NOT_ENOUGH_DATA = -ENODATA, + STATUS_WOULD_BLOCK = -EWOULDBLOCK, + STATUS_TIMED_OUT = -ETIMEDOUT, + STATUS_UNKNOWN_TRANSACTION = -EBADMSG, + STATUS_FDS_NOT_ALLOWED = STATUS_UNKNOWN_ERROR + 7, + STATUS_UNEXPECTED_NULL = STATUS_UNKNOWN_ERROR + 8 +}; +enum { + EX_NONE = 0, + EX_SECURITY = -1, + EX_BAD_PARCELABLE = -2, + EX_ILLEGAL_ARGUMENT = -3, + EX_NULL_POINTER = -4, + EX_ILLEGAL_STATE = -5, + EX_NETWORK_MAIN_THREAD = -6, + EX_UNSUPPORTED_OPERATION = -7, + EX_SERVICE_SPECIFIC = -8, + EX_PARCELABLE = -9, + EX_TRANSACTION_FAILED = -129 +}; +enum { + FLAG_ONEWAY = 0x01, +}; +enum { + FIRST_CALL_TRANSACTION = 0x00000001, + LAST_CALL_TRANSACTION = 0x00ffffff +}; +struct AIBinder; +struct AParcel; +struct AStatus; +struct AIBinder_Class; +typedef struct AIBinder AIBinder; +typedef struct AParcel AParcel; +typedef struct AStatus AStatus; +typedef struct AIBinder_Class AIBinder_Class; +typedef int32_t binder_status_t; +typedef int32_t binder_exception_t; +typedef uint32_t transaction_code_t; +typedef uint32_t binder_flags_t; +typedef void *(*AIBinder_Class_onCreate)(void *args); +typedef void (*AIBinder_Class_onDestroy)(void *userData); +typedef binder_status_t (*AIBinder_Class_onTransact)(AIBinder *binder, transaction_code_t code, const AParcel *in, AParcel *out); +typedef const char *(*AParcel_stringArrayElementGetter)(const void *arrayData, size_t index, int32_t *outLength); +static AIBinder_Class *(*AIBinder_Class_define)(const char *interfaceDescriptor, AIBinder_Class_onCreate onCreate, AIBinder_Class_onDestroy onDestroy, AIBinder_Class_onTransact onTransact) __attribute__((warn_unused_result)); +static bool (*AIBinder_associateClass)(AIBinder *binder, const AIBinder_Class *clazz); +static void (*AIBinder_decStrong)(AIBinder *binder); +static binder_status_t (*AIBinder_prepareTransaction)(AIBinder *binder, AParcel **in); +static binder_status_t (*AIBinder_transact)(AIBinder *binder, transaction_code_t code, AParcel **in, AParcel **out, binder_flags_t flags); +static binder_status_t (*AIBinder_ping)(AIBinder *binder); +static binder_status_t (*AIBinder_dump)(AIBinder *binder, int fd, const char **args, uint32_t numArgs); +static binder_status_t (*AParcel_readStatusHeader)(const AParcel *parcel, AStatus **status); +static binder_status_t (*AParcel_readBool)(const AParcel *parcel, bool *value); +static void (*AParcel_delete)(AParcel *parcel); +static binder_status_t (*AParcel_setDataPosition)(const AParcel *parcel, int32_t position); +static int32_t (*AParcel_getDataPosition)(const AParcel *parcel); +static binder_status_t (*AParcel_writeInt32)(AParcel *parcel, int32_t value); +static binder_status_t (*AParcel_writeStringArray)(AParcel *parcel, const void *arrayData, int32_t length, AParcel_stringArrayElementGetter getter); +static binder_status_t (*AParcel_writeString)(AParcel *parcel, const char *string, int32_t length); +static bool (*AStatus_isOk)(const AStatus *status); +static void (*AStatus_delete)(AStatus *status); +static binder_exception_t (*AStatus_getExceptionCode)(const AStatus *status); +static int32_t (*AStatus_getServiceSpecificError)(const AStatus *status); +static const char* (*AStatus_getMessage)(const AStatus *status); +static binder_status_t (*AStatus_getStatus)(const AStatus *status); +static AIBinder *(*AServiceManager_getService)(const char *instance) __attribute__((__warn_unused_result__)); + +static __attribute__((__constructor__(65535))) void load_symbols(void) +{ + void *handle = dlopen("libbinder_ndk.so", RTLD_LAZY); + binder_available = !!handle; + if (!binder_available) + return; + +#define X(symb) do { \ + if (!((symb) = (typeof(symb))dlsym(handle, #symb))) { \ + fprintf(stderr, "Error: unable to import " #symb " from libbinder_ndk.so\n"); \ + exit(ELIBACC); \ + } \ + } while (0) + X(AIBinder_Class_define); + X(AIBinder_associateClass); + X(AIBinder_decStrong); + X(AIBinder_prepareTransaction); + X(AIBinder_transact); + X(AIBinder_ping); + X(AIBinder_dump); + X(AParcel_readStatusHeader); + X(AParcel_readBool); + X(AParcel_delete); + X(AParcel_setDataPosition); + X(AParcel_getDataPosition); + X(AParcel_writeInt32); + X(AParcel_writeStringArray); + X(AParcel_writeString); + X(AStatus_isOk); + X(AStatus_delete); + X(AStatus_getExceptionCode); + X(AStatus_getServiceSpecificError); + X(AStatus_getMessage); + X(AStatus_getStatus); + X(AServiceManager_getService); +#undef X +} + +static void cleanup_binder(AIBinder **binder) +{ + if (*binder) + AIBinder_decStrong(*binder); +} +static void cleanup_status(AStatus **status) +{ + if (*status) + AStatus_delete(*status); +} +static void cleanup_parcel(AParcel **parcel) +{ + if (*parcel) + AParcel_delete(*parcel); +} + +#define _cleanup_status_ __attribute__((__cleanup__(cleanup_status))) +#define _cleanup_parcel_ __attribute__((__cleanup__(cleanup_parcel))) +#define _cleanup_binder_ __attribute__((__cleanup__(cleanup_binder))) + +static int32_t string_size(const char *str) +{ + return str ? strlen(str) : -1; +} + +static int32_t string_array_size(char *const *array) +{ + int32_t size = -1; + if (!array) + return size; + for (size = 0; array[size]; ++size); + return size; +} + +static const char *string_array_getter(const void *array_data, size_t index, int32_t *out_length) +{ + const char **array = (const char **)array_data; + *out_length = array[index] ? strlen(array[index]) : -1; + return array[index]; +} + +static binder_status_t meaningful_binder_status(const AStatus *status_out) +{ + binder_status_t status = STATUS_OK; + binder_exception_t exc_code; + int32_t exc_code_service; + const char *message; + + if (!AStatus_isOk(status_out)) { + exc_code = AStatus_getExceptionCode(status_out); + if (exc_code == EX_TRANSACTION_FAILED) { + status = AStatus_getStatus(status_out); + fprintf(stderr, "Error: transaction failed: %d\n", status); + } + else { + message = AStatus_getMessage(status_out); + + if (exc_code == EX_SERVICE_SPECIFIC) { + exc_code_service = AStatus_getServiceSpecificError(status_out); + fprintf(stderr, "Error: service specific exception code: %d%s%s\n", exc_code_service, message ? ": " : "", message ?: ""); + } + else + fprintf(stderr, "Error: exception code: %d%s%s\n", exc_code, message ? ": " : "", message ?: ""); + + status = STATUS_FAILED_TRANSACTION; + } + } + + return status; +} + +/* These values are default values observed in AOSP. */ +enum { + DNSRESOLVER_SAMPLE_VALIDITY = 1800 /* sec */, + DNSRESOLVER_SUCCESS_THRESHOLD = 25, + DNSRESOLVER_MIN_SAMPLES = 8, + DNSRESOLVER_MAX_SAMPLES = 8, + DNSRESOLVER_BASE_TIMEOUT = 5000 /* msec */, + DNSRESOLVER_RETRY_COUNT = 2 +}; + +struct dnsresolver_params { + int32_t netid; + int32_t sample_validity_seconds; + int32_t success_threshold; + int32_t min_samples; + int32_t max_samples; + int32_t base_timeout_msec; + int32_t retry_count; + char **servers; /* NULL terminated array of zero-terminated UTF-8 strings */ + char **domains; /* NULL terminated array of zero-terminated UTF-8 strings */ + char *tls_name; /* zero-terminated UTF-8 string */ + char **tls_servers; /* NULL terminated array of zero-terminated UTF-8 strings */ + char **tls_fingerprints; /* NULL terminated array of zero-terminated UTF-8 strings */ +}; + +static void *on_create() +{ + fprintf(stderr, "Error: on_create called on proxy object\n"); + exit(ENOTSUP); + return NULL; +} + +static void on_destroy() +{ + fprintf(stderr, "Error: on_destroy called on proxy object\n"); + exit(ENOTSUP); +} + +static binder_status_t on_transact() +{ + fprintf(stderr, "Error: on_transact called on a proxy object\n"); + exit(ENOTSUP); + return 0; +} + +static AIBinder *dnsresolver_get_handle(void) +{ + AIBinder *binder; + AIBinder_Class *clazz; + + if (!binder_available) + return NULL; + + binder = AServiceManager_getService("dnsresolver"); + if (!binder) + return NULL; + clazz = AIBinder_Class_define("android.net.IDnsResolver", &on_create, &on_destroy, &on_transact); + if (!clazz) + goto error; + + if (!AIBinder_associateClass(binder, clazz)) + goto error; + + return binder; +error: + AIBinder_decStrong(binder); + return NULL; +} + +static int32_t dnsresolver_create_network_cache(void *handle, int32_t netid) +{ + AIBinder *const binder = handle; + binder_status_t status; + _cleanup_parcel_ AParcel *parcel_in = NULL; + _cleanup_parcel_ AParcel *parcel_out = NULL; + _cleanup_status_ AStatus *status_out = NULL; + + status = AIBinder_prepareTransaction(binder, &parcel_in); + if (status != STATUS_OK) + return status; + + status = AParcel_writeInt32(parcel_in, netid); + if (status != STATUS_OK) + return status; + + status = AIBinder_transact(binder, FIRST_CALL_TRANSACTION + 7 /* createNetworkCache */, &parcel_in, &parcel_out, 0); + if (status != STATUS_OK) + return status; + + status = AParcel_readStatusHeader(parcel_out, &status_out); + if (status != STATUS_OK) + return status; + + if (!AStatus_isOk(status_out)) + return meaningful_binder_status(status_out); + + return STATUS_OK; +} + +static int32_t dnsresolver_set_resolver_configuration(void *handle, const struct dnsresolver_params *params) +{ + AIBinder *const binder = handle; + binder_status_t status; + _cleanup_parcel_ AParcel *parcel_in = NULL; + _cleanup_parcel_ AParcel *parcel_out = NULL; + _cleanup_status_ AStatus *status_out = NULL; + int32_t start_position, end_position; + + status = AIBinder_prepareTransaction(binder, &parcel_in); + if (status != STATUS_OK) + return status; + + status = AParcel_writeInt32(parcel_in, 1); + if (status != STATUS_OK) + return status; + + start_position = AParcel_getDataPosition(parcel_in); + status = AParcel_writeInt32(parcel_in, 0); + if (status != STATUS_OK) + return status; + + status = AParcel_writeInt32(parcel_in, params->netid); + if (status != STATUS_OK) + return status; + status = AParcel_writeInt32(parcel_in, params->sample_validity_seconds); + if (status != STATUS_OK) + return status; + status = AParcel_writeInt32(parcel_in, params->success_threshold); + if (status != STATUS_OK) + return status; + status = AParcel_writeInt32(parcel_in, params->min_samples); + if (status != STATUS_OK) + return status; + status = AParcel_writeInt32(parcel_in, params->max_samples); + if (status != STATUS_OK) + return status; + status = AParcel_writeInt32(parcel_in, params->base_timeout_msec); + if (status != STATUS_OK) + return status; + status = AParcel_writeInt32(parcel_in, params->retry_count); + if (status != STATUS_OK) + return status; + status = AParcel_writeStringArray(parcel_in, params->servers, string_array_size(params->servers), &string_array_getter); + if (status != STATUS_OK) + return status; + status = AParcel_writeStringArray(parcel_in, params->domains, string_array_size(params->domains), &string_array_getter); + if (status != STATUS_OK) + return status; + status = AParcel_writeString(parcel_in, params->tls_name, string_size(params->tls_name)); + if (status != STATUS_OK) + return status; + status = AParcel_writeStringArray(parcel_in, params->tls_servers, string_array_size(params->tls_servers), &string_array_getter); + if (status != STATUS_OK) + return status; + status = AParcel_writeStringArray(parcel_in, params->tls_fingerprints, string_array_size(params->tls_fingerprints), &string_array_getter); + if (status != STATUS_OK) + return status; + + end_position = AParcel_getDataPosition(parcel_in); + status = AParcel_setDataPosition(parcel_in, start_position); + if (status != STATUS_OK) + return status; + status = AParcel_writeInt32(parcel_in, end_position - start_position); + if (status != STATUS_OK) + return status; + status = AParcel_setDataPosition(parcel_in, end_position); + if (status != STATUS_OK) + return status; + + status = AIBinder_transact(binder, FIRST_CALL_TRANSACTION + 2 /* setResolverConfiguration */, &parcel_in, &parcel_out, 0); + if (status != STATUS_OK) + return status; + + status = AParcel_readStatusHeader(parcel_out, &status_out); + if (status != STATUS_OK) + return status; + + return meaningful_binder_status(status_out); +} + static void auto_su(int argc, char *argv[]) { char *args[argc + 4]; @@ -263,30 +635,84 @@ static void add_if(const char *iface) static void del_if(const char *iface) { - DEFINE_CMD(c); - _cleanup_regfree_ regex_t reg = { 0 }; + DEFINE_CMD(c_rule); + DEFINE_CMD(c_iptables); + DEFINE_CMD(c_ip6tables); + _cleanup_regfree_ regex_t rule_reg = { 0 }, iptables_reg = { 0 }; regmatch_t matches[2]; char *netid = NULL; - _cleanup_free_ char *regex = concat("0xc([0-9a-f]+)/0xcffff lookup ", iface, NULL); + _cleanup_free_ char *rule_regex = concat("0xc([0-9a-f]+)/0xcffff lookup ", iface, NULL); + _cleanup_free_ char *iptables_regex = concat("^-A (.* --comment \"wireguard rule ", iface, "\"[^\n]*)\n*$", NULL); - xregcomp(®, regex, REG_EXTENDED); + xregcomp(&rule_reg, rule_regex, REG_EXTENDED); + xregcomp(&iptables_reg, iptables_regex, REG_EXTENDED); - cmd("iptables -D OUTPUT -m mark --mark 0x20000 -j ACCEPT -m comment --comment \"wireguard rule %s\"", iface); - cmd("ip6tables -D OUTPUT -m mark --mark 0x20000 -j ACCEPT -m comment --comment \"wireguard rule %s\"", iface); cmd("ip link del %s", iface); - for (char *ret = cmd_ret(&c, "ip rule show"); ret; ret = cmd_ret(&c, NULL)) { - if (!regexec(®, ret, ARRAY_SIZE(matches), matches, 0)) { - ret[matches[1].rm_eo] = '\0'; - netid = &ret[matches[1].rm_so]; - break; + + for (char *rule = cmd_ret(&c_iptables, "iptables-save"); rule; rule = cmd_ret(&c_iptables, NULL)) { + if (!regexec(&iptables_reg, rule, ARRAY_SIZE(matches), matches, 0)) { + rule[matches[1].rm_eo] = '\0'; + cmd("iptables -D %s", &rule[matches[1].rm_so]); + } + } + for (char *rule = cmd_ret(&c_ip6tables, "ip6tables-save"); rule; rule = cmd_ret(&c_ip6tables, NULL)) { + if (!regexec(&iptables_reg, rule, ARRAY_SIZE(matches), matches, 0)) { + rule[matches[1].rm_eo] = '\0'; + cmd("ip6tables -D %s", &rule[matches[1].rm_so]); } } + for (char *rule = cmd_ret(&c_rule, "ip rule show"); rule; rule = cmd_ret(&c_rule, NULL)) { + if (!regexec(&rule_reg, rule, ARRAY_SIZE(matches), matches, 0)) { + rule[matches[1].rm_eo] = '\0'; + netid = &rule[matches[1].rm_so]; + break; + } + } if (netid) cndc("network destroy %lu", strtoul(netid, NULL, 16)); } -static void up_if(unsigned int *netid, const char *iface) +static bool should_block_ipv6(const char *iface) +{ + DEFINE_CMD(c); + bool has_ipv6 = false, has_all_none = true; + + for (char *endpoint = cmd_ret(&c, "wg show %s endpoints", iface); endpoint; endpoint = cmd_ret(&c, NULL)) { + char *start = strchr(endpoint, '\t'); + + if (!start) + continue; + ++start; + if (start[0] != '(') + has_all_none = false; + if (start[0] == '[') + has_ipv6 = true; + } + return !has_ipv6 && !has_all_none; +} + +static uint16_t determine_listen_port(const char *iface) +{ + DEFINE_CMD(c); + unsigned long listen_port = 0; + char *value; + + cmd("ip link set up dev %s", iface); + value = cmd_ret(&c, "wg show %s listen-port", iface); + if (!value) + goto set_back_down; + listen_port = strtoul(value, NULL, 10); + if (listen_port > UINT16_MAX || !listen_port) { + listen_port = 0; + goto set_back_down; + } +set_back_down: + cmd("ip link set down dev %s", iface); + return listen_port; +} + +static void up_if(unsigned int *netid, const char *iface, uint16_t listen_port) { srandom(time(NULL) ^ getpid()); /* Not real randomness. */ @@ -296,6 +722,10 @@ static void up_if(unsigned int *netid, const char *iface) cmd("wg set %s fwmark 0x20000", iface); cmd("iptables -I OUTPUT 1 -m mark --mark 0x20000 -j ACCEPT -m comment --comment \"wireguard rule %s\"", iface); cmd("ip6tables -I OUTPUT 1 -m mark --mark 0x20000 -j ACCEPT -m comment --comment \"wireguard rule %s\"", iface); + if (listen_port) { + cmd("iptables -I INPUT 1 -p udp --dport %u -j ACCEPT -m comment --comment \"wireguard rule %s\"", listen_port, iface); + cmd("ip6tables -I INPUT 1 -p udp --dport %u -j %s -m comment --comment \"wireguard rule %s\"", listen_port, should_block_ipv6(iface) ? "DROP" : "ACCEPT", iface); + } cndc("interface setcfg %s up", iface); cndc("network create %u vpn 1 1", *netid); cndc("network interface add %u %s", *netid, iface); @@ -382,22 +812,75 @@ static void set_dnses(unsigned int netid, const char *dnses) if (len > (1<<16)) return; _cleanup_free_ char *mutable = xstrdup(dnses); - _cleanup_free_ char *arglist = xmalloc(len * 4 + 1); + _cleanup_free_ char *shell_arglist = xmalloc(len * 4 + 1); + _cleanup_free_ char *function_arglist = xmalloc(len * 4 + 1); _cleanup_free_ char *arg = xmalloc(len + 4); + _cleanup_free_ char **dns_list = NULL; + _cleanup_binder_ AIBinder *handle = NULL; + size_t dns_list_size = 0; if (!len) return; - arglist[0] = '\0'; + for (char *dns = strtok(mutable, ", \t\n"); dns; dns = strtok(NULL, ", \t\n")) { + if (strchr(dns, '\'') || strchr(dns, '\\')) + continue; + ++dns_list_size; + } + if (!dns_list_size) + return; + dns_list = xcalloc(dns_list_size + 1, sizeof(*dns_list)); + free(mutable); + mutable = xstrdup(dnses); + shell_arglist[0] = '\0'; + function_arglist[0] = '\0'; + dns_list_size = 0; for (char *dns = strtok(mutable, ", \t\n"); dns; dns = strtok(NULL, ", \t\n")) { if (strchr(dns, '\'') || strchr(dns, '\\')) continue; snprintf(arg, len + 3, "'%s' ", dns); - strncat(arglist, arg, len * 4 - 1); + strncat(shell_arglist, arg, len * 4 - 1); + snprintf(arg, len + 2, function_arglist[0] == '\0' ? "%s" : ", %s", dns); + strncat(function_arglist, arg, len * 4 - 1); + dns_list[dns_list_size++] = dns; } - if (!strlen(arglist)) - return; - cndc("resolver setnetdns %u '' %s", netid, arglist); + + if ((handle = dnsresolver_get_handle())) { + binder_status_t status; + + printf("[#] <binder>::dnsResolver->createNetworkCache(%u)\n", netid); + status = dnsresolver_create_network_cache(handle, netid); + if (status != 0) { + fprintf(stderr, "Error: unable to create network cache\n"); + exit(ENONET); + } + + struct dnsresolver_params params = { + .netid = netid, + .sample_validity_seconds = DNSRESOLVER_SAMPLE_VALIDITY, + .success_threshold = DNSRESOLVER_SUCCESS_THRESHOLD, + .min_samples = DNSRESOLVER_MIN_SAMPLES, + .max_samples = DNSRESOLVER_MAX_SAMPLES, + .base_timeout_msec = DNSRESOLVER_BASE_TIMEOUT, + .retry_count = DNSRESOLVER_RETRY_COUNT, + .servers = dns_list, + .domains = (char *[]){NULL}, + .tls_name = "", + .tls_servers = (char *[]){NULL}, + .tls_fingerprints = (char *[]){NULL} + }; + + printf("[#] <binder>::dnsResolver->setResolverConfiguration(%u, [%s], [], %d, %d, %d, %d, %d, %d, [], [])\n", + netid, function_arglist, DNSRESOLVER_SAMPLE_VALIDITY, DNSRESOLVER_SUCCESS_THRESHOLD, + DNSRESOLVER_MIN_SAMPLES, DNSRESOLVER_MAX_SAMPLES, DNSRESOLVER_BASE_TIMEOUT, DNSRESOLVER_RETRY_COUNT); + status = dnsresolver_set_resolver_configuration(handle, ¶ms); + + if (status != 0) { + fprintf(stderr, "Error: unable to set DNS servers through Binder: %d\n", status); + exit(ENONET); + } + } else + cndc("resolver setnetdns %u '' %s", netid, shell_arglist); } static void add_addr(const char *iface, const char *addr) @@ -520,56 +1003,6 @@ static void set_routes(const char *iface, unsigned int netid) } } -static void maybe_block_ipv6(const char *iface) -{ - DEFINE_CMD(c_endpoints); - DEFINE_CMD(c_listenport); - bool has_ipv6 = false, has_all_none = true; - char *value; - unsigned long listenport; - - for (char *endpoint = cmd_ret(&c_endpoints, "wg show %s endpoints", iface); endpoint; endpoint = cmd_ret(&c_endpoints, NULL)) { - char *start = strchr(endpoint, '\t'); - - if (!start) - continue; - ++start; - if (start[0] != '(') - has_all_none = false; - if (start[0] == '[') - has_ipv6 = true; - } - if (has_ipv6 || has_all_none) - return; - - cmd("ip link set up dev %s", iface); - value = cmd_ret(&c_listenport, "wg show %s listen-port", iface); - if (!value) - goto set_back_down; - listenport = strtoul(value, NULL, 10); - if (listenport > UINT16_MAX || !listenport) - goto set_back_down; - cmd("ip6tables -I INPUT 1 -p udp --dport %lu -j DROP -m comment --comment \"wireguard rule %s\"", listenport, iface); -set_back_down: - cmd("ip link set down dev %s", iface); -} - -static void maybe_unblock_ipv6(const char *iface) -{ - regmatch_t matches[2]; - _cleanup_regfree_ regex_t reg = { 0 }; - _cleanup_free_ char *regex = concat("^-A (.* --comment \"wireguard rule ", iface, "\"[^\n]*)\n*$", NULL); - DEFINE_CMD(c); - - xregcomp(®, regex, REG_EXTENDED); - for (char *rule = cmd_ret(&c, "ip6tables-save"); rule; rule = cmd_ret(&c, NULL)) { - if (!regexec(®, rule, ARRAY_SIZE(matches), matches, 0)) { - rule[matches[1].rm_eo] = '\0'; - cmd("ip6tables -D %s", &rule[matches[1].rm_so]); - } - } -} - static void set_config(const char *iface, const char *config) { FILE *config_writer; @@ -641,6 +1074,7 @@ static void cmd_up(const char *iface, const char *config, unsigned int mtu, cons { DEFINE_CMD(c); unsigned int netid = 0; + uint16_t listen_port; if (cmd_ret(&c, "ip link show dev %s 2>/dev/null", iface)) { fprintf(stderr, "Error: %s already exists\n", iface); @@ -652,9 +1086,9 @@ static void cmd_up(const char *iface, const char *config, unsigned int mtu, cons add_if(iface); set_config(iface, config); - maybe_block_ipv6(iface); + listen_port = determine_listen_port(iface); + up_if(&netid, iface, listen_port); set_addr(iface, addrs); - up_if(&netid, iface); set_dnses(netid, dnses); set_routes(iface, netid); set_mtu(iface, mtu); @@ -686,7 +1120,6 @@ static void cmd_down(const char *iface) } del_if(iface); - maybe_unblock_ipv6(iface); broadcast_change(); exit(EXIT_SUCCESS); } diff --git a/src/tools/wg-quick/darwin.bash b/src/tools/wg-quick/darwin.bash index aa3edeb..d5dd396 100755 --- a/src/tools/wg-quick/darwin.bash +++ b/src/tools/wg-quick/darwin.bash @@ -47,7 +47,8 @@ parse_options() { CONFIG_FILE="$1" if [[ $CONFIG_FILE =~ ^[a-zA-Z0-9_=+.-]{1,15}$ ]]; then for path in "${CONFIG_SEARCH_PATHS[@]}"; do - [[ -e $path/$CONFIG_FILE.conf ]] && { CONFIG_FILE="$path/$CONFIG_FILE.conf"; break; } + CONFIG_FILE="$path/$1.conf" + [[ -e $CONFIG_FILE ]] && break done fi [[ -e $CONFIG_FILE ]] || die "\`$CONFIG_FILE' does not exist" @@ -80,6 +81,17 @@ parse_options() { shopt -u nocasematch } +detect_launchd() { + unset LAUNCHED_BY_LAUNCHD + local line + while read -r line; do + if [[ $line =~ ^\s*domain\ =\ ]]; then + LAUNCHED_BY_LAUNCHD=1 + break + fi + done < <(launchctl procinfo $$ 2>/dev/null) +} + read_bool() { case "$2" in true) printf -v "$1" 1 ;; @@ -307,7 +319,8 @@ monitor_daemon() { set_dns sleep 2 && kill -ALRM $pid 2>/dev/null & fi - done < <(route -n monitor)) & disown + done < <(route -n monitor)) & + [[ -n $LAUNCHED_BY_LAUNCHD ]] || disown } add_route() { @@ -462,6 +475,7 @@ if [[ $# -eq 1 && ( $1 == --help || $1 == -h || $1 == help ) ]]; then cmd_usage elif [[ $# -eq 2 && $1 == up ]]; then auto_su + detect_launchd parse_options "$2" cmd_up elif [[ $# -eq 2 && $1 == down ]]; then @@ -481,4 +495,6 @@ else exit 1 fi +[[ -n $LAUNCHED_BY_LAUNCHD ]] && wait + exit 0 diff --git a/src/tools/wg-quick/freebsd.bash b/src/tools/wg-quick/freebsd.bash index 72e0bd0..a72353c 100755 --- a/src/tools/wg-quick/freebsd.bash +++ b/src/tools/wg-quick/freebsd.bash @@ -64,7 +64,8 @@ parse_options() { CONFIG_FILE="$1" if [[ $CONFIG_FILE =~ ^[a-zA-Z0-9_=+.-]{1,15}$ ]]; then for path in "${CONFIG_SEARCH_PATHS[@]}"; do - [[ -e $path/$CONFIG_FILE.conf ]] && { CONFIG_FILE="$path/$CONFIG_FILE.conf"; break; } + CONFIG_FILE="$path/$1.conf" + [[ -e $CONFIG_FILE ]] && break done fi [[ -e $CONFIG_FILE ]] || die "\`$CONFIG_FILE' does not exist" diff --git a/src/tools/wg-quick/linux.bash b/src/tools/wg-quick/linux.bash index 5eaa504..e9c9052 100755 --- a/src/tools/wg-quick/linux.bash +++ b/src/tools/wg-quick/linux.bash @@ -95,17 +95,18 @@ add_if() { del_if() { local table [[ $HAVE_SET_DNS -eq 0 ]] || unset_dns + [[ $HAVE_SET_FIREWALL -eq 0 ]] || remove_firewall if [[ -z $TABLE || $TABLE == auto ]] && get_fwmark table && [[ $(wg show "$INTERFACE" allowed-ips) =~ /0(\ |$'\n'|$) ]]; then - while [[ $(ip -4 rule show) == *"lookup $table"* ]]; do + while [[ $(ip -4 rule show 2>/dev/null) == *"lookup $table"* ]]; do cmd ip -4 rule delete table $table done - while [[ $(ip -4 rule show) == *"from all lookup main suppress_prefixlength 0"* ]]; do + while [[ $(ip -4 rule show 2>/dev/null) == *"from all lookup main suppress_prefixlength 0"* ]]; do cmd ip -4 rule delete table main suppress_prefixlength 0 done - while [[ $(ip -6 rule show) == *"lookup $table"* ]]; do + while [[ $(ip -6 rule show 2>/dev/null) == *"lookup $table"* ]]; do cmd ip -6 rule delete table $table done - while [[ $(ip -6 rule show) == *"from all lookup main suppress_prefixlength 0"* ]]; do + while [[ $(ip -6 rule show 2>/dev/null) == *"from all lookup main suppress_prefixlength 0"* ]]; do cmd ip -6 rule delete table main suppress_prefixlength 0 done fi @@ -113,7 +114,9 @@ del_if() { } add_addr() { - cmd ip address add "$1" dev "$INTERFACE" + local proto=-4 + [[ $1 == *:* ]] && proto=-6 + cmd ip $proto address add "$1" dev "$INTERFACE" } set_mtu_up() { @@ -153,18 +156,20 @@ set_dns() { unset_dns() { [[ ${#DNS[@]} -gt 0 ]] || return 0 - cmd resolvconf -d "$(resolvconf_iface_prefix)$INTERFACE" + cmd resolvconf -d "$(resolvconf_iface_prefix)$INTERFACE" -f } add_route() { + local proto=-4 + [[ $1 == *:* ]] && proto=-6 [[ $TABLE != off ]] || return 0 if [[ -n $TABLE && $TABLE != auto ]]; then - cmd ip route add "$1" dev "$INTERFACE" table "$TABLE" + cmd ip $proto route add "$1" dev "$INTERFACE" table "$TABLE" elif [[ $1 == */0 ]]; then add_default "$1" else - [[ $(ip route get "$1" 2>/dev/null) == *dev\ $INTERFACE\ * ]] || cmd ip route add "$1" dev "$INTERFACE" + [[ -n $(ip $proto route show dev "$INTERFACE" match "$1" 2>/dev/null) ]] || cmd ip $proto route add "$1" dev "$INTERFACE" fi } @@ -176,23 +181,64 @@ get_fwmark() { return 0 } +remove_firewall() { + if type -p nft >/dev/null; then + local table nftcmd + while read -r table; do + [[ $table == *" wg-quick-$INTERFACE" ]] && printf -v nftcmd '%sdelete %s\n' "$nftcmd" "$table" + done < <(nft list tables 2>/dev/null) + [[ -z $nftcmd ]] || cmd nft -f <(echo -n "$nftcmd") + fi + if type -p iptables >/dev/null; then + local line iptables found restore + for iptables in iptables ip6tables; do + restore="" found=0 + while read -r line; do + [[ $line == "*"* || $line == COMMIT || $line == "-A "*"-m comment --comment \"wg-quick(8) rule for $INTERFACE\""* ]] || continue + [[ $line == "-A"* ]] && found=1 + printf -v restore '%s%s\n' "$restore" "${line/#-A/-D}" + done < <($iptables-save 2>/dev/null) + [[ $found -ne 1 ]] || echo -n "$restore" | cmd $iptables-restore -n + done + fi +} + +HAVE_SET_FIREWALL=0 add_default() { - local table proto key value + local table line if ! get_fwmark table; then table=51820 - while [[ -n $(ip -4 route show table $table) || -n $(ip -6 route show table $table) ]]; do + while [[ -n $(ip -4 route show table $table 2>/dev/null) || -n $(ip -6 route show table $table 2>/dev/null) ]]; do ((table++)) done cmd wg set "$INTERFACE" fwmark $table fi - proto=-4 - [[ $1 == *:* ]] && proto=-6 + local proto=-4 iptables=iptables pf=ip + [[ $1 == *:* ]] && proto=-6 iptables=ip6tables pf=ip6 cmd ip $proto route add "$1" dev "$INTERFACE" table $table cmd ip $proto rule add not fwmark $table table $table cmd ip $proto rule add table main suppress_prefixlength 0 - while read -r key _ value; do - [[ $value -eq 1 ]] && sysctl -q "$key=2" - done < <(sysctl -a -r '^net\.ipv4.conf\.[^ .=]+\.rp_filter$') + + local marker="-m comment --comment \"wg-quick(8) rule for $INTERFACE\"" restore=$'*raw\n' nftable="wg-quick-$INTERFACE" nftcmd + printf -v nftcmd '%sadd table %s %s\n' "$nftcmd" "$pf" "$nftable" + printf -v nftcmd '%sadd chain %s %s preraw { type filter hook prerouting priority -300; }\n' "$nftcmd" "$pf" "$nftable" + printf -v nftcmd '%sadd chain %s %s premangle { type filter hook prerouting priority -150; }\n' "$nftcmd" "$pf" "$nftable" + printf -v nftcmd '%sadd chain %s %s postmangle { type filter hook postrouting priority -150; }\n' "$nftcmd" "$pf" "$nftable" + while read -r line; do + [[ $line =~ .*inet6?\ ([0-9a-f:.]+)/[0-9]+.* ]] || continue + printf -v restore '%s-I PREROUTING ! -i %s -d %s -m addrtype ! --src-type LOCAL -j DROP %s\n' "$restore" "$INTERFACE" "${BASH_REMATCH[1]}" "$marker" + printf -v nftcmd '%sadd rule %s %s preraw iifname != %s %s daddr %s fib saddr type != local drop\n' "$nftcmd" "$pf" "$nftable" "$INTERFACE" "$pf" "${BASH_REMATCH[1]}" + done < <(ip -o $proto addr show dev "$INTERFACE" 2>/dev/null) + printf -v restore '%sCOMMIT\n*mangle\n-I POSTROUTING -m mark --mark %d -p udp -j CONNMARK --save-mark %s\n-I PREROUTING -p udp -j CONNMARK --restore-mark %s\nCOMMIT\n' "$restore" $table "$marker" "$marker" + printf -v nftcmd '%sadd rule %s %s postmangle meta l4proto udp mark %d ct mark set mark \n' "$nftcmd" "$pf" "$nftable" $table + printf -v nftcmd '%sadd rule %s %s premangle meta l4proto udp meta mark set ct mark \n' "$nftcmd" "$pf" "$nftable" + [[ $proto == -4 ]] && cmd sysctl -q net.ipv4.conf.all.src_valid_mark=1 + if type -p nft >/dev/null; then + cmd nft -f <(echo -n "$nftcmd") + else + echo -n "$restore" | cmd $iptables-restore -n + fi + HAVE_SET_FIREWALL=1 return 0 } @@ -296,7 +342,8 @@ cmd_down() { execute_hooks "${PRE_DOWN[@]}" [[ $SAVE_CONFIG -eq 0 ]] || save_config del_if - unset_dns + unset_dns || true + remove_firewall || true execute_hooks "${POST_DOWN[@]}" } diff --git a/src/tools/wg-quick/openbsd.bash b/src/tools/wg-quick/openbsd.bash index b234609..2cadeec 100755 --- a/src/tools/wg-quick/openbsd.bash +++ b/src/tools/wg-quick/openbsd.bash @@ -293,7 +293,7 @@ add_route() { fi if [[ -n $TABLE && $TABLE != auto ]]; then - cmd route -q -n add "-$family" -rdomain "$TABLE" "$1" -iface "$ifaceroute" + cmd route -q -n -T "$TABLE" add "-$family" "$1" -iface "$ifaceroute" elif [[ $1 == */0 ]]; then if [[ $1 == *:* ]]; then AUTO_ROUTE6=1 diff --git a/src/tools/wg.c b/src/tools/wg.c index 550d9b4..7b5d3af 100644 --- a/src/tools/wg.c +++ b/src/tools/wg.c @@ -21,6 +21,7 @@ static const struct { { "set", set_main, "Change the current configuration, add peers, remove peers, or change peers" }, { "setconf", setconf_main, "Applies a configuration file to a WireGuard interface" }, { "addconf", setconf_main, "Appends a configuration file to a WireGuard interface" }, + { "syncconf", setconf_main, "Synchronizes a configuration file to a WireGuard interface" }, { "genkey", genkey_main, "Generates a new private key and writes it to stdout" }, { "genpsk", genkey_main, "Generates a new preshared key and writes it to stdout" }, { "pubkey", pubkey_main, "Reads a private key from stdin and writes a public key to stdout" } diff --git a/src/tools/wincompat/compat.h b/src/tools/wincompat/compat.h new file mode 100644 index 0000000..4dada77 --- /dev/null +++ b/src/tools/wincompat/compat.h @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#define __USE_MINGW_ANSI_STDIO 1 +#include <stdio.h> +#include <stdbool.h> +#include <stdint.h> + +#include <winsock2.h> +#include <ws2ipdef.h> +#include <ws2tcpip.h> +#include <in6addr.h> +#include <windows.h> + +#undef interface +#undef min +#undef max + +#define WINCOMPAT + +#define IFNAMSIZ 64 +#define EAI_SYSTEM -99 + +/* libc.c */ +char *strsep(char **str, const char *sep); +ssize_t getdelim(char **buf, size_t *bufsiz, int delimiter, FILE *fp); +ssize_t getline(char **buf, size_t *bufsiz, FILE *fp); +int inet_pton(int af, const char *src, void *dst); +const char *inet_ntop(int af, const void *src, char *dst, socklen_t size); diff --git a/src/tools/wincompat/getrandom.c b/src/tools/wincompat/getrandom.c new file mode 100644 index 0000000..4e2c4bd --- /dev/null +++ b/src/tools/wincompat/getrandom.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <stdbool.h> +#include <ntsecapi.h> + +static inline bool __attribute__((__warn_unused_result__)) get_random_bytes(uint8_t *out, size_t len) +{ + return RtlGenRandom(out, len); +} diff --git a/src/tools/wincompat/include/arpa/inet.h b/src/tools/wincompat/include/arpa/inet.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/tools/wincompat/include/arpa/inet.h diff --git a/src/tools/wincompat/include/net/if.h b/src/tools/wincompat/include/net/if.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/tools/wincompat/include/net/if.h diff --git a/src/tools/wincompat/include/netdb.h b/src/tools/wincompat/include/netdb.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/tools/wincompat/include/netdb.h diff --git a/src/tools/wincompat/include/netinet/in.h b/src/tools/wincompat/include/netinet/in.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/tools/wincompat/include/netinet/in.h diff --git a/src/tools/wincompat/include/sys/ioctl.h b/src/tools/wincompat/include/sys/ioctl.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/tools/wincompat/include/sys/ioctl.h diff --git a/src/tools/wincompat/include/sys/socket.h b/src/tools/wincompat/include/sys/socket.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/tools/wincompat/include/sys/socket.h diff --git a/src/tools/wincompat/include/sys/un.h b/src/tools/wincompat/include/sys/un.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/tools/wincompat/include/sys/un.h diff --git a/src/tools/wincompat/init.c b/src/tools/wincompat/init.c new file mode 100644 index 0000000..8d862ff --- /dev/null +++ b/src/tools/wincompat/init.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <winsock2.h> +#include <windows.h> + +#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING +#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x4 +#endif + +__attribute__((constructor)) static void init(void) +{ + char *colormode; + DWORD console_mode; + HANDLE stdout_handle; + WSADATA wsaData; + WSAStartup(MAKEWORD(2, 2), &wsaData); + + stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); // We don't close this. + if (stdout_handle == INVALID_HANDLE_VALUE) + goto no_color; + if (!GetConsoleMode(stdout_handle, &console_mode)) + goto no_color; + if (!SetConsoleMode(stdout_handle, ENABLE_VIRTUAL_TERMINAL_PROCESSING | console_mode)) + goto no_color; + return; + +no_color: + colormode = getenv("WG_COLOR_MODE"); + if (!colormode) + putenv("WG_COLOR_MODE=never"); +} + +__attribute__((destructor)) static void deinit(void) +{ + WSACleanup(); +} diff --git a/src/tools/wincompat/ipc.c b/src/tools/wincompat/ipc.c new file mode 100644 index 0000000..25471b0 --- /dev/null +++ b/src/tools/wincompat/ipc.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <windows.h> +#include <tlhelp32.h> +#include <accctrl.h> +#include <aclapi.h> +#include <stdio.h> +#include <stdbool.h> +#include <fcntl.h> + +static FILE *userspace_interface_file(const char *iface) +{ + char fname[MAX_PATH], error_message[1024 * 128] = { 0 }; + HANDLE thread_token, process_snapshot, winlogon_process, winlogon_token, duplicated_token, pipe_handle = INVALID_HANDLE_VALUE; + PROCESSENTRY32 entry = { .dwSize = sizeof(PROCESSENTRY32) }; + PSECURITY_DESCRIPTOR pipe_sd; + PSID pipe_sid; + SID expected_sid; + BOOL ret; + int fd; + DWORD last_error = ERROR_SUCCESS, bytes = sizeof(expected_sid); + TOKEN_PRIVILEGES privileges = { + .PrivilegeCount = 1, + .Privileges = {{ .Attributes = SE_PRIVILEGE_ENABLED }} + }; + + if (!LookupPrivilegeValue(NULL, SE_DEBUG_NAME, &privileges.Privileges[0].Luid)) + goto err; + if (!CreateWellKnownSid(WinLocalSystemSid, NULL, &expected_sid, &bytes)) + goto err; + + process_snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); + if (process_snapshot == INVALID_HANDLE_VALUE) + goto err; + for (ret = Process32First(process_snapshot, &entry); ret; last_error = GetLastError(), ret = Process32Next(process_snapshot, &entry)) { + if (strcasecmp(entry.szExeFile, "winlogon.exe")) + continue; + + RevertToSelf(); + if (!ImpersonateSelf(SecurityImpersonation)) + continue; + if (!OpenThreadToken(GetCurrentThread(), TOKEN_ADJUST_PRIVILEGES, FALSE, &thread_token)) + continue; + if (!AdjustTokenPrivileges(thread_token, FALSE, &privileges, sizeof(privileges), NULL, NULL)) { + last_error = GetLastError(); + CloseHandle(thread_token); + continue; + } + CloseHandle(thread_token); + + winlogon_process = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, entry.th32ProcessID); + if (!winlogon_process) + continue; + if (!OpenProcessToken(winlogon_process, TOKEN_IMPERSONATE | TOKEN_DUPLICATE, &winlogon_token)) + continue; + CloseHandle(winlogon_process); + if (!DuplicateToken(winlogon_token, SecurityImpersonation, &duplicated_token)) { + last_error = GetLastError(); + RevertToSelf(); + continue; + } + CloseHandle(winlogon_token); + if (!SetThreadToken(NULL, duplicated_token)) { + last_error = GetLastError(); + CloseHandle(duplicated_token); + continue; + } + CloseHandle(duplicated_token); + + snprintf(fname, sizeof(fname), "\\\\.\\pipe\\ProtectedPrefix\\Administrators\\WireGuard\\%s", iface); + pipe_handle = CreateFile(fname, GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, 0, NULL); + last_error = GetLastError(); + if (pipe_handle == INVALID_HANDLE_VALUE) + continue; + last_error = GetSecurityInfo(pipe_handle, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION, &pipe_sid, NULL, NULL, NULL, &pipe_sd); + if (last_error != ERROR_SUCCESS) { + CloseHandle(pipe_handle); + continue; + } + last_error = EqualSid(&expected_sid, pipe_sid) ? ERROR_SUCCESS : ERROR_ACCESS_DENIED; + LocalFree(pipe_sd); + if (last_error != ERROR_SUCCESS) { + CloseHandle(pipe_handle); + continue; + } + last_error = ERROR_SUCCESS; + break; + } + RevertToSelf(); + CloseHandle(process_snapshot); + + if (last_error != ERROR_SUCCESS || pipe_handle == INVALID_HANDLE_VALUE) + goto err; + fd = _open_osfhandle((intptr_t)pipe_handle, _O_RDWR); + if (fd == -1) { + last_error = GetLastError(); + CloseHandle(pipe_handle); + goto err; + } + return _fdopen(fd, "r+"); + +err: + if (last_error == ERROR_SUCCESS) + last_error = GetLastError(); + if (last_error == ERROR_SUCCESS) + last_error = ERROR_ACCESS_DENIED; + FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, last_error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), error_message, sizeof(error_message) - 1, NULL); + fprintf(stderr, "Error: Unable to open IPC handle via SYSTEM impersonation: %ld: %s\n", last_error, error_message); + errno = EACCES; + return NULL; +} + +static int userspace_get_wireguard_interfaces(struct inflatable_buffer *buffer) +{ + WIN32_FIND_DATA find_data; + HANDLE find_handle; + int ret = 0; + + find_handle = FindFirstFile("\\\\.\\pipe\\*", &find_data); + if (find_handle == INVALID_HANDLE_VALUE) + return -GetLastError(); + do { + if (strncmp("WireGuard\\", find_data.cFileName, 10)) + continue; + buffer->next = strdup(find_data.cFileName + 10); + buffer->good = true; + ret = add_next_to_inflatable_buffer(buffer); + if (ret < 0) + goto out; + } while (FindNextFile(find_handle, &find_data)); + +out: + FindClose(find_handle); + return ret; +} diff --git a/src/tools/wincompat/libc.c b/src/tools/wincompat/libc.c new file mode 100644 index 0000000..ad30278 --- /dev/null +++ b/src/tools/wincompat/libc.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <stdio.h> +#include <stdbool.h> +#include <stdint.h> +#include <winsock2.h> +#include <ws2tcpip.h> +#include <windows.h> + +char *strsep(char **str, const char *sep) +{ + char *s = *str, *end; + if (!s) + return NULL; + end = s + strcspn(s, sep); + if (*end) + *end++ = 0; + else + end = 0; + *str = end; + return s; +} + +ssize_t getdelim(char **buf, size_t *bufsiz, int delimiter, FILE *fp) +{ + char *ptr, *eptr; + + if (!*buf || !*bufsiz) { + *bufsiz = BUFSIZ; + if (!(*buf = malloc(*bufsiz))) + return -1; + } + + for (ptr = *buf, eptr = *buf + *bufsiz;;) { + int c = fgetc(fp); + if (c == -1) { + if (feof(fp)) { + ssize_t diff = (ssize_t)(ptr - *buf); + if (diff != 0) { + *ptr = '\0'; + return diff; + } + } + return -1; + } + *ptr++ = c; + if (c == delimiter) { + *ptr = '\0'; + return ptr - *buf; + } + if (ptr + 2 >= eptr) { + char *nbuf; + size_t nbufsiz = *bufsiz * 2; + ssize_t d = ptr - *buf; + if ((nbuf = realloc(*buf, nbufsiz)) == NULL) + return -1; + *buf = nbuf; + *bufsiz = nbufsiz; + eptr = nbuf + nbufsiz; + ptr = nbuf + d; + } + } +} + +ssize_t getline(char **buf, size_t *bufsiz, FILE *fp) +{ + return getdelim(buf, bufsiz, '\n', fp); +} + +int inet_pton(int af, const char *src, void *dst) +{ + struct sockaddr_storage ss = { 0 }; + int size = sizeof(ss); + char s[INET6_ADDRSTRLEN + 1]; + + strncpy(s, src, INET6_ADDRSTRLEN + 1); + s[INET6_ADDRSTRLEN] = '\0'; + + if (WSAStringToAddress(s, af, NULL, (struct sockaddr *)&ss, &size)) + return 0; + if (af == AF_INET) + *(struct in_addr *)dst = ((struct sockaddr_in *)&ss)->sin_addr; + else if (af == AF_INET6) + *(struct in6_addr *)dst = ((struct sockaddr_in6 *)&ss)->sin6_addr; + else + return 0; + return 1; +} + +const char *inet_ntop(int af, const void *src, char *dst, socklen_t size) +{ + struct sockaddr_storage ss = { .ss_family = af }; + unsigned long s = size; + + if (af == AF_INET) + ((struct sockaddr_in *)&ss)->sin_addr = *(struct in_addr *)src; + else if (af == AF_INET6) + ((struct sockaddr_in6 *)&ss)->sin6_addr = *(struct in6_addr *)src; + else + return NULL; + return WSAAddressToString((struct sockaddr *)&ss, sizeof(ss), NULL, dst, &s) ? NULL : dst; +} diff --git a/src/uapi/wireguard.h b/src/uapi/wireguard.h index 071ce41..ae88be1 100644 --- a/src/uapi/wireguard.h +++ b/src/uapi/wireguard.h @@ -18,30 +18,30 @@ * one but not both of: * * WGDEVICE_A_IFINDEX: NLA_U32 - * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 * * The kernel will then return several messages (NLM_F_MULTI) containing the * following tree of nested items: * * WGDEVICE_A_IFINDEX: NLA_U32 - * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 - * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN - * WGDEVICE_A_PUBLIC_KEY: len WG_KEY_LEN + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN * WGDEVICE_A_LISTEN_PORT: NLA_U16 * WGDEVICE_A_FWMARK: NLA_U32 * WGDEVICE_A_PEERS: NLA_NESTED * 0: NLA_NESTED - * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN - * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN - * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6 * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16 - * WGPEER_A_LAST_HANDSHAKE_TIME: struct __kernel_timespec + * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec * WGPEER_A_RX_BYTES: NLA_U64 * WGPEER_A_TX_BYTES: NLA_U64 * WGPEER_A_ALLOWEDIPS: NLA_NESTED * 0: NLA_NESTED * WGALLOWEDIP_A_FAMILY: NLA_U16 - * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr + * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 * 0: NLA_NESTED * ... @@ -77,7 +77,7 @@ * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: * * WGDEVICE_A_IFINDEX: NLA_U32 - * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current * peers should be removed prior to adding the list below. * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove @@ -87,10 +87,12 @@ * 0: NLA_NESTED * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the - * specified peer should be removed rather than - * added/updated and/or WGPEER_F_REPLACE_ALLOWEDIPS - * if all current allowed IPs of this peer should be - * removed prior to adding the list below. + * specified peer should not exist at the end of the + * operation, rather than added/updated and/or + * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed + * IPs of this peer should be removed prior to adding + * the list below and/or WGPEER_F_UPDATE_ONLY if the + * peer should only be set if it already exists. * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable @@ -119,7 +121,7 @@ * filling in information not contained in the prior. Note that if * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably * should not be specified in fragments that come after, so that the list - * of peers is only cleared the first time but appened after. Likewise for + * of peers is only cleared the first time but appended after. Likewise for * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message * of a peer, it likely should not be specified in subsequent fragments. * @@ -142,7 +144,8 @@ enum wg_cmd { #define WG_CMD_MAX (__WG_CMD_MAX - 1) enum wgdevice_flag { - WGDEVICE_F_REPLACE_PEERS = 1U << 0 + WGDEVICE_F_REPLACE_PEERS = 1U << 0, + __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS }; enum wgdevice_attribute { WGDEVICE_A_UNSPEC, @@ -160,7 +163,10 @@ enum wgdevice_attribute { enum wgpeer_flag { WGPEER_F_REMOVE_ME = 1U << 0, - WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1 + WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, + WGPEER_F_UPDATE_ONLY = 1U << 2, + __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | + WGPEER_F_UPDATE_ONLY }; enum wgpeer_attribute { WGPEER_A_UNSPEC, diff --git a/src/version.h b/src/version.h index 2726156..ddba103 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define WIREGUARD_VERSION "0.0.20190406" +#define WIREGUARD_VERSION "0.0.20191219" |