aboutsummaryrefslogtreecommitdiffstats
path: root/net/unix/af_unix.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-30 15:51:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-30 15:51:09 -0700
commitdbe69e43372212527abf48609aba7fc39a6daa27 (patch)
tree96cfafdf70f5325ceeac1054daf7deca339c9730 /net/unix/af_unix.c
parentMerge tag 'sched-urgent-2021-06-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net (diff)
downloadlinux-dev-dbe69e43372212527abf48609aba7fc39a6daa27.tar.xz
linux-dev-dbe69e43372212527abf48609aba7fc39a6daa27.zip
Merge tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - BPF: - add syscall program type and libbpf support for generating instructions and bindings for in-kernel BPF loaders (BPF loaders for BPF), this is a stepping stone for signed BPF programs - infrastructure to migrate TCP child sockets from one listener to another in the same reuseport group/map to improve flexibility of service hand-off/restart - add broadcast support to XDP redirect - allow bypass of the lockless qdisc to improving performance (for pktgen: +23% with one thread, +44% with 2 threads) - add a simpler version of "DO_ONCE()" which does not require jump labels, intended for slow-path usage - virtio/vsock: introduce SOCK_SEQPACKET support - add getsocketopt to retrieve netns cookie - ip: treat lowest address of a IPv4 subnet as ordinary unicast address allowing reclaiming of precious IPv4 addresses - ipv6: use prandom_u32() for ID generation - ip: add support for more flexible field selection for hashing across multi-path routes (w/ offload to mlxsw) - icmp: add support for extended RFC 8335 PROBE (ping) - seg6: add support for SRv6 End.DT46 behavior - mptcp: - DSS checksum support (RFC 8684) to detect middlebox meddling - support Connection-time 'C' flag - time stamping support - sctp: packetization Layer Path MTU Discovery (RFC 8899) - xfrm: speed up state addition with seq set - WiFi: - hidden AP discovery on 6 GHz and other HE 6 GHz improvements - aggregation handling improvements for some drivers - minstrel improvements for no-ack frames - deferred rate control for TXQs to improve reaction times - switch from round robin to virtual time-based airtime scheduler - add trace points: - tcp checksum errors - openvswitch - action execution, upcalls - socket errors via sk_error_report Device APIs: - devlink: add rate API for hierarchical control of max egress rate of virtual devices (VFs, SFs etc.) - don't require RCU read lock to be held around BPF hooks in NAPI context - page_pool: generic buffer recycling New hardware/drivers: - mobile: - iosm: PCIe Driver for Intel M.2 Modem - support for Qualcomm MSM8998 (ipa) - WiFi: Qualcomm QCN9074 and WCN6855 PCI devices - sparx5: Microchip SparX-5 family of Enterprise Ethernet switches - Mellanox BlueField Gigabit Ethernet (control NIC of the DPU) - NXP SJA1110 Automotive Ethernet 10-port switch - Qualcomm QCA8327 switch support (qca8k) - Mikrotik 10/25G NIC (atl1c) Driver changes: - ACPI support for some MDIO, MAC and PHY devices from Marvell and NXP (our first foray into MAC/PHY description via ACPI) - HW timestamping (PTP) support: bnxt_en, ice, sja1105, hns3, tja11xx - Mellanox/Nvidia NIC (mlx5) - NIC VF offload of L2 bridging - support IRQ distribution to Sub-functions - Marvell (prestera): - add flower and match all - devlink trap - link aggregation - Netronome (nfp): connection tracking offload - Intel 1GE (igc): add AF_XDP support - Marvell DPU (octeontx2): ingress ratelimit offload - Google vNIC (gve): new ring/descriptor format support - Qualcomm mobile (rmnet & ipa): inline checksum offload support - MediaTek WiFi (mt76) - mt7915 MSI support - mt7915 Tx status reporting - mt7915 thermal sensors support - mt7921 decapsulation offload - mt7921 enable runtime pm and deep sleep - Realtek WiFi (rtw88) - beacon filter support - Tx antenna path diversity support - firmware crash information via devcoredump - Qualcomm WiFi (wcn36xx) - Wake-on-WLAN support with magic packets and GTK rekeying - Micrel PHY (ksz886x/ksz8081): add cable test support" * tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2168 commits) tcp: change ICSK_CA_PRIV_SIZE definition tcp_yeah: check struct yeah size at compile time gve: DQO: Fix off by one in gve_rx_dqo() stmmac: intel: set PCI_D3hot in suspend stmmac: intel: Enable PHY WOL option in EHL net: stmmac: option to enable PHY WOL with PMT enabled net: say "local" instead of "static" addresses in ndo_dflt_fdb_{add,del} net: use netdev_info in ndo_dflt_fdb_{add,del} ptp: Set lookup cookie when creating a PTP PPS source. net: sock: add trace for socket errors net: sock: introduce sk_error_report net: dsa: replay the local bridge FDB entries pointing to the bridge dev too net: dsa: ensure during dsa_fdb_offload_notify that dev_hold and dev_put are on the same dev net: dsa: include fdb entries pointing to bridge in the host fdb list net: dsa: include bridge addresses which are local in the host fdb list net: dsa: sync static FDB entries on foreign interfaces to hardware net: dsa: install the host MDB and FDB entries in the master's RX filter net: dsa: reference count the FDB addresses at the cross-chip notifier level net: dsa: introduce a separate cross-chip notifier type for host FDBs net: dsa: reference count the MDB entries at the cross-chip notifier level ...
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r--net/unix/af_unix.c192
1 files changed, 98 insertions, 94 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5d1192ceb139..23c92ad15c61 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -262,6 +262,14 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
sk_add_node(sk, list);
}
+static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
+ unsigned hash)
+{
+ __unix_remove_socket(sk);
+ smp_store_release(&unix_sk(sk)->addr, addr);
+ __unix_insert_socket(&unix_socket_table[hash], sk);
+}
+
static inline void unix_remove_socket(struct sock *sk)
{
spin_lock(&unix_table_lock);
@@ -278,11 +286,11 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
static struct sock *__unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
- int len, int type, unsigned int hash)
+ int len, unsigned int hash)
{
struct sock *s;
- sk_for_each(s, &unix_socket_table[hash ^ type]) {
+ sk_for_each(s, &unix_socket_table[hash]) {
struct unix_sock *u = unix_sk(s);
if (!net_eq(sock_net(s), net))
@@ -297,13 +305,12 @@ static struct sock *__unix_find_socket_byname(struct net *net,
static inline struct sock *unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
- int len, int type,
- unsigned int hash)
+ int len, unsigned int hash)
{
struct sock *s;
spin_lock(&unix_table_lock);
- s = __unix_find_socket_byname(net, sunname, len, type, hash);
+ s = __unix_find_socket_byname(net, sunname, len, hash);
if (s)
sock_hold(s);
spin_unlock(&unix_table_lock);
@@ -484,7 +491,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
*/
if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
other->sk_err = ECONNRESET;
- other->sk_error_report(other);
+ sk_error_report(other);
}
}
}
@@ -891,12 +898,12 @@ static int unix_autobind(struct socket *sock)
retry:
addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
+ addr->hash ^= sk->sk_type;
spin_lock(&unix_table_lock);
ordernum = (ordernum+1)&0xFFFFF;
- if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
- addr->hash)) {
+ if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
spin_unlock(&unix_table_lock);
/*
* __unix_find_socket_byname() may take long time if many names
@@ -911,11 +918,8 @@ retry:
}
goto retry;
}
- addr->hash ^= sk->sk_type;
- __unix_remove_socket(sk);
- smp_store_release(&u->addr, addr);
- __unix_insert_socket(&unix_socket_table[addr->hash], sk);
+ __unix_set_addr(sk, addr, addr->hash);
spin_unlock(&unix_table_lock);
err = 0;
@@ -960,7 +964,7 @@ static struct sock *unix_find_other(struct net *net,
}
} else {
err = -ECONNREFUSED;
- u = unix_find_socket_byname(net, sunname, len, type, hash);
+ u = unix_find_socket_byname(net, sunname, len, type ^ hash);
if (u) {
struct dentry *dentry;
dentry = unix_sk(u)->path.dentry;
@@ -978,125 +982,125 @@ fail:
return NULL;
}
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
{
+ struct unix_sock *u = unix_sk(sk);
+ umode_t mode = S_IFSOCK |
+ (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
+ struct user_namespace *ns; // barf...
+ struct path parent;
struct dentry *dentry;
- struct path path;
- int err = 0;
+ unsigned int hash;
+ int err;
+
/*
* Get the parent directory, calculate the hash for last
* component.
*/
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
+ dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
if (IS_ERR(dentry))
- return err;
+ return PTR_ERR(dentry);
+ ns = mnt_user_ns(parent.mnt);
/*
* All right, let's create it.
*/
- err = security_path_mknod(&path, dentry, mode, 0);
- if (!err) {
- err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
- dentry, mode, 0);
- if (!err) {
- res->mnt = mntget(path.mnt);
- res->dentry = dget(dentry);
- }
- }
- done_path_create(&path, dentry);
+ err = security_path_mknod(&parent, dentry, mode, 0);
+ if (!err)
+ err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
+ if (err)
+ goto out;
+ err = mutex_lock_interruptible(&u->bindlock);
+ if (err)
+ goto out_unlink;
+ if (u->addr)
+ goto out_unlock;
+
+ addr->hash = UNIX_HASH_SIZE;
+ hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+ spin_lock(&unix_table_lock);
+ u->path.mnt = mntget(parent.mnt);
+ u->path.dentry = dget(dentry);
+ __unix_set_addr(sk, addr, hash);
+ spin_unlock(&unix_table_lock);
+ mutex_unlock(&u->bindlock);
+ done_path_create(&parent, dentry);
+ return 0;
+
+out_unlock:
+ mutex_unlock(&u->bindlock);
+ err = -EINVAL;
+out_unlink:
+ /* failed after successful mknod? unlink what we'd created... */
+ vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
+out:
+ done_path_create(&parent, dentry);
return err;
}
+static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
+{
+ struct unix_sock *u = unix_sk(sk);
+ int err;
+
+ err = mutex_lock_interruptible(&u->bindlock);
+ if (err)
+ return err;
+
+ if (u->addr) {
+ mutex_unlock(&u->bindlock);
+ return -EINVAL;
+ }
+
+ spin_lock(&unix_table_lock);
+ if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
+ addr->hash)) {
+ spin_unlock(&unix_table_lock);
+ mutex_unlock(&u->bindlock);
+ return -EADDRINUSE;
+ }
+ __unix_set_addr(sk, addr, addr->hash);
+ spin_unlock(&unix_table_lock);
+ mutex_unlock(&u->bindlock);
+ return 0;
+}
+
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
- struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
int err;
unsigned int hash;
struct unix_address *addr;
- struct hlist_head *list;
- struct path path = { };
- err = -EINVAL;
if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
sunaddr->sun_family != AF_UNIX)
- goto out;
+ return -EINVAL;
- if (addr_len == sizeof(short)) {
- err = unix_autobind(sock);
- goto out;
- }
+ if (addr_len == sizeof(short))
+ return unix_autobind(sock);
err = unix_mkname(sunaddr, addr_len, &hash);
if (err < 0)
- goto out;
+ return err;
addr_len = err;
-
- if (sun_path[0]) {
- umode_t mode = S_IFSOCK |
- (SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(sun_path, mode, &path);
- if (err) {
- if (err == -EEXIST)
- err = -EADDRINUSE;
- goto out;
- }
- }
-
- err = mutex_lock_interruptible(&u->bindlock);
- if (err)
- goto out_put;
-
- err = -EINVAL;
- if (u->addr)
- goto out_up;
-
- err = -ENOMEM;
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
if (!addr)
- goto out_up;
+ return -ENOMEM;
memcpy(addr->name, sunaddr, addr_len);
addr->len = addr_len;
addr->hash = hash ^ sk->sk_type;
refcount_set(&addr->refcnt, 1);
- if (sun_path[0]) {
- addr->hash = UNIX_HASH_SIZE;
- hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
- spin_lock(&unix_table_lock);
- u->path = path;
- list = &unix_socket_table[hash];
- } else {
- spin_lock(&unix_table_lock);
- err = -EADDRINUSE;
- if (__unix_find_socket_byname(net, sunaddr, addr_len,
- sk->sk_type, hash)) {
- unix_release_addr(addr);
- goto out_unlock;
- }
-
- list = &unix_socket_table[addr->hash];
- }
-
- err = 0;
- __unix_remove_socket(sk);
- smp_store_release(&u->addr, addr);
- __unix_insert_socket(list, sk);
-
-out_unlock:
- spin_unlock(&unix_table_lock);
-out_up:
- mutex_unlock(&u->bindlock);
-out_put:
+ if (sun_path[0])
+ err = unix_bind_bsd(sk, addr);
+ else
+ err = unix_bind_abstract(sk, addr);
if (err)
- path_put(&path);
-out:
- return err;
+ unix_release_addr(addr);
+ return err == -EEXIST ? -EADDRINUSE : err;
}
static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
@@ -1393,7 +1397,7 @@ restart:
unix_state_unlock(sk);
- /* take ten and and send info to listening sock */
+ /* take ten and send info to listening sock */
spin_lock(&other->sk_receive_queue.lock);
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);