aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/bluetooth/af_bluetooth.c6
-rw-r--r--net/bluetooth/smp.c7
-rw-r--r--net/bridge/br_stp.c2
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/ceph/auth_x.c36
-rw-r--r--net/ceph/ceph_common.c18
-rw-r--r--net/ceph/crypto.h4
-rw-r--r--net/ceph/messenger.c88
-rw-r--r--net/ceph/osd_client.c34
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c18
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/netclassid_cgroup.c26
-rw-r--r--net/core/rtnetlink.c274
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/core/sock.c12
-rw-r--r--net/core/stream.c6
-rw-r--r--net/dccp/ipv6.c37
-rw-r--r--net/dccp/proto.c3
-rw-r--r--net/decnet/af_decnet.c8
-rw-r--r--net/dns_resolver/dns_query.c2
-rw-r--r--net/hsr/hsr_device.c2
-rw-r--r--net/ipv4/igmp.c5
-rw-r--r--net/ipv4/ipmr.c23
-rw-r--r--net/ipv4/raw.c8
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_input.c23
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_timer.c14
-rw-r--r--net/ipv4/udp.c1
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/af_inet6.c15
-rw-r--r--net/ipv6/datagram.c4
-rw-r--r--net/ipv6/exthdrs.c3
-rw-r--r--net/ipv6/icmp.c14
-rw-r--r--net/ipv6/inet6_connection_sock.c21
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6mr.c19
-rw-r--r--net/ipv6/ipv6_sockglue.c33
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/ndisc.c10
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c5
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/reassembly.c10
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c32
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/nfc/llcp_sock.c2
-rw-r--r--net/openvswitch/dp_notify.c2
-rw-r--r--net/openvswitch/vport-geneve.c1
-rw-r--r--net/openvswitch/vport-gre.c1
-rw-r--r--net/openvswitch/vport-netdev.c8
-rw-r--r--net/openvswitch/vport.c8
-rw-r--r--net/openvswitch/vport.h8
-rw-r--r--net/packet/af_packet.c10
-rw-r--r--net/rds/connection.c6
-rw-r--r--net/rds/send.c4
-rw-r--r--net/rxrpc/ar-ack.c4
-rw-r--r--net/rxrpc/ar-output.c2
-rw-r--r--net/sched/sch_api.c27
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_mq.c4
-rw-r--r--net/sched/sch_mqprio.c4
-rw-r--r--net/sctp/ipv6.c13
-rw-r--r--net/sctp/socket.c39
-rw-r--r--net/socket.c21
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c13
-rw-r--r--net/sunrpc/cache.c53
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/sunrpc/xprtsock.c14
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/socket.c10
-rw-r--r--net/tipc/udp_media.c7
-rw-r--r--net/unix/af_unix.c274
80 files changed, 932 insertions, 539 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b27588493..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
skb->pkt_type = PACKET_HOST;
}
- if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+ if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
+ !netif_is_macvlan_port(vlan_dev) &&
+ !netif_is_bridge_port(vlan_dev)) {
unsigned int offset = skb->data - skb_mac_header(skb);
/*
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index a3bffd1ec2b4..70306cc9d814 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -271,11 +271,11 @@ static long bt_sock_data_wait(struct sock *sk, long timeo)
if (signal_pending(current) || !timeo)
break;
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
release_sock(sk);
timeo = schedule_timeout(timeo);
lock_sock(sk);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
}
__set_current_state(TASK_RUNNING);
@@ -441,7 +441,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index c91353841e40..ffed8a1d4f27 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3027,8 +3027,13 @@ static void smp_ready_cb(struct l2cap_chan *chan)
BT_DBG("chan %p", chan);
+ /* No need to call l2cap_chan_hold() here since we already own
+ * the reference taken in smp_new_conn_cb(). This is just the
+ * first time that we tie it to a specific pointer. The code in
+ * l2cap_core.c ensures that there's no risk this function wont
+ * get called if smp_new_conn_cb was previously called.
+ */
conn->smp = chan;
- l2cap_chan_hold(chan);
if (hcon->type == ACL_LINK && test_bit(HCI_CONN_ENCRYPT, &hcon->flags))
bredr_pairing(chan);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f7e8dee64fc8..5f3f64553179 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -48,7 +48,7 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
p->state = state;
err = switchdev_port_attr_set(p->dev, &attr);
- if (err)
+ if (err && err != -EOPNOTSUPP)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
(unsigned int) p->port_no, p->dev->name);
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index fa53d7a89f48..5396ff08af32 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -50,7 +50,7 @@ void br_init_port(struct net_bridge_port *p)
p->config_pending = 0;
err = switchdev_port_attr_set(p->dev, &attr);
- if (err)
+ if (err && err != -EOPNOTSUPP)
netdev_err(p->dev, "failed to set HW ageing time\n");
}
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index cc858919108e..aa209b1066c9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -323,7 +323,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
!timeo)
break;
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
release_sock(sk);
timeo = schedule_timeout(timeo);
lock_sock(sk);
@@ -331,7 +331,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
if (sock_flag(sk, SOCK_DEAD))
break;
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
}
finish_wait(sk_sleep(sk), &wait);
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index ba6eb17226da..10d87753ed87 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
+#include <linux/ceph/libceph.h>
#include <linux/ceph/messenger.h>
#include "crypto.h"
@@ -279,6 +280,15 @@ bad:
return -EINVAL;
}
+static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
+{
+ ceph_crypto_key_destroy(&au->session_key);
+ if (au->buf) {
+ ceph_buffer_put(au->buf);
+ au->buf = NULL;
+ }
+}
+
static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th,
struct ceph_x_authorizer *au)
@@ -297,7 +307,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
ceph_crypto_key_destroy(&au->session_key);
ret = ceph_crypto_key_clone(&au->session_key, &th->session_key);
if (ret)
- return ret;
+ goto out_au;
maxlen = sizeof(*msg_a) + sizeof(msg_b) +
ceph_x_encrypt_buflen(ticket_blob_len);
@@ -309,8 +319,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
if (!au->buf) {
au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
if (!au->buf) {
- ceph_crypto_key_destroy(&au->session_key);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_au;
}
}
au->service = th->service;
@@ -340,7 +350,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
p, end - p);
if (ret < 0)
- goto out_buf;
+ goto out_au;
p += ret;
au->buf->vec.iov_len = p - au->buf->vec.iov_base;
dout(" built authorizer nonce %llx len %d\n", au->nonce,
@@ -348,9 +358,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
BUG_ON(au->buf->vec.iov_len > maxlen);
return 0;
-out_buf:
- ceph_buffer_put(au->buf);
- au->buf = NULL;
+out_au:
+ ceph_x_authorizer_cleanup(au);
return ret;
}
@@ -624,8 +633,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
{
struct ceph_x_authorizer *au = (void *)a;
- ceph_crypto_key_destroy(&au->session_key);
- ceph_buffer_put(au->buf);
+ ceph_x_authorizer_cleanup(au);
kfree(au);
}
@@ -653,8 +661,7 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
remove_ticket_handler(ac, th);
}
- if (xi->auth_authorizer.buf)
- ceph_buffer_put(xi->auth_authorizer.buf);
+ ceph_x_authorizer_cleanup(&xi->auth_authorizer);
kfree(ac->private);
ac->private = NULL;
@@ -691,8 +698,10 @@ static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
struct ceph_msg *msg)
{
int ret;
- if (!auth->authorizer)
+
+ if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
return 0;
+
ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
msg, &msg->footer.sig);
if (ret < 0)
@@ -707,8 +716,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
__le64 sig_check;
int ret;
- if (!auth->authorizer)
+ if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
return 0;
+
ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
msg, &sig_check);
if (ret < 0)
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 78f098a20796..bcbec33c6a14 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -245,6 +245,8 @@ enum {
Opt_nocrc,
Opt_cephx_require_signatures,
Opt_nocephx_require_signatures,
+ Opt_cephx_sign_messages,
+ Opt_nocephx_sign_messages,
Opt_tcp_nodelay,
Opt_notcp_nodelay,
};
@@ -267,6 +269,8 @@ static match_table_t opt_tokens = {
{Opt_nocrc, "nocrc"},
{Opt_cephx_require_signatures, "cephx_require_signatures"},
{Opt_nocephx_require_signatures, "nocephx_require_signatures"},
+ {Opt_cephx_sign_messages, "cephx_sign_messages"},
+ {Opt_nocephx_sign_messages, "nocephx_sign_messages"},
{Opt_tcp_nodelay, "tcp_nodelay"},
{Opt_notcp_nodelay, "notcp_nodelay"},
{-1, NULL}
@@ -491,6 +495,12 @@ ceph_parse_options(char *options, const char *dev_name,
case Opt_nocephx_require_signatures:
opt->flags |= CEPH_OPT_NOMSGAUTH;
break;
+ case Opt_cephx_sign_messages:
+ opt->flags &= ~CEPH_OPT_NOMSGSIGN;
+ break;
+ case Opt_nocephx_sign_messages:
+ opt->flags |= CEPH_OPT_NOMSGSIGN;
+ break;
case Opt_tcp_nodelay:
opt->flags |= CEPH_OPT_TCP_NODELAY;
@@ -534,6 +544,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
seq_puts(m, "nocrc,");
if (opt->flags & CEPH_OPT_NOMSGAUTH)
seq_puts(m, "nocephx_require_signatures,");
+ if (opt->flags & CEPH_OPT_NOMSGSIGN)
+ seq_puts(m, "nocephx_sign_messages,");
if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
seq_puts(m, "notcp_nodelay,");
@@ -596,11 +608,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
if (ceph_test_opt(client, MYIP))
myaddr = &client->options->my_addr;
- ceph_messenger_init(&client->msgr, myaddr,
- client->supported_features,
- client->required_features,
- ceph_test_opt(client, NOCRC),
- ceph_test_opt(client, TCP_NODELAY));
+ ceph_messenger_init(&client->msgr, myaddr);
/* subsystems */
err = ceph_monc_init(&client->monc, client);
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index d1498224c49d..2e9cab09f37b 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -16,8 +16,10 @@ struct ceph_crypto_key {
static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
{
- if (key)
+ if (key) {
kfree(key->key);
+ key->key = NULL;
+ }
}
int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b9b0e3b5da49..9981039ef4ff 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -509,7 +509,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return ret;
}
- if (con->msgr->tcp_nodelay) {
+ if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
int optval = 1;
ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
@@ -637,9 +637,6 @@ static int con_close_socket(struct ceph_connection *con)
static void ceph_msg_remove(struct ceph_msg *msg)
{
list_del_init(&msg->list_head);
- BUG_ON(msg->con == NULL);
- msg->con->ops->put(msg->con);
- msg->con = NULL;
ceph_msg_put(msg);
}
@@ -662,15 +659,14 @@ static void reset_connection(struct ceph_connection *con)
if (con->in_msg) {
BUG_ON(con->in_msg->con != con);
- con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
- con->ops->put(con);
}
con->connect_seq = 0;
con->out_seq = 0;
if (con->out_msg) {
+ BUG_ON(con->out_msg->con != con);
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
}
@@ -1205,7 +1201,7 @@ static void prepare_write_message_footer(struct ceph_connection *con)
con->out_kvec[v].iov_base = &m->footer;
if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
if (con->ops->sign_message)
- con->ops->sign_message(con, m);
+ con->ops->sign_message(m);
else
m->footer.sig = 0;
con->out_kvec[v].iov_len = sizeof(m->footer);
@@ -1432,7 +1428,8 @@ static int prepare_write_connect(struct ceph_connection *con)
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
- con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
+ con->out_connect.features =
+ cpu_to_le64(from_msgr(con->msgr)->supported_features);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1527,7 +1524,7 @@ static int write_partial_message_data(struct ceph_connection *con)
{
struct ceph_msg *msg = con->out_msg;
struct ceph_msg_data_cursor *cursor = &msg->cursor;
- bool do_datacrc = !con->msgr->nocrc;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
u32 crc;
dout("%s %p msg %p\n", __func__, con, msg);
@@ -1552,8 +1549,8 @@ static int write_partial_message_data(struct ceph_connection *con)
bool need_crc;
int ret;
- page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
- &last_piece);
+ page = ceph_msg_data_next(cursor, &page_offset, &length,
+ &last_piece);
ret = ceph_tcp_sendpage(con->sock, page, page_offset,
length, !last_piece);
if (ret <= 0) {
@@ -1564,7 +1561,7 @@ static int write_partial_message_data(struct ceph_connection *con)
}
if (do_datacrc && cursor->need_crc)
crc = ceph_crc32c_page(crc, page, page_offset, length);
- need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
+ need_crc = ceph_msg_data_advance(cursor, (size_t)ret);
}
dout("%s %p msg %p done\n", __func__, con, msg);
@@ -2005,8 +2002,8 @@ static int process_banner(struct ceph_connection *con)
static int process_connect(struct ceph_connection *con)
{
- u64 sup_feat = con->msgr->supported_features;
- u64 req_feat = con->msgr->required_features;
+ u64 sup_feat = from_msgr(con->msgr)->supported_features;
+ u64 req_feat = from_msgr(con->msgr)->required_features;
u64 server_feat = ceph_sanitize_features(
le64_to_cpu(con->in_reply.features));
int ret;
@@ -2232,7 +2229,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
{
struct ceph_msg *msg = con->in_msg;
struct ceph_msg_data_cursor *cursor = &msg->cursor;
- const bool do_datacrc = !con->msgr->nocrc;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
struct page *page;
size_t page_offset;
size_t length;
@@ -2246,8 +2243,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = con->in_data_crc;
while (cursor->resid) {
- page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
- NULL);
+ page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
if (ret <= 0) {
if (do_datacrc)
@@ -2258,7 +2254,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = ceph_crc32c_page(crc, page, page_offset, ret);
- (void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
+ (void) ceph_msg_data_advance(cursor, (size_t)ret);
}
if (do_datacrc)
con->in_data_crc = crc;
@@ -2278,7 +2274,7 @@ static int read_partial_message(struct ceph_connection *con)
int end;
int ret;
unsigned int front_len, middle_len, data_len;
- bool do_datacrc = !con->msgr->nocrc;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
u64 seq;
u32 crc;
@@ -2423,7 +2419,7 @@ static int read_partial_message(struct ceph_connection *con)
}
if (need_sign && con->ops->check_message_signature &&
- con->ops->check_message_signature(con, m)) {
+ con->ops->check_message_signature(m)) {
pr_err("read_partial_message %p signature check failed\n", m);
return -EBADMSG;
}
@@ -2438,13 +2434,10 @@ static int read_partial_message(struct ceph_connection *con)
*/
static void process_message(struct ceph_connection *con)
{
- struct ceph_msg *msg;
+ struct ceph_msg *msg = con->in_msg;
BUG_ON(con->in_msg->con != con);
- con->in_msg->con = NULL;
- msg = con->in_msg;
con->in_msg = NULL;
- con->ops->put(con);
/* if first message, set peer_name */
if (con->peer_name.type == 0)
@@ -2677,7 +2670,7 @@ more:
if (ret <= 0) {
switch (ret) {
case -EBADMSG:
- con->error_msg = "bad crc";
+ con->error_msg = "bad crc/signature";
/* fall through */
case -EBADE:
ret = -EIO;
@@ -2918,10 +2911,8 @@ static void con_fault(struct ceph_connection *con)
if (con->in_msg) {
BUG_ON(con->in_msg->con != con);
- con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
- con->ops->put(con);
}
/* Requeue anything that hasn't been acked */
@@ -2952,15 +2943,8 @@ static void con_fault(struct ceph_connection *con)
* initialize a new messenger instance
*/
void ceph_messenger_init(struct ceph_messenger *msgr,
- struct ceph_entity_addr *myaddr,
- u64 supported_features,
- u64 required_features,
- bool nocrc,
- bool tcp_nodelay)
+ struct ceph_entity_addr *myaddr)
{
- msgr->supported_features = supported_features;
- msgr->required_features = required_features;
-
spin_lock_init(&msgr->global_seq_lock);
if (myaddr)
@@ -2970,8 +2954,6 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
msgr->inst.addr.type = 0;
get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
encode_my_addr(msgr);
- msgr->nocrc = nocrc;
- msgr->tcp_nodelay = tcp_nodelay;
atomic_set(&msgr->stopping, 0);
write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
@@ -2986,6 +2968,15 @@ void ceph_messenger_fini(struct ceph_messenger *msgr)
}
EXPORT_SYMBOL(ceph_messenger_fini);
+static void msg_con_set(struct ceph_msg *msg, struct ceph_connection *con)
+{
+ if (msg->con)
+ msg->con->ops->put(msg->con);
+
+ msg->con = con ? con->ops->get(con) : NULL;
+ BUG_ON(msg->con != con);
+}
+
static void clear_standby(struct ceph_connection *con)
{
/* come back from STANDBY? */
@@ -3017,9 +3008,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
return;
}
- BUG_ON(msg->con != NULL);
- msg->con = con->ops->get(con);
- BUG_ON(msg->con == NULL);
+ msg_con_set(msg, con);
BUG_ON(!list_empty(&msg->list_head));
list_add_tail(&msg->list_head, &con->out_queue);
@@ -3047,16 +3036,15 @@ void ceph_msg_revoke(struct ceph_msg *msg)
{
struct ceph_connection *con = msg->con;
- if (!con)
+ if (!con) {
+ dout("%s msg %p null con\n", __func__, msg);
return; /* Message not in our possession */
+ }
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
dout("%s %p msg %p - was on queue\n", __func__, con, msg);
list_del_init(&msg->list_head);
- BUG_ON(msg->con == NULL);
- msg->con->ops->put(msg->con);
- msg->con = NULL;
msg->hdr.seq = 0;
ceph_msg_put(msg);
@@ -3080,16 +3068,13 @@ void ceph_msg_revoke(struct ceph_msg *msg)
*/
void ceph_msg_revoke_incoming(struct ceph_msg *msg)
{
- struct ceph_connection *con;
+ struct ceph_connection *con = msg->con;
- BUG_ON(msg == NULL);
- if (!msg->con) {
+ if (!con) {
dout("%s msg %p null con\n", __func__, msg);
-
return; /* Message not in our possession */
}
- con = msg->con;
mutex_lock(&con->mutex);
if (con->in_msg == msg) {
unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
@@ -3335,9 +3320,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
}
if (msg) {
BUG_ON(*skip);
+ msg_con_set(msg, con);
con->in_msg = msg;
- con->in_msg->con = con->ops->get(con);
- BUG_ON(con->in_msg->con == NULL);
} else {
/*
* Null message pointer means either we should skip
@@ -3384,6 +3368,8 @@ static void ceph_msg_release(struct kref *kref)
dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head));
+ msg_con_set(m, NULL);
+
/* drop middle, data, if any */
if (m->middle) {
ceph_buffer_put(m->middle);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f79ccac6699f..f8f235930d88 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -120,11 +120,13 @@ static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
}
#endif /* CONFIG_BLOCK */
-#define osd_req_op_data(oreq, whch, typ, fld) \
- ({ \
- BUG_ON(whch >= (oreq)->r_num_ops); \
- &(oreq)->r_ops[whch].typ.fld; \
- })
+#define osd_req_op_data(oreq, whch, typ, fld) \
+({ \
+ struct ceph_osd_request *__oreq = (oreq); \
+ unsigned int __whch = (whch); \
+ BUG_ON(__whch >= __oreq->r_num_ops); \
+ &__oreq->r_ops[__whch].typ.fld; \
+})
static struct ceph_osd_data *
osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
@@ -1750,8 +1752,7 @@ static void complete_request(struct ceph_osd_request *req)
* handle osd op reply. either call the callback if it is specified,
* or do the completion to wake up the waiting thread.
*/
-static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
- struct ceph_connection *con)
+static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
{
void *p, *end;
struct ceph_osd_request *req;
@@ -2807,7 +2808,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
ceph_osdc_handle_map(osdc, msg);
break;
case CEPH_MSG_OSD_OPREPLY:
- handle_reply(osdc, msg, con);
+ handle_reply(osdc, msg);
break;
case CEPH_MSG_WATCH_NOTIFY:
handle_watch_notify(osdc, msg);
@@ -2849,9 +2850,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
goto out;
}
- if (req->r_reply->con)
- dout("%s revoking msg %p from old con %p\n", __func__,
- req->r_reply, req->r_reply->con);
ceph_msg_revoke_incoming(req->r_reply);
if (front_len > req->r_reply->front_alloc_len) {
@@ -2978,17 +2976,19 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&osdc->client->monc);
}
-static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+static int osd_sign_message(struct ceph_msg *msg)
{
- struct ceph_osd *o = con->private;
+ struct ceph_osd *o = msg->con->private;
struct ceph_auth_handshake *auth = &o->o_auth;
+
return ceph_auth_sign_message(auth, msg);
}
-static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+static int osd_check_message_signature(struct ceph_msg *msg)
{
- struct ceph_osd *o = con->private;
+ struct ceph_osd *o = msg->con->private;
struct ceph_auth_handshake *auth = &o->o_auth;
+
return ceph_auth_check_message_signature(auth, msg);
}
@@ -3000,7 +3000,7 @@ static const struct ceph_connection_operations osd_con_ops = {
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.alloc_msg = alloc_msg,
- .sign_message = sign_message,
- .check_message_signature = check_message_signature,
+ .sign_message = osd_sign_message,
+ .check_message_signature = osd_check_message_signature,
.fault = osd_reset,
};
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 617088aee21d..d62af69ad844 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -785,7 +785,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index ab9b8d0d115e..ae00b894e675 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2403,17 +2403,20 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
{
static const netdev_features_t null_features = 0;
struct net_device *dev = skb->dev;
- const char *driver = "";
+ const char *name = "";
if (!net_ratelimit())
return;
- if (dev && dev->dev.parent)
- driver = dev_driver_string(dev->dev.parent);
-
+ if (dev) {
+ if (dev->dev.parent)
+ name = dev_driver_string(dev->dev.parent);
+ else
+ name = netdev_name(dev);
+ }
WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
"gso_type=%d ip_summed=%d\n",
- driver, dev ? &dev->features : &null_features,
+ name, dev ? &dev->features : &null_features,
skb->sk ? &skb->sk->sk_route_caps : &null_features,
skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
skb_shinfo(skb)->gso_type, skb->ip_summed);
@@ -6426,11 +6429,16 @@ int __netdev_update_features(struct net_device *dev)
if (dev->netdev_ops->ndo_set_features)
err = dev->netdev_ops->ndo_set_features(dev, features);
+ else
+ err = 0;
if (unlikely(err < 0)) {
netdev_err(dev,
"set_features() failed (%d); wanted %pNF, left %pNF\n",
err, &features, &dev->features);
+ /* return non-0 since some features might have changed and
+ * it's better to fire a spurious notification than miss it
+ */
return -1;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1aa8437ed6c4..f18ae91b652e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -857,7 +857,7 @@ static void neigh_probe(struct neighbour *neigh)
struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
/* keep skb alive even if arp_queue overflows */
if (skb)
- skb = skb_copy(skb, GFP_ATOMIC);
+ skb = skb_clone(skb, GFP_ATOMIC);
write_unlock(&neigh->lock);
neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
@@ -2215,7 +2215,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
ndm->ndm_pad2 = 0;
ndm->ndm_flags = pn->flags | NTF_PROXY;
ndm->ndm_type = RTN_UNICAST;
- ndm->ndm_ifindex = pn->dev->ifindex;
+ ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
ndm->ndm_state = NUD_NONE;
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
@@ -2333,7 +2333,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (h > s_h)
s_idx = 0;
for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
- if (dev_net(n->dev) != net)
+ if (pneigh_net(n) != net)
continue;
if (idx < s_idx)
goto next;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 6441f47b1a8f..2e4df84c34a1 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -56,7 +56,7 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
kfree(css_cls_state(css));
}
-static int update_classid(const void *v, struct file *file, unsigned n)
+static int update_classid_sock(const void *v, struct file *file, unsigned n)
{
int err;
struct socket *sock = sock_from_file(file, &err);
@@ -67,18 +67,25 @@ static int update_classid(const void *v, struct file *file, unsigned n)
return 0;
}
-static void cgrp_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void update_classid(struct cgroup_subsys_state *css, void *v)
{
- struct cgroup_cls_state *cs = css_cls_state(css);
- void *v = (void *)(unsigned long)cs->classid;
+ struct css_task_iter it;
struct task_struct *p;
- cgroup_taskset_for_each(p, tset) {
+ css_task_iter_start(css, &it);
+ while ((p = css_task_iter_next(&it))) {
task_lock(p);
- iterate_fd(p->files, 0, update_classid, v);
+ iterate_fd(p->files, 0, update_classid_sock, v);
task_unlock(p);
}
+ css_task_iter_end(&it);
+}
+
+static void cgrp_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
+{
+ update_classid(css,
+ (void *)(unsigned long)css_cls_state(css)->classid);
}
static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -89,8 +96,11 @@ static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
u64 value)
{
- css_cls_state(css)->classid = (u32) value;
+ struct cgroup_cls_state *cs = css_cls_state(css);
+
+ cs->classid = (u32)value;
+ update_classid(css, (void *)(unsigned long)cs->classid);
return 0;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 504bd17b7456..34ba7a08876d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1045,15 +1045,156 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ const struct rtnl_link_stats64 *stats;
+ struct rtnl_link_stats64 temp;
+ struct nlattr *attr;
+
+ stats = dev_get_stats(dev, &temp);
+
+ attr = nla_reserve(skb, IFLA_STATS,
+ sizeof(struct rtnl_link_stats));
+ if (!attr)
+ return -EMSGSIZE;
+
+ copy_rtnl_link_stats(nla_data(attr), stats);
+
+ attr = nla_reserve(skb, IFLA_STATS64,
+ sizeof(struct rtnl_link_stats64));
+ if (!attr)
+ return -EMSGSIZE;
+
+ copy_rtnl_link_stats64(nla_data(attr), stats);
+
+ return 0;
+}
+
+static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
+ struct net_device *dev,
+ int vfs_num,
+ struct nlattr *vfinfo)
+{
+ struct ifla_vf_rss_query_en vf_rss_query_en;
+ struct ifla_vf_link_state vf_linkstate;
+ struct ifla_vf_spoofchk vf_spoofchk;
+ struct ifla_vf_tx_rate vf_tx_rate;
+ struct ifla_vf_stats vf_stats;
+ struct ifla_vf_trust vf_trust;
+ struct ifla_vf_vlan vf_vlan;
+ struct ifla_vf_rate vf_rate;
+ struct nlattr *vf, *vfstats;
+ struct ifla_vf_mac vf_mac;
+ struct ifla_vf_info ivi;
+
+ /* Not all SR-IOV capable drivers support the
+ * spoofcheck and "RSS query enable" query. Preset to
+ * -1 so the user space tool can detect that the driver
+ * didn't report anything.
+ */
+ ivi.spoofchk = -1;
+ ivi.rss_query_en = -1;
+ ivi.trusted = -1;
+ memset(ivi.mac, 0, sizeof(ivi.mac));
+ /* The default value for VF link state is "auto"
+ * IFLA_VF_LINK_STATE_AUTO which equals zero
+ */
+ ivi.linkstate = 0;
+ if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
+ return 0;
+
+ vf_mac.vf =
+ vf_vlan.vf =
+ vf_rate.vf =
+ vf_tx_rate.vf =
+ vf_spoofchk.vf =
+ vf_linkstate.vf =
+ vf_rss_query_en.vf =
+ vf_trust.vf = ivi.vf;
+
+ memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+ vf_vlan.vlan = ivi.vlan;
+ vf_vlan.qos = ivi.qos;
+ vf_tx_rate.rate = ivi.max_tx_rate;
+ vf_rate.min_tx_rate = ivi.min_tx_rate;
+ vf_rate.max_tx_rate = ivi.max_tx_rate;
+ vf_spoofchk.setting = ivi.spoofchk;
+ vf_linkstate.link_state = ivi.linkstate;
+ vf_rss_query_en.setting = ivi.rss_query_en;
+ vf_trust.setting = ivi.trusted;
+ vf = nla_nest_start(skb, IFLA_VF_INFO);
+ if (!vf) {
+ nla_nest_cancel(skb, vfinfo);
+ return -EMSGSIZE;
+ }
+ if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
+ nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+ nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+ &vf_rate) ||
+ nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
+ &vf_tx_rate) ||
+ nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
+ &vf_spoofchk) ||
+ nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
+ &vf_linkstate) ||
+ nla_put(skb, IFLA_VF_RSS_QUERY_EN,
+ sizeof(vf_rss_query_en),
+ &vf_rss_query_en) ||
+ nla_put(skb, IFLA_VF_TRUST,
+ sizeof(vf_trust), &vf_trust))
+ return -EMSGSIZE;
+ memset(&vf_stats, 0, sizeof(vf_stats));
+ if (dev->netdev_ops->ndo_get_vf_stats)
+ dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
+ &vf_stats);
+ vfstats = nla_nest_start(skb, IFLA_VF_STATS);
+ if (!vfstats) {
+ nla_nest_cancel(skb, vf);
+ nla_nest_cancel(skb, vfinfo);
+ return -EMSGSIZE;
+ }
+ if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
+ vf_stats.rx_packets) ||
+ nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
+ vf_stats.tx_packets) ||
+ nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
+ vf_stats.rx_bytes) ||
+ nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
+ vf_stats.tx_bytes) ||
+ nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
+ vf_stats.broadcast) ||
+ nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
+ vf_stats.multicast))
+ return -EMSGSIZE;
+ nla_nest_end(skb, vfstats);
+ nla_nest_end(skb, vf);
+ return 0;
+}
+
+static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
+{
+ struct rtnl_link_ifmap map = {
+ .mem_start = dev->mem_start,
+ .mem_end = dev->mem_end,
+ .base_addr = dev->base_addr,
+ .irq = dev->irq,
+ .dma = dev->dma,
+ .port = dev->if_port,
+ };
+ if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
- struct rtnl_link_stats64 temp;
- const struct rtnl_link_stats64 *stats;
- struct nlattr *attr, *af_spec;
+ struct nlattr *af_spec;
struct rtnl_af_ops *af_ops;
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
@@ -1096,18 +1237,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
goto nla_put_failure;
- if (1) {
- struct rtnl_link_ifmap map = {
- .mem_start = dev->mem_start,
- .mem_end = dev->mem_end,
- .base_addr = dev->base_addr,
- .irq = dev->irq,
- .dma = dev->dma,
- .port = dev->if_port,
- };
- if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
- goto nla_put_failure;
- }
+ if (rtnl_fill_link_ifmap(skb, dev))
+ goto nla_put_failure;
if (dev->addr_len) {
if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
@@ -1124,128 +1255,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_phys_switch_id_fill(skb, dev))
goto nla_put_failure;
- attr = nla_reserve(skb, IFLA_STATS,
- sizeof(struct rtnl_link_stats));
- if (attr == NULL)
- goto nla_put_failure;
-
- stats = dev_get_stats(dev, &temp);
- copy_rtnl_link_stats(nla_data(attr), stats);
-
- attr = nla_reserve(skb, IFLA_STATS64,
- sizeof(struct rtnl_link_stats64));
- if (attr == NULL)
+ if (rtnl_fill_stats(skb, dev))
goto nla_put_failure;
- copy_rtnl_link_stats64(nla_data(attr), stats);
if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
goto nla_put_failure;
- if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
- && (ext_filter_mask & RTEXT_FILTER_VF)) {
+ if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
+ ext_filter_mask & RTEXT_FILTER_VF) {
int i;
-
- struct nlattr *vfinfo, *vf, *vfstats;
+ struct nlattr *vfinfo;
int num_vfs = dev_num_vf(dev->dev.parent);
vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
if (!vfinfo)
goto nla_put_failure;
for (i = 0; i < num_vfs; i++) {
- struct ifla_vf_info ivi;
- struct ifla_vf_mac vf_mac;
- struct ifla_vf_vlan vf_vlan;
- struct ifla_vf_rate vf_rate;
- struct ifla_vf_tx_rate vf_tx_rate;
- struct ifla_vf_spoofchk vf_spoofchk;
- struct ifla_vf_link_state vf_linkstate;
- struct ifla_vf_rss_query_en vf_rss_query_en;
- struct ifla_vf_stats vf_stats;
- struct ifla_vf_trust vf_trust;
-
- /*
- * Not all SR-IOV capable drivers support the
- * spoofcheck and "RSS query enable" query. Preset to
- * -1 so the user space tool can detect that the driver
- * didn't report anything.
- */
- ivi.spoofchk = -1;
- ivi.rss_query_en = -1;
- ivi.trusted = -1;
- memset(ivi.mac, 0, sizeof(ivi.mac));
- /* The default value for VF link state is "auto"
- * IFLA_VF_LINK_STATE_AUTO which equals zero
- */
- ivi.linkstate = 0;
- if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
- break;
- vf_mac.vf =
- vf_vlan.vf =
- vf_rate.vf =
- vf_tx_rate.vf =
- vf_spoofchk.vf =
- vf_linkstate.vf =
- vf_rss_query_en.vf =
- vf_trust.vf = ivi.vf;
-
- memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
- vf_vlan.vlan = ivi.vlan;
- vf_vlan.qos = ivi.qos;
- vf_tx_rate.rate = ivi.max_tx_rate;
- vf_rate.min_tx_rate = ivi.min_tx_rate;
- vf_rate.max_tx_rate = ivi.max_tx_rate;
- vf_spoofchk.setting = ivi.spoofchk;
- vf_linkstate.link_state = ivi.linkstate;
- vf_rss_query_en.setting = ivi.rss_query_en;
- vf_trust.setting = ivi.trusted;
- vf = nla_nest_start(skb, IFLA_VF_INFO);
- if (!vf) {
- nla_nest_cancel(skb, vfinfo);
- goto nla_put_failure;
- }
- if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
- nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
- nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
- &vf_rate) ||
- nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
- &vf_tx_rate) ||
- nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
- &vf_spoofchk) ||
- nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
- &vf_linkstate) ||
- nla_put(skb, IFLA_VF_RSS_QUERY_EN,
- sizeof(vf_rss_query_en),
- &vf_rss_query_en) ||
- nla_put(skb, IFLA_VF_TRUST,
- sizeof(vf_trust), &vf_trust))
+ if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
goto nla_put_failure;
- memset(&vf_stats, 0, sizeof(vf_stats));
- if (dev->netdev_ops->ndo_get_vf_stats)
- dev->netdev_ops->ndo_get_vf_stats(dev, i,
- &vf_stats);
- vfstats = nla_nest_start(skb, IFLA_VF_STATS);
- if (!vfstats) {
- nla_nest_cancel(skb, vf);
- nla_nest_cancel(skb, vfinfo);
- goto nla_put_failure;
- }
- if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
- vf_stats.rx_packets) ||
- nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
- vf_stats.tx_packets) ||
- nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
- vf_stats.rx_bytes) ||
- nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
- vf_stats.tx_bytes) ||
- nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
- vf_stats.broadcast) ||
- nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
- vf_stats.multicast))
- goto nla_put_failure;
- nla_nest_end(skb, vfstats);
- nla_nest_end(skb, vf);
}
+
nla_nest_end(skb, vfinfo);
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 3b6899b7d810..8a1741b14302 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -305,6 +305,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
err = put_user(cmlen, &cm->cmsg_len);
if (!err) {
cmlen = CMSG_SPACE(i*sizeof(int));
+ if (msg->msg_controllen < cmlen)
+ cmlen = msg->msg_controllen;
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index aa41e6dd6429..152b9c70e252 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4268,7 +4268,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
return NULL;
}
- memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+ memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len,
+ 2 * ETH_ALEN);
skb->mac_header += VLAN_HLEN;
return skb;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 1e4dd54bfb5a..e31dfcee1729 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1530,7 +1530,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
skb_queue_head_init(&newsk->sk_receive_queue);
skb_queue_head_init(&newsk->sk_write_queue);
- spin_lock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
lockdep_set_class_and_name(&newsk->sk_callback_lock,
af_callback_keys + newsk->sk_family,
@@ -1607,7 +1606,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
- __sk_dst_set(sk, dst);
+ sk_dst_set(sk, dst);
sk->sk_route_caps = dst->dev->features;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
@@ -1815,7 +1814,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
{
DEFINE_WAIT(wait);
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
for (;;) {
if (!timeo)
break;
@@ -1861,7 +1860,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
break;
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
err = -EAGAIN;
if (!timeo)
@@ -2048,9 +2047,9 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
DEFINE_WAIT(wait);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
finish_wait(sk_sleep(sk), &wait);
return rc;
}
@@ -2388,7 +2387,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
} else
sk->sk_wq = NULL;
- spin_lock_init(&sk->sk_dst_lock);
rwlock_init(&sk->sk_callback_lock);
lockdep_set_class_and_name(&sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
diff --git a/net/core/stream.c b/net/core/stream.c
index d70f77a0c889..b96f7a79e544 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -39,7 +39,7 @@ void sk_stream_write_space(struct sock *sk)
wake_up_interruptible_poll(&wq->wait, POLLOUT |
POLLWRNORM | POLLWRBAND);
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
- sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+ sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
}
@@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
while (1) {
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
@@ -139,7 +139,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
}
if (signal_pending(current))
goto do_interrupted;
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
if (sk_stream_memory_free(sk) && !vm_wait)
break;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index db5fc2440a23..9c6d0508e63a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -202,7 +202,9 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -219,7 +221,10 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
&ireq->ir_v6_loc_addr,
&ireq->ir_v6_rmt_addr);
fl6.daddr = ireq->ir_v6_rmt_addr;
- err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -387,6 +392,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *newnp;
const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct inet_sock *newinet;
struct dccp6_sock *newdp6;
struct sock *newsk;
@@ -453,7 +459,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
* comment in that function for the gory details. -acme
*/
- __ip6_dst_store(newsk, dst, NULL, NULL);
+ ip6_dst_store(newsk, dst, NULL, NULL);
newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
NETIF_F_TSO);
newdp6 = (struct dccp6_sock *)newsk;
@@ -488,13 +494,15 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
* Yes, keeping reference count would be much more clever, but we make
* one more one thing there: reattach optmem to newsk.
*/
- if (np->opt != NULL)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt != NULL)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;
dccp_sync_mss(newsk, dst_mtu(dst));
@@ -757,6 +765,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -856,7 +865,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -873,12 +883,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
np->saddr = *saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
icsk->icsk_ext_hdr_len = 0;
- if (np->opt != NULL)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
inet->inet_dport = usin->sin6_port;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b5cf13a28009..41e65804ddf5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -339,8 +339,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
- set_bit(SOCK_ASYNC_NOSPACE,
- &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
/* Race breaker. If space is freed after
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 675cf94e04f8..eebf5ac8ce18 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1747,9 +1747,9 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
}
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
finish_wait(sk_sleep(sk), &wait);
}
@@ -2004,10 +2004,10 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, &timeo,
!dn_queue_too_long(scp, queue, flags));
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
finish_wait(sk_sleep(sk), &wait);
continue;
}
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 4677b6fa6dda..ecc28cff08ab 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -67,7 +67,7 @@
* Returns the size of the result on success, -ve error code otherwise.
*/
int dns_query(const char *type, const char *name, size_t namelen,
- const char *options, char **_result, time_t *_expiry)
+ const char *options, char **_result, time64_t *_expiry)
{
struct key *rkey;
const struct user_key_payload *upayload;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 35a9788bb3ae..c7d1adca30d8 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -312,7 +312,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, u8 type)
return;
out:
- WARN_ON_ONCE("HSR: Could not send supervision frame\n");
+ WARN_ONCE(1, "HSR: Could not send supervision frame\n");
kfree_skb(skb);
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6baf36e11808..05e4cba14162 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2126,7 +2126,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
ASSERT_RTNL();
in_dev = ip_mc_find_dev(net, imr);
- if (!in_dev) {
+ if (!imr->imr_ifindex && !imr->imr_address.s_addr && !in_dev) {
ret = -ENODEV;
goto out;
}
@@ -2147,7 +2147,8 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
*imlp = iml->next_rcu;
- ip_mc_dec_group(in_dev, group);
+ if (in_dev)
+ ip_mc_dec_group(in_dev, group);
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 92dd4b74d513..c3a38353f5dc 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mfc_cache *c, struct rtmsg *rtm);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
-static void mroute_clean_tables(struct mr_table *mrt);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -350,7 +350,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
static void ipmr_free_table(struct mr_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, true);
kfree(mrt);
}
@@ -441,10 +441,6 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
return dev;
failure:
- /* allow the register to be completed before unregistering. */
- rtnl_unlock();
- rtnl_lock();
-
unregister_netdevice(dev);
return NULL;
}
@@ -540,10 +536,6 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
return dev;
failure:
- /* allow the register to be completed before unregistering. */
- rtnl_unlock();
- rtnl_lock();
-
unregister_netdevice(dev);
return NULL;
}
@@ -1208,7 +1200,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr_table *mrt)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
int i;
LIST_HEAD(list);
@@ -1217,8 +1209,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
/* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
- if (!(mrt->vif_table[i].flags & VIFF_STATIC))
- vif_delete(mrt, i, 0, &list);
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
+ continue;
+ vif_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
@@ -1226,7 +1219,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
for (i = 0; i < MFC_LINES; i++) {
list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
- if (c->mfc_flags & MFC_STATIC)
+ if (!all && (c->mfc_flags & MFC_STATIC))
continue;
list_del_rcu(&c->list);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
@@ -1261,7 +1254,7 @@ static void mrtsock_destruct(struct sock *sk)
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, false);
}
}
rtnl_unlock();
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8c0d0bdc2a7c..63e5be0abd86 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -406,10 +406,12 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
ip_select_ident(net, skb, NULL);
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ skb->transport_header += iphlen;
+ if (iph->protocol == IPPROTO_ICMP &&
+ length >= iphlen + sizeof(struct icmphdr))
+ icmp_out_count(net, ((struct icmphdr *)
+ skb_transport_header(skb))->type);
}
- if (iph->protocol == IPPROTO_ICMP)
- icmp_out_count(net, ((struct icmphdr *)
- skb_transport_header(skb))->type);
err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, rt->dst.dev,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c1728771cf89..c82cca18c90f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -517,8 +517,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
- set_bit(SOCK_ASYNC_NOSPACE,
- &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
/* Race breaker. If space is freed after
@@ -906,7 +905,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
goto out_err;
}
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
mss_now = tcp_send_mss(sk, &size_goal, flags);
copied = 0;
@@ -1134,7 +1133,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
}
/* This should be in poll */
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
mss_now = tcp_send_mss(sk, &size_goal, flags);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fdd88c3803a6..2d656eef7f8e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4481,19 +4481,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
{
struct sk_buff *skb;
+ int err = -ENOMEM;
+ int data_len = 0;
bool fragstolen;
if (size == 0)
return 0;
- skb = alloc_skb(size, sk->sk_allocation);
+ if (size > PAGE_SIZE) {
+ int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
+
+ data_len = npages << PAGE_SHIFT;
+ size = data_len + (size & ~PAGE_MASK);
+ }
+ skb = alloc_skb_with_frags(size - data_len, data_len,
+ PAGE_ALLOC_COSTLY_ORDER,
+ &err, sk->sk_allocation);
if (!skb)
goto err;
+ skb_put(skb, size - data_len);
+ skb->data_len = data_len;
+ skb->len = size;
+
if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
goto err_free;
- if (memcpy_from_msg(skb_put(skb, size), msg, size))
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+ if (err)
goto err_free;
TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
@@ -4509,7 +4524,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
err_free:
kfree_skb(skb);
err:
- return -ENOMEM;
+ return err;
+
}
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
@@ -5667,6 +5683,7 @@ discard:
}
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+ tp->copied_seq = tp->rcv_nxt;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ba09016d1bfd..db003438aaf5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -921,7 +921,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
}
md5sig = rcu_dereference_protected(tp->md5sig_info,
- sock_owned_by_user(sk));
+ sock_owned_by_user(sk) ||
+ lockdep_is_held(&sk->sk_lock.slock));
if (!md5sig) {
md5sig = kmalloc(sizeof(*md5sig), gfp);
if (!md5sig)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c9c716a483e4..193ba1fa8a9a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -168,7 +168,7 @@ static int tcp_write_timeout(struct sock *sk)
dst_negative_advice(sk);
if (tp->syn_fastopen || tp->syn_data)
tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
- if (tp->syn_data)
+ if (tp->syn_data && icsk->icsk_retransmits == 1)
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
}
@@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk)
syn_set = true;
} else {
if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
+ /* Some middle-boxes may black-hole Fast Open _after_
+ * the handshake. Therefore we conservatively disable
+ * Fast Open on this path on recurring timeouts with
+ * few or zero bytes acked after Fast Open.
+ */
+ if (tp->syn_data_acked &&
+ tp->bytes_acked <= tp->rx_opt.mss_clamp) {
+ tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
+ if (icsk->icsk_retransmits == sysctl_tcp_retries1)
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+ }
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24ec14f9825c..0c7b0e61b917 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -100,7 +100,6 @@
#include <linux/slab.h>
#include <net/tcp_states.h>
#include <linux/skbuff.h>
-#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/net_namespace.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d84742f003a9..61f26851655c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3642,7 +3642,7 @@ static void addrconf_dad_work(struct work_struct *w)
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
- ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL);
+ ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
rtnl_unlock();
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 44bb66bde0e2..8ec0df75f1c4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -428,9 +428,11 @@ void inet6_destroy_sock(struct sock *sk)
/* Free tx options */
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
}
EXPORT_SYMBOL_GPL(inet6_destroy_sock);
@@ -659,7 +661,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
+ &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -668,7 +673,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
return PTR_ERR(dst);
}
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
}
return 0;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index d70b0238f468..517c55b01ba8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -167,8 +167,10 @@ ipv4_connected:
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- opt = flowlabel ? flowlabel->opt : np->opt;
+ rcu_read_lock();
+ opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
final_p = fl6_update_dst(&fl6, opt, &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
err = 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ce203b0402be..ea7c4d64a00a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
*((char **)&opt2->dst1opt) += dif;
if (opt2->srcrt)
*((char **)&opt2->srcrt) += dif;
+ atomic_set(&opt2->refcnt, 1);
}
return opt2;
}
@@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
return ERR_PTR(-ENOBUFS);
memset(opt2, 0, tot_len);
-
+ atomic_set(&opt2->refcnt, 1);
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 36c5a98b0472..0a37ddc7af51 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -834,11 +834,6 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
}
-/*
- * Special lock-class for __icmpv6_sk:
- */
-static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
-
static int __net_init icmpv6_sk_init(struct net *net)
{
struct sock *sk;
@@ -860,15 +855,6 @@ static int __net_init icmpv6_sk_init(struct net *net)
net->ipv6.icmp_sk[i] = sk;
- /*
- * Split off their lock-class, because sk->sk_dst_lock
- * gets used from softirqs, which is safe for
- * __icmpv6_sk (because those never get directly used
- * via userspace syscalls), but unsafe for normal sockets.
- */
- lockdep_set_class(&sk->sk_dst_lock,
- &icmpv6_socket_sk_dst_lock_key);
-
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
*/
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 5d1c7cee2cb2..a7ca2cde2ecb 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -78,7 +78,9 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = proto;
fl6->daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
fl6->saddr = ireq->ir_v6_loc_addr;
fl6->flowi6_oif = ireq->ir_iif;
fl6->flowi6_mark = ireq->ir_mark;
@@ -109,14 +111,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
static inline
-void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
-{
- __ip6_dst_store(sk, dst, daddr, saddr);
-}
-
-static inline
struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
{
return __sk_dst_check(sk, cookie);
@@ -142,14 +136,16 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
fl6->fl6_dport = inet->inet_dport;
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
if (!dst) {
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
if (!IS_ERR(dst))
- __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
}
return dst;
}
@@ -175,7 +171,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
/* Restore final destination back after routing done */
fl6.daddr = sk->sk_v6_daddr;
- res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
rcu_read_unlock();
return res;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index eabffbb89795..137fca42aaa6 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -177,7 +177,7 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t)
int i;
for_each_possible_cpu(i)
- ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
+ ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index ad19136086dd..a10e77103c88 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
int cmd);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt);
+static void mroute_clean_tables(struct mr6_table *mrt, bool all);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -334,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr6_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, true);
kfree(mrt);
}
@@ -765,10 +765,6 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
return dev;
failure:
- /* allow the register to be completed before unregistering. */
- rtnl_unlock();
- rtnl_lock();
-
unregister_netdevice(dev);
return NULL;
}
@@ -1542,7 +1538,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt)
+static void mroute_clean_tables(struct mr6_table *mrt, bool all)
{
int i;
LIST_HEAD(list);
@@ -1552,8 +1548,9 @@ static void mroute_clean_tables(struct mr6_table *mrt)
* Shut down all active vif entries
*/
for (i = 0; i < mrt->maxvif; i++) {
- if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
- mif6_delete(mrt, i, &list);
+ if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ continue;
+ mif6_delete(mrt, i, &list);
}
unregister_netdevice_many(&list);
@@ -1562,7 +1559,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
*/
for (i = 0; i < MFC6_LINES; i++) {
list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (c->mfc_flags & MFC_STATIC)
+ if (!all && (c->mfc_flags & MFC_STATIC))
continue;
write_lock_bh(&mrt_lock);
list_del(&c->list);
@@ -1625,7 +1622,7 @@ int ip6mr_sk_done(struct sock *sk)
net->ipv6.devconf_all);
write_unlock_bh(&mrt_lock);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, false);
err = 0;
break;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 63e6956917c9..4449ad1f8114 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
}
- opt = xchg(&inet6_sk(sk)->opt, opt);
+ opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+ opt);
sk_dst_reset(sk);
return opt;
@@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+ NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
pktopt = xchg(&np->pktoptions, NULL);
kfree_skb(pktopt);
@@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;
- opt = ipv6_renew_options(sk, np->opt, optname,
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = ipv6_renew_options(sk, opt, optname,
(struct ipv6_opt_hdr __user *)optval,
optlen);
if (IS_ERR(opt)) {
@@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = 0;
opt = ipv6_update_options(sk, opt);
sticky_done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
@@ -486,6 +493,7 @@ sticky_done:
break;
memset(opt, 0, sizeof(*opt));
+ atomic_set(&opt->refcnt, 1);
opt->tot_len = sizeof(*opt) + optlen;
retv = -EFAULT;
if (copy_from_user(opt+1, optval, optlen))
@@ -502,8 +510,10 @@ update:
retv = 0;
opt = ipv6_update_options(sk, opt);
done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
case IPV6_UNICAST_HOPS:
@@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
case IPV6_RTHDR:
case IPV6_DSTOPTS:
{
+ struct ipv6_txoptions *opt;
lock_sock(sk);
- len = ipv6_getsockopt_sticky(sk, np->opt,
- optname, optval, len);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
release_sock(sk);
/* check if ipv6_getsockopt_sticky() returns err code */
if (len < 0)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 124338a39e29..5ee56d0a8699 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1651,7 +1651,6 @@ out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
} else {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
}
@@ -2015,7 +2014,6 @@ out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
} else
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3e0f855e1bea..d6161e1c48c8 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -556,8 +556,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
}
void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
- const struct in6_addr *daddr, const struct in6_addr *saddr,
- struct sk_buff *oskb)
+ const struct in6_addr *daddr, const struct in6_addr *saddr)
{
struct sk_buff *skb;
struct in6_addr addr_buf;
@@ -593,9 +592,6 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
dev->dev_addr);
- if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb)
- skb_dst_copy(skb, oskb);
-
ndisc_send_skb(skb, daddr, saddr);
}
@@ -682,12 +678,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
}
- ndisc_send_ns(dev, target, target, saddr, skb);
+ ndisc_send_ns(dev, target, target, saddr);
} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
} else {
addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(dev, target, &mcaddr, saddr, skb);
+ ndisc_send_ns(dev, target, &mcaddr, saddr);
}
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d5efeb87350e..bab4441ed4e4 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data)
/* Creation primitives. */
static inline struct frag_queue *fq_find(struct net *net, __be32 id,
u32 user, struct in6_addr *src,
- struct in6_addr *dst, u8 ecn)
+ struct in6_addr *dst, int iif, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.user = user;
arg.src = src;
arg.dst = dst;
+ arg.iif = iif;
arg.ecn = ecn;
local_bh_disable();
@@ -601,7 +602,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
fhdr = (struct frag_hdr *)skb_transport_header(clone);
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
goto ret_orig;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dc65ec198f7c..99140986e887 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -733,6 +733,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions opt_space;
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
@@ -839,8 +840,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
}
- if (!opt)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -906,6 +909,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err < 0 ? err : len;
do_confirm:
dst_confirm(dst);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 44e21a03cfc3..45f5ae51de65 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
return fq->id == arg->id &&
fq->user == arg->user &&
ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst);
+ ipv6_addr_equal(&fq->daddr, arg->dst) &&
+ (arg->iif == fq->iif ||
+ !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)));
}
EXPORT_SYMBOL(ip6_frag_match);
@@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data)
static struct frag_queue *
fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, u8 ecn)
+ const struct in6_addr *dst, int iif, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.user = IP6_DEFRAG_LOCAL_DELIVER;
arg.src = src;
arg.dst = dst;
+ arg.iif = iif;
arg.ecn = ecn;
hash = inet6_hash_frag(id, src, dst);
@@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
}
fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq) {
int ret;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6f01fe122abd..826e6aa44f8d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -523,7 +523,7 @@ static void rt6_probe_deferred(struct work_struct *w)
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
+ ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
dev_put(work->dev);
kfree(work);
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb8f2fa1c7fb..eaf7ac496d50 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -222,7 +222,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = ireq->ir_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c5429a636f1a..e7aab561b7b4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -120,6 +120,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -235,7 +236,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
@@ -255,7 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
sk->sk_gso_type = SKB_GSO_TCPV6;
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
@@ -263,9 +265,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
- if (np->opt)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen +
+ opt->opt_nflen;
tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
@@ -461,7 +463,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+ np->tclass);
err = net_xmit_eval(err);
}
@@ -972,6 +975,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp;
const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
struct tcp_sock *newtp;
@@ -1056,7 +1060,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
*/
newsk->sk_gso_type = SKB_GSO_TCPV6;
- __ip6_dst_store(newsk, dst, NULL, NULL);
+ ip6_dst_store(newsk, dst, NULL, NULL);
inet6_sk_rx_dst_set(newsk, skb);
newtcp6sk = (struct tcp6_sock *)newsk;
@@ -1098,13 +1102,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (np->opt)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;
tcp_ca_openreq_child(newsk, dst);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 01bcb49619ee..9da3287a3923 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1110,6 +1110,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_txoptions *opt = NULL;
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct flowi6 fl6;
struct dst_entry *dst;
@@ -1263,8 +1264,10 @@ do_udp_sendmsg:
opt = NULL;
connected = 0;
}
- if (!opt)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -1373,6 +1376,7 @@ release_dst:
out:
dst_release(dst);
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
if (!err)
return len;
/*
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index fcb2752419c6..435608c4306d 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1483,7 +1483,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
if (sock_writeable(sk) && iucv_below_msglim(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index aca38d8aed8e..a2c8747d2936 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct dst_entry *dst = NULL;
@@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = NULL;
}
- if (opt == NULL)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -631,6 +634,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err < 0 ? err : len;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b7de0da46acd..ecf0a0196f18 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -572,7 +572,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
pr_debug("mask 0x%x\n", mask);
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index a7a80a6b77b0..653d073bae45 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -58,7 +58,7 @@ void ovs_dp_notify_wq(struct work_struct *work)
struct hlist_node *n;
hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
- if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+ if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
continue;
if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index efb736bb6855..e41cd12d9b2d 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -117,7 +117,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
.destroy = ovs_netdev_tunnel_destroy,
.get_options = geneve_get_options,
.send = dev_queue_xmit,
- .owner = THIS_MODULE,
};
static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index c3257d78d3d2..7f8897f33a67 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -89,7 +89,6 @@ static struct vport_ops ovs_gre_vport_ops = {
.create = gre_create,
.send = dev_queue_xmit,
.destroy = ovs_netdev_tunnel_destroy,
- .owner = THIS_MODULE,
};
static int __init ovs_gre_tnl_init(void)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index b327368a3848..6b0190b987ec 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -180,9 +180,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
ovs_netdev_detach_dev(vport);
- /* Early release so we can unregister the device */
+ /* We can be invoked by both explicit vport deletion and
+ * underlying netdev deregistration; delete the link only
+ * if it's not already shutting down.
+ */
+ if (vport->dev->reg_state == NETREG_REGISTERED)
+ rtnl_delete_link(vport->dev);
dev_put(vport->dev);
- rtnl_delete_link(vport->dev);
vport->dev = NULL;
rtnl_unlock();
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 0ac0fd004d7e..31cbc8c5c7db 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -71,7 +71,7 @@ static struct hlist_head *hash_bucket(const struct net *net, const char *name)
return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
}
-int ovs_vport_ops_register(struct vport_ops *ops)
+int __ovs_vport_ops_register(struct vport_ops *ops)
{
int err = -EEXIST;
struct vport_ops *o;
@@ -87,7 +87,7 @@ errout:
ovs_unlock();
return err;
}
-EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
+EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
void ovs_vport_ops_unregister(struct vport_ops *ops)
{
@@ -256,8 +256,8 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
*
* @vport: vport to delete.
*
- * Detaches @vport from its datapath and destroys it. It is possible to fail
- * for reasons such as lack of memory. ovs_mutex must be held.
+ * Detaches @vport from its datapath and destroys it. ovs_mutex must
+ * be held.
*/
void ovs_vport_del(struct vport *vport)
{
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index bdfd82a7c064..8ea3a96980ac 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -196,7 +196,13 @@ static inline const char *ovs_vport_name(struct vport *vport)
return vport->dev->name;
}
-int ovs_vport_ops_register(struct vport_ops *ops);
+int __ovs_vport_ops_register(struct vport_ops *ops);
+#define ovs_vport_ops_register(ops) \
+ ({ \
+ (ops)->owner = THIS_MODULE; \
+ __ovs_vport_ops_register(ops); \
+ })
+
void ovs_vport_ops_unregister(struct vport_ops *ops);
static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 242bce1cf0f3..992396aa635c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2329,8 +2329,8 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
static bool ll_header_truncated(const struct net_device *dev, int len)
{
/* net device doesn't like empty head */
- if (unlikely(len <= dev->hard_header_len)) {
- net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n",
+ if (unlikely(len < dev->hard_header_len)) {
+ net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
current->comm, len, dev->hard_header_len);
return true;
}
@@ -4109,7 +4109,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
err = -EINVAL;
if (unlikely((int)req->tp_block_size <= 0))
goto out;
- if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
+ if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
goto out;
if (po->tp_version >= TPACKET_V3 &&
(int)(req->tp_block_size -
@@ -4121,8 +4121,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
goto out;
- rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
- if (unlikely(rb->frames_per_block <= 0))
+ rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
+ if (unlikely(rb->frames_per_block == 0))
goto out;
if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
req->tp_frame_nr))
diff --git a/net/rds/connection.c b/net/rds/connection.c
index d4564036a339..e3b118cae81d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -186,12 +186,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
}
}
- if (trans == NULL) {
- kmem_cache_free(rds_conn_slab, conn);
- conn = ERR_PTR(-ENODEV);
- goto out;
- }
-
conn->c_trans = trans;
ret = trans->conn_alloc(conn, gfp);
diff --git a/net/rds/send.c b/net/rds/send.c
index 827155c2ead1..c9cdb358ea88 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1013,11 +1013,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
release_sock(sk);
}
- /* racing with another thread binding seems ok here */
+ lock_sock(sk);
if (daddr == 0 || rs->rs_bound_addr == 0) {
+ release_sock(sk);
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
+ release_sock(sk);
if (payload_len > rds_sk_sndbuf(rs)) {
ret = -EMSGSIZE;
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index e0547f521f20..adc555e0323d 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -723,8 +723,10 @@ process_further:
if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
- hard > tx)
+ hard > tx) {
+ call->acks_hard = tx;
goto all_acked;
+ }
smp_rmb();
rxrpc_rotate_tx_window(call, hard - 1);
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index a40d3afe93b7..14c4e12c47b0 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -531,7 +531,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
/* this should be in poll */
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
return -EPIPE;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f43c8f33f09e..7ec667dd4ce1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -253,7 +253,8 @@ int qdisc_set_default(const char *name)
}
/* We know handle. Find qdisc among all qdisc's attached to device
- (root qdisc, all its children, children of children etc.)
+ * (root qdisc, all its children, children of children etc.)
+ * Note: caller either uses rtnl or rcu_read_lock()
*/
static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
@@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
root->handle == handle)
return root;
- list_for_each_entry(q, &root->list, list) {
+ list_for_each_entry_rcu(q, &root->list, list) {
if (q->handle == handle)
return q;
}
@@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q)
struct Qdisc *root = qdisc_dev(q)->qdisc;
WARN_ON_ONCE(root == &noop_qdisc);
- list_add_tail(&q->list, &root->list);
+ ASSERT_RTNL();
+ list_add_tail_rcu(&q->list, &root->list);
}
}
EXPORT_SYMBOL(qdisc_list_add);
void qdisc_list_del(struct Qdisc *q)
{
- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
- list_del(&q->list);
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+ ASSERT_RTNL();
+ list_del_rcu(&q->list);
+ }
}
EXPORT_SYMBOL(qdisc_list_del);
@@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
if (n == 0)
return;
drops = max_t(int, n, 0);
+ rcu_read_lock();
while ((parentid = sch->parent)) {
if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
- return;
+ break;
+ if (sch->flags & TCQ_F_NOPARENT)
+ break;
+ /* TODO: perform the search on a per txq basis */
sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
- WARN_ON(parentid != TC_H_ROOT);
- return;
+ WARN_ON_ONCE(parentid != TC_H_ROOT);
+ break;
}
cops = sch->ops->cl_ops;
if (cops->qlen_notify) {
@@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
sch->q.qlen -= n;
__qdisc_qstats_drop(sch, drops);
}
+ rcu_read_unlock();
}
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
@@ -941,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
}
lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
if (!netif_is_multiqueue(dev))
- sch->flags |= TCQ_F_ONETXQUEUE;
+ sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
sch->handle = handle;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cb5d4ad32946..e82a1ad80aa5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -737,7 +737,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
return;
}
if (!netif_is_multiqueue(dev))
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
dev_queue->qdisc_sleeping = qdisc;
}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3cbaecd283a..3e82f047caaf 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
if (qdisc == NULL)
goto err;
priv->qdiscs[ntx] = qdisc;
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
sch->flags |= TCQ_F_MQROOT;
@@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
*old = dev_graft_qdisc(dev_queue, new);
if (new)
- new->flags |= TCQ_F_ONETXQUEUE;
+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
if (dev->flags & IFF_UP)
dev_activate(dev);
return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 3811a745452c..ad70ecf57ce7 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
goto err;
}
priv->qdiscs[i] = qdisc;
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
/* If the mqprio options indicate that hardware should own
@@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
*old = dev_graft_qdisc(dev_queue, new);
if (new)
- new->flags |= TCQ_F_ONETXQUEUE;
+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
if (dev->flags & IFF_UP)
dev_activate(dev);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e917d27328ea..acb45b8c2a9d 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -209,6 +209,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi6 *fl6 = &transport->fl.u.ip6;
+ int res;
pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
skb->len, &fl6->saddr, &fl6->daddr);
@@ -220,7 +221,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
- return ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass);
+ rcu_read_unlock();
+ return res;
}
/* Returns the dst cache entry for the given source and destination ip
@@ -262,7 +266,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
pr_debug("src=%pI6 - ", &fl6->saddr);
}
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
+
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
if (!asoc || saddr)
goto out;
@@ -321,7 +328,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
if (baddr) {
fl6->saddr = baddr->v6.sin6_addr;
fl6->fl6_sport = baddr->v6.sin6_port;
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 897c01c029ca..03c8256063ec 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -972,7 +972,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
return -EFAULT;
/* Alloc space for the address array in kernel memory. */
- kaddrs = kmalloc(addrs_size, GFP_KERNEL);
+ kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
if (unlikely(!kaddrs))
return -ENOMEM;
@@ -4928,7 +4928,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
to = optval + offsetof(struct sctp_getaddrs, addrs);
space_left = len - offsetof(struct sctp_getaddrs, addrs);
- addrs = kmalloc(space_left, GFP_KERNEL);
+ addrs = kmalloc(space_left, GFP_USER | __GFP_NOWARN);
if (!addrs)
return -ENOMEM;
@@ -6458,7 +6458,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (sctp_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else {
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
/*
* Since the socket is not locked, the buffer
* might be made available after the writeable check and
@@ -6801,26 +6801,30 @@ no_packet:
static void __sctp_write_space(struct sctp_association *asoc)
{
struct sock *sk = asoc->base.sk;
- struct socket *sock = sk->sk_socket;
- if ((sctp_wspace(asoc) > 0) && sock) {
- if (waitqueue_active(&asoc->wait))
- wake_up_interruptible(&asoc->wait);
+ if (sctp_wspace(asoc) <= 0)
+ return;
+
+ if (waitqueue_active(&asoc->wait))
+ wake_up_interruptible(&asoc->wait);
- if (sctp_writeable(sk)) {
- wait_queue_head_t *wq = sk_sleep(sk);
+ if (sctp_writeable(sk)) {
+ struct socket_wq *wq;
- if (wq && waitqueue_active(wq))
- wake_up_interruptible(wq);
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq) {
+ if (waitqueue_active(&wq->wait))
+ wake_up_interruptible(&wq->wait);
/* Note that we try to include the Async I/O support
* here by modeling from the current TCP/UDP code.
* We have not tested with it yet.
*/
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
- sock_wake_async(sock,
- SOCK_WAKE_SPACE, POLL_OUT);
+ sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
}
+ rcu_read_unlock();
}
}
@@ -7375,6 +7379,13 @@ struct proto sctp_prot = {
#if IS_ENABLED(CONFIG_IPV6)
+#include <net/transp_v6.h>
+static void sctp_v6_destroy_sock(struct sock *sk)
+{
+ sctp_destroy_sock(sk);
+ inet6_destroy_sock(sk);
+}
+
struct proto sctpv6_prot = {
.name = "SCTPv6",
.owner = THIS_MODULE,
@@ -7384,7 +7395,7 @@ struct proto sctpv6_prot = {
.accept = sctp_accept,
.ioctl = sctp_ioctl,
.init = sctp_init_sock,
- .destroy = sctp_destroy_sock,
+ .destroy = sctp_v6_destroy_sock,
.shutdown = sctp_shutdown,
.setsockopt = sctp_setsockopt,
.getsockopt = sctp_getsockopt,
diff --git a/net/socket.c b/net/socket.c
index dd2c247c99e3..456fadb3d819 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1056,27 +1056,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
return 0;
}
-/* This function may be called only under socket lock or callback_lock or rcu_lock */
+/* This function may be called only under rcu_lock */
-int sock_wake_async(struct socket *sock, int how, int band)
+int sock_wake_async(struct socket_wq *wq, int how, int band)
{
- struct socket_wq *wq;
-
- if (!sock)
- return -1;
- rcu_read_lock();
- wq = rcu_dereference(sock->wq);
- if (!wq || !wq->fasync_list) {
- rcu_read_unlock();
+ if (!wq || !wq->fasync_list)
return -1;
- }
+
switch (how) {
case SOCK_WAKE_WAITD:
- if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
+ if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
break;
goto call_kill;
case SOCK_WAKE_SPACE:
- if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
+ if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
break;
/* fall through */
case SOCK_WAKE_IO:
@@ -1086,7 +1079,7 @@ call_kill:
case SOCK_WAKE_URG:
kill_fasync(&wq->fasync_list, SIGURG, band);
}
- rcu_read_unlock();
+
return 0;
}
EXPORT_SYMBOL(sock_wake_async);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index dace13d7638e..799e65b944b9 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1411,17 +1411,16 @@ gss_key_timeout(struct rpc_cred *rc)
{
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
struct gss_cl_ctx *ctx;
- unsigned long now = jiffies;
- unsigned long expire;
+ unsigned long timeout = jiffies + (gss_key_expire_timeo * HZ);
+ int ret = 0;
rcu_read_lock();
ctx = rcu_dereference(gss_cred->gc_ctx);
- if (ctx)
- expire = ctx->gc_expiry - (gss_key_expire_timeo * HZ);
+ if (!ctx || time_after(timeout, ctx->gc_expiry))
+ ret = -EACCES;
rcu_read_unlock();
- if (!ctx || time_after(now, expire))
- return -EACCES;
- return 0;
+
+ return ret;
}
static int
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4a2340a54401..5e4f815c2b34 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -41,13 +41,16 @@
static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
static void cache_revisit_request(struct cache_head *item);
-static void cache_init(struct cache_head *h)
+static void cache_init(struct cache_head *h, struct cache_detail *detail)
{
time_t now = seconds_since_boot();
INIT_HLIST_NODE(&h->cache_list);
h->flags = 0;
kref_init(&h->ref);
h->expiry_time = now + CACHE_NEW_EXPIRY;
+ if (now <= detail->flush_time)
+ /* ensure it isn't already expired */
+ now = detail->flush_time + 1;
h->last_refresh = now;
}
@@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
* we might get lose if we need to
* cache_put it soon.
*/
- cache_init(new);
+ cache_init(new, detail);
detail->init(new, key);
write_lock(&detail->hash_lock);
@@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
-static void cache_fresh_locked(struct cache_head *head, time_t expiry)
+static void cache_fresh_locked(struct cache_head *head, time_t expiry,
+ struct cache_detail *detail)
{
+ time_t now = seconds_since_boot();
+ if (now <= detail->flush_time)
+ /* ensure it isn't immediately treated as expired */
+ now = detail->flush_time + 1;
head->expiry_time = expiry;
- head->last_refresh = seconds_since_boot();
+ head->last_refresh = now;
smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
set_bit(CACHE_VALID, &head->flags);
}
@@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
set_bit(CACHE_NEGATIVE, &old->flags);
else
detail->update(old, new);
- cache_fresh_locked(old, new->expiry_time);
+ cache_fresh_locked(old, new->expiry_time, detail);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(old, detail);
return old;
@@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
cache_put(old, detail);
return NULL;
}
- cache_init(tmp);
+ cache_init(tmp, detail);
detail->init(tmp, old);
write_lock(&detail->hash_lock);
@@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
detail->entries++;
cache_get(tmp);
- cache_fresh_locked(tmp, new->expiry_time);
- cache_fresh_locked(old, 0);
+ cache_fresh_locked(tmp, new->expiry_time, detail);
+ cache_fresh_locked(old, 0, detail);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(tmp, detail);
cache_fresh_unlocked(old, detail);
@@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
rv = cache_is_valid(h);
if (rv == -EAGAIN) {
set_bit(CACHE_NEGATIVE, &h->flags);
- cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
+ cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
+ detail);
rv = -ENOENT;
}
write_unlock(&detail->hash_lock);
@@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
void cache_purge(struct cache_detail *detail)
{
- detail->flush_time = LONG_MAX;
+ time_t now = seconds_since_boot();
+ if (detail->flush_time >= now)
+ now = detail->flush_time + 1;
+ /* 'now' is the maximum value any 'last_refresh' can have */
+ detail->flush_time = now;
detail->nextcheck = seconds_since_boot();
cache_flush();
- detail->flush_time = 1;
}
EXPORT_SYMBOL_GPL(cache_purge);
@@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
{
char tbuf[20];
char *bp, *ep;
+ time_t then, now;
if (*ppos || count > sizeof(tbuf)-1)
return -EINVAL;
@@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
return -EINVAL;
bp = tbuf;
- cd->flush_time = get_expiry(&bp);
- cd->nextcheck = seconds_since_boot();
+ then = get_expiry(&bp);
+ now = seconds_since_boot();
+ cd->nextcheck = now;
+ /* Can only set flush_time to 1 second beyond "now", or
+ * possibly 1 second beyond flushtime. This is because
+ * flush_time never goes backwards so it mustn't get too far
+ * ahead of time.
+ */
+ if (then >= now) {
+ /* Want to flush everything, so behave like cache_purge() */
+ if (cd->flush_time >= now)
+ now = cd->flush_time + 1;
+ then = now;
+ }
+
+ cd->flush_time = then;
cache_flush();
*ppos += count;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0c8120229a03..1413cdcc131c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -181,7 +181,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page **ppage = xdr->pages;
size_t base = xdr->page_base;
unsigned int pglen = xdr->page_len;
- unsigned int flags = MSG_MORE;
+ unsigned int flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
int slen;
int len = 0;
@@ -399,6 +399,31 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp)
return svc_port_is_privileged(svc_addr(rqstp));
}
+static bool sunrpc_waitqueue_active(wait_queue_head_t *wq)
+{
+ if (!wq)
+ return false;
+ /*
+ * There should normally be a memory * barrier here--see
+ * wq_has_sleeper().
+ *
+ * It appears that isn't currently necessary, though, basically
+ * because callers all appear to have sufficient memory barriers
+ * between the time the relevant change is made and the
+ * time they call these callbacks.
+ *
+ * The nfsd code itself doesn't actually explicitly wait on
+ * these waitqueues, but it may wait on them for example in
+ * sendpage() or sendmsg() calls. (And those may be the only
+ * places, since it it uses nonblocking reads.)
+ *
+ * Maybe we should add the memory barriers anyway, but these are
+ * hot paths so we'd need to be convinced there's no sigificant
+ * penalty.
+ */
+ return waitqueue_active(wq);
+}
+
/*
* INET callback when data has been received on the socket.
*/
@@ -414,7 +439,7 @@ static void svc_udp_data_ready(struct sock *sk)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
}
- if (wq && waitqueue_active(wq))
+ if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
}
@@ -432,7 +457,7 @@ static void svc_write_space(struct sock *sk)
svc_xprt_enqueue(&svsk->sk_xprt);
}
- if (wq && waitqueue_active(wq)) {
+ if (sunrpc_waitqueue_active(wq)) {
dprintk("RPC svc_write_space: someone sleeping on %p\n",
svsk);
wake_up_interruptible(wq);
@@ -787,7 +812,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
}
wq = sk_sleep(sk);
- if (wq && waitqueue_active(wq))
+ if (sunrpc_waitqueue_active(wq))
wake_up_interruptible_all(wq);
}
@@ -808,7 +833,7 @@ static void svc_tcp_state_change(struct sock *sk)
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
}
- if (wq && waitqueue_active(wq))
+ if (sunrpc_waitqueue_active(wq))
wake_up_interruptible_all(wq);
}
@@ -823,7 +848,7 @@ static void svc_tcp_data_ready(struct sock *sk)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
}
- if (wq && waitqueue_active(wq))
+ if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
}
@@ -1367,7 +1392,6 @@ EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
/*
* Initialize socket for RPC use and create svc_sock struct
- * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
*/
static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct socket *sock,
@@ -1594,7 +1618,7 @@ static void svc_sock_detach(struct svc_xprt *xprt)
sk->sk_write_space = svsk->sk_owspace;
wq = sk_sleep(sk);
- if (wq && waitqueue_active(wq))
+ if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1d1a70498910..2ffaf6a79499 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,7 +398,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (unlikely(!sock))
return -ENOTSOCK;
- clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags);
if (base != 0) {
addr = NULL;
addrlen = 0;
@@ -442,7 +442,7 @@ static void xs_nospace_callback(struct rpc_task *task)
struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
transport->inet->sk_write_pending--;
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
}
/**
@@ -467,7 +467,7 @@ static int xs_nospace(struct rpc_task *task)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
- if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+ if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) {
/*
* Notify TCP that we're limited by the application
* window size
@@ -478,7 +478,7 @@ static int xs_nospace(struct rpc_task *task)
xprt_wait_for_buffer_space(task, xs_nospace_callback);
}
} else {
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
ret = -ENOTCONN;
}
@@ -626,7 +626,7 @@ process_status:
case -EPERM:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
}
return status;
@@ -715,7 +715,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
case -EADDRINUSE:
case -ENOBUFS:
case -EPIPE:
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
}
return status;
@@ -1618,7 +1618,7 @@ static void xs_write_space(struct sock *sk)
if (unlikely(!(xprt = xprt_from_sock(sk))))
return;
- if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
+ if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0)
return;
xprt_write_space(xprt);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 9efbdbde2b08..91aea071ab27 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -191,6 +191,7 @@ void tipc_link_add_bc_peer(struct tipc_link *snd_l,
snd_l->ackers++;
rcv_l->acked = snd_l->snd_nxt - 1;
+ snd_l->state = LINK_ESTABLISHED;
tipc_link_build_bc_init_msg(uc_l, xmitq);
}
@@ -206,6 +207,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
rcv_l->state = LINK_RESET;
if (!snd_l->ackers) {
tipc_link_reset(snd_l);
+ snd_l->state = LINK_RESET;
__skb_queue_purge(xmitq);
}
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 552dbaba9cf3..b53246fb0412 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -105,6 +105,7 @@ struct tipc_sock {
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
+static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
@@ -381,6 +382,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
+ sk->sk_destruct = tipc_sock_destruct;
tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
tsk->sent_unacked = 0;
atomic_set(&tsk->dupl_rcvcnt, 0);
@@ -470,9 +472,6 @@ static int tipc_release(struct socket *sock)
tipc_node_remove_conn(net, dnode, tsk->portid);
}
- /* Discard any remaining (connection-based) messages in receive queue */
- __skb_queue_purge(&sk->sk_receive_queue);
-
/* Reject any messages that accumulated in backlog queue */
sock->state = SS_DISCONNECTING;
release_sock(sk);
@@ -1515,6 +1514,11 @@ static void tipc_data_ready(struct sock *sk)
rcu_read_unlock();
}
+static void tipc_sock_destruct(struct sock *sk)
+{
+ __skb_queue_purge(&sk->sk_receive_queue);
+}
+
/**
* filter_connect - Handle all incoming messages for a connection-based socket
* @tsk: TIPC socket
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ad2719ad4c1b..70c03271b798 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -158,8 +158,11 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
struct rtable *rt;
- if (skb_headroom(skb) < UDP_MIN_HEADROOM)
- pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+ if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+ err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+ if (err)
+ goto tx_error;
+ }
skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
ub = rcu_dereference_rtnl(b->media_ptr);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 12b886f07982..45aebd966978 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,118 @@ found:
return s;
}
+/* Support code for asymmetrically connected dgram sockets
+ *
+ * If a datagram socket is connected to a socket not itself connected
+ * to the first socket (eg, /dev/log), clients may only enqueue more
+ * messages if the present receive queue of the server socket is not
+ * "too large". This means there's a second writeability condition
+ * poll and sendmsg need to test. The dgram recv code will do a wake
+ * up on the peer_wait wait queue of a socket upon reception of a
+ * datagram which needs to be propagated to sleeping would-be writers
+ * since these might not have sent anything so far. This can't be
+ * accomplished via poll_wait because the lifetime of the server
+ * socket might be less than that of its clients if these break their
+ * association with it or if the server socket is closed while clients
+ * are still connected to it and there's no way to inform "a polling
+ * implementation" that it should let go of a certain wait queue
+ *
+ * In order to propagate a wake up, a wait_queue_t of the client
+ * socket is enqueued on the peer_wait queue of the server socket
+ * whose wake function does a wake_up on the ordinary client socket
+ * wait queue. This connection is established whenever a write (or
+ * poll for write) hit the flow control condition and broken when the
+ * association to the server socket is dissolved or after a wake up
+ * was relayed.
+ */
+
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+ void *key)
+{
+ struct unix_sock *u;
+ wait_queue_head_t *u_sleep;
+
+ u = container_of(q, struct unix_sock, peer_wake);
+
+ __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
+ q);
+ u->peer_wake.private = NULL;
+
+ /* relaying can only happen while the wq still exists */
+ u_sleep = sk_sleep(&u->sk);
+ if (u_sleep)
+ wake_up_interruptible_poll(u_sleep, key);
+
+ return 0;
+}
+
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
+{
+ struct unix_sock *u, *u_other;
+ int rc;
+
+ u = unix_sk(sk);
+ u_other = unix_sk(other);
+ rc = 0;
+ spin_lock(&u_other->peer_wait.lock);
+
+ if (!u->peer_wake.private) {
+ u->peer_wake.private = other;
+ __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
+
+ rc = 1;
+ }
+
+ spin_unlock(&u_other->peer_wait.lock);
+ return rc;
+}
+
+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
+ struct sock *other)
+{
+ struct unix_sock *u, *u_other;
+
+ u = unix_sk(sk);
+ u_other = unix_sk(other);
+ spin_lock(&u_other->peer_wait.lock);
+
+ if (u->peer_wake.private == other) {
+ __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
+ u->peer_wake.private = NULL;
+ }
+
+ spin_unlock(&u_other->peer_wait.lock);
+}
+
+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
+ struct sock *other)
+{
+ unix_dgram_peer_wake_disconnect(sk, other);
+ wake_up_interruptible_poll(sk_sleep(sk),
+ POLLOUT |
+ POLLWRNORM |
+ POLLWRBAND);
+}
+
+/* preconditions:
+ * - unix_peer(sk) == other
+ * - association is stable
+ */
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+{
+ int connected;
+
+ connected = unix_dgram_peer_wake_connect(sk, other);
+
+ if (unix_recvq_full(other))
+ return 1;
+
+ if (connected)
+ unix_dgram_peer_wake_disconnect(sk, other);
+
+ return 0;
+}
+
static int unix_writable(const struct sock *sk)
{
return sk->sk_state != TCP_LISTEN &&
@@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
skpair->sk_state_change(skpair);
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
}
+
+ unix_dgram_peer_wake_disconnect(sk, skpair);
sock_put(skpair); /* It may now die */
unix_peer(sk) = NULL;
}
@@ -666,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
INIT_LIST_HEAD(&u->link);
mutex_init(&u->readlock); /* single task reading lock */
init_waitqueue_head(&u->peer_wait);
+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
unix_insert_socket(unix_sockets_unbound(sk), sk);
out:
if (sk == NULL)
@@ -1033,6 +1148,8 @@ restart:
if (unix_peer(sk)) {
struct sock *old_peer = unix_peer(sk);
unix_peer(sk) = other;
+ unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
+
unix_state_double_unlock(sk, other);
if (other != old_peer)
@@ -1434,6 +1551,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
return err;
}
+static bool unix_passcred_enabled(const struct socket *sock,
+ const struct sock *other)
+{
+ return test_bit(SOCK_PASSCRED, &sock->flags) ||
+ !other->sk_socket ||
+ test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+}
+
/*
* Some apps rely on write() giving SCM_CREDENTIALS
* We include credentials if source or destination socket
@@ -1444,14 +1569,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
{
if (UNIXCB(skb).pid)
return;
- if (test_bit(SOCK_PASSCRED, &sock->flags) ||
- !other->sk_socket ||
- test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+ if (unix_passcred_enabled(sock, other)) {
UNIXCB(skb).pid = get_pid(task_tgid(current));
current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
}
}
+static int maybe_init_creds(struct scm_cookie *scm,
+ struct socket *socket,
+ const struct sock *other)
+{
+ int err;
+ struct msghdr msg = { .msg_controllen = 0 };
+
+ err = scm_send(socket, &msg, scm, false);
+ if (err)
+ return err;
+
+ if (unix_passcred_enabled(socket, other)) {
+ scm->pid = get_pid(task_tgid(current));
+ current_uid_gid(&scm->creds.uid, &scm->creds.gid);
+ }
+ return err;
+}
+
+static bool unix_skb_scm_eq(struct sk_buff *skb,
+ struct scm_cookie *scm)
+{
+ const struct unix_skb_parms *u = &UNIXCB(skb);
+
+ return u->pid == scm->pid &&
+ uid_eq(u->uid, scm->creds.uid) &&
+ gid_eq(u->gid, scm->creds.gid) &&
+ unix_secdata_eq(scm, skb);
+}
+
/*
* Send AF_UNIX data.
*/
@@ -1472,6 +1624,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
struct scm_cookie scm;
int max_level;
int data_len = 0;
+ int sk_locked;
wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
@@ -1550,12 +1703,14 @@ restart:
goto out_free;
}
+ sk_locked = 0;
unix_state_lock(other);
+restart_locked:
err = -EPERM;
if (!unix_may_send(sk, other))
goto out_unlock;
- if (sock_flag(other, SOCK_DEAD)) {
+ if (unlikely(sock_flag(other, SOCK_DEAD))) {
/*
* Check with 1003.1g - what should
* datagram error
@@ -1563,10 +1718,14 @@ restart:
unix_state_unlock(other);
sock_put(other);
+ if (!sk_locked)
+ unix_state_lock(sk);
+
err = 0;
- unix_state_lock(sk);
if (unix_peer(sk) == other) {
unix_peer(sk) = NULL;
+ unix_dgram_peer_wake_disconnect_wakeup(sk, other);
+
unix_state_unlock(sk);
unix_dgram_disconnected(sk, other);
@@ -1592,21 +1751,38 @@ restart:
goto out_unlock;
}
- if (unix_peer(other) != sk && unix_recvq_full(other)) {
- if (!timeo) {
- err = -EAGAIN;
- goto out_unlock;
+ if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+ if (timeo) {
+ timeo = unix_wait_for_peer(other, timeo);
+
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ goto out_free;
+
+ goto restart;
}
- timeo = unix_wait_for_peer(other, timeo);
+ if (!sk_locked) {
+ unix_state_unlock(other);
+ unix_state_double_lock(sk, other);
+ }
- err = sock_intr_errno(timeo);
- if (signal_pending(current))
- goto out_free;
+ if (unix_peer(sk) != other ||
+ unix_dgram_peer_wake_me(sk, other)) {
+ err = -EAGAIN;
+ sk_locked = 1;
+ goto out_unlock;
+ }
- goto restart;
+ if (!sk_locked) {
+ sk_locked = 1;
+ goto restart_locked;
+ }
}
+ if (unlikely(sk_locked))
+ unix_state_unlock(sk);
+
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
maybe_add_creds(skb, sock, other);
@@ -1620,6 +1796,8 @@ restart:
return len;
out_unlock:
+ if (sk_locked)
+ unix_state_unlock(sk);
unix_state_unlock(other);
out_free:
kfree_skb(skb);
@@ -1741,8 +1919,10 @@ out_err:
static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
int offset, size_t size, int flags)
{
- int err = 0;
- bool send_sigpipe = true;
+ int err;
+ bool send_sigpipe = false;
+ bool init_scm = true;
+ struct scm_cookie scm;
struct sock *other, *sk = socket->sk;
struct sk_buff *skb, *newskb = NULL, *tail = NULL;
@@ -1760,7 +1940,7 @@ alloc_skb:
newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
&err, 0);
if (!newskb)
- return err;
+ goto err;
}
/* we must acquire readlock as we modify already present
@@ -1769,12 +1949,12 @@ alloc_skb:
err = mutex_lock_interruptible(&unix_sk(other)->readlock);
if (err) {
err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
- send_sigpipe = false;
goto err;
}
if (sk->sk_shutdown & SEND_SHUTDOWN) {
err = -EPIPE;
+ send_sigpipe = true;
goto err_unlock;
}
@@ -1783,23 +1963,34 @@ alloc_skb:
if (sock_flag(other, SOCK_DEAD) ||
other->sk_shutdown & RCV_SHUTDOWN) {
err = -EPIPE;
+ send_sigpipe = true;
goto err_state_unlock;
}
+ if (init_scm) {
+ err = maybe_init_creds(&scm, socket, other);
+ if (err)
+ goto err_state_unlock;
+ init_scm = false;
+ }
+
skb = skb_peek_tail(&other->sk_receive_queue);
if (tail && tail == skb) {
skb = newskb;
- } else if (!skb) {
- if (newskb)
+ } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
+ if (newskb) {
skb = newskb;
- else
+ } else {
+ tail = skb;
goto alloc_skb;
+ }
} else if (newskb) {
/* this is fast path, we don't necessarily need to
* call to kfree_skb even though with newskb == NULL
* this - does no harm
*/
consume_skb(newskb);
+ newskb = NULL;
}
if (skb_append_pagefrags(skb, page, offset, size)) {
@@ -1812,14 +2003,20 @@ alloc_skb:
skb->truesize += size;
atomic_add(size, &sk->sk_wmem_alloc);
- if (newskb)
+ if (newskb) {
+ err = unix_scm_to_skb(&scm, skb, false);
+ if (err)
+ goto err_state_unlock;
+ spin_lock(&other->sk_receive_queue.lock);
__skb_queue_tail(&other->sk_receive_queue, newskb);
+ spin_unlock(&other->sk_receive_queue.lock);
+ }
unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->readlock);
other->sk_data_ready(other);
-
+ scm_destroy(&scm);
return size;
err_state_unlock:
@@ -1830,6 +2027,8 @@ err:
kfree_skb(newskb);
if (send_sigpipe && !(flags & MSG_NOSIGNAL))
send_sig(SIGPIPE, current, 0);
+ if (!init_scm)
+ scm_destroy(&scm);
return err;
}
@@ -1992,7 +2191,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
!timeo)
break;
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
unix_state_unlock(sk);
timeo = freezable_schedule_timeout(timeo);
unix_state_lock(sk);
@@ -2000,7 +2199,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
if (sock_flag(sk, SOCK_DEAD))
break;
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
}
finish_wait(sk_sleep(sk), &wait);
@@ -2133,10 +2332,7 @@ unlock:
if (check_creds) {
/* Never glue messages from different writers */
- if ((UNIXCB(skb).pid != scm.pid) ||
- !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
- !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
- !unix_secdata_eq(&scm, skb))
+ if (!unix_skb_scm_eq(skb, &scm))
break;
} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
/* Copy credentials */
@@ -2472,20 +2668,22 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
return mask;
writable = unix_writable(sk);
- other = unix_peer_get(sk);
- if (other) {
- if (unix_peer(other) != sk) {
- sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
- if (unix_recvq_full(other))
- writable = 0;
- }
- sock_put(other);
+ if (writable) {
+ unix_state_lock(sk);
+
+ other = unix_peer(sk);
+ if (other && unix_peer(other) != sk &&
+ unix_recvq_full(other) &&
+ unix_dgram_peer_wake_me(sk, other))
+ writable = 0;
+
+ unix_state_unlock(sk);
}
if (writable)
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}