aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/mrp.c27
-rw-r--r--net/9p/client.c5
-rw-r--r--net/9p/trans_virtio.c5
-rw-r--r--net/Kconfig2
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/bluetooth/hci_core.c26
-rw-r--r--net/bluetooth/hci_event.c6
-rw-r--r--net/bluetooth/l2cap_core.c7
-rw-r--r--net/bluetooth/rfcomm/tty.c35
-rw-r--r--net/bridge/br_netlink.c4
-rw-r--r--net/bridge/br_private.h8
-rw-r--r--net/bridge/br_stp.c23
-rw-r--r--net/bridge/br_stp_if.c12
-rw-r--r--net/ceph/messenger.c2
-rw-r--r--net/ceph/osd_client.c38
-rw-r--r--net/ceph/osdmap.c2
-rw-r--r--net/core/dev.c49
-rw-r--r--net/core/flow_dissector.c6
-rw-r--r--net/core/net-sysfs.c8
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netpoll.c11
-rw-r--r--net/core/scm.c4
-rw-r--r--net/core/secure_seq.c27
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/igmp.c8
-rw-r--r--net/ipv4/inetpeer.c4
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ip_tunnel.c22
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c10
-rw-r--r--net/ipv4/raw.c6
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_memcontrol.c10
-rw-r--r--net/ipv4/tcp_metrics.c4
-rw-r--r--net/ipv4/tcp_output.c17
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
-rw-r--r--net/ipv6/addrconf.c79
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/exthdrs.c6
-rw-r--r--net/ipv6/fib6_rules.c4
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_gre.c4
-rw-r--r--net/ipv6/ip6_output.c53
-rw-r--r--net/ipv6/ip6_tunnel.c7
-rw-r--r--net/ipv6/mcast.c6
-rw-r--r--net/ipv6/ndisc.c18
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c10
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c4
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/sit.c86
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/lapb/lapb_timer.c1
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipset/ip_set_getport.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h28
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c72
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c62
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_synproxy_core.c12
-rw-r--r--net/netfilter/nfnetlink_queue_core.c2
-rw-r--r--net/netlink/af_netlink.c30
-rw-r--r--net/openvswitch/flow.c1
-rw-r--r--net/sched/sch_fq.c102
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sctp/input.c3
-rw-r--r--net/sctp/ipv6.c44
-rw-r--r--net/sctp/socket.c5
-rw-r--r--net/socket.c65
-rw-r--r--net/sunrpc/auth.c68
-rw-r--r--net/sunrpc/auth_generic.c82
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c453
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c26
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c41
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.h5
-rw-r--r--net/sunrpc/auth_null.c6
-rw-r--r--net/sunrpc/auth_unix.c6
-rw-r--r--net/sunrpc/clnt.c157
-rw-r--r--net/sunrpc/rpc_pipe.c193
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/xprtsock.c13
-rw-r--r--net/sysctl_net.c4
95 files changed, 1567 insertions, 788 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 1eb05d80b07b..3ed616215870 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -24,6 +24,11 @@
static unsigned int mrp_join_time __read_mostly = 200;
module_param(mrp_join_time, uint, 0644);
MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
+
+static unsigned int mrp_periodic_time __read_mostly = 1000;
+module_param(mrp_periodic_time, uint, 0644);
+MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)");
+
MODULE_LICENSE("GPL");
static const u8
@@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data)
mrp_join_timer_arm(app);
}
+static void mrp_periodic_timer_arm(struct mrp_applicant *app)
+{
+ mod_timer(&app->periodic_timer,
+ jiffies + msecs_to_jiffies(mrp_periodic_time));
+}
+
+static void mrp_periodic_timer(unsigned long data)
+{
+ struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+ spin_lock(&app->lock);
+ mrp_mad_event(app, MRP_EVENT_PERIODIC);
+ mrp_pdu_queue(app);
+ spin_unlock(&app->lock);
+
+ mrp_periodic_timer_arm(app);
+}
+
static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
{
__be16 endmark;
@@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
mrp_join_timer_arm(app);
+ setup_timer(&app->periodic_timer, mrp_periodic_timer,
+ (unsigned long)app);
+ mrp_periodic_timer_arm(app);
return 0;
err3:
@@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
* all pending messages before the applicant is gone.
*/
del_timer_sync(&app->join_timer);
+ del_timer_sync(&app->periodic_timer);
spin_lock_bh(&app->lock);
mrp_mad_event(app, MRP_EVENT_TX);
diff --git a/net/9p/client.c b/net/9p/client.c
index ba93bdab2701..ee8fd6bd4035 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -987,6 +987,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
{
int err;
struct p9_client *clnt;
+ char *client_id;
err = 0;
clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
@@ -995,6 +996,10 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
clnt->trans_mod = NULL;
clnt->trans = NULL;
+
+ client_id = utsname()->nodename;
+ memcpy(clnt->name, client_id, strlen(client_id) + 1);
+
spin_lock_init(&clnt->lock);
INIT_LIST_HEAD(&clnt->fidlist);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e1c26b101830..990afab2be1b 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -577,6 +577,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
mutex_lock(&virtio_9p_lock);
list_add_tail(&chan->chan_list, &virtio_chan_list);
mutex_unlock(&virtio_9p_lock);
+
+ /* Let udev rules use the new mount_tag attribute. */
+ kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
+
return 0;
out_free_tag:
@@ -654,6 +658,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
list_del(&chan->chan_list);
mutex_unlock(&virtio_9p_lock);
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+ kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
kfree(chan->tag);
kfree(chan->vc_wq);
kfree(chan);
diff --git a/net/Kconfig b/net/Kconfig
index ee0213667272..b50dacc072f0 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -228,7 +228,7 @@ config RPS
config RFS_ACCEL
boolean
- depends on RPS && GENERIC_HARDIRQS
+ depends on RPS
select CPU_RMAP
default y
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 4493913f0d5c..813db4e64602 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -168,6 +168,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
case ETH_P_8021Q:
vhdr = (struct vlan_ethhdr *)skb->data;
vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
+ vid |= BATADV_VLAN_HAS_TAG;
if (vhdr->h_vlan_encapsulated_proto != ethertype)
break;
@@ -331,6 +332,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
case ETH_P_8021Q:
vhdr = (struct vlan_ethhdr *)skb->data;
vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
+ vid |= BATADV_VLAN_HAS_TAG;
if (vhdr->h_vlan_encapsulated_proto != ethertype)
break;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 634debab4d54..fb7356fcfe51 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1146,7 +1146,11 @@ int hci_dev_open(__u16 dev)
goto done;
}
- if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) {
+ /* Check for rfkill but allow the HCI setup stage to proceed
+ * (which in itself doesn't cause any RF activity).
+ */
+ if (test_bit(HCI_RFKILLED, &hdev->dev_flags) &&
+ !test_bit(HCI_SETUP, &hdev->dev_flags)) {
ret = -ERFKILL;
goto done;
}
@@ -1566,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked)
BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
- if (!blocked)
- return 0;
-
- hci_dev_do_close(hdev);
+ if (blocked) {
+ set_bit(HCI_RFKILLED, &hdev->dev_flags);
+ if (!test_bit(HCI_SETUP, &hdev->dev_flags))
+ hci_dev_do_close(hdev);
+ } else {
+ clear_bit(HCI_RFKILLED, &hdev->dev_flags);
+ }
return 0;
}
@@ -1591,9 +1598,13 @@ static void hci_power_on(struct work_struct *work)
return;
}
- if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+ if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) {
+ clear_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+ hci_dev_do_close(hdev);
+ } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
HCI_AUTO_OFF_TIMEOUT);
+ }
if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags))
mgmt_index_added(hdev);
@@ -2209,6 +2220,9 @@ int hci_register_dev(struct hci_dev *hdev)
}
}
+ if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
+ set_bit(HCI_RFKILLED, &hdev->dev_flags);
+
set_bit(HCI_SETUP, &hdev->dev_flags);
if (hdev->dev_type != HCI_AMP)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 94aab73f89d4..8db3e89fae35 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3557,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
cp.handle = cpu_to_le16(conn->handle);
if (ltk->authenticated)
- conn->sec_level = BT_SECURITY_HIGH;
+ conn->pending_sec_level = BT_SECURITY_HIGH;
+ else
+ conn->pending_sec_level = BT_SECURITY_MEDIUM;
+
+ conn->enc_key_size = ltk->enc_size;
hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b3bb7bca8e60..63fa11109a1c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3755,6 +3755,13 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
sk = chan->sk;
+ /* For certain devices (ex: HID mouse), support for authentication,
+ * pairing and bonding is optional. For such devices, inorder to avoid
+ * the ACL alive for too long after L2CAP disconnection, reset the ACL
+ * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect.
+ */
+ conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
+
bacpy(&bt_sk(sk)->src, conn->src);
bacpy(&bt_sk(sk)->dst, conn->dst);
chan->psm = psm;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 6d126faf145f..84fcf9fff3ea 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -569,7 +569,6 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
{
struct rfcomm_dev *dev = dlc->owner;
- struct tty_struct *tty;
if (!dev)
return;
@@ -581,38 +580,8 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
DPM_ORDER_DEV_AFTER_PARENT);
wake_up_interruptible(&dev->port.open_wait);
- } else if (dlc->state == BT_CLOSED) {
- tty = tty_port_tty_get(&dev->port);
- if (!tty) {
- if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
- /* Drop DLC lock here to avoid deadlock
- * 1. rfcomm_dev_get will take rfcomm_dev_lock
- * but in rfcomm_dev_add there's lock order:
- * rfcomm_dev_lock -> dlc lock
- * 2. tty_port_put will deadlock if it's
- * the last reference
- *
- * FIXME: when we release the lock anything
- * could happen to dev, even its destruction
- */
- rfcomm_dlc_unlock(dlc);
- if (rfcomm_dev_get(dev->id) == NULL) {
- rfcomm_dlc_lock(dlc);
- return;
- }
-
- if (!test_and_set_bit(RFCOMM_TTY_RELEASED,
- &dev->flags))
- tty_port_put(&dev->port);
-
- tty_port_put(&dev->port);
- rfcomm_dlc_lock(dlc);
- }
- } else {
- tty_hangup(tty);
- tty_kref_put(tty);
- }
- }
+ } else if (dlc->state == BT_CLOSED)
+ tty_port_tty_hangup(&dev->port, false);
}
static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index b9259efa636e..e74ddc1c29a8 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -207,7 +207,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u32 filter_mask)
{
int err = 0;
- struct net_bridge_port *port = br_port_get_rcu(dev);
+ struct net_bridge_port *port = br_port_get_rtnl(dev);
/* not a bridge port and */
if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
@@ -451,7 +451,7 @@ static size_t br_get_link_af_size(const struct net_device *dev)
struct net_port_vlans *pv;
if (br_port_exists(dev))
- pv = nbp_get_vlan_info(br_port_get_rcu(dev));
+ pv = nbp_get_vlan_info(br_port_get_rtnl(dev));
else if (dev->priv_flags & IFF_EBRIDGE)
pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
else
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 598cb0b333c6..efb57d911569 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -202,13 +202,10 @@ struct net_bridge_port
static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
{
- struct net_bridge_port *port =
- rcu_dereference_rtnl(dev->rx_handler_data);
-
- return br_port_exists(dev) ? port : NULL;
+ return rcu_dereference(dev->rx_handler_data);
}
-static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
+static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev)
{
return br_port_exists(dev) ?
rtnl_dereference(dev->rx_handler_data) : NULL;
@@ -746,6 +743,7 @@ extern struct net_bridge_port *br_get_port(struct net_bridge *br,
extern void br_init_port(struct net_bridge_port *p);
extern void br_become_designated_port(struct net_bridge_port *p);
+extern void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
extern int br_set_forward_delay(struct net_bridge *br, unsigned long x);
extern int br_set_hello_time(struct net_bridge *br, unsigned long x);
extern int br_set_max_age(struct net_bridge *br, unsigned long x);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1c0a50f13229..3c86f0538cbb 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -209,7 +209,7 @@ static void br_record_config_information(struct net_bridge_port *p,
p->designated_age = jiffies - bpdu->message_age;
mod_timer(&p->message_age_timer, jiffies
- + (p->br->max_age - bpdu->message_age));
+ + (bpdu->max_age - bpdu->message_age));
}
/* called under bridge lock */
@@ -544,18 +544,27 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
}
+void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
+{
+ br->bridge_forward_delay = t;
+ if (br_is_root_bridge(br))
+ br->forward_delay = br->bridge_forward_delay;
+}
+
int br_set_forward_delay(struct net_bridge *br, unsigned long val)
{
unsigned long t = clock_t_to_jiffies(val);
+ int err = -ERANGE;
+ spin_lock_bh(&br->lock);
if (br->stp_enabled != BR_NO_STP &&
(t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY))
- return -ERANGE;
+ goto unlock;
- spin_lock_bh(&br->lock);
- br->bridge_forward_delay = t;
- if (br_is_root_bridge(br))
- br->forward_delay = br->bridge_forward_delay;
+ __br_set_forward_delay(br, t);
+ err = 0;
+
+unlock:
spin_unlock_bh(&br->lock);
- return 0;
+ return err;
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d45e760141bb..108084a04671 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -129,6 +129,14 @@ static void br_stp_start(struct net_bridge *br)
char *envp[] = { NULL };
r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+
+ spin_lock_bh(&br->lock);
+
+ if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY)
+ __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY);
+ else if (br->bridge_forward_delay < BR_MAX_FORWARD_DELAY)
+ __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY);
+
if (r == 0) {
br->stp_enabled = BR_USER_STP;
br_debug(br, "userspace STP started\n");
@@ -137,10 +145,10 @@ static void br_stp_start(struct net_bridge *br)
br_debug(br, "using kernel STP\n");
/* To start timers on any ports left in blocking */
- spin_lock_bh(&br->lock);
br_port_state_selection(br);
- spin_unlock_bh(&br->lock);
}
+
+ spin_unlock_bh(&br->lock);
}
static void br_stp_stop(struct net_bridge *br)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 3be308e14302..4a5df7b1cc9f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -290,7 +290,7 @@ int ceph_msgr_init(void)
if (ceph_msgr_slab_init())
return -ENOMEM;
- ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
+ ceph_msgr_wq = alloc_workqueue("ceph-msgr", 0, 0);
if (ceph_msgr_wq)
return 0;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index dd47889adc4a..2b4b32aaa893 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -503,7 +503,9 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
size_t payload_len = 0;
- BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
+ BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
+ opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
+ opcode != CEPH_OSD_OP_TRUNCATE);
op->extent.offset = offset;
op->extent.length = length;
@@ -631,6 +633,9 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
break;
case CEPH_OSD_OP_READ:
case CEPH_OSD_OP_WRITE:
+ case CEPH_OSD_OP_ZERO:
+ case CEPH_OSD_OP_DELETE:
+ case CEPH_OSD_OP_TRUNCATE:
if (src->op == CEPH_OSD_OP_WRITE)
request_data_len = src->extent.length;
dst->extent.offset = cpu_to_le64(src->extent.offset);
@@ -715,7 +720,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
u64 object_base;
int r;
- BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
+ BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
+ opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
+ opcode != CEPH_OSD_OP_TRUNCATE);
req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
GFP_NOFS);
@@ -1488,14 +1495,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
req, result);
- ceph_decode_need(&p, end, 4, bad);
+ ceph_decode_need(&p, end, 4, bad_put);
numops = ceph_decode_32(&p);
if (numops > CEPH_OSD_MAX_OP)
goto bad_put;
if (numops != req->r_num_ops)
goto bad_put;
payload_len = 0;
- ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad);
+ ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad_put);
for (i = 0; i < numops; i++) {
struct ceph_osd_op *op = p;
int len;
@@ -1513,7 +1520,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
goto bad_put;
}
- ceph_decode_need(&p, end, 4 + numops * 4, bad);
+ ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
retry_attempt = ceph_decode_32(&p);
for (i = 0; i < numops; i++)
req->r_reply_op_result[i] = ceph_decode_32(&p);
@@ -1786,6 +1793,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
nr_maps--;
}
+ if (!osdc->osdmap)
+ goto bad;
done:
downgrade_write(&osdc->map_sem);
ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
@@ -2129,6 +2138,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
dout("osdc_start_request failed map, "
" will retry %lld\n", req->r_tid);
rc = 0;
+ } else {
+ __unregister_request(osdc, req);
}
goto out_unlock;
}
@@ -2205,6 +2216,17 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc)
EXPORT_SYMBOL(ceph_osdc_sync);
/*
+ * Call all pending notify callbacks - for use after a watch is
+ * unregistered, to make sure no more callbacks for it will be invoked
+ */
+extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
+{
+ flush_workqueue(osdc->notify_wq);
+}
+EXPORT_SYMBOL(ceph_osdc_flush_notifies);
+
+
+/*
* init, shutdown
*/
int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
@@ -2253,12 +2275,10 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
if (err < 0)
goto out_msgpool;
+ err = -ENOMEM;
osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
- if (IS_ERR(osdc->notify_wq)) {
- err = PTR_ERR(osdc->notify_wq);
- osdc->notify_wq = NULL;
+ if (!osdc->notify_wq)
goto out_msgpool;
- }
return 0;
out_msgpool:
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 603ddd92db19..dbd9a4792427 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1129,7 +1129,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
/* pg_temp? */
pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
- pool->pgp_num_mask);
+ pool->pg_num_mask);
pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
if (pg) {
*num = pg->len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c713f2239cc..65f829cfd928 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net)
/* Delayed registration/unregisteration */
static LIST_HEAD(net_todo_list);
+static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
+ dev_net(dev)->dev_unreg_count++;
}
static void rollback_registered_many(struct list_head *head)
@@ -5918,6 +5920,12 @@ void netdev_run_todo(void)
if (dev->destructor)
dev->destructor(dev);
+ /* Report a network device has been unregistered */
+ rtnl_lock();
+ dev_net(dev)->dev_unreg_count--;
+ __rtnl_unlock();
+ wake_up(&netdev_unregistering_wq);
+
/* Free network device */
kobject_put(&dev->dev.kobj);
}
@@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net)
rtnl_unlock();
}
+static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
+{
+ /* Return with the rtnl_lock held when there are no network
+ * devices unregistering in any network namespace in net_list.
+ */
+ struct net *net;
+ bool unregistering;
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&netdev_unregistering_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ unregistering = false;
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list) {
+ if (net->dev_unreg_count > 0) {
+ unregistering = true;
+ break;
+ }
+ }
+ if (!unregistering)
+ break;
+ __rtnl_unlock();
+ schedule();
+ }
+ finish_wait(&netdev_unregistering_wq, &wait);
+}
+
static void __net_exit default_device_exit_batch(struct list_head *net_list)
{
/* At exit all network devices most be removed from a network
@@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);
- rtnl_lock();
+ /* To prevent network device cleanup code from dereferencing
+ * loopback devices or network devices that have been freed
+ * wait here for all pending unregistrations to complete,
+ * before unregistring the loopback device and allowing the
+ * network namespace be freed.
+ *
+ * The netdev todo list containing all network devices
+ * unregistrations that happen in default_device_exit_batch
+ * will run in the rtnl_unlock() at the end of
+ * default_device_exit_batch.
+ */
+ rtnl_lock_unregistering(net_list);
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0ff42f029ace..8d7d0dd72db2 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -154,8 +154,8 @@ ipv6:
if (poff >= 0) {
__be32 *ports, _ports;
- nhoff += poff;
- ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
+ ports = skb_header_pointer(skb, nhoff + poff,
+ sizeof(_ports), &_ports);
if (ports)
flow->ports = *ports;
}
@@ -352,7 +352,7 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
if (queue_index != new_index && sk &&
rcu_access_pointer(sk->sk_dst_cache))
- sk_tx_queue_set(sk, queue_index);
+ sk_tx_queue_set(sk, new_index);
queue_index = new_index;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3f40ea9de814..d954b56b4e47 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1196,6 +1196,13 @@ static void remove_queue_kobjects(struct net_device *net)
#endif
}
+static bool net_current_may_mount(void)
+{
+ struct net *net = current->nsproxy->net_ns;
+
+ return ns_capable(net->user_ns, CAP_SYS_ADMIN);
+}
+
static void *net_grab_current_ns(void)
{
struct net *ns = current->nsproxy->net_ns;
@@ -1218,6 +1225,7 @@ static const void *net_netlink_ns(struct sock *sk)
struct kobj_ns_type_operations net_ns_type_operations = {
.type = KOBJ_NS_TYPE_NET,
+ .current_may_mount = net_current_may_mount,
.grab_current_ns = net_grab_current_ns,
.netlink_ns = net_netlink_ns,
.initial_ns = net_initial_ns,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f97652036754..81d3a9a08453 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -651,7 +651,7 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
struct net *net = ns;
if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
- !nsown_capable(CAP_SYS_ADMIN))
+ !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
put_net(nsproxy->net_ns);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2c637e9a0b27..fc75c9e461b8 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -550,7 +550,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
return;
proto = ntohs(eth_hdr(skb)->h_proto);
- if (proto == ETH_P_IP) {
+ if (proto == ETH_P_ARP) {
struct arphdr *arp;
unsigned char *arp_ptr;
/* No arp on this interface */
@@ -1284,15 +1284,14 @@ EXPORT_SYMBOL_GPL(__netpoll_free_async);
void netpoll_cleanup(struct netpoll *np)
{
- if (!np->dev)
- return;
-
rtnl_lock();
+ if (!np->dev)
+ goto out;
__netpoll_cleanup(np);
- rtnl_unlock();
-
dev_put(np->dev);
np->dev = NULL;
+out:
+ rtnl_unlock();
}
EXPORT_SYMBOL(netpoll_cleanup);
diff --git a/net/core/scm.c b/net/core/scm.c
index b4da80b1cc07..b442e7e25e60 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -56,9 +56,9 @@ static __inline__ int scm_check_creds(struct ucred *creds)
if ((creds->pid == task_tgid_vnr(current) ||
ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) &&
((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
- uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
+ uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) &&
((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
- gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) {
+ gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) {
return 0;
}
return -EPERM;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13cee86a..3f1ec1586ae1 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,11 +10,24 @@
#include <net/secure_seq.h>
-static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
-void net_secret_init(void)
+static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+
+static void net_secret_init(void)
{
- get_random_bytes(net_secret, sizeof(net_secret));
+ u32 tmp;
+ int i;
+
+ if (likely(net_secret[0]))
+ return;
+
+ for (i = NET_SECRET_SIZE; i > 0;) {
+ do {
+ get_random_bytes(&tmp, sizeof(tmp));
+ } while (!tmp);
+ cmpxchg(&net_secret[--i], 0, tmp);
+ }
}
#ifdef CONFIG_INET
@@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force __u32) daddr;
hash[1] = net_secret[13];
hash[2] = net_secret[14];
@@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
{
__u32 hash[4];
+ net_secret_init();
memcpy(hash, daddr, 16);
md5_transform(hash, net_secret);
@@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
u32 hash[MD5_DIGEST_WORDS];
u64 seq;
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
u64 seq;
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + daddr[i];
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9c61f9c02fdb..6cf9f7782ad4 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -135,6 +135,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (dst)
dst->ops->redirect(dst, sk, skb);
+ goto out;
}
if (type == ICMPV6_PKT_TOOBIG) {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7a1874b7b8fd..cfeb85cff4f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
get_random_bytes(&rnd, sizeof(rnd));
} while (rnd == 0);
- if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
+ if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
- net_secret_init();
- }
}
EXPORT_SYMBOL(build_ehash_secret);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d6c0e64ec97f..7defdc9ba167 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
pip->saddr = fl4.saddr;
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
- ip_select_ident(pip, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
((u8 *)&pip[1])[0] = IPOPT_RA;
((u8 *)&pip[1])[1] = 4;
((u8 *)&pip[1])[2] = 0;
@@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
iph->daddr = dst;
iph->saddr = fl4.saddr;
iph->protocol = IPPROTO_IGMP;
- ip_select_ident(iph, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
((u8 *)&iph[1])[2] = 0;
@@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data)
in_dev->mr_gq_running = 0;
igmpv3_send_report(in_dev, NULL);
- __in_dev_put(in_dev);
+ in_dev_put(in_dev);
}
static void igmp_ifc_timer_expire(unsigned long data)
@@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data)
igmp_ifc_start_timer(in_dev,
unsolicited_report_interval(in_dev));
}
- __in_dev_put(in_dev);
+ in_dev_put(in_dev);
}
static void igmp_ifc_event(struct in_device *in_dev)
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 000e3d239d64..33d5537881ed 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -32,8 +32,8 @@
* At the moment of writing this notes identifier of IP packets is generated
* to be unpredictable using this code only for packets subjected
* (actually or potentially) to defragmentation. I.e. DF packets less than
- * PMTU in size uses a constant ID and do not use this code (see
- * ip_select_ident() in include/net/ip.h).
+ * PMTU in size when local fragmentation is disabled use a constant ID and do
+ * not use this code (see ip_select_ident() in include/net/ip.h).
*
* Route cache entries hold references to our nodes.
* New cache entries get references via lookup by destination IP address in
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9ee17e3d11c3..a04d872c54f9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- ip_select_ident(iph, &rt->dst, sk);
+ ip_select_ident(skb, &rt->dst, sk);
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
@@ -386,7 +386,7 @@ packet_routed:
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
- ip_select_ident_more(iph, &rt->dst, sk,
+ ip_select_ident_more(skb, &rt->dst, sk,
(skb_shinfo(skb)->gso_segs ?: 1) - 1);
skb->priority = sk->sk_priority;
@@ -1316,7 +1316,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
else
ttl = ip_select_ttl(inet, &rt->dst);
- iph = (struct iphdr *)skb->data;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
iph->tos = inet->tos;
@@ -1324,7 +1324,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
- ip_select_ident(iph, &rt->dst, sk);
+ ip_select_ident(skb, &rt->dst, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index ac9fabe0300f..63a6d6d6b875 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->err_count = 0;
}
+ tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
ttl = tnl_params->ttl;
if (ttl == 0) {
if (skb->protocol == htons(ETH_P_IP))
@@ -641,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len;
- if (max_headroom > dev->needed_headroom) {
+ if (max_headroom > dev->needed_headroom)
dev->needed_headroom = max_headroom;
- if (skb_cow_head(skb, dev->needed_headroom)) {
- dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
- return;
- }
+
+ if (skb_cow_head(skb, dev->needed_headroom)) {
+ dev->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return;
}
err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
- ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df,
- !net_eq(tunnel->net, dev_net(dev)));
+ tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return;
@@ -853,8 +853,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- if (!IS_ERR(itn->fb_tunnel_dev))
+ if (!IS_ERR(itn->fb_tunnel_dev)) {
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+ }
rtnl_unlock();
return PTR_RET(itn->fb_tunnel_dev);
@@ -884,8 +886,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
if (!net_eq(dev_net(t->dev), net))
unregister_netdevice_queue(t->dev, head);
}
- if (itn->fb_tunnel_dev)
- unregister_netdevice_queue(itn->fb_tunnel_dev, head);
}
void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index d6c856b17fd4..c31e3ad98ef2 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -61,7 +61,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
/* Push down and install the IP header. */
- __skb_push(skb, sizeof(struct iphdr));
+ skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9ae54b09254f..62212c772a4b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1658,7 +1658,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
iph->tot_len = htons(skb->len);
- ip_select_ident(iph, skb_dst(skb), NULL);
+ ip_select_ident(skb, skb_dst(skb), NULL);
ip_send_check(iph);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 67e17dcda65e..b6346bf2fde3 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
if (th == NULL)
return NF_DROP;
- synproxy_parse_options(skb, par->thoff, th, &opts);
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
if (th->syn && !(th->ack || th->fin || th->rst)) {
/* Initial SYN from client */
@@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
/* fall through */
case TCP_CONNTRACK_SYN_SENT:
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
if (!th->syn && th->ack &&
CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
if (!th->syn || !th->ack)
break;
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->tsoff = opts.tsval - synproxy->its;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a86c7ae71881..193db03540ad 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_sk_update_pmtu(skb, sk, info);
- else if (type == ICMP_REDIRECT)
+ else if (type == ICMP_REDIRECT) {
ipv4_sk_redirect(skb, sk);
+ return;
+ }
/* Report error on raw socket, if:
1. User requested ip_recverr.
@@ -387,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
iph->check = 0;
iph->tot_len = htons(length);
if (!iph->id)
- ip_select_ident(iph, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1969e16d936d..25a89eaa669d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3162,16 +3162,14 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
/* If reordering is high then always grow cwnd whenever data is
* delivered regardless of its ordering. Otherwise stay conservative
- * and only grow cwnd on in-order delivery in Open state, and retain
- * cwnd in Disordered state (RFC5681). A stretched ACK with
+ * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
* new SACK or ECE mark may first advance cwnd here and later reduce
* cwnd in tcp_fastretrans_alert() based on more states.
*/
if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
return flag & FLAG_FORWARD_PROGRESS;
- return inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
- flag & FLAG_DATA_ACKED;
+ return flag & FLAG_DATA_ACKED;
}
/* Check that window update is acceptable.
@@ -4141,6 +4139,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
} else {
+ tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
}
@@ -4216,8 +4215,10 @@ add_sack:
if (tcp_is_sack(tp))
tcp_sack_new_ofo_skb(sk, seq, end_seq);
end:
- if (skb)
+ if (skb) {
+ tcp_grow_window(sk, skb);
skb_set_owner_r(skb, sk);
+ }
}
static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 8a57d79b0b16..559d4ae6ebf4 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -87,8 +87,8 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
if (!cg_proto)
return -EINVAL;
- if (val > RESOURCE_MAX)
- val = RESOURCE_MAX;
+ if (val > RES_COUNTER_MAX)
+ val = RES_COUNTER_MAX;
tcp = tcp_from_cgproto(cg_proto);
@@ -101,9 +101,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
net->ipv4.sysctl_tcp_mem[i]);
- if (val == RESOURCE_MAX)
+ if (val == RES_COUNTER_MAX)
clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
- else if (val != RESOURCE_MAX) {
+ else if (val != RES_COUNTER_MAX) {
/*
* The active bit needs to be written after the static_key
* update. This is what guarantees that the socket activation
@@ -187,7 +187,7 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
switch (cft->private) {
case RES_LIMIT:
- val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX);
+ val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
break;
case RES_USAGE:
val = tcp_read_usage(memcg);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 4a22f3e715df..52f3c6b971d2 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -502,7 +502,9 @@ reset:
* ACKs, wait for troubles.
*/
if (crtt > tp->srtt) {
- inet_csk(sk)->icsk_rto = crtt + max(crtt >> 2, tcp_rto_min(sk));
+ /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
+ crtt >>= 3;
+ inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
} else if (tp->srtt == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
* 3WHS. This is most likely due to retransmission,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7c83cb8bf137..e6bb8256e59f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -895,8 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
- skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
- tcp_wfree : sock_wfree;
+ skb->destructor = tcp_wfree;
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
/* Build TCP header and checksum it. */
@@ -1840,7 +1839,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
-
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
@@ -1869,13 +1867,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
}
- /* TSQ : sk_wmem_alloc accounts skb truesize,
- * including skb overhead. But thats OK.
+ /* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * This allows for :
+ * - better RTT estimation and ACK scheduling
+ * - faster recovery
+ * - high rates
*/
- if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
+ limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
+
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
break;
}
+
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 74d2c95db57f..0ca44df51ee9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -658,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
break;
case ICMP_REDIRECT:
ipv4_sk_redirect(skb, sk);
- break;
+ goto out;
}
/*
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index eb1dd4d643f2..b5663c37f089 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -117,7 +117,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
- ip_select_ident(top_iph, dst->child, NULL);
+ ip_select_ident(skb, dst->child, NULL);
top_iph->ttl = ip4_dst_hoplimit(dst->child);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d6ff12617f36..cd3fb301da38 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
return false;
}
+/* Compares an address/prefix_len with addresses on device @dev.
+ * If one is found it returns true.
+ */
+bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
+ const unsigned int prefix_len, struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ bool ret = false;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
+ if (ret)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(ipv6_chk_custom_prefix);
+
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
struct inet6_dev *idev;
@@ -2193,43 +2220,21 @@ ok:
else
stored_lft = 0;
if (!update_lft && !create && stored_lft) {
- if (valid_lft > MIN_VALID_LIFETIME ||
- valid_lft > stored_lft)
- update_lft = 1;
- else if (stored_lft <= MIN_VALID_LIFETIME) {
- /* valid_lft <= stored_lft is always true */
- /*
- * RFC 4862 Section 5.5.3e:
- * "Note that the preferred lifetime of
- * the corresponding address is always
- * reset to the Preferred Lifetime in
- * the received Prefix Information
- * option, regardless of whether the
- * valid lifetime is also reset or
- * ignored."
- *
- * So if the preferred lifetime in
- * this advertisement is different
- * than what we have stored, but the
- * valid lifetime is invalid, just
- * reset prefered_lft.
- *
- * We must set the valid lifetime
- * to the stored lifetime since we'll
- * be updating the timestamp below,
- * else we'll set it back to the
- * minimum.
- */
- if (prefered_lft != ifp->prefered_lft) {
- valid_lft = stored_lft;
- update_lft = 1;
- }
- } else {
- valid_lft = MIN_VALID_LIFETIME;
- if (valid_lft < prefered_lft)
- prefered_lft = valid_lft;
- update_lft = 1;
- }
+ const u32 minimum_lft = min(
+ stored_lft, (u32)MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
}
if (update_lft) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 136fe55c1a47..7c96100b021e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -915,6 +915,9 @@ static int __init inet6_init(void)
err = ip6_route_init();
if (err)
goto ip6_route_fail;
+ err = ndisc_late_init();
+ if (err)
+ goto ndisc_late_fail;
err = ip6_flowlabel_init();
if (err)
goto ip6_flowlabel_fail;
@@ -981,6 +984,8 @@ ipv6_exthdrs_fail:
addrconf_fail:
ip6_flowlabel_cleanup();
ip6_flowlabel_fail:
+ ndisc_late_cleanup();
+ndisc_late_fail:
ip6_route_cleanup();
ip6_route_fail:
#ifdef CONFIG_PROC_FS
@@ -1043,6 +1048,7 @@ static void __exit inet6_exit(void)
ipv6_exthdrs_exit();
addrconf_cleanup();
ip6_flowlabel_cleanup();
+ ndisc_late_cleanup();
ip6_route_cleanup();
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 07a7d65a7cb6..8d67900aa003 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -162,12 +162,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
off += optlen;
len -= optlen;
}
- /* This case will not be caught by above check since its padding
- * length is smaller than 7:
- * 1 byte NH + 1 byte Length + 6 bytes Padding
- */
- if ((padlen == 6) && ((off - skb_network_header_len(skb)) == 8))
- goto bad;
if (len == 0)
return true;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index a6c58ce43d34..e27591635f92 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -138,8 +138,8 @@ static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg
return false;
suppress_route:
- ip6_rt_put(rt);
- return true;
+ ip6_rt_put(rt);
+ return true;
}
static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 73db48eba1c4..5bec666aba61 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -825,9 +825,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
offsetof(struct rt6_info, rt6i_dst), allow_create,
replace_required);
-
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
+ fn = NULL;
goto out;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6b26e9feafb9..7bb5446b9d73 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
struct ip6_tnl *tunnel = netdev_priv(dev);
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *ipv6h; /* Our new IP header */
- unsigned int max_headroom; /* The extra header space needed */
+ unsigned int max_headroom = 0; /* The extra header space needed */
int gre_hlen;
struct ipv6_tel_txoption opt;
int mtu;
@@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
- max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+ max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3a692d529163..a54c45ce4a48 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1015,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
* udp datagram
*/
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+ struct frag_hdr fhdr;
+
skb = sock_alloc_send_skb(sk,
hh_len + fragheaderlen + transhdrlen + 20,
(flags & MSG_DONTWAIT), &err);
@@ -1036,12 +1038,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb->protocol = htons(ETH_P_IPV6);
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- }
-
- err = skb_append_datato_frags(sk,skb, getfrag, from,
- (length - transhdrlen));
- if (!err) {
- struct frag_hdr fhdr;
/* Specify the length of each IPv6 datagram fragment.
* It has to be a multiple of 8.
@@ -1052,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
ipv6_select_ident(&fhdr, rt);
skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
__skb_queue_tail(&sk->sk_write_queue, skb);
-
- return 0;
}
- /* There is not enough support do UPD LSO,
- * so follow normal path
- */
- kfree_skb(skb);
- return err;
+ return skb_append_datato_frags(sk, skb, getfrag, from,
+ (length - transhdrlen));
}
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1227,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
* --yoshfuji
*/
- cork->length += length;
- if (length > mtu) {
- int proto = sk->sk_protocol;
- if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
- ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
- return -EMSGSIZE;
- }
-
- if (proto == IPPROTO_UDP &&
- (rt->dst.dev->features & NETIF_F_UFO)) {
+ if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_RAW)) {
+ ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
+ return -EMSGSIZE;
+ }
- err = ip6_ufo_append_data(sk, getfrag, from, length,
- hh_len, fragheaderlen,
- transhdrlen, mtu, flags, rt);
- if (err)
- goto error;
- return 0;
- }
+ skb = skb_peek_tail(&sk->sk_write_queue);
+ cork->length += length;
+ if (((length > mtu) ||
+ (skb && skb_is_gso(skb))) &&
+ (sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->dst.dev->features & NETIF_F_UFO)) {
+ err = ip6_ufo_append_data(sk, getfrag, from, length,
+ hh_len, fragheaderlen,
+ transhdrlen, mtu, flags, rt);
+ if (err)
+ goto error;
+ return 0;
}
- if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+ if (!skb)
goto alloc_new_skb;
while (length > 0) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 61355f7f4da5..a791552e0422 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1656,9 +1656,9 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
- &parm->raddr) ||
- nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
&parm->laddr) ||
+ nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
+ &parm->raddr) ||
nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
@@ -1731,8 +1731,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
}
}
- t = rtnl_dereference(ip6n->tnls_wc[0]);
- unregister_netdevice_queue(t->dev, &list);
unregister_netdevice_many(&list);
}
@@ -1752,6 +1750,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
dev_net_set(ip6n->fb_tnl_dev, net);
+ ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 096cd67b737c..d18f9f903db6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data)
if (idev->mc_dad_count)
mld_dad_start_timer(idev, idev->mc_maxdelay);
}
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
@@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data)
idev->mc_gq_running = 0;
mld_send_report(idev, NULL);
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_timer_expire(unsigned long data)
@@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data)
if (idev->mc_ifc_count)
mld_ifc_start_timer(idev, idev->mc_maxdelay);
}
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_event(struct inet6_dev *idev)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 12179457b2cd..f8a55ff1971b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1727,24 +1727,28 @@ int __init ndisc_init(void)
if (err)
goto out_unregister_pernet;
#endif
- err = register_netdevice_notifier(&ndisc_netdev_notifier);
- if (err)
- goto out_unregister_sysctl;
out:
return err;
-out_unregister_sysctl:
#ifdef CONFIG_SYSCTL
- neigh_sysctl_unregister(&nd_tbl.parms);
out_unregister_pernet:
-#endif
unregister_pernet_subsys(&ndisc_net_ops);
goto out;
+#endif
}
-void ndisc_cleanup(void)
+int __init ndisc_late_init(void)
+{
+ return register_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_late_cleanup(void)
{
unregister_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_cleanup(void)
+{
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(&nd_tbl.parms);
#endif
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 19cfea8dbcaa..2748b042da72 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
if (th == NULL)
return NF_DROP;
- synproxy_parse_options(skb, par->thoff, th, &opts);
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
if (th->syn && !(th->ack || th->fin || th->rst)) {
/* Initial SYN from client */
@@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
/* fall through */
case TCP_CONNTRACK_SYN_SENT:
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
if (!th->syn && th->ack &&
CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
if (!th->syn || !th->ack)
break;
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->tsoff = opts.tsval - synproxy->its;
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 61aaf70f376e..2205e8eeeacf 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -69,8 +69,8 @@ icmpv6_manip_pkt(struct sk_buff *skb,
hdr = (struct icmp6hdr *)(skb->data + hdroff);
l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
tuple, maniptype);
- if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST ||
- hdr->icmp6_code == ICMPV6_ECHO_REPLY) {
+ if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
+ hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
hdr->icmp6_identifier,
tuple->src.u.icmp.id, 0);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 58916bbb1728..a4ed2416399e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -335,8 +335,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
ip6_sk_update_pmtu(skb, sk, info);
harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
}
- if (type == NDISC_REDIRECT)
+ if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
+ return;
+ }
if (np->recverr) {
u8 *payload = skb->data;
if (!inet->hdrincl)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 7ee5cb96db34..19269453a8ea 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
return false;
}
+/* Checks if an address matches an address on the tunnel interface.
+ * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
+ * Long story:
+ * This function is called after we considered the packet as spoofed
+ * in is_spoofed_6rd.
+ * We may have a router that is doing NAT for proto 41 packets
+ * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
+ * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
+ * function will return true, dropping the packet.
+ * But, we can still check if is spoofed against the IP
+ * addresses associated with the interface.
+ */
+static bool only_dnatted(const struct ip_tunnel *tunnel,
+ const struct in6_addr *v6dst)
+{
+ int prefix_len;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ prefix_len = tunnel->ip6rd.prefixlen + 32
+ - tunnel->ip6rd.relay_prefixlen;
+#else
+ prefix_len = 48;
+#endif
+ return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
+}
+
+/* Returns true if a packet is spoofed */
+static bool packet_is_spoofed(struct sk_buff *skb,
+ const struct iphdr *iph,
+ struct ip_tunnel *tunnel)
+{
+ const struct ipv6hdr *ipv6h;
+
+ if (tunnel->dev->priv_flags & IFF_ISATAP) {
+ if (!isatap_chksrc(skb, iph, tunnel))
+ return true;
+
+ return false;
+ }
+
+ if (tunnel->dev->flags & IFF_POINTOPOINT)
+ return false;
+
+ ipv6h = ipv6_hdr(skb);
+
+ if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
+ net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+ }
+
+ if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
+ return false;
+
+ if (only_dnatted(tunnel, &ipv6h->daddr))
+ return false;
+
+ net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+}
+
static int ipip6_rcv(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb)
IPCB(skb)->flags = 0;
skb->protocol = htons(ETH_P_IPV6);
- if (tunnel->dev->priv_flags & IFF_ISATAP) {
- if (!isatap_chksrc(skb, iph, tunnel)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
- } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) {
- if (is_spoofed_6rd(tunnel, iph->saddr,
- &ipv6_hdr(skb)->saddr) ||
- is_spoofed_6rd(tunnel, iph->daddr,
- &ipv6_hdr(skb)->daddr)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
+ if (packet_is_spoofed(skb, iph, tunnel)) {
+ tunnel->dev->stats.rx_errors++;
+ goto out;
}
__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
@@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -1612,6 +1666,7 @@ static int __net_init sit_init_net(struct net *net)
goto err_alloc_dev;
}
dev_net_set(sitn->fb_tunnel_dev, net);
+ sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
@@ -1646,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net)
rtnl_lock();
sit_destroy_tunnels(sitn, &list);
- unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f4058150262b..72b7eaaf3ca0 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -525,8 +525,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type == ICMPV6_PKT_TOOBIG)
ip6_sk_update_pmtu(skb, sk, info);
- if (type == NDISC_REDIRECT)
+ if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
+ goto out;
+ }
np = inet6_sk(sk);
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 54563ad8aeb1..355cc3b6fa4d 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param)
} else {
lapb->n2count++;
lapb_requeue_frames(lapb);
+ lapb_kick(lapb);
}
break;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index f77139007983..f2e30fb31e78 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1052,7 +1052,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
* Not an artificial restriction anymore, as we must prevent
* possible loops created by swapping in setlist type of sets. */
if (!(from->type->features == to->type->features &&
- from->type->family == to->type->family))
+ from->family == to->family))
return -IPSET_ERR_TYPE_MISMATCH;
strncpy(from_name, from->name, IPSET_MAXNAMELEN);
@@ -1489,8 +1489,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
if (ret == -EAGAIN)
ret = 1;
- return (ret < 0 && ret != -ENOTEMPTY) ? ret :
- ret > 0 ? 0 : -IPSET_ERR_EXIST;
+ return ret > 0 ? 0 : -IPSET_ERR_EXIST;
}
/* Get headed data of a set */
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 6fdf88ae2353..dac156f819ac 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -116,12 +116,12 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
{
int protoff;
u8 nexthdr;
- __be16 frag_off;
+ __be16 frag_off = 0;
nexthdr = ipv6_hdr(skb)->nexthdr;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
- if (protoff < 0)
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
return false;
return get_port(skb, nexthdr, protoff, src, port, proto);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 57beb1762b2d..707bc520d629 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -325,18 +325,22 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
static void
mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
{
- u8 i, j;
-
- for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++)
- ;
- h->nets[i].nets--;
-
- if (h->nets[i].nets != 0)
- return;
-
- for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) {
- h->nets[j].cidr = h->nets[j + 1].cidr;
- h->nets[j].nets = h->nets[j + 1].nets;
+ u8 i, j, net_end = nets_length - 1;
+
+ for (i = 0; i < nets_length; i++) {
+ if (h->nets[i].cidr != cidr)
+ continue;
+ if (h->nets[i].nets > 1 || i == net_end ||
+ h->nets[i + 1].nets == 0) {
+ h->nets[i].nets--;
+ return;
+ }
+ for (j = i; j < net_end && h->nets[j].nets; j++) {
+ h->nets[j].cidr = h->nets[j + 1].cidr;
+ h->nets[j].nets = h->nets[j + 1].nets;
+ }
+ h->nets[j].nets = 0;
+ return;
}
}
#endif
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index c6a525373be4..f15f3e28b9c3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -260,7 +260,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip = htonl(ip);
e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1));
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -544,7 +544,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index da740ceb56ae..223e9f546d0f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -199,7 +199,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
e.ip = htonl(ip & ip_set_hostmask(e.cidr));
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret:
ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -396,7 +396,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 84ae6f6ce624..7d798d5d5cd3 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -368,7 +368,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
e.ip = htonl(ip & ip_set_hostmask(e.cidr));
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -634,7 +634,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 9a0869853be5..09d6690bee6f 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -244,7 +244,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) {
e.ip = htonl(ip & ip_set_hostmask(e.cidr + 1));
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -489,7 +489,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4f69e83ff836..74fd00c27210 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
+ struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
s->ustats.inpkts++;
@@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- s = this_cpu_ptr(dest->svc->stats.cpustats);
+ rcu_read_lock();
+ svc = rcu_dereference(dest->svc);
+ s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
+ rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.inpkts++;
@@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
+ struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
s->ustats.outpkts++;
@@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- s = this_cpu_ptr(dest->svc->stats.cpustats);
+ rcu_read_lock();
+ svc = rcu_dereference(dest->svc);
+ s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
+ rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.outpkts++;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c8148e487386..a3df9bddc4f7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -460,7 +460,7 @@ static inline void
__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
{
atomic_inc(&svc->refcnt);
- dest->svc = svc;
+ rcu_assign_pointer(dest->svc, svc);
}
static void ip_vs_service_free(struct ip_vs_service *svc)
@@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc)
kfree(svc);
}
-static void
-__ip_vs_unbind_svc(struct ip_vs_dest *dest)
+static void ip_vs_service_rcu_free(struct rcu_head *head)
{
- struct ip_vs_service *svc = dest->svc;
+ struct ip_vs_service *svc;
+
+ svc = container_of(head, struct ip_vs_service, rcu_head);
+ ip_vs_service_free(svc);
+}
- dest->svc = NULL;
+static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
+{
if (atomic_dec_and_test(&svc->refcnt)) {
IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr),
ntohs(svc->port));
- ip_vs_service_free(svc);
+ if (do_delay)
+ call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
+ else
+ ip_vs_service_free(svc);
}
}
@@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
IP_VS_DBG_ADDR(svc->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- /* We can not reuse dest while in grace period
- * because conns still can use dest->svc
- */
- if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
- continue;
if (dest->af == svc->af &&
ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
dest->port == dport &&
@@ -697,8 +699,10 @@ out:
static void ip_vs_dest_free(struct ip_vs_dest *dest)
{
+ struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
+
__ip_vs_dst_cache_reset(dest);
- __ip_vs_unbind_svc(dest);
+ __ip_vs_svc_put(svc, false);
free_percpu(dest->stats.cpustats);
kfree(dest);
}
@@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct ip_vs_service *old_svc;
struct ip_vs_scheduler *sched;
int conn_flags;
@@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
atomic_set(&dest->conn_flags, conn_flags);
/* bind the service */
- if (!dest->svc) {
+ old_svc = rcu_dereference_protected(dest->svc, 1);
+ if (!old_svc) {
__ip_vs_bind_svc(dest, svc);
} else {
- if (dest->svc != svc) {
- __ip_vs_unbind_svc(dest);
+ if (old_svc != svc) {
ip_vs_zero_stats(&dest->stats);
__ip_vs_bind_svc(dest, svc);
+ __ip_vs_svc_put(old_svc, true);
}
}
@@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return 0;
}
-static void ip_vs_dest_wait_readers(struct rcu_head *head)
-{
- struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
- rcu_head);
-
- /* End of grace period after unlinking */
- clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
-}
-
-
/*
* Delete a destination (must be already unlinked from the service)
*/
@@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
*/
ip_vs_rs_unhash(dest);
- if (!cleanup) {
- set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
- call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
- }
-
spin_lock_bh(&ipvs->dest_trash_lock);
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->refcnt));
if (list_empty(&ipvs->dest_trash) && !cleanup)
mod_timer(&ipvs->dest_trash_timer,
- jiffies + IP_VS_DEST_TRASH_PERIOD);
+ jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
/* dest lives in trash without reference */
list_add(&dest->t_list, &ipvs->dest_trash);
+ dest->idle_start = 0;
spin_unlock_bh(&ipvs->dest_trash_lock);
ip_vs_dest_put(dest);
}
@@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data)
struct net *net = (struct net *) data;
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_dest *dest, *next;
+ unsigned long now = jiffies;
spin_lock(&ipvs->dest_trash_lock);
list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
- /* Skip if dest is in grace period */
- if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
- continue;
if (atomic_read(&dest->refcnt) > 0)
continue;
+ if (dest->idle_start) {
+ if (time_before(now, dest->idle_start +
+ IP_VS_DEST_TRASH_PERIOD))
+ continue;
+ } else {
+ dest->idle_start = max(1UL, now);
+ continue;
+ }
IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
dest->vfwmark,
- IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
list_del(&dest->t_list);
ip_vs_dest_free(dest);
}
if (!list_empty(&ipvs->dest_trash))
mod_timer(&ipvs->dest_trash_timer,
- jiffies + IP_VS_DEST_TRASH_PERIOD);
+ jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
spin_unlock(&ipvs->dest_trash_lock);
}
@@ -1320,14 +1318,6 @@ out:
return ret;
}
-static void ip_vs_service_rcu_free(struct rcu_head *head)
-{
- struct ip_vs_service *svc;
-
- svc = container_of(head, struct ip_vs_service, rcu_head);
- ip_vs_service_free(svc);
-}
-
/*
* Delete a service from the service list
* - The service must be unlinked, unlocked and not referenced!
@@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
/*
* Free the service if nobody refers to it
*/
- if (atomic_dec_and_test(&svc->refcnt)) {
- IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
- svc->fwmark,
- IP_VS_DBG_ADDR(svc->af, &svc->addr),
- ntohs(svc->port));
- call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
- }
+ __ip_vs_svc_put(svc, true);
/* decrease the module use count */
ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 6bee6d0c73a5..1425e9a924c4 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
struct ip_vs_cpu_stats __percpu *stats)
{
int i;
+ bool add = false;
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
unsigned int start;
__u64 inbytes, outbytes;
- if (i) {
+ if (add) {
sum->conns += s->ustats.conns;
sum->inpkts += s->ustats.inpkts;
sum->outpkts += s->ustats.outpkts;
@@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
sum->inbytes += inbytes;
sum->outbytes += outbytes;
} else {
+ add = true;
sum->conns = s->ustats.conns;
sum->inpkts = s->ustats.inpkts;
sum->outpkts = s->ustats.outpkts;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 1383b0eadc0e..eff13c94498e 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry {
struct hlist_node list;
int af; /* address family */
union nf_inet_addr addr; /* destination IP address */
- struct ip_vs_dest __rcu *dest; /* real server (cache) */
+ struct ip_vs_dest *dest; /* real server (cache) */
unsigned long lastuse; /* last used time */
struct rcu_head rcu_head;
};
@@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = {
};
#endif
-static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
+static void ip_vs_lblc_rcu_free(struct rcu_head *head)
{
- struct ip_vs_dest *dest;
+ struct ip_vs_lblc_entry *en = container_of(head,
+ struct ip_vs_lblc_entry,
+ rcu_head);
- hlist_del_rcu(&en->list);
- /*
- * We don't kfree dest because it is referred either by its service
- * or the trash dest list.
- */
- dest = rcu_dereference_protected(en->dest, 1);
- ip_vs_dest_put(dest);
- kfree_rcu(en, rcu_head);
+ ip_vs_dest_put(en->dest);
+ kfree(en);
}
+static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
+{
+ hlist_del_rcu(&en->list);
+ call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
+}
/*
* Returns hash value for IPVS LBLC entry
@@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
struct ip_vs_lblc_entry *en;
en = ip_vs_lblc_get(dest->af, tbl, daddr);
- if (!en) {
- en = kmalloc(sizeof(*en), GFP_ATOMIC);
- if (!en)
- return NULL;
-
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
- en->lastuse = jiffies;
+ if (en) {
+ if (en->dest == dest)
+ return en;
+ ip_vs_lblc_del(en);
+ }
+ en = kmalloc(sizeof(*en), GFP_ATOMIC);
+ if (!en)
+ return NULL;
- ip_vs_dest_hold(dest);
- RCU_INIT_POINTER(en->dest, dest);
+ en->af = dest->af;
+ ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->lastuse = jiffies;
- ip_vs_lblc_hash(tbl, en);
- } else {
- struct ip_vs_dest *old_dest;
+ ip_vs_dest_hold(dest);
+ en->dest = dest;
- old_dest = rcu_dereference_protected(en->dest, 1);
- if (old_dest != dest) {
- ip_vs_dest_put(old_dest);
- ip_vs_dest_hold(dest);
- /* No ordering constraints for refcnt */
- RCU_INIT_POINTER(en->dest, dest);
- }
- }
+ ip_vs_lblc_hash(tbl, en);
return en;
}
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
tbl->dead = 1;
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
- ip_vs_lblc_free(en);
+ ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
}
}
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
sysctl_lblc_expiration(svc)))
continue;
- ip_vs_lblc_free(en);
+ ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
}
spin_unlock(&svc->sched_lock);
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
continue;
- ip_vs_lblc_free(en);
+ ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
goal--;
}
@@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
@@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
* free up entries from the trash at any time.
*/
- dest = rcu_dereference(en->dest);
+ dest = en->dest;
if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
goto out;
@@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
unregister_pernet_subsys(&ip_vs_lblc_ops);
- synchronize_rcu();
+ rcu_barrier();
}
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 5199448697f6..0b8550089a2e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,7 +89,7 @@
*/
struct ip_vs_dest_set_elem {
struct list_head list; /* list link */
- struct ip_vs_dest __rcu *dest; /* destination server */
+ struct ip_vs_dest *dest; /* destination server */
struct rcu_head rcu_head;
};
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
if (check) {
list_for_each_entry(e, &set->list, list) {
- struct ip_vs_dest *d;
-
- d = rcu_dereference_protected(e->dest, 1);
- if (d == dest)
- /* already existed */
+ if (e->dest == dest)
return;
}
}
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
return;
ip_vs_dest_hold(dest);
- RCU_INIT_POINTER(e->dest, dest);
+ e->dest = dest;
list_add_rcu(&e->list, &set->list);
atomic_inc(&set->size);
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
set->lastmod = jiffies;
}
+static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_dest_set_elem *e;
+
+ e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
+ ip_vs_dest_put(e->dest);
+ kfree(e);
+}
+
static void
ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
{
struct ip_vs_dest_set_elem *e;
list_for_each_entry(e, &set->list, list) {
- struct ip_vs_dest *d;
-
- d = rcu_dereference_protected(e->dest, 1);
- if (d == dest) {
+ if (e->dest == dest) {
/* HIT */
atomic_dec(&set->size);
set->lastmod = jiffies;
- ip_vs_dest_put(dest);
list_del_rcu(&e->list);
- kfree_rcu(e, rcu_head);
+ call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
break;
}
}
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
struct ip_vs_dest_set_elem *e, *ep;
list_for_each_entry_safe(e, ep, &set->list, list) {
- struct ip_vs_dest *d;
-
- d = rcu_dereference_protected(e->dest, 1);
- /*
- * We don't kfree dest because it is referred either
- * by its service or by the trash dest list.
- */
- ip_vs_dest_put(d);
list_del_rcu(&e->list);
- kfree_rcu(e, rcu_head);
+ call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
}
}
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
struct ip_vs_dest *dest, *least;
int loh, doh;
- if (set == NULL)
- return NULL;
-
/* select the first destination server, whose weight > 0 */
list_for_each_entry_rcu(e, &set->list, list) {
- least = rcu_dereference(e->dest);
+ least = e->dest;
if (least->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
/* find the destination with the weighted least load */
nextstage:
list_for_each_entry_continue_rcu(e, &set->list, list) {
- dest = rcu_dereference(e->dest);
+ dest = e->dest;
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if ((loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight))
+ if (((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight))
&& (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
least = dest;
loh = doh;
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* select the first destination server, whose weight > 0 */
list_for_each_entry(e, &set->list, list) {
- most = rcu_dereference_protected(e->dest, 1);
+ most = e->dest;
if (atomic_read(&most->weight) > 0) {
moh = ip_vs_dest_conn_overhead(most);
goto nextstage;
@@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* find the destination with the weighted most load */
nextstage:
list_for_each_entry_continue(e, &set->list, list) {
- dest = rcu_dereference_protected(e->dest, 1);
+ dest = e->dest;
doh = ip_vs_dest_conn_overhead(dest);
/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
- if ((moh * atomic_read(&dest->weight) <
- doh * atomic_read(&most->weight))
+ if (((__s64)moh * atomic_read(&dest->weight) <
+ (__s64)doh * atomic_read(&most->weight))
&& (atomic_read(&dest->weight) > 0)) {
most = dest;
moh = doh;
@@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
@@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
unregister_pernet_subsys(&ip_vs_lblcr_ops);
- synchronize_rcu();
+ rcu_barrier();
}
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index d8d9860934fe..961a6de9bb29 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -40,7 +40,7 @@
#include <net/ip_vs.h>
-static inline unsigned int
+static inline int
ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
{
/*
@@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *least = NULL;
- unsigned int loh = 0, doh;
+ int loh = 0, doh;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
@@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
}
if (!least ||
- (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight))) {
+ ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight))) {
least = dest;
loh = doh;
}
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index a5284cc3d882..e446b9fa7424 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -44,7 +44,7 @@
#include <net/ip_vs.h>
-static inline unsigned int
+static inline int
ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
{
/*
@@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *least;
- unsigned int loh, doh;
+ int loh, doh;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
@@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_sed_dest_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 6dc1fa128840..b5b4650d50a9 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *least;
- unsigned int loh, doh;
+ int loh, doh;
IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
@@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b75ff6429a04..c47444e4cf8c 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
iph->ttl = old_iph->ttl;
- ip_select_ident(iph, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 6fd967c6278c..cdf4567ba9b3 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -24,7 +24,7 @@
int synproxy_net_id;
EXPORT_SYMBOL_GPL(synproxy_net_id);
-void
+bool
synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
const struct tcphdr *th, struct synproxy_options *opts)
{
@@ -32,7 +32,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
u8 buf[40], *ptr;
ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
- BUG_ON(ptr == NULL);
+ if (ptr == NULL)
+ return false;
opts->options = 0;
while (length > 0) {
@@ -41,16 +42,16 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
switch (opcode) {
case TCPOPT_EOL:
- return;
+ return true;
case TCPOPT_NOP:
length--;
continue;
default:
opsize = *ptr++;
if (opsize < 2)
- return;
+ return true;
if (opsize > length)
- return;
+ return true;
switch (opcode) {
case TCPOPT_MSS:
@@ -84,6 +85,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
length -= opsize;
}
}
+ return true;
}
EXPORT_SYMBOL_GPL(synproxy_parse_options);
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 95a98c8c1da6..ae2e5c11d01a 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -1009,7 +1009,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
verdict = NF_DROP;
if (ct)
- nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff);
+ nfqnl_ct_seq_adjust(entry->skb, ct, ctinfo, diff);
}
if (nfqa[NFQA_MARK])
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a17dda1bbee0..8df7f64c6db3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -168,16 +168,43 @@ int netlink_remove_tap(struct netlink_tap *nt)
}
EXPORT_SYMBOL_GPL(netlink_remove_tap);
+static bool netlink_filter_tap(const struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ bool pass = false;
+
+ /* We take the more conservative approach and
+ * whitelist socket protocols that may pass.
+ */
+ switch (sk->sk_protocol) {
+ case NETLINK_ROUTE:
+ case NETLINK_USERSOCK:
+ case NETLINK_SOCK_DIAG:
+ case NETLINK_NFLOG:
+ case NETLINK_XFRM:
+ case NETLINK_FIB_LOOKUP:
+ case NETLINK_NETFILTER:
+ case NETLINK_GENERIC:
+ pass = true;
+ break;
+ }
+
+ return pass;
+}
+
static int __netlink_deliver_tap_skb(struct sk_buff *skb,
struct net_device *dev)
{
struct sk_buff *nskb;
+ struct sock *sk = skb->sk;
int ret = -ENOMEM;
dev_hold(dev);
nskb = skb_clone(skb, GFP_ATOMIC);
if (nskb) {
nskb->dev = dev;
+ nskb->protocol = htons((u16) sk->sk_protocol);
+
ret = dev_queue_xmit(nskb);
if (unlikely(ret > 0))
ret = net_xmit_errno(ret);
@@ -192,6 +219,9 @@ static void __netlink_deliver_tap(struct sk_buff *skb)
int ret;
struct netlink_tap *tmp;
+ if (!netlink_filter_tap(skb))
+ return;
+
list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
ret = __netlink_deliver_tap_skb(skb, tmp->dev);
if (unlikely(ret))
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index fb36f8565161..410db90db73d 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -1178,6 +1178,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
if (type > OVS_KEY_ATTR_MAX) {
OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
type, OVS_KEY_ATTR_MAX);
+ return -EINVAL;
}
if (attrs & (1 << type)) {
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 32ad015ee8ce..a2fef8b10b96 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -285,7 +285,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
/* remove one skb from head of flow queue */
-static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
+static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
{
struct sk_buff *skb = flow->head;
@@ -293,6 +293,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
flow->head = skb->next;
skb->next = NULL;
flow->qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ sch->q.qlen--;
}
return skb;
}
@@ -418,8 +420,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
struct fq_flow_head *head;
struct sk_buff *skb;
struct fq_flow *f;
+ u32 rate;
- skb = fq_dequeue_head(&q->internal);
+ skb = fq_dequeue_head(sch, &q->internal);
if (skb)
goto out;
fq_check_throttled(q, now);
@@ -449,7 +452,7 @@ begin:
goto begin;
}
- skb = fq_dequeue_head(f);
+ skb = fq_dequeue_head(sch, f);
if (!skb) {
head->first = f->next;
/* force a pass through old_flows to prevent starvation */
@@ -466,43 +469,74 @@ begin:
f->time_next_packet = now;
f->credit -= qdisc_pkt_len(skb);
- if (f->credit <= 0 &&
- q->rate_enable &&
- skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
- u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
+ if (f->credit > 0 || !q->rate_enable)
+ goto out;
- rate = min(rate, q->flow_max_rate);
- if (rate) {
- u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC;
-
- do_div(len, rate);
- /* Since socket rate can change later,
- * clamp the delay to 125 ms.
- * TODO: maybe segment the too big skb, as in commit
- * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
- */
- if (unlikely(len > 125 * NSEC_PER_MSEC)) {
- len = 125 * NSEC_PER_MSEC;
- q->stat_pkts_too_long++;
- }
+ if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
+ rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
- f->time_next_packet = now + len;
+ rate = min(rate, q->flow_max_rate);
+ } else {
+ rate = q->flow_max_rate;
+ if (rate == ~0U)
+ goto out;
+ }
+ if (rate) {
+ u32 plen = max(qdisc_pkt_len(skb), q->quantum);
+ u64 len = (u64)plen * NSEC_PER_SEC;
+
+ do_div(len, rate);
+ /* Since socket rate can change later,
+ * clamp the delay to 125 ms.
+ * TODO: maybe segment the too big skb, as in commit
+ * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
+ */
+ if (unlikely(len > 125 * NSEC_PER_MSEC)) {
+ len = 125 * NSEC_PER_MSEC;
+ q->stat_pkts_too_long++;
}
+
+ f->time_next_packet = now + len;
}
out:
- sch->qstats.backlog -= qdisc_pkt_len(skb);
qdisc_bstats_update(sch, skb);
- sch->q.qlen--;
qdisc_unthrottled(sch);
return skb;
}
static void fq_reset(struct Qdisc *sch)
{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ struct rb_root *root;
struct sk_buff *skb;
+ struct rb_node *p;
+ struct fq_flow *f;
+ unsigned int idx;
- while ((skb = fq_dequeue(sch)) != NULL)
+ while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
kfree_skb(skb);
+
+ if (!q->fq_root)
+ return;
+
+ for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
+ root = &q->fq_root[idx];
+ while ((p = rb_first(root)) != NULL) {
+ f = container_of(p, struct fq_flow, fq_node);
+ rb_erase(p, root);
+
+ while ((skb = fq_dequeue_head(sch, f)) != NULL)
+ kfree_skb(skb);
+
+ kmem_cache_free(fq_flow_cachep, f);
+ }
+ }
+ q->new_flows.first = NULL;
+ q->old_flows.first = NULL;
+ q->delayed = RB_ROOT;
+ q->flows = 0;
+ q->inactive_flows = 0;
+ q->throttled_flows = 0;
}
static void fq_rehash(struct fq_sched_data *q,
@@ -645,6 +679,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = fq_dequeue(sch);
+ if (!skb)
+ break;
kfree_skb(skb);
drop_count++;
}
@@ -657,21 +693,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
static void fq_destroy(struct Qdisc *sch)
{
struct fq_sched_data *q = qdisc_priv(sch);
- struct rb_root *root;
- struct rb_node *p;
- unsigned int idx;
- if (q->fq_root) {
- for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
- root = &q->fq_root[idx];
- while ((p = rb_first(root)) != NULL) {
- rb_erase(p, root);
- kmem_cache_free(fq_flow_cachep,
- container_of(p, struct fq_flow, fq_node));
- }
- }
- kfree(q->fq_root);
- }
+ fq_reset(sch);
+ kfree(q->fq_root);
qdisc_watchdog_cancel(&q->watchdog);
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c2178b15ca6e..863846cc5513 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1495,7 +1495,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
- cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
+ cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
sch_tree_unlock(sch);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 5f2068679f83..98b69bbecdd9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -634,8 +634,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
break;
case ICMP_REDIRECT:
sctp_icmp_redirect(sk, transport, skb);
- err = 0;
- break;
+ /* Fall through to out_unlock. */
default:
goto out_unlock;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index da613ceae28c..e7b2d4fe2b6a 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -183,7 +183,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
case NDISC_REDIRECT:
sctp_icmp_redirect(sk, transport, skb);
- break;
+ goto out_unlock;
default:
break;
}
@@ -204,44 +204,23 @@ out:
in6_dev_put(idev);
}
-/* Based on tcp_v6_xmit() in tcp_ipv6.c. */
static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
{
struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
-
- fl6.flowi6_proto = sk->sk_protocol;
-
- /* Fill in the dest address from the route entry passed with the skb
- * and the source address from the transport.
- */
- fl6.daddr = transport->ipaddr.v6.sin6_addr;
- fl6.saddr = transport->saddr.v6.sin6_addr;
-
- fl6.flowlabel = np->flow_label;
- IP6_ECN_flow_xmit(sk, fl6.flowlabel);
- if (ipv6_addr_type(&fl6.saddr) & IPV6_ADDR_LINKLOCAL)
- fl6.flowi6_oif = transport->saddr.v6.sin6_scope_id;
- else
- fl6.flowi6_oif = sk->sk_bound_dev_if;
-
- if (np->opt && np->opt->srcrt) {
- struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
- fl6.daddr = *rt0->addr;
- }
+ struct flowi6 *fl6 = &transport->fl.u.ip6;
pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
- skb->len, &fl6.saddr, &fl6.daddr);
+ skb->len, &fl6->saddr, &fl6->daddr);
- SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
+ IP6_ECN_flow_xmit(sk, fl6->flowlabel);
if (!(transport->param_flags & SPP_PMTUD_ENABLE))
skb->local_df = 1;
- return ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
+
+ return ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
}
/* Returns the dst cache entry for the given source and destination ip
@@ -254,10 +233,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
struct dst_entry *dst = NULL;
struct flowi6 *fl6 = &fl->u.ip6;
struct sctp_bind_addr *bp;
+ struct ipv6_pinfo *np = inet6_sk(sk);
struct sctp_sockaddr_entry *laddr;
union sctp_addr *baddr = NULL;
union sctp_addr *daddr = &t->ipaddr;
union sctp_addr dst_saddr;
+ struct in6_addr *final_p, final;
__u8 matchlen = 0;
__u8 bmatchlen;
sctp_scope_t scope;
@@ -281,7 +262,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
pr_debug("src=%pI6 - ", &fl6->saddr);
}
- dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
+ final_p = fl6_update_dst(fl6, np->opt, &final);
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
if (!asoc || saddr)
goto out;
@@ -333,10 +315,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
}
}
rcu_read_unlock();
+
if (baddr) {
fl6->saddr = baddr->v6.sin6_addr;
fl6->fl6_sport = baddr->v6.sin6_port;
- dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
+ final_p = fl6_update_dst(fl6, np->opt, &final);
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
}
out:
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d5d5882a2891..911b71b26b0e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -806,6 +806,9 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
goto skip_mkasconf;
}
+ if (laddr == NULL)
+ return -EINVAL;
+
/* We do not need RCU protection throughout this loop
* because this is done under a socket lock from the
* setsockopt call.
@@ -6176,7 +6179,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
/* Is there any exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR |
- sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/socket.c b/net/socket.c
index b2d7c629eeb9..ebed4b68f768 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -854,11 +854,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL(kernel_recvmsg);
-static void sock_aio_dtor(struct kiocb *iocb)
-{
- kfree(iocb->private);
-}
-
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more)
{
@@ -889,12 +884,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
struct sock_iocb *siocb)
{
- if (!is_sync_kiocb(iocb)) {
- siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
- if (!siocb)
- return NULL;
- iocb->ki_dtor = sock_aio_dtor;
- }
+ if (!is_sync_kiocb(iocb))
+ BUG();
siocb->kiocb = iocb;
iocb->private = siocb;
@@ -931,7 +922,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
if (pos != 0)
return -ESPIPE;
- if (iocb->ki_left == 0) /* Match SYS5 behaviour */
+ if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
return 0;
@@ -3072,12 +3063,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
uifmap32 = &uifr32->ifr_ifru.ifru_map;
err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
- err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
+ err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
+ err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
+ err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
+ err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
+ err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
+ err |= get_user(ifr.ifr_map.port, &uifmap32->port);
if (err)
return -EFAULT;
@@ -3088,12 +3079,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
if (cmd == SIOCGIFMAP && !err) {
err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
- err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
+ err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
+ err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
+ err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
+ err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
+ err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
+ err |= put_user(ifr.ifr_map.port, &uifmap32->port);
if (err)
err = -EFAULT;
}
@@ -3167,25 +3158,25 @@ static int routing_ioctl(struct net *net, struct socket *sock,
struct in6_rtmsg32 __user *ur6 = argp;
ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
3 * sizeof(struct in6_addr));
- ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
- ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
- ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
- ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
- ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
- ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
- ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
+ ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
+ ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
+ ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
+ ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
+ ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
+ ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
+ ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
r = (void *) &r6;
} else { /* ipv4 */
struct rtentry32 __user *ur4 = argp;
ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
3 * sizeof(struct sockaddr));
- ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
- ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
- ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
- ret |= __get_user(r4.rt_window, &(ur4->rt_window));
- ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
- ret |= __get_user(rtdev, &(ur4->rt_dev));
+ ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
+ ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
+ ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
+ ret |= get_user(r4.rt_window, &(ur4->rt_window));
+ ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
+ ret |= get_user(rtdev, &(ur4->rt_dev));
if (rtdev) {
ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
r4.rt_dev = (char __user __force *)devname;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index ed2fdd210c0b..5285ead196c0 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -250,11 +250,11 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
struct rpc_auth *
-rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
+rpcauth_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
struct rpc_auth *auth;
const struct rpc_authops *ops;
- u32 flavor = pseudoflavor_to_flavor(pseudoflavor);
+ u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
auth = ERR_PTR(-EINVAL);
if (flavor >= RPC_AUTH_MAXFLAVOR)
@@ -269,7 +269,7 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
goto out;
}
spin_unlock(&rpc_authflavor_lock);
- auth = ops->create(clnt, pseudoflavor);
+ auth = ops->create(args, clnt);
module_put(ops->owner);
if (IS_ERR(auth))
return auth;
@@ -343,6 +343,27 @@ out_nocache:
EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
/*
+ * Setup a credential key lifetime timeout notification
+ */
+int
+rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
+{
+ if (!cred->cr_auth->au_ops->key_timeout)
+ return 0;
+ return cred->cr_auth->au_ops->key_timeout(auth, cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
+
+bool
+rpcauth_cred_key_to_expire(struct rpc_cred *cred)
+{
+ if (!cred->cr_ops->crkey_to_expire)
+ return false;
+ return cred->cr_ops->crkey_to_expire(cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
+
+/*
* Destroy a list of credentials
*/
static inline
@@ -413,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
/*
* Remove stale credentials. Avoid sleeping inside the loop.
*/
-static int
+static long
rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
{
spinlock_t *cache_lock;
struct rpc_cred *cred, *next;
unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
+ long freed = 0;
list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
@@ -430,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
*/
if (time_in_range(cred->cr_expire, expired, jiffies) &&
test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
- return 0;
+ break;
list_del_init(&cred->cr_lru);
number_cred_unused--;
+ freed++;
if (atomic_read(&cred->cr_count) != 0)
continue;
@@ -446,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
}
spin_unlock(cache_lock);
}
- return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+ return freed;
}
/*
* Run memory cache shrinker.
*/
-static int
-rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+
{
LIST_HEAD(free);
- int res;
- int nr_to_scan = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
+ unsigned long freed;
- if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
- return (nr_to_scan == 0) ? 0 : -1;
+ if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+ return SHRINK_STOP;
+
+ /* nothing left, don't come back */
if (list_empty(&cred_unused))
- return 0;
+ return SHRINK_STOP;
+
spin_lock(&rpc_credcache_lock);
- res = rpcauth_prune_expired(&free, nr_to_scan);
+ freed = rpcauth_prune_expired(&free, sc->nr_to_scan);
spin_unlock(&rpc_credcache_lock);
rpcauth_destroy_credlist(&free);
- return res;
+
+ return freed;
+}
+
+static unsigned long
+rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+
+{
+ return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
}
/*
@@ -784,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task)
}
static struct shrinker rpc_cred_shrinker = {
- .shrink = rpcauth_cache_shrinker,
+ .count_objects = rpcauth_cache_shrink_count,
+ .scan_objects = rpcauth_cache_shrink_scan,
.seeks = DEFAULT_SEEKS,
};
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index b6badafc6494..ed04869b2d4f 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -89,6 +89,7 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
gcred->acred.uid = acred->uid;
gcred->acred.gid = acred->gid;
gcred->acred.group_info = acred->group_info;
+ gcred->acred.ac_flags = 0;
if (gcred->acred.group_info != NULL)
get_group_info(gcred->acred.group_info);
gcred->acred.machine_cred = acred->machine_cred;
@@ -182,11 +183,78 @@ void rpc_destroy_generic_auth(void)
rpcauth_destroy_credcache(&generic_auth);
}
+/*
+ * Test the the current time (now) against the underlying credential key expiry
+ * minus a timeout and setup notification.
+ *
+ * The normal case:
+ * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
+ * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
+ * rpc_credops crmatch routine to notify this generic cred when it's key
+ * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
+ *
+ * The error case:
+ * If the underlying cred lookup fails, return -EACCES.
+ *
+ * The 'almost' error case:
+ * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
+ * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
+ * on the acred ac_flags and return 0.
+ */
+static int
+generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
+{
+ struct auth_cred *acred = &container_of(cred, struct generic_cred,
+ gc_base)->acred;
+ struct rpc_cred *tcred;
+ int ret = 0;
+
+
+ /* Fast track for non crkey_timeout (no key) underlying credentials */
+ if (test_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags))
+ return 0;
+
+ /* Fast track for the normal case */
+ if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
+ return 0;
+
+ /* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
+ tcred = auth->au_ops->lookup_cred(auth, acred, 0);
+ if (IS_ERR(tcred))
+ return -EACCES;
+
+ if (!tcred->cr_ops->crkey_timeout) {
+ set_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags);
+ ret = 0;
+ goto out_put;
+ }
+
+ /* Test for the almost error case */
+ ret = tcred->cr_ops->crkey_timeout(tcred);
+ if (ret != 0) {
+ set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+ ret = 0;
+ } else {
+ /* In case underlying cred key has been reset */
+ if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
+ &acred->ac_flags))
+ dprintk("RPC: UID %d Credential key reset\n",
+ from_kuid(&init_user_ns, tcred->cr_uid));
+ /* set up fasttrack for the normal case */
+ set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
+ }
+
+out_put:
+ put_rpccred(tcred);
+ return ret;
+}
+
static const struct rpc_authops generic_auth_ops = {
.owner = THIS_MODULE,
.au_name = "Generic",
.lookup_cred = generic_lookup_cred,
.crcreate = generic_create_cred,
+ .key_timeout = generic_key_timeout,
};
static struct rpc_auth generic_auth = {
@@ -194,9 +262,23 @@ static struct rpc_auth generic_auth = {
.au_count = ATOMIC_INIT(0),
};
+static bool generic_key_to_expire(struct rpc_cred *cred)
+{
+ struct auth_cred *acred = &container_of(cred, struct generic_cred,
+ gc_base)->acred;
+ bool ret;
+
+ get_rpccred(cred);
+ ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+ put_rpccred(cred);
+
+ return ret;
+}
+
static const struct rpc_credops generic_credops = {
.cr_name = "Generic cred",
.crdestroy = generic_destroy_cred,
.crbind = generic_bind_cred,
.crmatch = generic_match,
+ .crkey_to_expire = generic_key_to_expire,
};
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index fc2f78d6a9b4..084656671d6e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -51,6 +51,7 @@
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/gss_api.h>
#include <asm/uaccess.h>
+#include <linux/hashtable.h>
#include "../netns.h"
@@ -62,6 +63,9 @@ static const struct rpc_credops gss_nullops;
#define GSS_RETRY_EXPIRED 5
static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
+#define GSS_KEY_EXPIRE_TIMEO 240
+static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO;
+
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
@@ -71,19 +75,33 @@ static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
* using integrity (two 4-byte integers): */
#define GSS_VERF_SLACK 100
+static DEFINE_HASHTABLE(gss_auth_hash_table, 4);
+static DEFINE_SPINLOCK(gss_auth_hash_lock);
+
+struct gss_pipe {
+ struct rpc_pipe_dir_object pdo;
+ struct rpc_pipe *pipe;
+ struct rpc_clnt *clnt;
+ const char *name;
+ struct kref kref;
+};
+
struct gss_auth {
struct kref kref;
+ struct hlist_node hash;
struct rpc_auth rpc_auth;
struct gss_api_mech *mech;
enum rpc_gss_svc service;
struct rpc_clnt *client;
+ struct net *net;
/*
* There are two upcall pipes; dentry[1], named "gssd", is used
* for the new text-based upcall; dentry[0] is named after the
* mechanism (for example, "krb5") and exists for
* backwards-compatibility with older gssd's.
*/
- struct rpc_pipe *pipe[2];
+ struct gss_pipe *gss_pipe[2];
+ const char *target_name;
};
/* pipe_version >= 0 if and only if someone has a pipe open. */
@@ -294,7 +312,7 @@ static void put_pipe_version(struct net *net)
static void
gss_release_msg(struct gss_upcall_msg *gss_msg)
{
- struct net *net = rpc_net_ns(gss_msg->auth->client);
+ struct net *net = gss_msg->auth->net;
if (!atomic_dec_and_test(&gss_msg->count))
return;
put_pipe_version(net);
@@ -406,8 +424,8 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
}
static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
- struct rpc_clnt *clnt,
- const char *service_name)
+ const char *service_name,
+ const char *target_name)
{
struct gss_api_mech *mech = gss_msg->auth->mech;
char *p = gss_msg->databuf;
@@ -417,8 +435,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
mech->gm_name,
from_kuid(&init_user_ns, gss_msg->uid));
p += gss_msg->msg.len;
- if (clnt->cl_principal) {
- len = sprintf(p, "target=%s ", clnt->cl_principal);
+ if (target_name) {
+ len = sprintf(p, "target=%s ", target_name);
p += len;
gss_msg->msg.len += len;
}
@@ -439,21 +457,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN);
}
-static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
- struct rpc_clnt *clnt,
- const char *service_name)
-{
- struct net *net = rpc_net_ns(clnt);
- struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
-
- if (sn->pipe_version == 0)
- gss_encode_v0_msg(gss_msg);
- else /* pipe_version == 1 */
- gss_encode_v1_msg(gss_msg, clnt, service_name);
-}
-
static struct gss_upcall_msg *
-gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
+gss_alloc_msg(struct gss_auth *gss_auth,
kuid_t uid, const char *service_name)
{
struct gss_upcall_msg *gss_msg;
@@ -462,31 +467,36 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
if (gss_msg == NULL)
return ERR_PTR(-ENOMEM);
- vers = get_pipe_version(rpc_net_ns(clnt));
+ vers = get_pipe_version(gss_auth->net);
if (vers < 0) {
kfree(gss_msg);
return ERR_PTR(vers);
}
- gss_msg->pipe = gss_auth->pipe[vers];
+ gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe;
INIT_LIST_HEAD(&gss_msg->list);
rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
init_waitqueue_head(&gss_msg->waitqueue);
atomic_set(&gss_msg->count, 1);
gss_msg->uid = uid;
gss_msg->auth = gss_auth;
- gss_encode_msg(gss_msg, clnt, service_name);
+ switch (vers) {
+ case 0:
+ gss_encode_v0_msg(gss_msg);
+ default:
+ gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
+ };
return gss_msg;
}
static struct gss_upcall_msg *
-gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred)
+gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_new, *gss_msg;
kuid_t uid = cred->cr_uid;
- gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal);
+ gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal);
if (IS_ERR(gss_new))
return gss_new;
gss_msg = gss_add_msg(gss_new);
@@ -527,7 +537,7 @@ gss_refresh_upcall(struct rpc_task *task)
dprintk("RPC: %5u %s for uid %u\n",
task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid));
- gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
+ gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
/* XXX: warning on the first, under the assumption we
* shouldn't normally hit this case on a refresh. */
@@ -566,7 +576,7 @@ out:
static inline int
gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
{
- struct net *net = rpc_net_ns(gss_auth->client);
+ struct net *net = gss_auth->net;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_pipe *pipe;
struct rpc_cred *cred = &gss_cred->gc_base;
@@ -583,7 +593,7 @@ retry:
timeout = 15 * HZ;
if (!sn->gssd_running)
timeout = HZ >> 2;
- gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
+ gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
err = wait_event_interruptible_timeout(pipe_version_waitqueue,
sn->pipe_version >= 0, timeout);
@@ -797,83 +807,153 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
}
}
-static void gss_pipes_dentries_destroy(struct rpc_auth *auth)
+static void gss_pipe_dentry_destroy(struct dentry *dir,
+ struct rpc_pipe_dir_object *pdo)
{
- struct gss_auth *gss_auth;
+ struct gss_pipe *gss_pipe = pdo->pdo_data;
+ struct rpc_pipe *pipe = gss_pipe->pipe;
- gss_auth = container_of(auth, struct gss_auth, rpc_auth);
- if (gss_auth->pipe[0]->dentry)
- rpc_unlink(gss_auth->pipe[0]->dentry);
- if (gss_auth->pipe[1]->dentry)
- rpc_unlink(gss_auth->pipe[1]->dentry);
+ if (pipe->dentry != NULL) {
+ rpc_unlink(pipe->dentry);
+ pipe->dentry = NULL;
+ }
}
-static int gss_pipes_dentries_create(struct rpc_auth *auth)
+static int gss_pipe_dentry_create(struct dentry *dir,
+ struct rpc_pipe_dir_object *pdo)
{
- int err;
- struct gss_auth *gss_auth;
- struct rpc_clnt *clnt;
+ struct gss_pipe *p = pdo->pdo_data;
+ struct dentry *dentry;
- gss_auth = container_of(auth, struct gss_auth, rpc_auth);
- clnt = gss_auth->client;
-
- gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry,
- "gssd",
- clnt, gss_auth->pipe[1]);
- if (IS_ERR(gss_auth->pipe[1]->dentry))
- return PTR_ERR(gss_auth->pipe[1]->dentry);
- gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry,
- gss_auth->mech->gm_name,
- clnt, gss_auth->pipe[0]);
- if (IS_ERR(gss_auth->pipe[0]->dentry)) {
- err = PTR_ERR(gss_auth->pipe[0]->dentry);
- goto err_unlink_pipe_1;
- }
+ dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ p->pipe->dentry = dentry;
return 0;
+}
-err_unlink_pipe_1:
- rpc_unlink(gss_auth->pipe[1]->dentry);
- return err;
+static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = {
+ .create = gss_pipe_dentry_create,
+ .destroy = gss_pipe_dentry_destroy,
+};
+
+static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt,
+ const char *name,
+ const struct rpc_pipe_ops *upcall_ops)
+{
+ struct gss_pipe *p;
+ int err = -ENOMEM;
+
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (p == NULL)
+ goto err;
+ p->pipe = rpc_mkpipe_data(upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+ if (IS_ERR(p->pipe)) {
+ err = PTR_ERR(p->pipe);
+ goto err_free_gss_pipe;
+ }
+ p->name = name;
+ p->clnt = clnt;
+ kref_init(&p->kref);
+ rpc_init_pipe_dir_object(&p->pdo,
+ &gss_pipe_dir_object_ops,
+ p);
+ return p;
+err_free_gss_pipe:
+ kfree(p);
+err:
+ return ERR_PTR(err);
+}
+
+struct gss_alloc_pdo {
+ struct rpc_clnt *clnt;
+ const char *name;
+ const struct rpc_pipe_ops *upcall_ops;
+};
+
+static int gss_pipe_match_pdo(struct rpc_pipe_dir_object *pdo, void *data)
+{
+ struct gss_pipe *gss_pipe;
+ struct gss_alloc_pdo *args = data;
+
+ if (pdo->pdo_ops != &gss_pipe_dir_object_ops)
+ return 0;
+ gss_pipe = container_of(pdo, struct gss_pipe, pdo);
+ if (strcmp(gss_pipe->name, args->name) != 0)
+ return 0;
+ if (!kref_get_unless_zero(&gss_pipe->kref))
+ return 0;
+ return 1;
+}
+
+static struct rpc_pipe_dir_object *gss_pipe_alloc_pdo(void *data)
+{
+ struct gss_pipe *gss_pipe;
+ struct gss_alloc_pdo *args = data;
+
+ gss_pipe = gss_pipe_alloc(args->clnt, args->name, args->upcall_ops);
+ if (!IS_ERR(gss_pipe))
+ return &gss_pipe->pdo;
+ return NULL;
}
-static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt,
- struct rpc_auth *auth)
+static struct gss_pipe *gss_pipe_get(struct rpc_clnt *clnt,
+ const char *name,
+ const struct rpc_pipe_ops *upcall_ops)
{
struct net *net = rpc_net_ns(clnt);
- struct super_block *sb;
+ struct rpc_pipe_dir_object *pdo;
+ struct gss_alloc_pdo args = {
+ .clnt = clnt,
+ .name = name,
+ .upcall_ops = upcall_ops,
+ };
- sb = rpc_get_sb_net(net);
- if (sb) {
- if (clnt->cl_dentry)
- gss_pipes_dentries_destroy(auth);
- rpc_put_sb_net(net);
- }
+ pdo = rpc_find_or_alloc_pipe_dir_object(net,
+ &clnt->cl_pipedir_objects,
+ gss_pipe_match_pdo,
+ gss_pipe_alloc_pdo,
+ &args);
+ if (pdo != NULL)
+ return container_of(pdo, struct gss_pipe, pdo);
+ return ERR_PTR(-ENOMEM);
}
-static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt,
- struct rpc_auth *auth)
+static void __gss_pipe_free(struct gss_pipe *p)
{
+ struct rpc_clnt *clnt = p->clnt;
struct net *net = rpc_net_ns(clnt);
- struct super_block *sb;
- int err = 0;
- sb = rpc_get_sb_net(net);
- if (sb) {
- if (clnt->cl_dentry)
- err = gss_pipes_dentries_create(auth);
- rpc_put_sb_net(net);
- }
- return err;
+ rpc_remove_pipe_dir_object(net,
+ &clnt->cl_pipedir_objects,
+ &p->pdo);
+ rpc_destroy_pipe_data(p->pipe);
+ kfree(p);
+}
+
+static void __gss_pipe_release(struct kref *kref)
+{
+ struct gss_pipe *p = container_of(kref, struct gss_pipe, kref);
+
+ __gss_pipe_free(p);
+}
+
+static void gss_pipe_free(struct gss_pipe *p)
+{
+ if (p != NULL)
+ kref_put(&p->kref, __gss_pipe_release);
}
/*
* NOTE: we have the opportunity to use different
* parameters based on the input flavor (which must be a pseudoflavor)
*/
-static struct rpc_auth *
-gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+static struct gss_auth *
+gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
+ rpc_authflavor_t flavor = args->pseudoflavor;
struct gss_auth *gss_auth;
+ struct gss_pipe *gss_pipe;
struct rpc_auth * auth;
int err = -ENOMEM; /* XXX? */
@@ -883,12 +963,20 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
return ERR_PTR(err);
if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
goto out_dec;
+ INIT_HLIST_NODE(&gss_auth->hash);
+ gss_auth->target_name = NULL;
+ if (args->target_name) {
+ gss_auth->target_name = kstrdup(args->target_name, GFP_KERNEL);
+ if (gss_auth->target_name == NULL)
+ goto err_free;
+ }
gss_auth->client = clnt;
+ gss_auth->net = get_net(rpc_net_ns(clnt));
err = -EINVAL;
gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
if (!gss_auth->mech) {
dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
- goto err_free;
+ goto err_put_net;
}
gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
if (gss_auth->service == 0)
@@ -901,42 +989,41 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
atomic_set(&auth->au_count, 1);
kref_init(&gss_auth->kref);
+ err = rpcauth_init_credcache(auth);
+ if (err)
+ goto err_put_mech;
/*
* Note: if we created the old pipe first, then someone who
* examined the directory at the right moment might conclude
* that we supported only the old pipe. So we instead create
* the new pipe first.
*/
- gss_auth->pipe[1] = rpc_mkpipe_data(&gss_upcall_ops_v1,
- RPC_PIPE_WAIT_FOR_OPEN);
- if (IS_ERR(gss_auth->pipe[1])) {
- err = PTR_ERR(gss_auth->pipe[1]);
- goto err_put_mech;
+ gss_pipe = gss_pipe_get(clnt, "gssd", &gss_upcall_ops_v1);
+ if (IS_ERR(gss_pipe)) {
+ err = PTR_ERR(gss_pipe);
+ goto err_destroy_credcache;
}
+ gss_auth->gss_pipe[1] = gss_pipe;
- gss_auth->pipe[0] = rpc_mkpipe_data(&gss_upcall_ops_v0,
- RPC_PIPE_WAIT_FOR_OPEN);
- if (IS_ERR(gss_auth->pipe[0])) {
- err = PTR_ERR(gss_auth->pipe[0]);
+ gss_pipe = gss_pipe_get(clnt, gss_auth->mech->gm_name,
+ &gss_upcall_ops_v0);
+ if (IS_ERR(gss_pipe)) {
+ err = PTR_ERR(gss_pipe);
goto err_destroy_pipe_1;
}
- err = gss_pipes_dentries_create_net(clnt, auth);
- if (err)
- goto err_destroy_pipe_0;
- err = rpcauth_init_credcache(auth);
- if (err)
- goto err_unlink_pipes;
+ gss_auth->gss_pipe[0] = gss_pipe;
- return auth;
-err_unlink_pipes:
- gss_pipes_dentries_destroy_net(clnt, auth);
-err_destroy_pipe_0:
- rpc_destroy_pipe_data(gss_auth->pipe[0]);
+ return gss_auth;
err_destroy_pipe_1:
- rpc_destroy_pipe_data(gss_auth->pipe[1]);
+ gss_pipe_free(gss_auth->gss_pipe[1]);
+err_destroy_credcache:
+ rpcauth_destroy_credcache(auth);
err_put_mech:
gss_mech_put(gss_auth->mech);
+err_put_net:
+ put_net(gss_auth->net);
err_free:
+ kfree(gss_auth->target_name);
kfree(gss_auth);
out_dec:
module_put(THIS_MODULE);
@@ -946,10 +1033,11 @@ out_dec:
static void
gss_free(struct gss_auth *gss_auth)
{
- gss_pipes_dentries_destroy_net(gss_auth->client, &gss_auth->rpc_auth);
- rpc_destroy_pipe_data(gss_auth->pipe[0]);
- rpc_destroy_pipe_data(gss_auth->pipe[1]);
+ gss_pipe_free(gss_auth->gss_pipe[0]);
+ gss_pipe_free(gss_auth->gss_pipe[1]);
gss_mech_put(gss_auth->mech);
+ put_net(gss_auth->net);
+ kfree(gss_auth->target_name);
kfree(gss_auth);
module_put(THIS_MODULE);
@@ -966,18 +1054,113 @@ gss_free_callback(struct kref *kref)
static void
gss_destroy(struct rpc_auth *auth)
{
- struct gss_auth *gss_auth;
+ struct gss_auth *gss_auth = container_of(auth,
+ struct gss_auth, rpc_auth);
dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
auth, auth->au_flavor);
+ if (hash_hashed(&gss_auth->hash)) {
+ spin_lock(&gss_auth_hash_lock);
+ hash_del(&gss_auth->hash);
+ spin_unlock(&gss_auth_hash_lock);
+ }
+
+ gss_pipe_free(gss_auth->gss_pipe[0]);
+ gss_auth->gss_pipe[0] = NULL;
+ gss_pipe_free(gss_auth->gss_pipe[1]);
+ gss_auth->gss_pipe[1] = NULL;
rpcauth_destroy_credcache(auth);
- gss_auth = container_of(auth, struct gss_auth, rpc_auth);
kref_put(&gss_auth->kref, gss_free_callback);
}
/*
+ * Auths may be shared between rpc clients that were cloned from a
+ * common client with the same xprt, if they also share the flavor and
+ * target_name.
+ *
+ * The auth is looked up from the oldest parent sharing the same
+ * cl_xprt, and the auth itself references only that common parent
+ * (which is guaranteed to last as long as any of its descendants).
+ */
+static struct gss_auth *
+gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args,
+ struct rpc_clnt *clnt,
+ struct gss_auth *new)
+{
+ struct gss_auth *gss_auth;
+ unsigned long hashval = (unsigned long)clnt;
+
+ spin_lock(&gss_auth_hash_lock);
+ hash_for_each_possible(gss_auth_hash_table,
+ gss_auth,
+ hash,
+ hashval) {
+ if (gss_auth->client != clnt)
+ continue;
+ if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor)
+ continue;
+ if (gss_auth->target_name != args->target_name) {
+ if (gss_auth->target_name == NULL)
+ continue;
+ if (args->target_name == NULL)
+ continue;
+ if (strcmp(gss_auth->target_name, args->target_name))
+ continue;
+ }
+ if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
+ continue;
+ goto out;
+ }
+ if (new)
+ hash_add(gss_auth_hash_table, &new->hash, hashval);
+ gss_auth = new;
+out:
+ spin_unlock(&gss_auth_hash_lock);
+ return gss_auth;
+}
+
+static struct gss_auth *
+gss_create_hashed(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+{
+ struct gss_auth *gss_auth;
+ struct gss_auth *new;
+
+ gss_auth = gss_auth_find_or_add_hashed(args, clnt, NULL);
+ if (gss_auth != NULL)
+ goto out;
+ new = gss_create_new(args, clnt);
+ if (IS_ERR(new))
+ return new;
+ gss_auth = gss_auth_find_or_add_hashed(args, clnt, new);
+ if (gss_auth != new)
+ gss_destroy(&new->rpc_auth);
+out:
+ return gss_auth;
+}
+
+static struct rpc_auth *
+gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+{
+ struct gss_auth *gss_auth;
+ struct rpc_xprt *xprt = rcu_access_pointer(clnt->cl_xprt);
+
+ while (clnt != clnt->cl_parent) {
+ struct rpc_clnt *parent = clnt->cl_parent;
+ /* Find the original parent for this transport */
+ if (rcu_access_pointer(parent->cl_xprt) != xprt)
+ break;
+ clnt = parent;
+ }
+
+ gss_auth = gss_create_hashed(args, clnt);
+ if (IS_ERR(gss_auth))
+ return ERR_CAST(gss_auth);
+ return &gss_auth->rpc_auth;
+}
+
+/*
* gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
* to the server with the GSS control procedure field set to
* RPC_GSS_PROC_DESTROY. This should normally cause the server to release
@@ -1126,10 +1309,32 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
return err;
}
+/*
+ * Returns -EACCES if GSS context is NULL or will expire within the
+ * timeout (miliseconds)
+ */
+static int
+gss_key_timeout(struct rpc_cred *rc)
+{
+ struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
+ unsigned long now = jiffies;
+ unsigned long expire;
+
+ if (gss_cred->gc_ctx == NULL)
+ return -EACCES;
+
+ expire = gss_cred->gc_ctx->gc_expiry - (gss_key_expire_timeo * HZ);
+
+ if (time_after(now, expire))
+ return -EACCES;
+ return 0;
+}
+
static int
gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
{
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
+ int ret;
if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
goto out;
@@ -1142,11 +1347,26 @@ out:
if (acred->principal != NULL) {
if (gss_cred->gc_principal == NULL)
return 0;
- return strcmp(acred->principal, gss_cred->gc_principal) == 0;
+ ret = strcmp(acred->principal, gss_cred->gc_principal) == 0;
+ goto check_expire;
}
if (gss_cred->gc_principal != NULL)
return 0;
- return uid_eq(rc->cr_uid, acred->uid);
+ ret = uid_eq(rc->cr_uid, acred->uid);
+
+check_expire:
+ if (ret == 0)
+ return ret;
+
+ /* Notify acred users of GSS context expiration timeout */
+ if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) &&
+ (gss_key_timeout(rc) != 0)) {
+ /* test will now be done from generic cred */
+ test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
+ /* tell NFS layer that key will expire soon */
+ set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+ }
+ return ret;
}
/*
@@ -1292,6 +1512,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
struct xdr_netobj mic;
u32 flav,len;
u32 maj_stat;
+ __be32 *ret = ERR_PTR(-EIO);
dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
@@ -1307,6 +1528,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
mic.data = (u8 *)p;
mic.len = len;
+ ret = ERR_PTR(-EACCES);
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
@@ -1324,8 +1546,9 @@ gss_validate(struct rpc_task *task, __be32 *p)
return p + XDR_QUADLEN(len);
out_bad:
gss_put_ctx(ctx);
- dprintk("RPC: %5u %s failed.\n", task->tk_pid, __func__);
- return NULL;
+ dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
+ PTR_ERR(ret));
+ return ret;
}
static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
@@ -1657,8 +1880,6 @@ static const struct rpc_authops authgss_ops = {
.destroy = gss_destroy,
.lookup_cred = gss_lookup_cred,
.crcreate = gss_create_cred,
- .pipes_create = gss_pipes_dentries_create,
- .pipes_destroy = gss_pipes_dentries_destroy,
.list_pseudoflavors = gss_mech_list_pseudoflavors,
.info2flavor = gss_mech_info2flavor,
.flavor2info = gss_mech_flavor2info,
@@ -1675,6 +1896,7 @@ static const struct rpc_credops gss_credops = {
.crvalidate = gss_validate,
.crwrap_req = gss_wrap_req,
.crunwrap_resp = gss_unwrap_resp,
+ .crkey_timeout = gss_key_timeout,
};
static const struct rpc_credops gss_nullops = {
@@ -1762,5 +1984,12 @@ module_param_named(expired_cred_retry_delay,
MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
"the RPC engine retries an expired credential");
+module_param_named(key_expire_timeo,
+ gss_key_expire_timeo,
+ uint, 0644);
+MODULE_PARM_DESC(key_expire_timeo, "Time (in seconds) at the end of a "
+ "credential keys lifetime where the NFS layer cleans up "
+ "prior to key expiration");
+
module_init(init_rpcsec_gss)
module_exit(exit_rpcsec_gss)
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index af7ffd447fee..f1eb0d16666c 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -213,6 +213,26 @@ static int gssp_call(struct net *net, struct rpc_message *msg)
return status;
}
+static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
+{
+ int i;
+
+ for (i = 0; i < arg->npages && arg->pages[i]; i++)
+ __free_page(arg->pages[i]);
+}
+
+static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
+{
+ arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE);
+ arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL);
+ /*
+ * XXX: actual pages are allocated by xdr layer in
+ * xdr_partial_copy_from_skb.
+ */
+ if (!arg->pages)
+ return -ENOMEM;
+ return 0;
+}
/*
* Public functions
@@ -261,10 +281,16 @@ int gssp_accept_sec_context_upcall(struct net *net,
arg.context_handle = &ctxh;
res.output_token->len = GSSX_max_output_token_sz;
+ ret = gssp_alloc_receive_pages(&arg);
+ if (ret)
+ return ret;
+
/* use nfs/ for targ_name ? */
ret = gssp_call(net, &msg);
+ gssp_free_receive_pages(&arg);
+
/* we need to fetch all data even in case of error so
* that we can free special strctures is they have been allocated */
data->major_status = res.status.major_status;
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 3c85d1c8a028..f0f78c5f1c7d 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -166,14 +166,15 @@ static int dummy_dec_opt_array(struct xdr_stream *xdr,
return 0;
}
-static int get_s32(void **p, void *max, s32 *res)
+static int get_host_u32(struct xdr_stream *xdr, u32 *res)
{
- void *base = *p;
- void *next = (void *)((char *)base + sizeof(s32));
- if (unlikely(next > max || next < base))
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (!p)
return -EINVAL;
- memcpy(res, base, sizeof(s32));
- *p = next;
+ /* Contents of linux creds are all host-endian: */
+ memcpy(res, p, sizeof(u32));
return 0;
}
@@ -182,9 +183,9 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
{
u32 length;
__be32 *p;
- void *q, *end;
- s32 tmp;
- int N, i, err;
+ u32 tmp;
+ u32 N;
+ int i, err;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
@@ -192,33 +193,28 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
length = be32_to_cpup(p);
- /* FIXME: we do not want to use the scratch buffer for this one
- * may need to use functions that allows us to access an io vector
- * directly */
- p = xdr_inline_decode(xdr, length);
- if (unlikely(p == NULL))
+ if (length > (3 + NGROUPS_MAX) * sizeof(u32))
return -ENOSPC;
- q = p;
- end = q + length;
-
/* uid */
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
return err;
creds->cr_uid = make_kuid(&init_user_ns, tmp);
/* gid */
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
return err;
creds->cr_gid = make_kgid(&init_user_ns, tmp);
/* number of additional gid's */
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
return err;
N = tmp;
+ if ((3 + N) * sizeof(u32) != length)
+ return -EINVAL;
creds->cr_group_info = groups_alloc(N);
if (creds->cr_group_info == NULL)
return -ENOMEM;
@@ -226,7 +222,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
/* gid's */
for (i = 0; i < N; i++) {
kgid_t kgid;
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
goto out_free_groups;
err = -EINVAL;
@@ -784,6 +780,9 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
/* arg->options */
err = dummy_enc_opt_array(xdr, &arg->options);
+ xdr_inline_pages(&req->rq_rcv_buf,
+ PAGE_SIZE/2 /* pretty arbitrary */,
+ arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
done:
if (err)
dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 1c98b27d870c..685a688f3d8a 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -147,6 +147,8 @@ struct gssx_arg_accept_sec_context {
struct gssx_cb *input_cb;
u32 ret_deleg_cred;
struct gssx_option_array options;
+ struct page **pages;
+ unsigned int npages;
};
struct gssx_res_accept_sec_context {
@@ -240,7 +242,8 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
2 * GSSX_max_princ_sz + \
8 + 8 + 4 + 4 + 4)
#define GSSX_max_output_token_sz 1024
-#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4)
+/* grouplist not included; we allocate separate pages for that: */
+#define GSSX_max_creds_sz (4 + 4 + 4 /* + NGROUPS_MAX*4 */)
#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
GSSX_default_ctx_sz + \
GSSX_max_output_token_sz + \
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index a5c36c01707b..f0ebe07978a2 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -18,7 +18,7 @@ static struct rpc_auth null_auth;
static struct rpc_cred null_cred;
static struct rpc_auth *
-nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+nul_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
atomic_inc(&null_auth.au_count);
return &null_auth;
@@ -88,13 +88,13 @@ nul_validate(struct rpc_task *task, __be32 *p)
flavor = ntohl(*p++);
if (flavor != RPC_AUTH_NULL) {
printk("RPC: bad verf flavor: %u\n", flavor);
- return NULL;
+ return ERR_PTR(-EIO);
}
size = ntohl(*p++);
if (size != 0) {
printk("RPC: bad verf size: %u\n", size);
- return NULL;
+ return ERR_PTR(-EIO);
}
return p;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index dc37021fc3e5..d5d692366294 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -33,7 +33,7 @@ static struct rpc_auth unix_auth;
static const struct rpc_credops unix_credops;
static struct rpc_auth *
-unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+unx_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
dprintk("RPC: creating UNIX authenticator for client %p\n",
clnt);
@@ -192,13 +192,13 @@ unx_validate(struct rpc_task *task, __be32 *p)
flavor != RPC_AUTH_UNIX &&
flavor != RPC_AUTH_SHORT) {
printk("RPC: bad verf flavor: %u\n", flavor);
- return NULL;
+ return ERR_PTR(-EIO);
}
size = ntohl(*p++);
if (size > RPC_MAX_AUTH_SIZE) {
printk("RPC: giant verf size: %u\n", size);
- return NULL;
+ return ERR_PTR(-EIO);
}
task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
p += (size >> 2);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ecbc4e3d83ad..77479606a971 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -102,12 +102,7 @@ static void rpc_unregister_client(struct rpc_clnt *clnt)
static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
{
- if (clnt->cl_dentry) {
- if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy)
- clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth);
- rpc_remove_client_dir(clnt->cl_dentry);
- }
- clnt->cl_dentry = NULL;
+ rpc_remove_client_dir(clnt);
}
static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
@@ -123,10 +118,10 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
}
static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
- struct rpc_clnt *clnt,
- const char *dir_name)
+ struct rpc_clnt *clnt)
{
static uint32_t clntid;
+ const char *dir_name = clnt->cl_program->pipe_dir_name;
char name[15];
struct dentry *dir, *dentry;
@@ -153,28 +148,35 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
}
static int
-rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name,
- struct super_block *pipefs_sb)
+rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
{
struct dentry *dentry;
- clnt->cl_dentry = NULL;
- if (dir_name == NULL)
- return 0;
- dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- clnt->cl_dentry = dentry;
+ if (clnt->cl_program->pipe_dir_name != NULL) {
+ dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ }
return 0;
}
-static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
+static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
{
- if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) ||
- ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
- return 1;
- if ((event == RPC_PIPEFS_MOUNT) && atomic_read(&clnt->cl_count) == 0)
+ if (clnt->cl_program->pipe_dir_name == NULL)
return 1;
+
+ switch (event) {
+ case RPC_PIPEFS_MOUNT:
+ if (clnt->cl_pipedir_objects.pdh_dentry != NULL)
+ return 1;
+ if (atomic_read(&clnt->cl_count) == 0)
+ return 1;
+ break;
+ case RPC_PIPEFS_UMOUNT:
+ if (clnt->cl_pipedir_objects.pdh_dentry == NULL)
+ return 1;
+ break;
+ }
return 0;
}
@@ -186,18 +188,11 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
switch (event) {
case RPC_PIPEFS_MOUNT:
- dentry = rpc_setup_pipedir_sb(sb, clnt,
- clnt->cl_program->pipe_dir_name);
+ dentry = rpc_setup_pipedir_sb(sb, clnt);
if (!dentry)
return -ENOENT;
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- clnt->cl_dentry = dentry;
- if (clnt->cl_auth->au_ops->pipes_create) {
- err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth);
- if (err)
- __rpc_clnt_remove_pipedir(clnt);
- }
break;
case RPC_PIPEFS_UMOUNT:
__rpc_clnt_remove_pipedir(clnt);
@@ -230,8 +225,6 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
spin_lock(&sn->rpc_client_lock);
list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
- if (clnt->cl_program->pipe_dir_name == NULL)
- continue;
if (rpc_clnt_skip_event(clnt, event))
continue;
spin_unlock(&sn->rpc_client_lock);
@@ -282,7 +275,10 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
static int rpc_client_register(const struct rpc_create_args *args,
struct rpc_clnt *clnt)
{
- const struct rpc_program *program = args->program;
+ struct rpc_auth_create_args auth_args = {
+ .pseudoflavor = args->authflavor,
+ .target_name = args->client_name,
+ };
struct rpc_auth *auth;
struct net *net = rpc_net_ns(clnt);
struct super_block *pipefs_sb;
@@ -290,7 +286,7 @@ static int rpc_client_register(const struct rpc_create_args *args,
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
- err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb);
+ err = rpc_setup_pipedir(pipefs_sb, clnt);
if (err)
goto out;
}
@@ -299,7 +295,7 @@ static int rpc_client_register(const struct rpc_create_args *args,
if (pipefs_sb)
rpc_put_sb_net(net);
- auth = rpcauth_create(args->authflavor, clnt);
+ auth = rpcauth_create(&auth_args, clnt);
if (IS_ERR(auth)) {
dprintk("RPC: Couldn't create auth handle (flavor %u)\n",
args->authflavor);
@@ -317,7 +313,27 @@ out:
return err;
}
-static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
+static DEFINE_IDA(rpc_clids);
+
+static int rpc_alloc_clid(struct rpc_clnt *clnt)
+{
+ int clid;
+
+ clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL);
+ if (clid < 0)
+ return clid;
+ clnt->cl_clid = clid;
+ return 0;
+}
+
+static void rpc_free_clid(struct rpc_clnt *clnt)
+{
+ ida_simple_remove(&rpc_clids, clnt->cl_clid);
+}
+
+static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
+ struct rpc_xprt *xprt,
+ struct rpc_clnt *parent)
{
const struct rpc_program *program = args->program;
const struct rpc_version *version;
@@ -343,16 +359,20 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
goto out_err;
- clnt->cl_parent = clnt;
+ clnt->cl_parent = parent ? : clnt;
+
+ err = rpc_alloc_clid(clnt);
+ if (err)
+ goto out_no_clid;
rcu_assign_pointer(clnt->cl_xprt, xprt);
clnt->cl_procinfo = version->procs;
clnt->cl_maxproc = version->nrprocs;
- clnt->cl_protname = program->name;
clnt->cl_prog = args->prognumber ? : program->number;
clnt->cl_vers = version->number;
clnt->cl_stats = program->stats;
clnt->cl_metrics = rpc_alloc_iostats(clnt);
+ rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
err = -ENOMEM;
if (clnt->cl_metrics == NULL)
goto out_no_stats;
@@ -372,12 +392,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
clnt->cl_rtt = &clnt->cl_rtt_default;
rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
- clnt->cl_principal = NULL;
- if (args->client_name) {
- clnt->cl_principal = kstrdup(args->client_name, GFP_KERNEL);
- if (!clnt->cl_principal)
- goto out_no_principal;
- }
atomic_set(&clnt->cl_count, 1);
@@ -387,13 +401,15 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
err = rpc_client_register(args, clnt);
if (err)
goto out_no_path;
+ if (parent)
+ atomic_inc(&parent->cl_count);
return clnt;
out_no_path:
- kfree(clnt->cl_principal);
-out_no_principal:
rpc_free_iostats(clnt->cl_metrics);
out_no_stats:
+ rpc_free_clid(clnt);
+out_no_clid:
kfree(clnt);
out_err:
rpciod_down();
@@ -479,7 +495,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
- clnt = rpc_new_client(args, xprt);
+ clnt = rpc_new_client(args, xprt, NULL);
if (IS_ERR(clnt))
return clnt;
@@ -526,15 +542,12 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
goto out_err;
args->servername = xprt->servername;
- new = rpc_new_client(args, xprt);
+ new = rpc_new_client(args, xprt, clnt);
if (IS_ERR(new)) {
err = PTR_ERR(new);
goto out_err;
}
- atomic_inc(&clnt->cl_count);
- new->cl_parent = clnt;
-
/* Turn off autobind on clones */
new->cl_autobind = 0;
new->cl_softrtry = clnt->cl_softrtry;
@@ -561,7 +574,6 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = clnt->cl_auth->au_flavor,
- .client_name = clnt->cl_principal,
};
return __rpc_clone_client(&args, clnt);
}
@@ -583,7 +595,6 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = flavor,
- .client_name = clnt->cl_principal,
};
return __rpc_clone_client(&args, clnt);
}
@@ -629,7 +640,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt)
might_sleep();
dprintk_rcu("RPC: shutting down %s client for %s\n",
- clnt->cl_protname,
+ clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
while (!list_empty(&clnt->cl_tasks)) {
@@ -649,17 +660,17 @@ static void
rpc_free_client(struct rpc_clnt *clnt)
{
dprintk_rcu("RPC: destroying %s client for %s\n",
- clnt->cl_protname,
+ clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
if (clnt->cl_parent != clnt)
rpc_release_client(clnt->cl_parent);
rpc_clnt_remove_pipedir(clnt);
rpc_unregister_client(clnt);
rpc_free_iostats(clnt->cl_metrics);
- kfree(clnt->cl_principal);
clnt->cl_metrics = NULL;
xprt_put(rcu_dereference_raw(clnt->cl_xprt));
rpciod_down();
+ rpc_free_clid(clnt);
kfree(clnt);
}
@@ -720,7 +731,6 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
.prognumber = program->number,
.version = vers,
.authflavor = old->cl_auth->au_flavor,
- .client_name = old->cl_principal,
};
struct rpc_clnt *clnt;
int err;
@@ -1299,7 +1309,7 @@ call_start(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client;
dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
- clnt->cl_protname, clnt->cl_vers,
+ clnt->cl_program->name, clnt->cl_vers,
rpc_proc_name(task),
(RPC_IS_ASYNC(task) ? "async" : "sync"));
@@ -1423,9 +1433,9 @@ call_refreshresult(struct rpc_task *task)
return;
case -ETIMEDOUT:
rpc_delay(task, 3*HZ);
- case -EKEYEXPIRED:
case -EAGAIN:
status = -EACCES;
+ case -EKEYEXPIRED:
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
@@ -1912,7 +1922,7 @@ call_status(struct rpc_task *task)
default:
if (clnt->cl_chatty)
printk("%s: RPC call returned error %d\n",
- clnt->cl_protname, -status);
+ clnt->cl_program->name, -status);
rpc_exit(task, status);
}
}
@@ -1943,7 +1953,7 @@ call_timeout(struct rpc_task *task)
if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
- clnt->cl_protname,
+ clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
}
@@ -1959,7 +1969,7 @@ call_timeout(struct rpc_task *task)
if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
- clnt->cl_protname,
+ clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
}
@@ -1994,7 +2004,7 @@ call_decode(struct rpc_task *task)
if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s OK\n",
- clnt->cl_protname,
+ clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
}
@@ -2019,7 +2029,7 @@ call_decode(struct rpc_task *task)
goto out_retry;
}
dprintk("RPC: %s: too small RPC reply size (%d bytes)\n",
- clnt->cl_protname, task->tk_status);
+ clnt->cl_program->name, task->tk_status);
task->tk_action = call_timeout;
goto out_retry;
}
@@ -2091,7 +2101,8 @@ rpc_verify_header(struct rpc_task *task)
dprintk("RPC: %5u %s: XDR representation not a multiple of"
" 4 bytes: 0x%x\n", task->tk_pid, __func__,
task->tk_rqstp->rq_rcv_buf.len);
- goto out_eio;
+ error = -EIO;
+ goto out_err;
}
if ((len -= 3) < 0)
goto out_overflow;
@@ -2100,6 +2111,7 @@ rpc_verify_header(struct rpc_task *task)
if ((n = ntohl(*p++)) != RPC_REPLY) {
dprintk("RPC: %5u %s: not an RPC reply: %x\n",
task->tk_pid, __func__, n);
+ error = -EIO;
goto out_garbage;
}
@@ -2118,7 +2130,8 @@ rpc_verify_header(struct rpc_task *task)
dprintk("RPC: %5u %s: RPC call rejected, "
"unknown error: %x\n",
task->tk_pid, __func__, n);
- goto out_eio;
+ error = -EIO;
+ goto out_err;
}
if (--len < 0)
goto out_overflow;
@@ -2163,9 +2176,11 @@ rpc_verify_header(struct rpc_task *task)
task->tk_pid, __func__, n);
goto out_err;
}
- if (!(p = rpcauth_checkverf(task, p))) {
- dprintk("RPC: %5u %s: auth check failed\n",
- task->tk_pid, __func__);
+ p = rpcauth_checkverf(task, p);
+ if (IS_ERR(p)) {
+ error = PTR_ERR(p);
+ dprintk("RPC: %5u %s: auth check failed with %d\n",
+ task->tk_pid, __func__, error);
goto out_garbage; /* bad verifier, retry */
}
len = p - (__be32 *)iov->iov_base - 1;
@@ -2218,8 +2233,6 @@ out_garbage:
out_retry:
return ERR_PTR(-EAGAIN);
}
-out_eio:
- error = -EIO;
out_err:
rpc_exit(task, error);
dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
@@ -2291,7 +2304,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
task->tk_pid, task->tk_flags, task->tk_status,
clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
- clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task),
+ clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
task->tk_action, rpc_waitq);
}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 406859cc68aa..f94567b45bb3 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -409,7 +409,7 @@ rpc_show_info(struct seq_file *m, void *v)
rcu_read_lock();
seq_printf(m, "RPC server: %s\n",
rcu_dereference(clnt->cl_xprt)->servername);
- seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
+ seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_program->name,
clnt->cl_prog, clnt->cl_vers);
seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
@@ -480,23 +480,6 @@ static const struct dentry_operations rpc_dentry_operations = {
.d_delete = rpc_delete_dentry,
};
-/*
- * Lookup the data. This is trivial - if the dentry didn't already
- * exist, we know it is negative.
- */
-static struct dentry *
-rpc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
-{
- if (dentry->d_name.len > NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
- d_add(dentry, NULL);
- return NULL;
-}
-
-static const struct inode_operations rpc_dir_inode_operations = {
- .lookup = rpc_lookup,
-};
-
static struct inode *
rpc_get_inode(struct super_block *sb, umode_t mode)
{
@@ -509,7 +492,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
- inode->i_op = &rpc_dir_inode_operations;
+ inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
default:
break;
@@ -901,6 +884,159 @@ rpc_unlink(struct dentry *dentry)
}
EXPORT_SYMBOL_GPL(rpc_unlink);
+/**
+ * rpc_init_pipe_dir_head - initialise a struct rpc_pipe_dir_head
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ */
+void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh)
+{
+ INIT_LIST_HEAD(&pdh->pdh_entries);
+ pdh->pdh_dentry = NULL;
+}
+EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_head);
+
+/**
+ * rpc_init_pipe_dir_object - initialise a struct rpc_pipe_dir_object
+ * @pdo: pointer to struct rpc_pipe_dir_object
+ * @pdo_ops: pointer to const struct rpc_pipe_dir_object_ops
+ * @pdo_data: pointer to caller-defined data
+ */
+void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo,
+ const struct rpc_pipe_dir_object_ops *pdo_ops,
+ void *pdo_data)
+{
+ INIT_LIST_HEAD(&pdo->pdo_head);
+ pdo->pdo_ops = pdo_ops;
+ pdo->pdo_data = pdo_data;
+}
+EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_object);
+
+static int
+rpc_add_pipe_dir_object_locked(struct net *net,
+ struct rpc_pipe_dir_head *pdh,
+ struct rpc_pipe_dir_object *pdo)
+{
+ int ret = 0;
+
+ if (pdh->pdh_dentry)
+ ret = pdo->pdo_ops->create(pdh->pdh_dentry, pdo);
+ if (ret == 0)
+ list_add_tail(&pdo->pdo_head, &pdh->pdh_entries);
+ return ret;
+}
+
+static void
+rpc_remove_pipe_dir_object_locked(struct net *net,
+ struct rpc_pipe_dir_head *pdh,
+ struct rpc_pipe_dir_object *pdo)
+{
+ if (pdh->pdh_dentry)
+ pdo->pdo_ops->destroy(pdh->pdh_dentry, pdo);
+ list_del_init(&pdo->pdo_head);
+}
+
+/**
+ * rpc_add_pipe_dir_object - associate a rpc_pipe_dir_object to a directory
+ * @net: pointer to struct net
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ * @pdo: pointer to struct rpc_pipe_dir_object
+ *
+ */
+int
+rpc_add_pipe_dir_object(struct net *net,
+ struct rpc_pipe_dir_head *pdh,
+ struct rpc_pipe_dir_object *pdo)
+{
+ int ret = 0;
+
+ if (list_empty(&pdo->pdo_head)) {
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ mutex_lock(&sn->pipefs_sb_lock);
+ ret = rpc_add_pipe_dir_object_locked(net, pdh, pdo);
+ mutex_unlock(&sn->pipefs_sb_lock);
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_add_pipe_dir_object);
+
+/**
+ * rpc_remove_pipe_dir_object - remove a rpc_pipe_dir_object from a directory
+ * @net: pointer to struct net
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ * @pdo: pointer to struct rpc_pipe_dir_object
+ *
+ */
+void
+rpc_remove_pipe_dir_object(struct net *net,
+ struct rpc_pipe_dir_head *pdh,
+ struct rpc_pipe_dir_object *pdo)
+{
+ if (!list_empty(&pdo->pdo_head)) {
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ mutex_lock(&sn->pipefs_sb_lock);
+ rpc_remove_pipe_dir_object_locked(net, pdh, pdo);
+ mutex_unlock(&sn->pipefs_sb_lock);
+ }
+}
+EXPORT_SYMBOL_GPL(rpc_remove_pipe_dir_object);
+
+/**
+ * rpc_find_or_alloc_pipe_dir_object
+ * @net: pointer to struct net
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ * @match: match struct rpc_pipe_dir_object to data
+ * @alloc: allocate a new struct rpc_pipe_dir_object
+ * @data: user defined data for match() and alloc()
+ *
+ */
+struct rpc_pipe_dir_object *
+rpc_find_or_alloc_pipe_dir_object(struct net *net,
+ struct rpc_pipe_dir_head *pdh,
+ int (*match)(struct rpc_pipe_dir_object *, void *),
+ struct rpc_pipe_dir_object *(*alloc)(void *),
+ void *data)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+ struct rpc_pipe_dir_object *pdo;
+
+ mutex_lock(&sn->pipefs_sb_lock);
+ list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) {
+ if (!match(pdo, data))
+ continue;
+ goto out;
+ }
+ pdo = alloc(data);
+ if (!pdo)
+ goto out;
+ rpc_add_pipe_dir_object_locked(net, pdh, pdo);
+out:
+ mutex_unlock(&sn->pipefs_sb_lock);
+ return pdo;
+}
+EXPORT_SYMBOL_GPL(rpc_find_or_alloc_pipe_dir_object);
+
+static void
+rpc_create_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
+{
+ struct rpc_pipe_dir_object *pdo;
+ struct dentry *dir = pdh->pdh_dentry;
+
+ list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head)
+ pdo->pdo_ops->create(dir, pdo);
+}
+
+static void
+rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
+{
+ struct rpc_pipe_dir_object *pdo;
+ struct dentry *dir = pdh->pdh_dentry;
+
+ list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head)
+ pdo->pdo_ops->destroy(dir, pdo);
+}
+
enum {
RPCAUTH_info,
RPCAUTH_EOF
@@ -941,16 +1077,29 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry,
const char *name,
struct rpc_clnt *rpc_client)
{
- return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
+ struct dentry *ret;
+
+ ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
rpc_clntdir_populate, rpc_client);
+ if (!IS_ERR(ret)) {
+ rpc_client->cl_pipedir_objects.pdh_dentry = ret;
+ rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+ }
+ return ret;
}
/**
* rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir()
- * @dentry: dentry for the pipe
+ * @rpc_client: rpc_client for the pipe
*/
-int rpc_remove_client_dir(struct dentry *dentry)
+int rpc_remove_client_dir(struct rpc_clnt *rpc_client)
{
+ struct dentry *dentry = rpc_client->cl_pipedir_objects.pdh_dentry;
+
+ if (dentry == NULL)
+ return 0;
+ rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+ rpc_client->cl_pipedir_objects.pdh_dentry = NULL;
return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate);
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 93a7a4e94d80..ff3cc4bf4b24 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -258,7 +258,7 @@ static int rpc_wait_bit_killable(void *word)
return 0;
}
-#ifdef RPC_DEBUG
+#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
static void rpc_task_set_debuginfo(struct rpc_task *task)
{
static atomic_t rpc_pid;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 21b75cb08c03..54530490944e 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -188,7 +188,7 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS);
seq_printf(seq, "p/v: %u/%u (%s)\n",
- clnt->cl_prog, clnt->cl_vers, clnt->cl_protname);
+ clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name);
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d6656d7768f4..ee03d35677d9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,6 +47,8 @@
#include <net/udp.h>
#include <net/tcp.h>
+#include <trace/events/sunrpc.h>
+
#include "sunrpc.h"
static void xs_close(struct rpc_xprt *xprt);
@@ -665,8 +667,10 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
- if (sock != NULL)
+ if (sock != NULL) {
kernel_sock_shutdown(sock, SHUT_WR);
+ trace_rpc_socket_shutdown(xprt, sock);
+ }
}
/**
@@ -811,6 +815,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
sk->sk_no_check = 0;
+ trace_rpc_socket_close(&transport->xprt, sock);
sock_release(sock);
}
@@ -1492,6 +1497,7 @@ static void xs_tcp_state_change(struct sock *sk)
sock_flag(sk, SOCK_ZAPPED),
sk->sk_shutdown);
+ trace_rpc_socket_state_change(xprt, sk->sk_socket);
switch (sk->sk_state) {
case TCP_ESTABLISHED:
spin_lock(&xprt->transport_lock);
@@ -1896,6 +1902,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
status = xs_local_finish_connecting(xprt, sock);
+ trace_rpc_socket_connect(xprt, sock, status);
switch (status) {
case 0:
dprintk("RPC: xprt %p connected to %s\n",
@@ -2039,6 +2046,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
xprt->address_strings[RPC_DISPLAY_PORT]);
xs_udp_finish_connecting(xprt, sock);
+ trace_rpc_socket_connect(xprt, sock, 0);
status = 0;
out:
xprt_clear_connecting(xprt);
@@ -2064,6 +2072,8 @@ static void xs_abort_connection(struct sock_xprt *transport)
memset(&any, 0, sizeof(any));
any.sa_family = AF_UNSPEC;
result = kernel_connect(transport->sock, &any, sizeof(any), 0);
+ trace_rpc_socket_reset_connection(&transport->xprt,
+ transport->sock, result);
if (!result)
xs_sock_reset_connection_flags(&transport->xprt);
dprintk("RPC: AF_UNSPEC connect return code %d\n", result);
@@ -2194,6 +2204,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
xprt->address_strings[RPC_DISPLAY_PORT]);
status = xs_tcp_finish_connecting(xprt, sock);
+ trace_rpc_socket_connect(xprt, sock, status);
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt),
sock->sk->sk_state);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9bc6db04be3e..e7000be321b0 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head,
/* Allow network administrator to have same access as root. */
if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
- uid_eq(root_uid, current_uid())) {
+ uid_eq(root_uid, current_euid())) {
int mode = (table->mode >> 6) & 7;
return (mode << 6) | (mode << 3) | mode;
}
/* Allow netns root group to have the same access as the root group */
- if (gid_eq(root_gid, current_gid())) {
+ if (in_egroup_p(root_gid)) {
int mode = (table->mode >> 3) & 7;
return (mode << 3) | mode;
}