aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/rfcomm/tty.c6
-rw-r--r--net/core/dev.c5
-rw-r--r--net/core/ethtool.c3
-rw-r--r--net/core/sock.c8
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_output.c73
-rw-r--r--net/ipv6/netfilter/ip6_tables.c4
-rw-r--r--net/irda/ircomm/ircomm_tty.c256
-rw-r--r--net/rds/ib.c5
-rw-r--r--net/rds/ib.h28
-rw-r--r--net/rds/ib_cm.c43
-rw-r--r--net/rds/ib_rdma.c43
-rw-r--r--net/rds/ib_recv.c37
-rw-r--r--net/rds/iw.c5
-rw-r--r--net/rds/iw.h28
-rw-r--r--net/rds/iw_cm.c44
-rw-r--r--net/rds/iw_rdma.c44
-rw-r--r--net/rds/iw_recv.c37
-rw-r--r--net/rds/rds.h4
-rw-r--r--net/rds/send.c6
-rw-r--r--net/sunrpc/Kconfig22
-rw-r--r--net/sunrpc/clnt.c48
-rw-r--r--net/sunrpc/rpcb_clnt.c103
-rw-r--r--net/sunrpc/svc.c158
-rw-r--r--net/sunrpc/svc_xprt.c31
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/sunrpc/xprt.c89
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c26
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c8
-rw-r--r--net/sunrpc/xprtsock.c363
32 files changed, 912 insertions, 668 deletions
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index abdc703a11d2..cab71ea2796d 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1093,11 +1093,6 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
}
}
-static int rfcomm_tty_read_proc(char *buf, char **start, off_t offset, int len, int *eof, void *unused)
-{
- return 0;
-}
-
static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp)
{
struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
@@ -1156,7 +1151,6 @@ static const struct tty_operations rfcomm_ops = {
.send_xchar = rfcomm_tty_send_xchar,
.hangup = rfcomm_tty_hangup,
.wait_until_sent = rfcomm_tty_wait_until_sent,
- .read_proc = rfcomm_tty_read_proc,
.tiocmget = rfcomm_tty_tiocmget,
.tiocmset = rfcomm_tty_tiocmset,
};
diff --git a/net/core/dev.c b/net/core/dev.c
index 52fea5b28ca6..91d792d17e09 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2472,8 +2472,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
return GRO_NORMAL;
for (p = napi->gro_list; p; p = p->next) {
- NAPI_GRO_CB(p)->same_flow = !compare_ether_header(
- skb_mac_header(p), skb_gro_mac_header(skb));
+ NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
+ && !compare_ether_header(skb_mac_header(p),
+ skb_gro_mac_header(skb));
NAPI_GRO_CB(p)->flush = 0;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 244ca56dffac..d9d5160610d5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -261,8 +261,7 @@ static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
ret = 0;
err_out:
- if (rule_buf)
- kfree(rule_buf);
+ kfree(rule_buf);
return ret;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 0620046e4eba..7dbf3ffb35cc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1677,7 +1677,7 @@ static void sock_def_error_report(struct sock *sk)
{
read_lock(&sk->sk_callback_lock);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
+ wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
read_unlock(&sk->sk_callback_lock);
}
@@ -1686,7 +1686,8 @@ static void sock_def_readable(struct sock *sk, int len)
{
read_lock(&sk->sk_callback_lock);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible_sync(sk->sk_sleep);
+ wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+ POLLRDNORM | POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
read_unlock(&sk->sk_callback_lock);
}
@@ -1700,7 +1701,8 @@ static void sock_def_write_space(struct sock *sk)
*/
if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible_sync(sk->sk_sleep);
+ wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+ POLLWRNORM | POLLWRBAND);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 35c5f6a5cb7c..5ba533d234db 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -253,7 +253,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
- rcu_read_lock();
+ rcu_read_lock_bh();
private = rcu_dereference(table->private);
table_base = rcu_dereference(private->entries[smp_processor_id()]);
@@ -329,7 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
}
} while (!hotdrop);
- rcu_read_unlock();
+ rcu_read_unlock_bh();
if (hotdrop)
return NF_DROP;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 82ee7c9049ff..810c0b62c7d4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -339,7 +339,7 @@ ipt_do_table(struct sk_buff *skb,
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- rcu_read_lock();
+ rcu_read_lock_bh();
private = rcu_dereference(table->private);
table_base = rcu_dereference(private->entries[smp_processor_id()]);
@@ -437,7 +437,7 @@ ipt_do_table(struct sk_buff *skb,
}
} while (!hotdrop);
- rcu_read_unlock();
+ rcu_read_unlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2451aeb5ac23..fafbec8b073e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1081,8 +1081,7 @@ out_err:
* this, no blocking and very strange errors 8)
*/
-static int tcp_recv_urg(struct sock *sk, long timeo,
- struct msghdr *msg, int len, int flags)
+static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1697,7 +1696,7 @@ out:
return err;
recv_urg:
- err = tcp_recv_urg(sk, timeo, msg, len, flags);
+ err = tcp_recv_urg(sk, msg, len, flags);
goto out;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c1f259d2d33b..53300fa2359f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -754,6 +754,36 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
tp->fackets_out -= decr;
}
+/* Pcount in the middle of the write queue got changed, we need to do various
+ * tweaks to fix counters
+ */
+static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tp->packets_out -= decr;
+
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ tp->sacked_out -= decr;
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
+ tp->retrans_out -= decr;
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
+ tp->lost_out -= decr;
+
+ /* Reno case is special. Sigh... */
+ if (tcp_is_reno(tp) && decr > 0)
+ tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
+
+ tcp_adjust_fackets_out(sk, skb, decr);
+
+ if (tp->lost_skb_hint &&
+ before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
+ (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
+ tp->lost_cnt_hint -= decr;
+
+ tcp_verify_left_out(tp);
+}
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
@@ -836,28 +866,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
int diff = old_factor - tcp_skb_pcount(skb) -
tcp_skb_pcount(buff);
- tp->packets_out -= diff;
-
- if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
- tp->sacked_out -= diff;
- if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
- tp->retrans_out -= diff;
-
- if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
- tp->lost_out -= diff;
-
- /* Adjust Reno SACK estimate. */
- if (tcp_is_reno(tp) && diff > 0) {
- tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
- tcp_verify_left_out(tp);
- }
- tcp_adjust_fackets_out(sk, skb, diff);
-
- if (tp->lost_skb_hint &&
- before(TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
- (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
- tp->lost_cnt_hint -= diff;
+ if (diff)
+ tcp_adjust_pcount(sk, skb, diff);
}
/* Link BUFF into the send queue. */
@@ -1768,22 +1778,14 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
* packet counting does not break.
*/
TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
- if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
- tp->retrans_out -= tcp_skb_pcount(next_skb);
- if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
- tp->lost_out -= tcp_skb_pcount(next_skb);
- /* Reno case is special. Sigh... */
- if (tcp_is_reno(tp) && tp->sacked_out)
- tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
-
- tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
- tp->packets_out -= tcp_skb_pcount(next_skb);
/* changed transmit queue under us so clear hints */
tcp_clear_retrans_hints_partial(tp);
if (next_skb == tp->retransmit_skb_hint)
tp->retransmit_skb_hint = skb;
+ tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
+
sk_wmem_free_skb(sk, next_skb);
}
@@ -1891,7 +1893,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (tcp_fragment(sk, skb, cur_mss, cur_mss))
return -ENOMEM; /* We'll try again later. */
} else {
- tcp_init_tso_segs(sk, skb, cur_mss);
+ int oldpcount = tcp_skb_pcount(skb);
+
+ if (unlikely(oldpcount > 1)) {
+ tcp_init_tso_segs(sk, skb, cur_mss);
+ tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
+ }
}
tcp_retrans_try_collapse(sk, skb, cur_mss);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e89cfa3a8f25..dfed176aed37 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -365,7 +365,7 @@ ip6t_do_table(struct sk_buff *skb,
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- rcu_read_lock();
+ rcu_read_lock_bh();
private = rcu_dereference(table->private);
table_base = rcu_dereference(private->entries[smp_processor_id()]);
@@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb,
#ifdef CONFIG_NETFILTER_DEBUG
((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
#endif
- rcu_read_unlock();
+ rcu_read_unlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 086d5ef098fd..811984d9324b 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -34,6 +34,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#include <linux/termios.h>
#include <linux/tty.h>
#include <linux/interrupt.h>
@@ -72,8 +73,7 @@ static int ircomm_tty_control_indication(void *instance, void *sap,
static void ircomm_tty_flow_indication(void *instance, void *sap,
LOCAL_FLOW cmd);
#ifdef CONFIG_PROC_FS
-static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len,
- int *eof, void *unused);
+static const struct file_operations ircomm_tty_proc_fops;
#endif /* CONFIG_PROC_FS */
static struct tty_driver *driver;
@@ -98,7 +98,7 @@ static const struct tty_operations ops = {
.hangup = ircomm_tty_hangup,
.wait_until_sent = ircomm_tty_wait_until_sent,
#ifdef CONFIG_PROC_FS
- .read_proc = ircomm_tty_read_proc,
+ .proc_fops = &ircomm_tty_proc_fops,
#endif /* CONFIG_PROC_FS */
};
@@ -1245,150 +1245,170 @@ static void ircomm_tty_flow_indication(void *instance, void *sap,
}
#ifdef CONFIG_PROC_FS
-static int ircomm_tty_line_info(struct ircomm_tty_cb *self, char *buf)
+static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m)
{
- int ret=0;
+ char sep;
- ret += sprintf(buf+ret, "State: %s\n", ircomm_tty_state[self->state]);
+ seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]);
- ret += sprintf(buf+ret, "Service type: ");
+ seq_puts(m, "Service type: ");
if (self->service_type & IRCOMM_9_WIRE)
- ret += sprintf(buf+ret, "9_WIRE");
+ seq_puts(m, "9_WIRE");
else if (self->service_type & IRCOMM_3_WIRE)
- ret += sprintf(buf+ret, "3_WIRE");
+ seq_puts(m, "3_WIRE");
else if (self->service_type & IRCOMM_3_WIRE_RAW)
- ret += sprintf(buf+ret, "3_WIRE_RAW");
+ seq_puts(m, "3_WIRE_RAW");
else
- ret += sprintf(buf+ret, "No common service type!\n");
- ret += sprintf(buf+ret, "\n");
-
- ret += sprintf(buf+ret, "Port name: %s\n", self->settings.port_name);
-
- ret += sprintf(buf+ret, "DTE status: ");
- if (self->settings.dte & IRCOMM_RTS)
- ret += sprintf(buf+ret, "RTS|");
- if (self->settings.dte & IRCOMM_DTR)
- ret += sprintf(buf+ret, "DTR|");
- if (self->settings.dte)
- ret--; /* remove the last | */
- ret += sprintf(buf+ret, "\n");
-
- ret += sprintf(buf+ret, "DCE status: ");
- if (self->settings.dce & IRCOMM_CTS)
- ret += sprintf(buf+ret, "CTS|");
- if (self->settings.dce & IRCOMM_DSR)
- ret += sprintf(buf+ret, "DSR|");
- if (self->settings.dce & IRCOMM_CD)
- ret += sprintf(buf+ret, "CD|");
- if (self->settings.dce & IRCOMM_RI)
- ret += sprintf(buf+ret, "RI|");
- if (self->settings.dce)
- ret--; /* remove the last | */
- ret += sprintf(buf+ret, "\n");
-
- ret += sprintf(buf+ret, "Configuration: ");
+ seq_puts(m, "No common service type!\n");
+ seq_putc(m, '\n');
+
+ seq_printf(m, "Port name: %s\n", self->settings.port_name);
+
+ seq_printf(m, "DTE status:");
+ sep = ' ';
+ if (self->settings.dte & IRCOMM_RTS) {
+ seq_printf(m, "%cRTS", sep);
+ sep = '|';
+ }
+ if (self->settings.dte & IRCOMM_DTR) {
+ seq_printf(m, "%cDTR", sep);
+ sep = '|';
+ }
+ seq_putc(m, '\n');
+
+ seq_puts(m, "DCE status:");
+ sep = ' ';
+ if (self->settings.dce & IRCOMM_CTS) {
+ seq_printf(m, "%cCTS", sep);
+ sep = '|';
+ }
+ if (self->settings.dce & IRCOMM_DSR) {
+ seq_printf(m, "%cDSR", sep);
+ sep = '|';
+ }
+ if (self->settings.dce & IRCOMM_CD) {
+ seq_printf(m, "%cCD", sep);
+ sep = '|';
+ }
+ if (self->settings.dce & IRCOMM_RI) {
+ seq_printf(m, "%cRI", sep);
+ sep = '|';
+ }
+ seq_putc(m, '\n');
+
+ seq_puts(m, "Configuration: ");
if (!self->settings.null_modem)
- ret += sprintf(buf+ret, "DTE <-> DCE\n");
+ seq_puts(m, "DTE <-> DCE\n");
else
- ret += sprintf(buf+ret,
- "DTE <-> DTE (null modem emulation)\n");
-
- ret += sprintf(buf+ret, "Data rate: %d\n", self->settings.data_rate);
-
- ret += sprintf(buf+ret, "Flow control: ");
- if (self->settings.flow_control & IRCOMM_XON_XOFF_IN)
- ret += sprintf(buf+ret, "XON_XOFF_IN|");
- if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT)
- ret += sprintf(buf+ret, "XON_XOFF_OUT|");
- if (self->settings.flow_control & IRCOMM_RTS_CTS_IN)
- ret += sprintf(buf+ret, "RTS_CTS_IN|");
- if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT)
- ret += sprintf(buf+ret, "RTS_CTS_OUT|");
- if (self->settings.flow_control & IRCOMM_DSR_DTR_IN)
- ret += sprintf(buf+ret, "DSR_DTR_IN|");
- if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT)
- ret += sprintf(buf+ret, "DSR_DTR_OUT|");
- if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN)
- ret += sprintf(buf+ret, "ENQ_ACK_IN|");
- if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT)
- ret += sprintf(buf+ret, "ENQ_ACK_OUT|");
- if (self->settings.flow_control)
- ret--; /* remove the last | */
- ret += sprintf(buf+ret, "\n");
-
- ret += sprintf(buf+ret, "Flags: ");
- if (self->flags & ASYNC_CTS_FLOW)
- ret += sprintf(buf+ret, "ASYNC_CTS_FLOW|");
- if (self->flags & ASYNC_CHECK_CD)
- ret += sprintf(buf+ret, "ASYNC_CHECK_CD|");
- if (self->flags & ASYNC_INITIALIZED)
- ret += sprintf(buf+ret, "ASYNC_INITIALIZED|");
- if (self->flags & ASYNC_LOW_LATENCY)
- ret += sprintf(buf+ret, "ASYNC_LOW_LATENCY|");
- if (self->flags & ASYNC_CLOSING)
- ret += sprintf(buf+ret, "ASYNC_CLOSING|");
- if (self->flags & ASYNC_NORMAL_ACTIVE)
- ret += sprintf(buf+ret, "ASYNC_NORMAL_ACTIVE|");
- if (self->flags)
- ret--; /* remove the last | */
- ret += sprintf(buf+ret, "\n");
-
- ret += sprintf(buf+ret, "Role: %s\n", self->client ?
- "client" : "server");
- ret += sprintf(buf+ret, "Open count: %d\n", self->open_count);
- ret += sprintf(buf+ret, "Max data size: %d\n", self->max_data_size);
- ret += sprintf(buf+ret, "Max header size: %d\n", self->max_header_size);
+ seq_puts(m, "DTE <-> DTE (null modem emulation)\n");
+
+ seq_printf(m, "Data rate: %d\n", self->settings.data_rate);
+
+ seq_puts(m, "Flow control:");
+ sep = ' ';
+ if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) {
+ seq_printf(m, "%cXON_XOFF_IN", sep);
+ sep = '|';
+ }
+ if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) {
+ seq_printf(m, "%cXON_XOFF_OUT", sep);
+ sep = '|';
+ }
+ if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) {
+ seq_printf(m, "%cRTS_CTS_IN", sep);
+ sep = '|';
+ }
+ if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) {
+ seq_printf(m, "%cRTS_CTS_OUT", sep);
+ sep = '|';
+ }
+ if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) {
+ seq_printf(m, "%cDSR_DTR_IN", sep);
+ sep = '|';
+ }
+ if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) {
+ seq_printf(m, "%cDSR_DTR_OUT", sep);
+ sep = '|';
+ }
+ if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) {
+ seq_printf(m, "%cENQ_ACK_IN", sep);
+ sep = '|';
+ }
+ if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) {
+ seq_printf(m, "%cENQ_ACK_OUT", sep);
+ sep = '|';
+ }
+ seq_putc(m, '\n');
+
+ seq_puts(m, "Flags:");
+ sep = ' ';
+ if (self->flags & ASYNC_CTS_FLOW) {
+ seq_printf(m, "%cASYNC_CTS_FLOW", sep);
+ sep = '|';
+ }
+ if (self->flags & ASYNC_CHECK_CD) {
+ seq_printf(m, "%cASYNC_CHECK_CD", sep);
+ sep = '|';
+ }
+ if (self->flags & ASYNC_INITIALIZED) {
+ seq_printf(m, "%cASYNC_INITIALIZED", sep);
+ sep = '|';
+ }
+ if (self->flags & ASYNC_LOW_LATENCY) {
+ seq_printf(m, "%cASYNC_LOW_LATENCY", sep);
+ sep = '|';
+ }
+ if (self->flags & ASYNC_CLOSING) {
+ seq_printf(m, "%cASYNC_CLOSING", sep);
+ sep = '|';
+ }
+ if (self->flags & ASYNC_NORMAL_ACTIVE) {
+ seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep);
+ sep = '|';
+ }
+ seq_putc(m, '\n');
+
+ seq_printf(m, "Role: %s\n", self->client ? "client" : "server");
+ seq_printf(m, "Open count: %d\n", self->open_count);
+ seq_printf(m, "Max data size: %d\n", self->max_data_size);
+ seq_printf(m, "Max header size: %d\n", self->max_header_size);
if (self->tty)
- ret += sprintf(buf+ret, "Hardware: %s\n",
+ seq_printf(m, "Hardware: %s\n",
self->tty->hw_stopped ? "Stopped" : "Running");
-
- ret += sprintf(buf+ret, "\n");
- return ret;
}
-
-/*
- * Function ircomm_tty_read_proc (buf, start, offset, len, eof, unused)
- *
- *
- *
- */
-static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len,
- int *eof, void *unused)
+static int ircomm_tty_proc_show(struct seq_file *m, void *v)
{
struct ircomm_tty_cb *self;
- int count = 0, l;
- off_t begin = 0;
unsigned long flags;
spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags);
self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty);
- while ((self != NULL) && (count < 4000)) {
+ while (self != NULL) {
if (self->magic != IRCOMM_TTY_MAGIC)
break;
- l = ircomm_tty_line_info(self, buf + count);
- count += l;
- if (count+begin > offset+len)
- goto done;
- if (count+begin < offset) {
- begin += count;
- count = 0;
- }
-
+ ircomm_tty_line_info(self, m);
self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty);
}
- *eof = 1;
-done:
spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags);
+ return 0;
+}
- if (offset >= count+begin)
- return 0;
- *start = buf + (offset-begin);
- return ((len < begin+count-offset) ? len : begin+count-offset);
+static int ircomm_tty_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ircomm_tty_proc_show, NULL);
}
+
+static const struct file_operations ircomm_tty_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = ircomm_tty_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
#endif /* CONFIG_PROC_FS */
MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>");
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 06a7b798d9a7..4933b380985e 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
struct list_head rds_ib_devices;
+/* NOTE: if also grabbing ibdev lock, grab this first */
DEFINE_SPINLOCK(ib_nodev_conns_lock);
LIST_HEAD(ib_nodev_conns);
@@ -137,7 +138,7 @@ void rds_ib_remove_one(struct ib_device *device)
kfree(i_ipaddr);
}
- rds_ib_remove_conns(rds_ibdev);
+ rds_ib_destroy_conns(rds_ibdev);
if (rds_ibdev->mr_pool)
rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
@@ -249,7 +250,7 @@ static int rds_ib_laddr_check(__be32 addr)
void rds_ib_exit(void)
{
rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
- rds_ib_remove_nodev_conns();
+ rds_ib_destroy_nodev_conns();
ib_unregister_client(&rds_ib_client);
rds_ib_sysctl_exit();
rds_ib_recv_exit();
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 8be563a1363a..069206cae733 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -108,7 +108,12 @@ struct rds_ib_connection {
/* sending acks */
unsigned long i_ack_flags;
+#ifdef KERNEL_HAS_ATOMIC64
+ atomic64_t i_ack_next; /* next ACK to send */
+#else
+ spinlock_t i_ack_lock; /* protect i_ack_next */
u64 i_ack_next; /* next ACK to send */
+#endif
struct rds_header *i_ack;
struct ib_send_wr i_ack_wr;
struct ib_sge i_ack_sge;
@@ -267,9 +272,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
/* ib_rdma.c */
int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
-int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
-void rds_ib_remove_nodev_conns(void);
-void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev);
+void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
+void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
+void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock);
+static inline void rds_ib_destroy_nodev_conns(void)
+{
+ __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
+}
+static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
+{
+ __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
+}
struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
@@ -355,13 +368,4 @@ rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
return &sge[1];
}
-static inline void rds_ib_set_64bit(u64 *ptr, u64 val)
-{
-#if BITS_PER_LONG == 64
- *ptr = val;
-#else
- set_64bit(ptr, val);
-#endif
-}
-
#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 0532237bd128..f8e40e1a6038 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -126,9 +126,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
if (err)
printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err);
- err = rds_ib_add_conn(rds_ibdev, conn);
- if (err)
- printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err);
+ rds_ib_add_conn(rds_ibdev, conn);
/* If the peer gave us the last packet it saw, process this as if
* we had received a regular ACK. */
@@ -616,18 +614,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
/*
* Move connection back to the nodev list.
*/
- if (ic->rds_ibdev) {
-
- spin_lock_irq(&ic->rds_ibdev->spinlock);
- BUG_ON(list_empty(&ic->ib_node));
- list_del(&ic->ib_node);
- spin_unlock_irq(&ic->rds_ibdev->spinlock);
-
- spin_lock_irq(&ib_nodev_conns_lock);
- list_add_tail(&ic->ib_node, &ib_nodev_conns);
- spin_unlock_irq(&ib_nodev_conns_lock);
- ic->rds_ibdev = NULL;
- }
+ if (ic->rds_ibdev)
+ rds_ib_remove_conn(ic->rds_ibdev, conn);
ic->i_cm_id = NULL;
ic->i_pd = NULL;
@@ -648,7 +636,11 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
/* Clear the ACK state */
clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
- rds_ib_set_64bit(&ic->i_ack_next, 0);
+#ifdef KERNEL_HAS_ATOMIC64
+ atomic64_set(&ic->i_ack_next, 0);
+#else
+ ic->i_ack_next = 0;
+#endif
ic->i_ack_recv = 0;
/* Clear flow control state */
@@ -681,6 +673,9 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
INIT_LIST_HEAD(&ic->ib_node);
mutex_init(&ic->i_recv_mutex);
+#ifndef KERNEL_HAS_ATOMIC64
+ spin_lock_init(&ic->i_ack_lock);
+#endif
/*
* rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -701,11 +696,27 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
return 0;
}
+/*
+ * Free a connection. Connection must be shut down and not set for reconnect.
+ */
void rds_ib_conn_free(void *arg)
{
struct rds_ib_connection *ic = arg;
+ spinlock_t *lock_ptr;
+
rdsdebug("ic %p\n", ic);
+
+ /*
+ * Conn is either on a dev's list or on the nodev list.
+ * A race with shutdown() or connect() would cause problems
+ * (since rds_ibdev would change) but that should never happen.
+ */
+ lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock;
+
+ spin_lock_irq(lock_ptr);
list_del(&ic->ib_node);
+ spin_unlock_irq(lock_ptr);
+
kfree(ic);
}
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 69a6289ed672..81033af93020 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -139,7 +139,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
}
-int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
+void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
{
struct rds_ib_connection *ic = conn->c_transport_data;
@@ -148,45 +148,44 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn
BUG_ON(list_empty(&ib_nodev_conns));
BUG_ON(list_empty(&ic->ib_node));
list_del(&ic->ib_node);
- spin_unlock_irq(&ib_nodev_conns_lock);
spin_lock_irq(&rds_ibdev->spinlock);
list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
spin_unlock_irq(&rds_ibdev->spinlock);
+ spin_unlock_irq(&ib_nodev_conns_lock);
ic->rds_ibdev = rds_ibdev;
-
- return 0;
}
-void rds_ib_remove_nodev_conns(void)
+void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
{
- struct rds_ib_connection *ic, *_ic;
- LIST_HEAD(tmp_list);
+ struct rds_ib_connection *ic = conn->c_transport_data;
- /* avoid calling conn_destroy with irqs off */
- spin_lock_irq(&ib_nodev_conns_lock);
- list_splice(&ib_nodev_conns, &tmp_list);
- INIT_LIST_HEAD(&ib_nodev_conns);
- spin_unlock_irq(&ib_nodev_conns_lock);
+ /* place conn on nodev_conns_list */
+ spin_lock(&ib_nodev_conns_lock);
- list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
- if (ic->conn->c_passive)
- rds_conn_destroy(ic->conn->c_passive);
- rds_conn_destroy(ic->conn);
- }
+ spin_lock_irq(&rds_ibdev->spinlock);
+ BUG_ON(list_empty(&ic->ib_node));
+ list_del(&ic->ib_node);
+ spin_unlock_irq(&rds_ibdev->spinlock);
+
+ list_add_tail(&ic->ib_node, &ib_nodev_conns);
+
+ spin_unlock(&ib_nodev_conns_lock);
+
+ ic->rds_ibdev = NULL;
}
-void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev)
+void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock)
{
struct rds_ib_connection *ic, *_ic;
LIST_HEAD(tmp_list);
/* avoid calling conn_destroy with irqs off */
- spin_lock_irq(&rds_ibdev->spinlock);
- list_splice(&rds_ibdev->conn_list, &tmp_list);
- INIT_LIST_HEAD(&rds_ibdev->conn_list);
- spin_unlock_irq(&rds_ibdev->spinlock);
+ spin_lock_irq(list_lock);
+ list_splice(list, &tmp_list);
+ INIT_LIST_HEAD(list);
+ spin_unlock_irq(list_lock);
list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
if (ic->conn->c_passive)
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 5061b5502162..36d931573ff4 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -395,10 +395,37 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
* room for it beyond the ring size. Send completion notices its special
* wr_id and avoids working with the ring in that case.
*/
+#ifndef KERNEL_HAS_ATOMIC64
static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
int ack_required)
{
- rds_ib_set_64bit(&ic->i_ack_next, seq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ic->i_ack_lock, flags);
+ ic->i_ack_next = seq;
+ if (ack_required)
+ set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
+ spin_unlock_irqrestore(&ic->i_ack_lock, flags);
+}
+
+static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
+{
+ unsigned long flags;
+ u64 seq;
+
+ clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
+
+ spin_lock_irqsave(&ic->i_ack_lock, flags);
+ seq = ic->i_ack_next;
+ spin_unlock_irqrestore(&ic->i_ack_lock, flags);
+
+ return seq;
+}
+#else
+static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
+ int ack_required)
+{
+ atomic64_set(&ic->i_ack_next, seq);
if (ack_required) {
smp_mb__before_clear_bit();
set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
@@ -410,8 +437,10 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
smp_mb__after_clear_bit();
- return ic->i_ack_next;
+ return atomic64_read(&ic->i_ack_next);
}
+#endif
+
static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
{
@@ -464,6 +493,10 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
* - i_ack_next, which is the last sequence number we received
*
* Potentially, send queue and receive queue handlers can run concurrently.
+ * It would be nice to not have to use a spinlock to synchronize things,
+ * but the one problem that rules this out is that 64bit updates are
+ * not atomic on all platforms. Things would be a lot simpler if
+ * we had atomic64 or maybe cmpxchg64 everywhere.
*
* Reconnecting complicates this picture just slightly. When we
* reconnect, we may be seeing duplicate packets. The peer
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 1b56905c4c08..b732efb5b634 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MR
struct list_head rds_iw_devices;
+/* NOTE: if also grabbing iwdev lock, grab this first */
DEFINE_SPINLOCK(iw_nodev_conns_lock);
LIST_HEAD(iw_nodev_conns);
@@ -145,7 +146,7 @@ void rds_iw_remove_one(struct ib_device *device)
}
spin_unlock_irq(&rds_iwdev->spinlock);
- rds_iw_remove_conns(rds_iwdev);
+ rds_iw_destroy_conns(rds_iwdev);
if (rds_iwdev->mr_pool)
rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
@@ -258,7 +259,7 @@ static int rds_iw_laddr_check(__be32 addr)
void rds_iw_exit(void)
{
rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
- rds_iw_remove_nodev_conns();
+ rds_iw_destroy_nodev_conns();
ib_unregister_client(&rds_iw_client);
rds_iw_sysctl_exit();
rds_iw_recv_exit();
diff --git a/net/rds/iw.h b/net/rds/iw.h
index 0ddda34f2a1c..b4fb27252895 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -131,7 +131,12 @@ struct rds_iw_connection {
/* sending acks */
unsigned long i_ack_flags;
+#ifdef KERNEL_HAS_ATOMIC64
+ atomic64_t i_ack_next; /* next ACK to send */
+#else
+ spinlock_t i_ack_lock; /* protect i_ack_next */
u64 i_ack_next; /* next ACK to send */
+#endif
struct rds_header *i_ack;
struct ib_send_wr i_ack_wr;
struct ib_sge i_ack_sge;
@@ -294,9 +299,17 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn,
/* ib_rdma.c */
int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
-int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
-void rds_iw_remove_nodev_conns(void);
-void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev);
+void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
+void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
+void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock);
+static inline void rds_iw_destroy_nodev_conns(void)
+{
+ __rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock);
+}
+static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev)
+{
+ __rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock);
+}
struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *);
void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo);
void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *);
@@ -383,13 +396,4 @@ rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
return &sge[1];
}
-static inline void rds_iw_set_64bit(u64 *ptr, u64 val)
-{
-#if BITS_PER_LONG == 64
- *ptr = val;
-#else
- set_64bit(ptr, val);
-#endif
-}
-
#endif
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 57ecb3d4b8a5..a416b0d492b1 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
if (err)
printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
- err = rds_iw_add_conn(rds_iwdev, conn);
- if (err)
- printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err);
+ rds_iw_add_conn(rds_iwdev, conn);
/* If the peer gave us the last packet it saw, process this as if
* we had received a regular ACK. */
@@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
* Move connection back to the nodev list.
* Remove cm_id from the device cm_id list.
*/
- if (ic->rds_iwdev) {
-
- spin_lock_irq(&ic->rds_iwdev->spinlock);
- BUG_ON(list_empty(&ic->iw_node));
- list_del(&ic->iw_node);
- spin_unlock_irq(&ic->rds_iwdev->spinlock);
-
- spin_lock_irq(&iw_nodev_conns_lock);
- list_add_tail(&ic->iw_node, &iw_nodev_conns);
- spin_unlock_irq(&iw_nodev_conns_lock);
- rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
- ic->rds_iwdev = NULL;
- }
+ if (ic->rds_iwdev)
+ rds_iw_remove_conn(ic->rds_iwdev, conn);
rdma_destroy_id(ic->i_cm_id);
@@ -672,7 +659,11 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
/* Clear the ACK state */
clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
- rds_iw_set_64bit(&ic->i_ack_next, 0);
+#ifdef KERNEL_HAS_ATOMIC64
+ atomic64_set(&ic->i_ack_next, 0);
+#else
+ ic->i_ack_next = 0;
+#endif
ic->i_ack_recv = 0;
/* Clear flow control state */
@@ -706,6 +697,9 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
INIT_LIST_HEAD(&ic->iw_node);
mutex_init(&ic->i_recv_mutex);
+#ifndef KERNEL_HAS_ATOMIC64
+ spin_lock_init(&ic->i_ack_lock);
+#endif
/*
* rds_iw_conn_shutdown() waits for these to be emptied so they
@@ -726,11 +720,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
return 0;
}
+/*
+ * Free a connection. Connection must be shut down and not set for reconnect.
+ */
void rds_iw_conn_free(void *arg)
{
struct rds_iw_connection *ic = arg;
+ spinlock_t *lock_ptr;
+
rdsdebug("ic %p\n", ic);
+
+ /*
+ * Conn is either on a dev's list or on the nodev list.
+ * A race with shutdown() or connect() would cause problems
+ * (since rds_iwdev would change) but that should never happen.
+ */
+ lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock;
+
+ spin_lock_irq(lock_ptr);
list_del(&ic->iw_node);
+ spin_unlock_irq(lock_ptr);
+
kfree(ic);
}
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 1c02a8f952d0..dcdb37da80f2 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -196,7 +196,7 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i
return rds_iw_add_cm_id(rds_iwdev, cm_id);
}
-int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
+void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
{
struct rds_iw_connection *ic = conn->c_transport_data;
@@ -205,45 +205,45 @@ int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn
BUG_ON(list_empty(&iw_nodev_conns));
BUG_ON(list_empty(&ic->iw_node));
list_del(&ic->iw_node);
- spin_unlock_irq(&iw_nodev_conns_lock);
spin_lock_irq(&rds_iwdev->spinlock);
list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
spin_unlock_irq(&rds_iwdev->spinlock);
+ spin_unlock_irq(&iw_nodev_conns_lock);
ic->rds_iwdev = rds_iwdev;
-
- return 0;
}
-void rds_iw_remove_nodev_conns(void)
+void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
{
- struct rds_iw_connection *ic, *_ic;
- LIST_HEAD(tmp_list);
+ struct rds_iw_connection *ic = conn->c_transport_data;
- /* avoid calling conn_destroy with irqs off */
- spin_lock_irq(&iw_nodev_conns_lock);
- list_splice(&iw_nodev_conns, &tmp_list);
- INIT_LIST_HEAD(&iw_nodev_conns);
- spin_unlock_irq(&iw_nodev_conns_lock);
+ /* place conn on nodev_conns_list */
+ spin_lock(&iw_nodev_conns_lock);
- list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
- if (ic->conn->c_passive)
- rds_conn_destroy(ic->conn->c_passive);
- rds_conn_destroy(ic->conn);
- }
+ spin_lock_irq(&rds_iwdev->spinlock);
+ BUG_ON(list_empty(&ic->iw_node));
+ list_del(&ic->iw_node);
+ spin_unlock_irq(&rds_iwdev->spinlock);
+
+ list_add_tail(&ic->iw_node, &iw_nodev_conns);
+
+ spin_unlock(&iw_nodev_conns_lock);
+
+ rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
+ ic->rds_iwdev = NULL;
}
-void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev)
+void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock)
{
struct rds_iw_connection *ic, *_ic;
LIST_HEAD(tmp_list);
/* avoid calling conn_destroy with irqs off */
- spin_lock_irq(&rds_iwdev->spinlock);
- list_splice(&rds_iwdev->conn_list, &tmp_list);
- INIT_LIST_HEAD(&rds_iwdev->conn_list);
- spin_unlock_irq(&rds_iwdev->spinlock);
+ spin_lock_irq(list_lock);
+ list_splice(list, &tmp_list);
+ INIT_LIST_HEAD(list);
+ spin_unlock_irq(list_lock);
list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
if (ic->conn->c_passive)
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index a1931f0027a2..fde470fa50d5 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -395,10 +395,37 @@ void rds_iw_recv_init_ack(struct rds_iw_connection *ic)
* room for it beyond the ring size. Send completion notices its special
* wr_id and avoids working with the ring in that case.
*/
+#ifndef KERNEL_HAS_ATOMIC64
static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
int ack_required)
{
- rds_iw_set_64bit(&ic->i_ack_next, seq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ic->i_ack_lock, flags);
+ ic->i_ack_next = seq;
+ if (ack_required)
+ set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
+ spin_unlock_irqrestore(&ic->i_ack_lock, flags);
+}
+
+static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
+{
+ unsigned long flags;
+ u64 seq;
+
+ clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
+
+ spin_lock_irqsave(&ic->i_ack_lock, flags);
+ seq = ic->i_ack_next;
+ spin_unlock_irqrestore(&ic->i_ack_lock, flags);
+
+ return seq;
+}
+#else
+static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
+ int ack_required)
+{
+ atomic64_set(&ic->i_ack_next, seq);
if (ack_required) {
smp_mb__before_clear_bit();
set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
@@ -410,8 +437,10 @@ static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
smp_mb__after_clear_bit();
- return ic->i_ack_next;
+ return atomic64_read(&ic->i_ack_next);
}
+#endif
+
static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits)
{
@@ -464,6 +493,10 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
* - i_ack_next, which is the last sequence number we received
*
* Potentially, send queue and receive queue handlers can run concurrently.
+ * It would be nice to not have to use a spinlock to synchronize things,
+ * but the one problem that rules this out is that 64bit updates are
+ * not atomic on all platforms. Things would be a lot simpler if
+ * we had atomic64 or maybe cmpxchg64 everywhere.
*
* Reconnecting complicates this picture just slightly. When we
* reconnect, we may be seeing duplicate packets. The peer
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 060400704979..619f0a30a4e5 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -28,6 +28,10 @@
*/
#define RDS_PORT 18634
+#ifdef ATOMIC64_INIT
+#define KERNEL_HAS_ATOMIC64
+#endif
+
#ifdef DEBUG
#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
#else
diff --git a/net/rds/send.c b/net/rds/send.c
index 1b37364656f0..104fe033203d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -615,7 +615,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
{
struct rds_message *rm, *tmp;
struct rds_connection *conn;
- unsigned long flags;
+ unsigned long flags, flags2;
LIST_HEAD(list);
int wake = 0;
@@ -651,9 +651,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
list_for_each_entry(rm, &list, m_sock_item) {
/* We do this here rather than in the loop above, so that
* we don't have to nest m_rs_lock under rs->rs_lock */
- spin_lock(&rm->m_rs_lock);
+ spin_lock_irqsave(&rm->m_rs_lock, flags2);
rm->m_rs = NULL;
- spin_unlock(&rm->m_rs_lock);
+ spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
/*
* If we see this flag cleared then we're *sure* that someone
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 5592883e1e4a..afd91c78ce8e 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -17,28 +17,6 @@ config SUNRPC_XPRT_RDMA
If unsure, say N.
-config SUNRPC_REGISTER_V4
- bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
- depends on SUNRPC && EXPERIMENTAL
- default n
- help
- Sun added support for registering RPC services at an IPv6
- address by creating two new versions of the rpcbind protocol
- (RFC 1833).
-
- This option enables support in the kernel RPC server for
- registering kernel RPC services via version 4 of the rpcbind
- protocol. If you enable this option, you must run a portmapper
- daemon that supports rpcbind protocol version 4.
-
- Serving NFS over IPv6 from knfsd (the kernel's NFS server)
- requires that you enable this option and use a portmapper that
- supports rpcbind version 4.
-
- If unsure, say N to get traditional behavior (register kernel
- RPC services using only rpcbind version 2). Distributions
- using the legacy Linux portmapper daemon must say N here.
-
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
depends on SUNRPC && EXPERIMENTAL
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 836f15c0c4a3..5abab094441f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1032,27 +1032,20 @@ call_connect_status(struct rpc_task *task)
dprint_status(task);
task->tk_status = 0;
- if (status >= 0) {
+ if (status >= 0 || status == -EAGAIN) {
clnt->cl_stats->netreconn++;
task->tk_action = call_transmit;
return;
}
- /* Something failed: remote service port may have changed */
- rpc_force_rebind(clnt);
-
switch (status) {
- case -ENOTCONN:
- case -EAGAIN:
- task->tk_action = call_bind;
- if (!RPC_IS_SOFT(task))
- return;
/* if soft mounted, test if we've timed out */
case -ETIMEDOUT:
task->tk_action = call_timeout;
- return;
+ break;
+ default:
+ rpc_exit(task, -EIO);
}
- rpc_exit(task, -EIO);
}
/*
@@ -1105,14 +1098,26 @@ static void
call_transmit_status(struct rpc_task *task)
{
task->tk_action = call_status;
- /*
- * Special case: if we've been waiting on the socket's write_space()
- * callback, then don't call xprt_end_transmit().
- */
- if (task->tk_status == -EAGAIN)
- return;
- xprt_end_transmit(task);
- rpc_task_force_reencode(task);
+ switch (task->tk_status) {
+ case -EAGAIN:
+ break;
+ default:
+ xprt_end_transmit(task);
+ /*
+ * Special cases: if we've been waiting on the
+ * socket's write_space() callback, or if the
+ * socket just returned a connection error,
+ * then hold onto the transport lock.
+ */
+ case -ECONNREFUSED:
+ case -ECONNRESET:
+ case -ENOTCONN:
+ case -EHOSTDOWN:
+ case -EHOSTUNREACH:
+ case -ENETUNREACH:
+ case -EPIPE:
+ rpc_task_force_reencode(task);
+ }
}
/*
@@ -1152,9 +1157,12 @@ call_status(struct rpc_task *task)
xprt_conditional_disconnect(task->tk_xprt,
req->rq_connect_cookie);
break;
+ case -ECONNRESET:
case -ECONNREFUSED:
- case -ENOTCONN:
rpc_force_rebind(clnt);
+ rpc_delay(task, 3*HZ);
+ case -EPIPE:
+ case -ENOTCONN:
task->tk_action = call_bind;
break;
case -EAGAIN:
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 03ae007641e4..beee6da33035 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -63,9 +63,16 @@ enum {
* r_owner
*
* The "owner" is allowed to unset a service in the rpcbind database.
- * We always use the following (arbitrary) fixed string.
+ *
+ * For AF_LOCAL SET/UNSET requests, rpcbind treats this string as a
+ * UID which it maps to a local user name via a password lookup.
+ * In all other cases it is ignored.
+ *
+ * For SET/UNSET requests, user space provides a value, even for
+ * network requests, and GETADDR uses an empty string. We follow
+ * those precedents here.
*/
-#define RPCB_OWNER_STRING "rpcb"
+#define RPCB_OWNER_STRING "0"
#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
static void rpcb_getport_done(struct rpc_task *, void *);
@@ -124,12 +131,6 @@ static const struct sockaddr_in rpcb_inaddr_loopback = {
.sin_port = htons(RPCBIND_PORT),
};
-static const struct sockaddr_in6 rpcb_in6addr_loopback = {
- .sin6_family = AF_INET6,
- .sin6_addr = IN6ADDR_LOOPBACK_INIT,
- .sin6_port = htons(RPCBIND_PORT),
-};
-
static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
size_t addrlen, u32 version)
{
@@ -176,9 +177,10 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
return rpc_create(&args);
}
-static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
- u32 version, struct rpc_message *msg)
+static int rpcb_register_call(const u32 version, struct rpc_message *msg)
{
+ struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback;
+ size_t addrlen = sizeof(rpcb_inaddr_loopback);
struct rpc_clnt *rpcb_clnt;
int result, error = 0;
@@ -192,7 +194,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
error = PTR_ERR(rpcb_clnt);
if (error < 0) {
- printk(KERN_WARNING "RPC: failed to contact local rpcbind "
+ dprintk("RPC: failed to contact local rpcbind "
"server (errno %d).\n", -error);
return error;
}
@@ -254,25 +256,23 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
if (port)
msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
- return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
- sizeof(rpcb_inaddr_loopback),
- RPCBVERS_2, &msg);
+ return rpcb_register_call(RPCBVERS_2, &msg);
}
/*
* Fill in AF_INET family-specific arguments to register
*/
-static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
- struct rpc_message *msg)
+static int rpcb_register_inet4(const struct sockaddr *sap,
+ struct rpc_message *msg)
{
+ const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
struct rpcbind_args *map = msg->rpc_argp;
- unsigned short port = ntohs(address_to_register->sin_port);
+ unsigned short port = ntohs(sin->sin_port);
char buf[32];
/* Construct AF_INET universal address */
snprintf(buf, sizeof(buf), "%pI4.%u.%u",
- &address_to_register->sin_addr.s_addr,
- port >> 8, port & 0xff);
+ &sin->sin_addr.s_addr, port >> 8, port & 0xff);
map->r_addr = buf;
dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -284,29 +284,27 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
if (port)
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
- return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
- sizeof(rpcb_inaddr_loopback),
- RPCBVERS_4, msg);
+ return rpcb_register_call(RPCBVERS_4, msg);
}
/*
* Fill in AF_INET6 family-specific arguments to register
*/
-static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
- struct rpc_message *msg)
+static int rpcb_register_inet6(const struct sockaddr *sap,
+ struct rpc_message *msg)
{
+ const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
struct rpcbind_args *map = msg->rpc_argp;
- unsigned short port = ntohs(address_to_register->sin6_port);
+ unsigned short port = ntohs(sin6->sin6_port);
char buf[64];
/* Construct AF_INET6 universal address */
- if (ipv6_addr_any(&address_to_register->sin6_addr))
+ if (ipv6_addr_any(&sin6->sin6_addr))
snprintf(buf, sizeof(buf), "::.%u.%u",
port >> 8, port & 0xff);
else
snprintf(buf, sizeof(buf), "%pI6.%u.%u",
- &address_to_register->sin6_addr,
- port >> 8, port & 0xff);
+ &sin6->sin6_addr, port >> 8, port & 0xff);
map->r_addr = buf;
dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -318,9 +316,21 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
if (port)
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
- return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
- sizeof(rpcb_in6addr_loopback),
- RPCBVERS_4, msg);
+ return rpcb_register_call(RPCBVERS_4, msg);
+}
+
+static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
+{
+ struct rpcbind_args *map = msg->rpc_argp;
+
+ dprintk("RPC: unregistering [%u, %u, '%s'] with "
+ "local rpcbind\n",
+ map->r_prog, map->r_vers, map->r_netid);
+
+ map->r_addr = "";
+ msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
+
+ return rpcb_register_call(RPCBVERS_4, msg);
}
/**
@@ -340,10 +350,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
* invoke this function once for each [program, version, address,
* netid] tuple they wish to advertise.
*
- * Callers may also unregister RPC services that are no longer
- * available by setting the port number in the passed-in address
- * to zero. Callers pass a netid of "" to unregister all
- * transport netids associated with [program, version, address].
+ * Callers may also unregister RPC services that are registered at a
+ * specific address by setting the port number in @address to zero.
+ * They may unregister all registered protocol families at once for
+ * a service by passing a NULL @address argument. If @netid is ""
+ * then all netids for [program, version, address] are unregistered.
*
* This function uses rpcbind protocol version 4 to contact the
* local rpcbind daemon. The local rpcbind daemon must support
@@ -378,13 +389,14 @@ int rpcb_v4_register(const u32 program, const u32 version,
.rpc_argp = &map,
};
+ if (address == NULL)
+ return rpcb_unregister_all_protofamilies(&msg);
+
switch (address->sa_family) {
case AF_INET:
- return rpcb_register_netid4((struct sockaddr_in *)address,
- &msg);
+ return rpcb_register_inet4(address, &msg);
case AF_INET6:
- return rpcb_register_netid6((struct sockaddr_in6 *)address,
- &msg);
+ return rpcb_register_inet6(address, &msg);
}
return -EAFNOSUPPORT;
@@ -579,7 +591,7 @@ void rpcb_getport_async(struct rpc_task *task)
map->r_xprt = xprt_get(xprt);
map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
- map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
+ map->r_owner = "";
map->r_status = -EIO;
child = rpcb_call_async(rpcb_clnt, map, proc);
@@ -703,11 +715,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
*portp = 0;
addr_len = ntohl(*p++);
+ if (addr_len == 0) {
+ dprintk("RPC: rpcb_decode_getaddr: "
+ "service is not registered\n");
+ return 0;
+ }
+
/*
- * Simple sanity check. The smallest possible universal
- * address is an IPv4 address string containing 11 bytes.
+ * Simple sanity check.
*/
- if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN)
+ if (addr_len > RPCBIND_MAXUADDRLEN)
goto out_err;
/*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bb507e2bb94d..9f2f2412a2f3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -359,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+ void (*shutdown)(struct svc_serv *serv))
{
struct svc_serv *serv;
unsigned int vers;
@@ -368,7 +368,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
- serv->sv_family = family;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
serv->sv_nrthreads = 1;
@@ -427,21 +426,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *
svc_create(struct svc_program *prog, unsigned int bufsize,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+ void (*shutdown)(struct svc_serv *serv))
{
- return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
+ return __svc_create(prog, bufsize, /*npools*/1, shutdown);
}
EXPORT_SYMBOL_GPL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv),
+ void (*shutdown)(struct svc_serv *serv),
svc_thread_fn func, struct module *mod)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, family, shutdown);
+ serv = __svc_create(prog, bufsize, npools, shutdown);
if (serv != NULL) {
serv->sv_function = func;
@@ -719,8 +718,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
-#ifdef CONFIG_SUNRPC_REGISTER_V4
-
/*
* Register an "inet" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
@@ -735,12 +732,13 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
const unsigned short protocol,
const unsigned short port)
{
- struct sockaddr_in sin = {
+ const struct sockaddr_in sin = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
.sin_port = htons(port),
};
- char *netid;
+ const char *netid;
+ int error;
switch (protocol) {
case IPPROTO_UDP:
@@ -750,13 +748,23 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
netid = RPCBIND_NETID_TCP;
break;
default:
- return -EPROTONOSUPPORT;
+ return -ENOPROTOOPT;
}
- return rpcb_v4_register(program, version,
- (struct sockaddr *)&sin, netid);
+ error = rpcb_v4_register(program, version,
+ (const struct sockaddr *)&sin, netid);
+
+ /*
+ * User space didn't support rpcbind v4, so retry this
+ * registration request with the legacy rpcbind v2 protocol.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = rpcb_register(program, version, protocol, port);
+
+ return error;
}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
/*
* Register an "inet6" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
@@ -771,12 +779,13 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
const unsigned short protocol,
const unsigned short port)
{
- struct sockaddr_in6 sin6 = {
+ const struct sockaddr_in6 sin6 = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_ANY_INIT,
.sin6_port = htons(port),
};
- char *netid;
+ const char *netid;
+ int error;
switch (protocol) {
case IPPROTO_UDP:
@@ -786,12 +795,22 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
netid = RPCBIND_NETID_TCP6;
break;
default:
- return -EPROTONOSUPPORT;
+ return -ENOPROTOOPT;
}
- return rpcb_v4_register(program, version,
- (struct sockaddr *)&sin6, netid);
+ error = rpcb_v4_register(program, version,
+ (const struct sockaddr *)&sin6, netid);
+
+ /*
+ * User space didn't support rpcbind version 4, so we won't
+ * use a PF_INET6 listener.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = -EAFNOSUPPORT;
+
+ return error;
}
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
/*
* Register a kernel RPC service via rpcbind version 4.
@@ -799,69 +818,43 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
* Returns zero on success; a negative errno value is returned
* if any error occurs.
*/
-static int __svc_register(const u32 program, const u32 version,
- const sa_family_t family,
+static int __svc_register(const char *progname,
+ const u32 program, const u32 version,
+ const int family,
const unsigned short protocol,
const unsigned short port)
{
- int error;
+ int error = -EAFNOSUPPORT;
switch (family) {
- case AF_INET:
- return __svc_rpcb_register4(program, version,
+ case PF_INET:
+ error = __svc_rpcb_register4(program, version,
protocol, port);
- case AF_INET6:
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case PF_INET6:
error = __svc_rpcb_register6(program, version,
protocol, port);
- if (error < 0)
- return error;
-
- /*
- * Work around bug in some versions of Linux rpcbind
- * which don't allow registration of both inet and
- * inet6 netids.
- *
- * Error return ignored for now.
- */
- __svc_rpcb_register4(program, version,
- protocol, port);
- return 0;
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
}
- return -EAFNOSUPPORT;
-}
-
-#else /* CONFIG_SUNRPC_REGISTER_V4 */
-
-/*
- * Register a kernel RPC service via rpcbind version 2.
- *
- * Returns zero on success; a negative errno value is returned
- * if any error occurs.
- */
-static int __svc_register(const u32 program, const u32 version,
- sa_family_t family,
- const unsigned short protocol,
- const unsigned short port)
-{
- if (family != AF_INET)
- return -EAFNOSUPPORT;
-
- return rpcb_register(program, version, protocol, port);
+ if (error < 0)
+ printk(KERN_WARNING "svc: failed to register %sv%u RPC "
+ "service (errno %d).\n", progname, version, -error);
+ return error;
}
-#endif /* CONFIG_SUNRPC_REGISTER_V4 */
-
/**
* svc_register - register an RPC service with the local portmapper
* @serv: svc_serv struct for the service to register
+ * @family: protocol family of service's listener socket
* @proto: transport protocol number to advertise
* @port: port to advertise
*
- * Service is registered for any address in serv's address family
+ * Service is registered for any address in the passed-in protocol family
*/
-int svc_register(const struct svc_serv *serv, const unsigned short proto,
- const unsigned short port)
+int svc_register(const struct svc_serv *serv, const int family,
+ const unsigned short proto, const unsigned short port)
{
struct svc_program *progp;
unsigned int i;
@@ -879,15 +872,15 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
i,
proto == IPPROTO_UDP? "udp" : "tcp",
port,
- serv->sv_family,
+ family,
progp->pg_vers[i]->vs_hidden?
" (but not telling portmap)" : "");
if (progp->pg_vers[i]->vs_hidden)
continue;
- error = __svc_register(progp->pg_prog, i,
- serv->sv_family, proto, port);
+ error = __svc_register(progp->pg_name, progp->pg_prog,
+ i, family, proto, port);
if (error < 0)
break;
}
@@ -896,38 +889,31 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
return error;
}
-#ifdef CONFIG_SUNRPC_REGISTER_V4
-
+/*
+ * If user space is running rpcbind, it should take the v4 UNSET
+ * and clear everything for this [program, version]. If user space
+ * is running portmap, it will reject the v4 UNSET, but won't have
+ * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient
+ * in this case to clear all existing entries for [program, version].
+ */
static void __svc_unregister(const u32 program, const u32 version,
const char *progname)
{
- struct sockaddr_in6 sin6 = {
- .sin6_family = AF_INET6,
- .sin6_addr = IN6ADDR_ANY_INIT,
- .sin6_port = 0,
- };
int error;
- error = rpcb_v4_register(program, version,
- (struct sockaddr *)&sin6, "");
- dprintk("svc: %s(%sv%u), error %d\n",
- __func__, progname, version, error);
-}
-
-#else /* CONFIG_SUNRPC_REGISTER_V4 */
+ error = rpcb_v4_register(program, version, NULL, "");
-static void __svc_unregister(const u32 program, const u32 version,
- const char *progname)
-{
- int error;
+ /*
+ * User space didn't support rpcbind v4, so retry this
+ * request with the legacy rpcbind v2 protocol.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = rpcb_register(program, version, 0, 0);
- error = rpcb_register(program, version, 0, 0);
dprintk("svc: %s(%sv%u), error %d\n",
__func__, progname, version, error);
}
-#endif /* CONFIG_SUNRPC_REGISTER_V4 */
-
/*
* All netids, bind addresses and ports registered for [program, version]
* are removed from the local rpcbind database (if the service is not
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e588df5d6b34..2819ee093f36 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -161,7 +161,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init);
static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
struct svc_serv *serv,
- unsigned short port, int flags)
+ const int family,
+ const unsigned short port,
+ int flags)
{
struct sockaddr_in sin = {
.sin_family = AF_INET,
@@ -176,12 +178,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
struct sockaddr *sap;
size_t len;
- switch (serv->sv_family) {
- case AF_INET:
+ switch (family) {
+ case PF_INET:
sap = (struct sockaddr *)&sin;
len = sizeof(sin);
break;
- case AF_INET6:
+ case PF_INET6:
sap = (struct sockaddr *)&sin6;
len = sizeof(sin6);
break;
@@ -192,7 +194,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
}
-int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
+int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+ const int family, const unsigned short port,
int flags)
{
struct svc_xprt_class *xcl;
@@ -209,7 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
goto err;
spin_unlock(&svc_xprt_class_lock);
- newxprt = __svc_xpo_create(xcl, serv, port, flags);
+ newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
if (IS_ERR(newxprt)) {
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
@@ -1033,7 +1036,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
return dr;
}
-/*
+/**
+ * svc_find_xprt - find an RPC transport instance
+ * @serv: pointer to svc_serv to search
+ * @xcl_name: C string containing transport's class name
+ * @af: Address family of transport's local address
+ * @port: transport's IP port number
+ *
* Return the transport instance pointer for the endpoint accepting
* connections/peer traffic from the specified transport class,
* address family and port.
@@ -1042,14 +1051,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
* wild-card, and will result in matching the first transport in the
* service's list that has a matching class name.
*/
-struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
- int af, int port)
+struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
+ const sa_family_t af, const unsigned short port)
{
struct svc_xprt *xprt;
struct svc_xprt *found = NULL;
/* Sanity check the args */
- if (!serv || !xcl_name)
+ if (serv == NULL || xcl_name == NULL)
return found;
spin_lock_bh(&serv->sv_lock);
@@ -1058,7 +1067,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
continue;
if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
continue;
- if (port && port != svc_xprt_local_port(xprt))
+ if (port != 0 && port != svc_xprt_local_port(xprt))
continue;
found = xprt;
svc_xprt_get(xprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5763e6460fea..9d504234af4a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1110,7 +1110,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
- int val;
dprintk("svc: svc_setup_socket %p\n", sock);
if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1122,7 +1121,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
/* Register socket with portmapper */
if (*errp >= 0 && pmap_register)
- *errp = svc_register(serv, inet->sk_protocol,
+ *errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
ntohs(inet_sk(inet)->sport));
if (*errp < 0) {
@@ -1143,18 +1142,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else
svc_tcp_init(svsk, serv);
- /*
- * We start one listener per sv_serv. We want AF_INET
- * requests to be automatically shunted to our AF_INET6
- * listener using a mapped IPv4 address. Make sure
- * no-one starts an equivalent IPv4 listener, which
- * would steal our incoming connections.
- */
- val = 0;
- if (serv->sv_family == AF_INET6)
- kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
- (char *)&val, sizeof(val));
-
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
@@ -1222,6 +1209,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
struct sockaddr_storage addr;
struct sockaddr *newsin = (struct sockaddr *)&addr;
int newlen;
+ int family;
+ int val;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("svc: svc_create_socket(%s, %d, %s)\n",
@@ -1233,14 +1222,35 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
"sockets supported\n");
return ERR_PTR(-EINVAL);
}
+
type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+ switch (sin->sa_family) {
+ case AF_INET6:
+ family = PF_INET6;
+ break;
+ case AF_INET:
+ family = PF_INET;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
- error = sock_create_kern(sin->sa_family, type, protocol, &sock);
+ error = sock_create_kern(family, type, protocol, &sock);
if (error < 0)
return ERR_PTR(error);
svc_reclassify_socket(sock);
+ /*
+ * If this is an PF_INET6 listener, we want to avoid
+ * getting requests from IPv4 remotes. Those should
+ * be shunted to a PF_INET listener via rpcbind.
+ */
+ val = 1;
+ if (family == PF_INET6)
+ kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
+ (char *)&val, sizeof(val));
+
if (type == SOCK_STREAM)
sock->sk->sk_reuse = 1; /* allow address reuse */
error = kernel_bind(sock, sin, len);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 62098d101a1f..a0bfe53f1621 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -152,6 +152,37 @@ out:
EXPORT_SYMBOL_GPL(xprt_unregister_transport);
/**
+ * xprt_load_transport - load a transport implementation
+ * @transport_name: transport to load
+ *
+ * Returns:
+ * 0: transport successfully loaded
+ * -ENOENT: transport module not available
+ */
+int xprt_load_transport(const char *transport_name)
+{
+ struct xprt_class *t;
+ char module_name[sizeof t->name + 5];
+ int result;
+
+ result = 0;
+ spin_lock(&xprt_list_lock);
+ list_for_each_entry(t, &xprt_list, list) {
+ if (strcmp(t->name, transport_name) == 0) {
+ spin_unlock(&xprt_list_lock);
+ goto out;
+ }
+ }
+ spin_unlock(&xprt_list_lock);
+ strcpy(module_name, "xprt");
+ strncat(module_name, transport_name, sizeof t->name);
+ result = request_module(module_name);
+out:
+ return result;
+}
+EXPORT_SYMBOL_GPL(xprt_load_transport);
+
+/**
* xprt_reserve_xprt - serialize write access to transports
* @task: task that is requesting access to the transport
*
@@ -580,7 +611,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
dprintk("RPC: disconnected transport %p\n", xprt);
spin_lock_bh(&xprt->transport_lock);
xprt_clear_connected(xprt);
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_disconnect_done);
@@ -598,7 +629,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(rpciod_workqueue, &xprt->task_cleanup);
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock);
}
@@ -625,7 +656,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(rpciod_workqueue, &xprt->task_cleanup);
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
out:
spin_unlock_bh(&xprt->transport_lock);
}
@@ -695,9 +726,8 @@ static void xprt_connect_status(struct rpc_task *task)
}
switch (task->tk_status) {
- case -ENOTCONN:
- dprintk("RPC: %5u xprt_connect_status: connection broken\n",
- task->tk_pid);
+ case -EAGAIN:
+ dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
break;
case -ETIMEDOUT:
dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
@@ -818,15 +848,8 @@ int xprt_prepare_transmit(struct rpc_task *task)
err = req->rq_received;
goto out_unlock;
}
- if (!xprt->ops->reserve_xprt(task)) {
+ if (!xprt->ops->reserve_xprt(task))
err = -EAGAIN;
- goto out_unlock;
- }
-
- if (!xprt_connected(xprt)) {
- err = -ENOTCONN;
- goto out_unlock;
- }
out_unlock:
spin_unlock_bh(&xprt->transport_lock);
return err;
@@ -870,32 +893,26 @@ void xprt_transmit(struct rpc_task *task)
req->rq_connect_cookie = xprt->connect_cookie;
req->rq_xtime = jiffies;
status = xprt->ops->send_request(task);
- if (status == 0) {
- dprintk("RPC: %5u xmit complete\n", task->tk_pid);
- spin_lock_bh(&xprt->transport_lock);
+ if (status != 0) {
+ task->tk_status = status;
+ return;
+ }
- xprt->ops->set_retrans_timeout(task);
+ dprintk("RPC: %5u xmit complete\n", task->tk_pid);
+ spin_lock_bh(&xprt->transport_lock);
- xprt->stat.sends++;
- xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
- xprt->stat.bklog_u += xprt->backlog.qlen;
+ xprt->ops->set_retrans_timeout(task);
- /* Don't race with disconnect */
- if (!xprt_connected(xprt))
- task->tk_status = -ENOTCONN;
- else if (!req->rq_received)
- rpc_sleep_on(&xprt->pending, task, xprt_timer);
- spin_unlock_bh(&xprt->transport_lock);
- return;
- }
+ xprt->stat.sends++;
+ xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
+ xprt->stat.bklog_u += xprt->backlog.qlen;
- /* Note: at this point, task->tk_sleeping has not yet been set,
- * hence there is no danger of the waking up task being put on
- * schedq, and being picked up by a parallel run of rpciod().
- */
- task->tk_status = status;
- if (status == -ECONNREFUSED)
- rpc_sleep_on(&xprt->sending, task, NULL);
+ /* Don't race with disconnect */
+ if (!xprt_connected(xprt))
+ task->tk_status = -ENOTCONN;
+ else if (!req->rq_received)
+ rpc_sleep_on(&xprt->pending, task, xprt_timer);
+ spin_unlock_bh(&xprt->transport_lock);
}
static inline void do_xprt_reserve(struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 14106d26bb95..e5e28d1946a4 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -310,6 +310,19 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
__func__, pad, destp, rqst->rq_slen, curlen);
copy_len = rqst->rq_snd_buf.page_len;
+
+ if (rqst->rq_snd_buf.tail[0].iov_len) {
+ curlen = rqst->rq_snd_buf.tail[0].iov_len;
+ if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
+ memmove(destp + copy_len,
+ rqst->rq_snd_buf.tail[0].iov_base, curlen);
+ r_xprt->rx_stats.pullup_copy_count += curlen;
+ }
+ dprintk("RPC: %s: tail destp 0x%p len %d\n",
+ __func__, destp + copy_len, curlen);
+ rqst->rq_svec[0].iov_len += curlen;
+ }
+
r_xprt->rx_stats.pullup_copy_count += copy_len;
npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
for (i = 0; copy_len && i < npages; i++) {
@@ -332,17 +345,6 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
destp += curlen;
copy_len -= curlen;
}
- if (rqst->rq_snd_buf.tail[0].iov_len) {
- curlen = rqst->rq_snd_buf.tail[0].iov_len;
- if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
- memcpy(destp,
- rqst->rq_snd_buf.tail[0].iov_base, curlen);
- r_xprt->rx_stats.pullup_copy_count += curlen;
- }
- dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n",
- __func__, destp, copy_len, curlen);
- rqst->rq_svec[0].iov_len += curlen;
- }
/* header now contains entire send message */
return pad;
}
@@ -656,7 +658,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
curlen = rqst->rq_rcv_buf.tail[0].iov_len;
if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
- memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
+ memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n",
__func__, srcp, copy_len, curlen);
rqst->rq_rcv_buf.tail[0].iov_len = curlen;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index a3334e3b73cc..6c26a675435a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -191,7 +191,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
{
- int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
int sge_no;
u32 sge_bytes;
u32 page_bytes;
@@ -235,7 +234,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
sge_no++;
}
- BUG_ON(sge_no > sge_max);
+ dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+ "page_base %u page_len %u head_len %zu tail_len %zu\n",
+ sge_no, page_no, xdr->page_base, xdr->page_len,
+ xdr->head[0].iov_len, xdr->tail[0].iov_len);
+
vec->count = sge_no;
return 0;
}
@@ -579,7 +582,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[page_no+1].length = 0;
}
BUG_ON(sge_no > rdma->sc_max_sge);
- BUG_ON(sge_no > ctxt->count);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
send_wr.wr_id = (unsigned long)ctxt;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 568330eebbfe..d40ff50887aa 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -49,6 +49,9 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
+#define XS_TCP_LINGER_TO (15U * HZ)
+static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
+
/*
* We can register our own files under /proc/sys/sunrpc by
* calling register_sysctl_table() again. The files in that
@@ -117,6 +120,14 @@ static ctl_table xs_tunables_table[] = {
.extra2 = &xprt_max_resvport_limit
},
{
+ .procname = "tcp_fin_timeout",
+ .data = &xs_tcp_fin_timeout,
+ .maxlen = sizeof(xs_tcp_fin_timeout),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ .strategy = sysctl_jiffies
+ },
+ {
.ctl_name = 0,
},
};
@@ -521,11 +532,12 @@ static void xs_nospace_callback(struct rpc_task *task)
* @task: task to put to sleep
*
*/
-static void xs_nospace(struct rpc_task *task)
+static int xs_nospace(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ int ret = 0;
dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
task->tk_pid, req->rq_slen - req->rq_bytes_sent,
@@ -537,6 +549,7 @@ static void xs_nospace(struct rpc_task *task)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+ ret = -EAGAIN;
/*
* Notify TCP that we're limited by the application
* window size
@@ -548,10 +561,11 @@ static void xs_nospace(struct rpc_task *task)
}
} else {
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- task->tk_status = -ENOTCONN;
+ ret = -ENOTCONN;
}
spin_unlock_bh(&xprt->transport_lock);
+ return ret;
}
/**
@@ -594,6 +608,8 @@ static int xs_udp_send_request(struct rpc_task *task)
/* Still some bytes left; set up for a retry later. */
status = -EAGAIN;
}
+ if (!transport->sock)
+ goto out;
switch (status) {
case -ENOTSOCK:
@@ -601,21 +617,19 @@ static int xs_udp_send_request(struct rpc_task *task)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- xs_nospace(task);
+ status = xs_nospace(task);
break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
case -ENETUNREACH:
case -EPIPE:
case -ECONNREFUSED:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- break;
- default:
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
}
-
+out:
return status;
}
@@ -697,6 +711,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
status = -EAGAIN;
break;
}
+ if (!transport->sock)
+ goto out;
switch (status) {
case -ENOTSOCK:
@@ -704,23 +720,19 @@ static int xs_tcp_send_request(struct rpc_task *task)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- xs_nospace(task);
+ status = xs_nospace(task);
break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
case -ECONNRESET:
+ case -EPIPE:
xs_tcp_shutdown(xprt);
case -ECONNREFUSED:
case -ENOTCONN:
- case -EPIPE:
- status = -ENOTCONN;
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- break;
- default:
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
- xs_tcp_shutdown(xprt);
}
-
+out:
return status;
}
@@ -767,23 +779,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
sk->sk_error_report = transport->old_error_report;
}
-/**
- * xs_close - close a socket
- * @xprt: transport
- *
- * This is used when all requests are complete; ie, no DRC state remains
- * on the server we want to save.
- */
-static void xs_close(struct rpc_xprt *xprt)
+static void xs_reset_transport(struct sock_xprt *transport)
{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
struct sock *sk = transport->inet;
- if (!sk)
- goto clear_close_wait;
-
- dprintk("RPC: xs_close xprt %p\n", xprt);
+ if (sk == NULL)
+ return;
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
@@ -797,8 +799,25 @@ static void xs_close(struct rpc_xprt *xprt)
sk->sk_no_check = 0;
sock_release(sock);
-clear_close_wait:
+}
+
+/**
+ * xs_close - close a socket
+ * @xprt: transport
+ *
+ * This is used when all requests are complete; ie, no DRC state remains
+ * on the server we want to save.
+ */
+static void xs_close(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+ dprintk("RPC: xs_close xprt %p\n", xprt);
+
+ xs_reset_transport(transport);
+
smp_mb__before_clear_bit();
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
smp_mb__after_clear_bit();
@@ -1126,6 +1145,47 @@ out:
read_unlock(&sk->sk_callback_lock);
}
+/*
+ * Do the equivalent of linger/linger2 handling for dealing with
+ * broken servers that don't close the socket in a timely
+ * fashion
+ */
+static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
+ unsigned long timeout)
+{
+ struct sock_xprt *transport;
+
+ if (xprt_test_and_set_connecting(xprt))
+ return;
+ set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ transport = container_of(xprt, struct sock_xprt, xprt);
+ queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
+ timeout);
+}
+
+static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *transport;
+
+ transport = container_of(xprt, struct sock_xprt, xprt);
+
+ if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
+ !cancel_delayed_work(&transport->connect_worker))
+ return;
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ xprt_clear_connecting(xprt);
+}
+
+static void xs_sock_mark_closed(struct rpc_xprt *xprt)
+{
+ smp_mb__before_clear_bit();
+ clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ clear_bit(XPRT_CLOSING, &xprt->state);
+ smp_mb__after_clear_bit();
+ /* Mark transport as closed and wake up all pending tasks */
+ xprt_disconnect_done(xprt);
+}
+
/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
@@ -1158,7 +1218,7 @@ static void xs_tcp_state_change(struct sock *sk)
transport->tcp_flags =
TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
- xprt_wake_pending_tasks(xprt, 0);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
}
spin_unlock_bh(&xprt->transport_lock);
break;
@@ -1171,10 +1231,10 @@ static void xs_tcp_state_change(struct sock *sk)
clear_bit(XPRT_CONNECTED, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
smp_mb__after_clear_bit();
+ xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
break;
case TCP_CLOSE_WAIT:
/* The server initiated a shutdown of the socket */
- set_bit(XPRT_CLOSING, &xprt->state);
xprt_force_disconnect(xprt);
case TCP_SYN_SENT:
xprt->connect_cookie++;
@@ -1187,40 +1247,35 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
break;
case TCP_LAST_ACK:
+ set_bit(XPRT_CLOSING, &xprt->state);
+ xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
smp_mb__before_clear_bit();
clear_bit(XPRT_CONNECTED, &xprt->state);
smp_mb__after_clear_bit();
break;
case TCP_CLOSE:
- smp_mb__before_clear_bit();
- clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
- clear_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__after_clear_bit();
- /* Mark transport as closed and wake up all pending tasks */
- xprt_disconnect_done(xprt);
+ xs_tcp_cancel_linger_timeout(xprt);
+ xs_sock_mark_closed(xprt);
}
out:
read_unlock(&sk->sk_callback_lock);
}
/**
- * xs_tcp_error_report - callback mainly for catching RST events
+ * xs_error_report - callback mainly for catching socket errors
* @sk: socket
*/
-static void xs_tcp_error_report(struct sock *sk)
+static void xs_error_report(struct sock *sk)
{
struct rpc_xprt *xprt;
read_lock(&sk->sk_callback_lock);
- if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
- goto out;
if (!(xprt = xprt_from_sock(sk)))
goto out;
dprintk("RPC: %s client %p...\n"
"RPC: error %d\n",
__func__, xprt, sk->sk_err);
-
- xprt_force_disconnect(xprt);
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
out:
read_unlock(&sk->sk_callback_lock);
}
@@ -1494,6 +1549,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
+ sk->sk_error_report = xs_error_report;
sk->sk_no_check = UDP_CSUM_NORCV;
sk->sk_allocation = GFP_ATOMIC;
@@ -1526,9 +1582,10 @@ static void xs_udp_connect_worker4(struct work_struct *work)
goto out;
/* Start by resetting any existing state */
- xs_close(xprt);
+ xs_reset_transport(transport);
- if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (err < 0) {
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
goto out;
}
@@ -1545,8 +1602,8 @@ static void xs_udp_connect_worker4(struct work_struct *work)
xs_udp_finish_connecting(xprt, sock);
status = 0;
out:
- xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, status);
}
/**
@@ -1567,9 +1624,10 @@ static void xs_udp_connect_worker6(struct work_struct *work)
goto out;
/* Start by resetting any existing state */
- xs_close(xprt);
+ xs_reset_transport(transport);
- if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (err < 0) {
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
goto out;
}
@@ -1586,18 +1644,17 @@ static void xs_udp_connect_worker6(struct work_struct *work)
xs_udp_finish_connecting(xprt, sock);
status = 0;
out:
- xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, status);
}
/*
* We need to preserve the port number so the reply cache on the server can
* find our cached RPC replies when we get around to reconnecting.
*/
-static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
+static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
{
int result;
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct sockaddr any;
dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
@@ -1609,11 +1666,24 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
memset(&any, 0, sizeof(any));
any.sa_family = AF_UNSPEC;
result = kernel_connect(transport->sock, &any, sizeof(any), 0);
- if (result)
+ if (!result)
+ xs_sock_mark_closed(xprt);
+ else
dprintk("RPC: AF_UNSPEC connect return code %d\n",
result);
}
+static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
+{
+ unsigned int state = transport->inet->sk_state;
+
+ if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
+ return;
+ if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
+ return;
+ xs_abort_connection(xprt, transport);
+}
+
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1629,7 +1699,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_tcp_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
- sk->sk_error_report = xs_tcp_error_report;
+ sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_ATOMIC;
/* socket options */
@@ -1657,37 +1727,42 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
}
/**
- * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
- * @work: RPC transport to connect
+ * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
+ * @xprt: RPC transport to connect
+ * @transport: socket transport to connect
+ * @create_sock: function to create a socket of the correct type
*
* Invoked by a work queue tasklet.
*/
-static void xs_tcp_connect_worker4(struct work_struct *work)
+static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
+ struct sock_xprt *transport,
+ struct socket *(*create_sock)(struct rpc_xprt *,
+ struct sock_xprt *))
{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, connect_worker.work);
- struct rpc_xprt *xprt = &transport->xprt;
struct socket *sock = transport->sock;
- int err, status = -EIO;
+ int status = -EIO;
if (xprt->shutdown)
goto out;
if (!sock) {
- /* start from scratch */
- if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
- dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ sock = create_sock(xprt, transport);
+ if (IS_ERR(sock)) {
+ status = PTR_ERR(sock);
goto out;
}
- xs_reclassify_socket4(sock);
+ } else {
+ int abort_and_exit;
- if (xs_bind4(transport, sock) < 0) {
- sock_release(sock);
- goto out;
- }
- } else
+ abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
+ &xprt->state);
/* "close" the socket, preserving the local port */
- xs_tcp_reuse_connection(xprt);
+ xs_tcp_reuse_connection(xprt, transport);
+
+ if (abort_and_exit)
+ goto out_eagain;
+ }
dprintk("RPC: worker connecting xprt %p to address: %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
@@ -1696,83 +1771,104 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt),
sock->sk->sk_state);
- if (status < 0) {
- switch (status) {
- case -EINPROGRESS:
- case -EALREADY:
- goto out_clear;
- case -ECONNREFUSED:
- case -ECONNRESET:
- /* retry with existing socket, after a delay */
- break;
- default:
- /* get rid of existing socket, and retry */
- xs_tcp_shutdown(xprt);
- }
+ switch (status) {
+ case -ECONNREFUSED:
+ case -ECONNRESET:
+ case -ENETUNREACH:
+ /* retry with existing socket, after a delay */
+ case 0:
+ case -EINPROGRESS:
+ case -EALREADY:
+ xprt_clear_connecting(xprt);
+ return;
}
+ /* get rid of existing socket, and retry */
+ xs_tcp_shutdown(xprt);
+ printk("%s: connect returned unhandled error %d\n",
+ __func__, status);
+out_eagain:
+ status = -EAGAIN;
out:
- xprt_wake_pending_tasks(xprt, status);
-out_clear:
xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, status);
+}
+
+static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
+ struct sock_xprt *transport)
+{
+ struct socket *sock;
+ int err;
+
+ /* start from scratch */
+ err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ if (err < 0) {
+ dprintk("RPC: can't create TCP transport socket (%d).\n",
+ -err);
+ goto out_err;
+ }
+ xs_reclassify_socket4(sock);
+
+ if (xs_bind4(transport, sock) < 0) {
+ sock_release(sock);
+ goto out_err;
+ }
+ return sock;
+out_err:
+ return ERR_PTR(-EIO);
}
/**
- * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
* @work: RPC transport to connect
*
* Invoked by a work queue tasklet.
*/
-static void xs_tcp_connect_worker6(struct work_struct *work)
+static void xs_tcp_connect_worker4(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
- struct socket *sock = transport->sock;
- int err, status = -EIO;
- if (xprt->shutdown)
- goto out;
+ xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
+}
- if (!sock) {
- /* start from scratch */
- if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
- dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
- goto out;
- }
- xs_reclassify_socket6(sock);
+static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
+ struct sock_xprt *transport)
+{
+ struct socket *sock;
+ int err;
+
+ /* start from scratch */
+ err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
+ if (err < 0) {
+ dprintk("RPC: can't create TCP transport socket (%d).\n",
+ -err);
+ goto out_err;
+ }
+ xs_reclassify_socket6(sock);
- if (xs_bind6(transport, sock) < 0) {
- sock_release(sock);
- goto out;
- }
- } else
- /* "close" the socket, preserving the local port */
- xs_tcp_reuse_connection(xprt);
+ if (xs_bind6(transport, sock) < 0) {
+ sock_release(sock);
+ goto out_err;
+ }
+ return sock;
+out_err:
+ return ERR_PTR(-EIO);
+}
- dprintk("RPC: worker connecting xprt %p to address: %s\n",
- xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+/**
+ * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_tcp_connect_worker6(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
- status = xs_tcp_finish_connecting(xprt, sock);
- dprintk("RPC: %p connect status %d connected %d sock state %d\n",
- xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
- if (status < 0) {
- switch (status) {
- case -EINPROGRESS:
- case -EALREADY:
- goto out_clear;
- case -ECONNREFUSED:
- case -ECONNRESET:
- /* retry with existing socket, after a delay */
- break;
- default:
- /* get rid of existing socket, and retry */
- xs_tcp_shutdown(xprt);
- }
- }
-out:
- xprt_wake_pending_tasks(xprt, status);
-out_clear:
- xprt_clear_connecting(xprt);
+ xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
}
/**
@@ -1817,9 +1913,6 @@ static void xs_tcp_connect(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
- /* Initiate graceful shutdown of the socket if not already done */
- if (test_bit(XPRT_CONNECTED, &xprt->state))
- xs_tcp_shutdown(xprt);
/* Exit if we need to wait for socket shutdown to complete */
if (test_bit(XPRT_CLOSING, &xprt->state))
return;